예제 #1
0
  def get(self):

    solr_args = dict(request.args)
    if 'max_groups' in solr_args:
        del solr_args['max_groups']

    solr_args["rows"] = min(int(solr_args.get("rows", [current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS")])[0]), current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS"))

    solr_args['fl'] = ['bibcode,title,first_author,year,citation_count,read_count,reference']
    solr_args['wt'] ='json'

    headers = {'X-Forwarded-Authorization' : request.headers.get('Authorization')}

    response = client().get(current_app.config.get("VIS_SERVICE_SOLR_PATH") , params = solr_args, headers=headers)

    if response.status_code == 200:
      full_response = response.json()

    else:
      return {"Error": "There was a connection error. Please try again later", "Error Info": response.text}, response.status_code

    #get_network_with_groups expects a list of normalized authors
    data = full_response["response"]["docs"]
    paper_network_json = paper_network.get_papernetwork(data, request.args.get("max_groups", current_app.config.get("VIS_SERVICE_PN_MAX_GROUPS")))
    if paper_network_json:
      return {"msg" : {"numFound" : full_response["response"]["numFound"],
       "start": full_response["response"].get("start", 0),
        "rows": int(full_response["responseHeader"]["params"]["rows"])
       }, "data" : paper_network_json}, 200
    else:
      return {"Error": "Empty network."}, 200
예제 #2
0
  def test_paper_network_resource(self):

    #first, test the tf-idf library

    self.maxDiff = None

    input_js_tf_idf = json.load(open(PROJECT_HOME + "/tests/test_input/tf_idf_input.json"))

    test_js_tf_idf = json.load(open(PROJECT_HOME + "/tests/test_output/tf_idf_output.json"))

    processed_data = json.loads(json.dumps(tf_idf.get_tf_idf_vals(input_js_tf_idf), sort_keys=True))

    self.assertEqual(processed_data, test_js_tf_idf)

    #now test reference counting function

    processed_data = json.loads(json.dumps(paper_network.get_papernetwork(input_js_paper_network["response"]["docs"], 10), sort_keys=True))

    topCommonReferences = processed_data["summaryGraph"]["nodes"][0]["top_common_references"].items().sort()

    def get_group_references(group):
      indexes =[i for i,n in enumerate(processed_data["fullGraph"]["nodes"]) if n["group"] == group]
      links =[l for l in processed_data["fullGraph"]["links"] if l["source"] in indexes and l["target"] in indexes]
      freq_dict = defaultdict(list)
      for l in links:
        for o in l["overlap"]:
          freq_dict[o].extend([l["source"], l["target"]])
          
      for f in freq_dict:
        freq_dict[f] = len(list(set(freq_dict[f])))
        
      final = sorted(freq_dict.items(), key=lambda x:x[1], reverse=True)[:5]

      num_papers = processed_data["summaryGraph"]["nodes"][0]["paper_count"]

      final = [(f[0], f[1]/float(num_papers)) for f in final].sort()
      return final

    self.assertEqual(topCommonReferences, get_group_references(0))

    # now just test input/output

    test_js_paper_network =  json.load(open(PROJECT_HOME + "/tests/test_output/paper_network_star.json"))

    processed_data = json.loads(json.dumps(paper_network.get_papernetwork(input_js_paper_network["response"]["docs"], 10), sort_keys=True))
    self.assertEqual(processed_data, test_js_paper_network)
예제 #3
0
    def get(self):

        solr_args = dict(request.args)
        if 'max_groups' in solr_args:
            del solr_args['max_groups']

        solr_args["rows"] = min(
            int(
                solr_args.get(
                    "rows",
                    [current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS")
                     ])[0]),
            current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS"))

        solr_args['fl'] = [
            'bibcode,title,first_author,year,citation_count,read_count,reference'
        ]
        solr_args['wt'] = 'json'

        headers = {
            'X-Forwarded-Authorization': request.headers.get('Authorization')
        }

        response = client().get(
            current_app.config.get("VIS_SERVICE_SOLR_PATH"),
            params=solr_args,
            headers=headers)

        if response.status_code == 200:
            full_response = response.json()

        else:
            return {
                "Error":
                "There was a connection error. Please try again later",
                "Error Info": response.text
            }, response.status_code

        #get_network_with_groups expects a list of normalized authors
        data = full_response["response"]["docs"]
        paper_network_json = paper_network.get_papernetwork(
            data,
            request.args.get(
                "max_groups",
                current_app.config.get("VIS_SERVICE_PN_MAX_GROUPS")))
        if paper_network_json:
            return {
                "msg": {
                    "numFound": full_response["response"]["numFound"],
                    "start": full_response["response"].get("start", 0),
                    "rows":
                    int(full_response["responseHeader"]["params"]["rows"])
                },
                "data": paper_network_json
            }, 200
        else:
            return {"Error": "Empty network."}, 200
예제 #4
0
class PaperNetwork(Resource):
    '''Returns paper network data for a solr query'''
    decorators = [advertise('scopes', 'rate_limit')]
    scopes = []
    rate_limit = [500, 60 * 60 * 24]

    def post(self):

        try:
            required_fields = [
                'bibcode,title,first_author,year,citation_count,read_count,reference'
            ]
            response = make_request(request, "PN", required_fields)
        except QueryException, error:
            return {
                'Error': 'there was a problem with your request',
                'Error Info': str(error)
            }, 403

        if response.status_code == 200:
            full_response = response.json()
        else:
            return {
                "Error":
                "There was a connection error. Please try again later",
                "Error Info": response.text
            }, response.status_code

        #get_network_with_groups expects a list of normalized authors
        data = full_response["response"]["docs"]
        paper_network_json = paper_network.get_papernetwork(
            data,
            request.json.get(
                "max_groups",
                current_app.config.get("VIS_SERVICE_PN_MAX_GROUPS")))
        if paper_network_json:
            return {
                "msg": {
                    "numFound": full_response["response"]["numFound"],
                    "start": full_response["response"].get("start", 0),
                    "rows":
                    int(full_response["responseHeader"]["params"]["rows"])
                },
                "data": paper_network_json
            }, 200
        else:
            return {"Error": "Empty network."}, 200
예제 #5
0
    def test_paper_network_resource(self):

        #first, test the tf-idf library

        self.maxDiff = None

        input_js_tf_idf = json.load(
            open(PROJECT_HOME + "/tests/test_input/tf_idf_input.json"))

        test_js_tf_idf = json.load(
            open(PROJECT_HOME + "/tests/test_output/tf_idf_output.json"))

        processed_data = json.loads(
            json.dumps(tf_idf.get_tf_idf_vals(input_js_tf_idf),
                       sort_keys=True))

        self.assertEqual(processed_data, test_js_tf_idf)

        #now test reference counting function

        processed_data = json.loads(
            json.dumps(paper_network.get_papernetwork(
                input_js_paper_network["response"]["docs"], 10),
                       sort_keys=True))

        topCommonReferences = processed_data["summaryGraph"]["nodes"][0][
            "top_common_references"].items().sort()

        def get_group_references(group):
            indexes = [
                i for i, n in enumerate(processed_data["fullGraph"]["nodes"])
                if n["group"] == group
            ]
            links = [
                l for l in processed_data["fullGraph"]["links"]
                if l["source"] in indexes and l["target"] in indexes
            ]
            freq_dict = defaultdict(list)
            for l in links:
                for o in l["overlap"]:
                    freq_dict[o].extend([l["source"], l["target"]])

            for f in freq_dict:
                freq_dict[f] = len(list(set(freq_dict[f])))

            final = sorted(freq_dict.items(), key=lambda x: x[1],
                           reverse=True)[:5]

            num_papers = processed_data["summaryGraph"]["nodes"][0][
                "paper_count"]

            final = [(f[0], f[1] / float(num_papers)) for f in final].sort()
            return final

        self.assertEqual(topCommonReferences, get_group_references(0))

        # now just test input/output

        test_js_paper_network = json.load(
            open(PROJECT_HOME + "/tests/test_output/paper_network_star.json"))

        processed_data = json.loads(
            json.dumps(paper_network.get_papernetwork(
                input_js_paper_network["response"]["docs"], 10),
                       sort_keys=True))
        self.assertEqual(processed_data, test_js_paper_network)