def get(self): solr_args = dict(request.args) if 'max_groups' in solr_args: del solr_args['max_groups'] solr_args["rows"] = min(int(solr_args.get("rows", [current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS")])[0]), current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS")) solr_args['fl'] = ['bibcode,title,first_author,year,citation_count,read_count,reference'] solr_args['wt'] ='json' headers = {'X-Forwarded-Authorization' : request.headers.get('Authorization')} response = client().get(current_app.config.get("VIS_SERVICE_SOLR_PATH") , params = solr_args, headers=headers) if response.status_code == 200: full_response = response.json() else: return {"Error": "There was a connection error. Please try again later", "Error Info": response.text}, response.status_code #get_network_with_groups expects a list of normalized authors data = full_response["response"]["docs"] paper_network_json = paper_network.get_papernetwork(data, request.args.get("max_groups", current_app.config.get("VIS_SERVICE_PN_MAX_GROUPS"))) if paper_network_json: return {"msg" : {"numFound" : full_response["response"]["numFound"], "start": full_response["response"].get("start", 0), "rows": int(full_response["responseHeader"]["params"]["rows"]) }, "data" : paper_network_json}, 200 else: return {"Error": "Empty network."}, 200
def test_paper_network_resource(self): #first, test the tf-idf library self.maxDiff = None input_js_tf_idf = json.load(open(PROJECT_HOME + "/tests/test_input/tf_idf_input.json")) test_js_tf_idf = json.load(open(PROJECT_HOME + "/tests/test_output/tf_idf_output.json")) processed_data = json.loads(json.dumps(tf_idf.get_tf_idf_vals(input_js_tf_idf), sort_keys=True)) self.assertEqual(processed_data, test_js_tf_idf) #now test reference counting function processed_data = json.loads(json.dumps(paper_network.get_papernetwork(input_js_paper_network["response"]["docs"], 10), sort_keys=True)) topCommonReferences = processed_data["summaryGraph"]["nodes"][0]["top_common_references"].items().sort() def get_group_references(group): indexes =[i for i,n in enumerate(processed_data["fullGraph"]["nodes"]) if n["group"] == group] links =[l for l in processed_data["fullGraph"]["links"] if l["source"] in indexes and l["target"] in indexes] freq_dict = defaultdict(list) for l in links: for o in l["overlap"]: freq_dict[o].extend([l["source"], l["target"]]) for f in freq_dict: freq_dict[f] = len(list(set(freq_dict[f]))) final = sorted(freq_dict.items(), key=lambda x:x[1], reverse=True)[:5] num_papers = processed_data["summaryGraph"]["nodes"][0]["paper_count"] final = [(f[0], f[1]/float(num_papers)) for f in final].sort() return final self.assertEqual(topCommonReferences, get_group_references(0)) # now just test input/output test_js_paper_network = json.load(open(PROJECT_HOME + "/tests/test_output/paper_network_star.json")) processed_data = json.loads(json.dumps(paper_network.get_papernetwork(input_js_paper_network["response"]["docs"], 10), sort_keys=True)) self.assertEqual(processed_data, test_js_paper_network)
def get(self): solr_args = dict(request.args) if 'max_groups' in solr_args: del solr_args['max_groups'] solr_args["rows"] = min( int( solr_args.get( "rows", [current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS") ])[0]), current_app.config.get("VIS_SERVICE_PN_MAX_RECORDS")) solr_args['fl'] = [ 'bibcode,title,first_author,year,citation_count,read_count,reference' ] solr_args['wt'] = 'json' headers = { 'X-Forwarded-Authorization': request.headers.get('Authorization') } response = client().get( current_app.config.get("VIS_SERVICE_SOLR_PATH"), params=solr_args, headers=headers) if response.status_code == 200: full_response = response.json() else: return { "Error": "There was a connection error. Please try again later", "Error Info": response.text }, response.status_code #get_network_with_groups expects a list of normalized authors data = full_response["response"]["docs"] paper_network_json = paper_network.get_papernetwork( data, request.args.get( "max_groups", current_app.config.get("VIS_SERVICE_PN_MAX_GROUPS"))) if paper_network_json: return { "msg": { "numFound": full_response["response"]["numFound"], "start": full_response["response"].get("start", 0), "rows": int(full_response["responseHeader"]["params"]["rows"]) }, "data": paper_network_json }, 200 else: return {"Error": "Empty network."}, 200
class PaperNetwork(Resource): '''Returns paper network data for a solr query''' decorators = [advertise('scopes', 'rate_limit')] scopes = [] rate_limit = [500, 60 * 60 * 24] def post(self): try: required_fields = [ 'bibcode,title,first_author,year,citation_count,read_count,reference' ] response = make_request(request, "PN", required_fields) except QueryException, error: return { 'Error': 'there was a problem with your request', 'Error Info': str(error) }, 403 if response.status_code == 200: full_response = response.json() else: return { "Error": "There was a connection error. Please try again later", "Error Info": response.text }, response.status_code #get_network_with_groups expects a list of normalized authors data = full_response["response"]["docs"] paper_network_json = paper_network.get_papernetwork( data, request.json.get( "max_groups", current_app.config.get("VIS_SERVICE_PN_MAX_GROUPS"))) if paper_network_json: return { "msg": { "numFound": full_response["response"]["numFound"], "start": full_response["response"].get("start", 0), "rows": int(full_response["responseHeader"]["params"]["rows"]) }, "data": paper_network_json }, 200 else: return {"Error": "Empty network."}, 200
def test_paper_network_resource(self): #first, test the tf-idf library self.maxDiff = None input_js_tf_idf = json.load( open(PROJECT_HOME + "/tests/test_input/tf_idf_input.json")) test_js_tf_idf = json.load( open(PROJECT_HOME + "/tests/test_output/tf_idf_output.json")) processed_data = json.loads( json.dumps(tf_idf.get_tf_idf_vals(input_js_tf_idf), sort_keys=True)) self.assertEqual(processed_data, test_js_tf_idf) #now test reference counting function processed_data = json.loads( json.dumps(paper_network.get_papernetwork( input_js_paper_network["response"]["docs"], 10), sort_keys=True)) topCommonReferences = processed_data["summaryGraph"]["nodes"][0][ "top_common_references"].items().sort() def get_group_references(group): indexes = [ i for i, n in enumerate(processed_data["fullGraph"]["nodes"]) if n["group"] == group ] links = [ l for l in processed_data["fullGraph"]["links"] if l["source"] in indexes and l["target"] in indexes ] freq_dict = defaultdict(list) for l in links: for o in l["overlap"]: freq_dict[o].extend([l["source"], l["target"]]) for f in freq_dict: freq_dict[f] = len(list(set(freq_dict[f]))) final = sorted(freq_dict.items(), key=lambda x: x[1], reverse=True)[:5] num_papers = processed_data["summaryGraph"]["nodes"][0][ "paper_count"] final = [(f[0], f[1] / float(num_papers)) for f in final].sort() return final self.assertEqual(topCommonReferences, get_group_references(0)) # now just test input/output test_js_paper_network = json.load( open(PROJECT_HOME + "/tests/test_output/paper_network_star.json")) processed_data = json.loads( json.dumps(paper_network.get_papernetwork( input_js_paper_network["response"]["docs"], 10), sort_keys=True)) self.assertEqual(processed_data, test_js_paper_network)