def get_coauthors(author, tags, cache): """Get all coauthors for an author Given author A, returns all the authors having published a record with author A """ if author in cache: return cache[author] friends = set() # sanity check: author may not exist if not author: return friends try: authorid = int(author) records = get_person_bibrecs(authorid) except ValueError: records = search_engine.search_pattern(p=author, f='author') for recid in records: friends.update(get_authors_from_record(recid, tags)) cache[author] = friends return friends
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ import search_engine if of == 'hcs': # this is HTML cite summary # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern(p=colldef) d_total_recs[coll] = len(d_recids[coll]) req.write(websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) if d_total_cites[coll] != 0: d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] req.write(websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)) # 3) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 req.write(websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)) # 4) hcs epilogue: req.write(websearch_templates.tmpl_citesummary_epilogue(ln)) return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist)
def get_coauthors(author, tags, cache): """ Get all coauthors for an author Given author A, returns all the authors having published a record with author A """ if author in cache: return cache[author] friends = set() for recid in search_engine.search_pattern(p=author, f='author'): friends.update(get_authors_from_record(recid, tags)) cache[author] = friends return friends
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'hcs': # this is HTML cite summary html = [] compute_self_citations_p = True # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern( p=colldef) d_total_recs[coll] = len(d_recids[coll]) if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD: compute_self_citations_p = False prologue = websearch_templates.tmpl_citesummary_prologue( d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(prologue) elif hasattr(req, "write"): req.write(prologue) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} d_recid_citecount_l = {} for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citecount_l[coll] = [] d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) d_recid_citecount_l[coll].append((recid, len(lciters))) if d_total_recs[coll] != 0: d_avg_cites[ coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_overview( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) # 3) compute self-citations if compute_self_citations_p: overview = render_self_citations(d_recids, d_total_recs, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) header = websearch_templates.tmpl_citesummary_breakdown_header(ln) if not req: html.append(header) elif hasattr(req, "write"): req.write(header) # 4) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame( d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(fame_info) elif hasattr(req, "write"): req.write(fame_info) # 5) hcs calculate h index d_h_factors = {} def comparator(x, y): if x[1] > y[1]: return -1 elif x[1] == y[1]: return 0 else: return +1 for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_h_factors[coll] = 0 d_recid_citecount_l[coll].sort(cmp=comparator) #req.write(repr(d_recid_citecount_l[coll])) # DEBUG for citecount in d_recid_citecount_l[coll]: d_h_factors[coll] += 1 if d_h_factors[coll] > citecount[1]: d_h_factors[coll] -= 1 break h_idx = websearch_templates.tmpl_citesummary_h_index( d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(h_idx) elif hasattr(req, "write"): req.write(h_idx) # 6) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist)
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'hcs': # this is HTML cite summary html = [] compute_self_citations = True # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern( p=colldef) d_total_recs[coll] = len(d_recids[coll]) if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD: compute_self_citations = False prologue = websearch_templates.tmpl_citesummary_prologue( d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(prologue) elif hasattr(req, "write"): req.write(prologue) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} d_recid_citecount_l = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citecount_l[coll] = [] d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) d_recid_citecount_l[coll].append((recid, len(lciters))) if d_total_cites[coll] != 0: d_avg_cites[ coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_overview( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) # 3) compute self-citations if compute_self_citations: try: tags = get_authors_tags() except (IndexError, ConfigParser.NoOptionError), msg: register_exception(prefix="attribute " + \ str(msg) + " missing in config", alert_admin=True) compute_self_citations = False if compute_self_citations: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) authors_cache = {} for recid, lciters in d_recid_citers[coll]: if lciters: authors = get_authors_from_record(recid, tags) if len(authors) > 20: # Use collaboration names collaborations = get_collaborations_from_record( recid, tags) for cit in lciters: cit_collaborations = get_collaborations_from_record( cit, tags) if len( collaborations.intersection( cit_collaborations)) == 0: d_total_cites[coll] += 1 else: # Use author names for cit in lciters: cit_authors = get_authors_from_record( cit, tags) #extend with circle of friends for author in list(cit_authors)[:20]: author_friends = get_coauthors( author, tags, authors_cache) cit_authors.update(author_friends) if len(authors.intersection(cit_authors)) == 0: d_total_cites[coll] += 1 if d_total_cites[coll] != 0: d_avg_cites[ coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_minus_self_cites( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) header = websearch_templates.tmpl_citesummary_breakdown_header(ln) if not req: html.append(header) elif hasattr(req, "write"): req.write(header) # 4) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame( d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(fame_info) elif hasattr(req, "write"): req.write(fame_info) # 5) hcs calculate h index d_h_factors = {} def comparator(x, y): if x[1] > y[1]: return -1 elif x[1] == y[1]: return 0 else: return +1 for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_h_factors[coll] = 0 d_recid_citecount_l[coll].sort(cmp=comparator) #req.write(repr(d_recid_citecount_l[coll])) # DEBUG for citecount in d_recid_citecount_l[coll]: d_h_factors[coll] += 1 if d_h_factors[coll] > citecount[1]: d_h_factors[coll] -= 1 break h_idx = websearch_templates.tmpl_citesummary_h_index( d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(h_idx) elif hasattr(req, "write"): req.write(h_idx) # 6) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return ''
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'hcs': # this is HTML cite summary html = [] # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern(p=colldef) d_total_recs[coll] = len(d_recids[coll]) prologue = websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(prologue) elif hasattr(req, "write"): req.write(prologue) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} d_recid_citecount_l = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citecount_l[coll] = [] d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) d_recid_citecount_l[coll].append((recid, len(lciters))) if d_total_cites[coll] != 0: d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) # 3) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(fame_info) elif hasattr(req, "write"): req.write(fame_info) # 4) hcs calculate h index d_h_factors = {} def comparator(x, y): if x[1] > y[1]: return -1 elif x[1] == y[1]: return 0 else: return +1 for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_h_factors[coll] = 0 d_recid_citecount_l[coll].sort(cmp=comparator) #req.write(repr(d_recid_citecount_l[coll])) # DEBUG for citecount in d_recid_citecount_l[coll]: d_h_factors[coll] += 1 if d_h_factors[coll] > citecount[1]: d_h_factors[coll] -= 1 break h_idx = websearch_templates.tmpl_citesummary_h_index(d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(h_idx) elif hasattr(req, "write"): req.write(h_idx) # 5) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist)