def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ import search_engine if of == 'hcs': # this is HTML cite summary # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern(p=colldef) d_total_recs[coll] = len(d_recids[coll]) req.write(websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) if d_total_cites[coll] != 0: d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] req.write(websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)) # 3) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 req.write(websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)) # 4) hcs epilogue: req.write(websearch_templates.tmpl_citesummary_epilogue(ln)) return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist)
def main(key, value, start, end): '''Add up all citations over a period.''' search = 'find {0} {1} and topcite 1+'.format(key, value) if key == 'exp': search = 'find {0} {1}* and topcite 1+'.format(key, value) entity_papers = intbitset(perform_request_search(p=search, cc='HEP')) citation_list = get_cited_by_list(entity_papers) citation_dict = dict( (cite[0], intbitset(cite[1])) for cite in citation_list) print 'The {0} papers of {1}'.format(len(entity_papers), value) all_papers = {} years = range(start, end) for year in years: search = 'earliestdate:' + str(year) all_papers[year] = intbitset(perform_request_search(p=search, cc='HEP')) citations_year = {} total = 0 for year in years: citations_year[year] = 0 for entity_paper in entity_papers: citations_year[year] += len(citation_dict[entity_paper] & all_papers[year]) total += citations_year[year] print '{0:6d}\t{1:6d}\t{2:6d}'.format(year, citations_year[year], total)
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None, collections=CFG_CITESUMMARY_COLLECTIONS): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return render_citation_summary_xml(citedbylist) has_req = req is not None if not has_req: req = StringIO() if of == 'hcs': renderer = render_citation_summary else: renderer = render_extended_citation_summary renderer(req, ln, recids, collections, searchpattern, searchfield) req.write(websearch_templates.tmpl_citesummary_footer()) if has_req: return '' else: return req.getvalue()
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'hcs': # this is HTML cite summary html = [] compute_self_citations_p = True # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern( p=colldef) d_total_recs[coll] = len(d_recids[coll]) if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD: compute_self_citations_p = False prologue = websearch_templates.tmpl_citesummary_prologue( d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(prologue) elif hasattr(req, "write"): req.write(prologue) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} d_recid_citecount_l = {} for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citecount_l[coll] = [] d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) d_recid_citecount_l[coll].append((recid, len(lciters))) if d_total_recs[coll] != 0: d_avg_cites[ coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_overview( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) # 3) compute self-citations if compute_self_citations_p: overview = render_self_citations(d_recids, d_total_recs, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) header = websearch_templates.tmpl_citesummary_breakdown_header(ln) if not req: html.append(header) elif hasattr(req, "write"): req.write(header) # 4) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame( d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(fame_info) elif hasattr(req, "write"): req.write(fame_info) # 5) hcs calculate h index d_h_factors = {} def comparator(x, y): if x[1] > y[1]: return -1 elif x[1] == y[1]: return 0 else: return +1 for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_h_factors[coll] = 0 d_recid_citecount_l[coll].sort(cmp=comparator) #req.write(repr(d_recid_citecount_l[coll])) # DEBUG for citecount in d_recid_citecount_l[coll]: d_h_factors[coll] += 1 if d_h_factors[coll] > citecount[1]: d_h_factors[coll] -= 1 break h_idx = websearch_templates.tmpl_citesummary_h_index( d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(h_idx) elif hasattr(req, "write"): req.write(h_idx) # 6) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist)
def render_self_citations(d_recids, d_total_recs, ln): try: tags = get_authors_tags() except IndexError, e: register_exception(prefix="attribute " + \ str(e) + " missing in config", alert_admin=True) return "" d_recid_citers = {} d_total_cites = {} d_avg_cites = {} for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) authors_cache = {} for recid, lciters in d_recid_citers[coll]: d_total_cites[coll] += \ compute_self_citations(recid, lciters, authors_cache, tags) if d_total_recs[coll] != 0: d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] return websearch_templates.tmpl_citesummary_minus_self_cites( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) def summarize_records(recids, of, ln,
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'hcs': # this is HTML cite summary html = [] compute_self_citations = True # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern( p=colldef) d_total_recs[coll] = len(d_recids[coll]) if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD: compute_self_citations = False prologue = websearch_templates.tmpl_citesummary_prologue( d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(prologue) elif hasattr(req, "write"): req.write(prologue) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} d_recid_citecount_l = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citecount_l[coll] = [] d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) d_recid_citecount_l[coll].append((recid, len(lciters))) if d_total_cites[coll] != 0: d_avg_cites[ coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_overview( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) # 3) compute self-citations if compute_self_citations: try: tags = get_authors_tags() except (IndexError, ConfigParser.NoOptionError), msg: register_exception(prefix="attribute " + \ str(msg) + " missing in config", alert_admin=True) compute_self_citations = False if compute_self_citations: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) authors_cache = {} for recid, lciters in d_recid_citers[coll]: if lciters: authors = get_authors_from_record(recid, tags) if len(authors) > 20: # Use collaboration names collaborations = get_collaborations_from_record( recid, tags) for cit in lciters: cit_collaborations = get_collaborations_from_record( cit, tags) if len( collaborations.intersection( cit_collaborations)) == 0: d_total_cites[coll] += 1 else: # Use author names for cit in lciters: cit_authors = get_authors_from_record( cit, tags) #extend with circle of friends for author in list(cit_authors)[:20]: author_friends = get_coauthors( author, tags, authors_cache) cit_authors.update(author_friends) if len(authors.intersection(cit_authors)) == 0: d_total_cites[coll] += 1 if d_total_cites[coll] != 0: d_avg_cites[ coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_minus_self_cites( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) header = websearch_templates.tmpl_citesummary_breakdown_header(ln) if not req: html.append(header) elif hasattr(req, "write"): req.write(header) # 4) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame( d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(fame_info) elif hasattr(req, "write"): req.write(fame_info) # 5) hcs calculate h index d_h_factors = {} def comparator(x, y): if x[1] > y[1]: return -1 elif x[1] == y[1]: return 0 else: return +1 for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_h_factors[coll] = 0 d_recid_citecount_l[coll].sort(cmp=comparator) #req.write(repr(d_recid_citecount_l[coll])) # DEBUG for citecount in d_recid_citecount_l[coll]: d_h_factors[coll] += 1 if d_h_factors[coll] > citecount[1]: d_h_factors[coll] -= 1 break h_idx = websearch_templates.tmpl_citesummary_h_index( d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(h_idx) elif hasattr(req, "write"): req.write(h_idx) # 6) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return ''
# 6) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist) #for citation summary, code xcs/hcs (unless changed) def print_citation_summary_xml(citedbylist): """Prints citation summary in xml.""" alldict = calculate_citations(citedbylist) avgstr = str(alldict['avgcites']) totalcites = str(alldict['totalcites']) #format avg so that it does not span 10 digits avgstr = avgstr[0:4] reciddict = alldict['reciddict'] #output formatting outp = "<citationsummary records=\"" + str(len(citedbylist)) outp += "\" citations=\"" + str(totalcites) + "\">"
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS, for instance p='Smith, Paul' and f='author'. They are used for links. REQ is the Apache/mod_python request object. """ if of == 'hcs': # this is HTML cite summary html = [] # 1) hcs prologue: d_recids = {} d_total_recs = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: if not colldef: d_recids[coll] = recids else: d_recids[coll] = recids & search_engine.search_pattern(p=colldef) d_total_recs[coll] = len(d_recids[coll]) prologue = websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(prologue) elif hasattr(req, "write"): req.write(prologue) # 2) hcs overview: d_recid_citers = {} d_total_cites = {} d_avg_cites = {} d_recid_citecount_l = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citecount_l[coll] = [] d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) for recid, lciters in d_recid_citers[coll]: if lciters: d_total_cites[coll] += len(lciters) d_recid_citecount_l[coll].append((recid, len(lciters))) if d_total_cites[coll] != 0: d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] overview = websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(overview) elif hasattr(req, "write"): req.write(overview) # 3) hcs break down by fame: for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS: d_cites = {} for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_cites[coll] = 0 for recid, lciters in d_recid_citers[coll]: numcites = 0 if lciters: numcites = len(lciters) if numcites >= low and numcites <= high: d_cites[coll] += 1 fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln) if not req: html.append(fame_info) elif hasattr(req, "write"): req.write(fame_info) # 4) hcs calculate h index d_h_factors = {} def comparator(x, y): if x[1] > y[1]: return -1 elif x[1] == y[1]: return 0 else: return +1 for coll, colldef in CFG_CITESUMMARY_COLLECTIONS: d_h_factors[coll] = 0 d_recid_citecount_l[coll].sort(cmp=comparator) #req.write(repr(d_recid_citecount_l[coll])) # DEBUG for citecount in d_recid_citecount_l[coll]: d_h_factors[coll] += 1 if d_h_factors[coll] > citecount[1]: d_h_factors[coll] -= 1 break h_idx = websearch_templates.tmpl_citesummary_h_index(d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln) if not req: html.append(h_idx) elif hasattr(req, "write"): req.write(h_idx) # 5) hcs epilogue: eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln) if not req: html.append(eplilogue) elif hasattr(req, "write"): req.write(eplilogue) if not req: return "\n".join(html) else: return '' elif of == 'xcs': # this is XML cite summary citedbylist = get_cited_by_list(recids) return print_citation_summary_xml(citedbylist)
def get_citers(d_recids): """For each recid fetches the list of citing papers""" d_recid_citers = {} for coll, recids in d_recids.iteritems(): d_recid_citers[coll] = get_cited_by_list(recids) return d_recid_citers
def render_self_citations(d_recids, d_total_recs, ln): try: tags = get_authors_tags() except IndexError, e: register_exception(prefix="attribute " + \ str(e) + " missing in config", alert_admin=True) return "" d_recid_citers = {} d_total_cites = {} d_avg_cites = {} for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS: d_total_cites[coll] = 0 d_avg_cites[coll] = 0 d_recid_citers[coll] = get_cited_by_list(d_recids[coll]) authors_cache = {} for recid, lciters in d_recid_citers[coll]: d_total_cites[coll] += \ compute_self_citations(recid, lciters, authors_cache, tags) if d_total_recs[coll] != 0: d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll] return websearch_templates.tmpl_citesummary_minus_self_cites( d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln) def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None): """Write summary report for records RECIDS in the format OF in language LN. SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,