def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    import search_engine
    if of == 'hcs':
        # this is HTML cite summary

        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(p=colldef)
            d_total_recs[coll] = len(d_recids[coll])
        req.write(websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln))

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citers[coll] =  get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
            if d_total_cites[coll] != 0:
                d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        req.write(websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln))

        # 3) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            req.write(websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln))

        # 4) hcs epilogue:
        req.write(websearch_templates.tmpl_citesummary_epilogue(ln))
        return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)
def main(key, value, start, end):
    '''Add up all citations over a period.'''

    search = 'find {0} {1} and topcite 1+'.format(key, value)
    if key == 'exp':
        search = 'find {0} {1}* and topcite 1+'.format(key, value)
    entity_papers = intbitset(perform_request_search(p=search, cc='HEP'))
    citation_list = get_cited_by_list(entity_papers)
    citation_dict = dict(
        (cite[0], intbitset(cite[1])) for cite in citation_list)
    print 'The {0} papers of {1}'.format(len(entity_papers), value)

    all_papers = {}
    years = range(start, end)
    for year in years:
        search = 'earliestdate:' + str(year)
        all_papers[year] = intbitset(perform_request_search(p=search,
                                                            cc='HEP'))
    citations_year = {}
    total = 0
    for year in years:
        citations_year[year] = 0
        for entity_paper in entity_papers:
            citations_year[year] += len(citation_dict[entity_paper]
                                        & all_papers[year])
        total += citations_year[year]
        print '{0:6d}\t{1:6d}\t{2:6d}'.format(year, citations_year[year],
                                              total)
def summarize_records(recids, of, ln, searchpattern="", searchfield="",
                            req=None, collections=CFG_CITESUMMARY_COLLECTIONS):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return render_citation_summary_xml(citedbylist)

    has_req = req is not None
    if not has_req:
        req = StringIO()

    if of == 'hcs':
        renderer = render_citation_summary
    else:
        renderer = render_extended_citation_summary

    renderer(req,
             ln,
             recids,
             collections,
             searchpattern,
             searchfield)

    req.write(websearch_templates.tmpl_citesummary_footer())

    if has_req:
        return ''
    else:
        return req.getvalue()
Exemple #4
0
def summarize_records(recids,
                      of,
                      ln,
                      searchpattern="",
                      searchfield="",
                      req=None,
                      collections=CFG_CITESUMMARY_COLLECTIONS):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return render_citation_summary_xml(citedbylist)

    has_req = req is not None
    if not has_req:
        req = StringIO()

    if of == 'hcs':
        renderer = render_citation_summary
    else:
        renderer = render_extended_citation_summary

    renderer(req, ln, recids, collections, searchpattern, searchfield)

    req.write(websearch_templates.tmpl_citesummary_footer())

    if has_req:
        return ''
    else:
        return req.getvalue()
def summarize_records(recids,
                      of,
                      ln,
                      searchpattern="",
                      searchfield="",
                      req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'hcs':
        # this is HTML cite summary
        html = []
        compute_self_citations_p = True

        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(
                    p=colldef)
            d_total_recs[coll] = len(d_recids[coll])
            if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD:
                compute_self_citations_p = False

        prologue = websearch_templates.tmpl_citesummary_prologue(
            d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern,
            searchfield, ln)

        if not req:
            html.append(prologue)
        elif hasattr(req, "write"):
            req.write(prologue)

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        d_recid_citecount_l = {}
        for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citecount_l[coll] = []
            d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
                    d_recid_citecount_l[coll].append((recid, len(lciters)))
            if d_total_recs[coll] != 0:
                d_avg_cites[
                    coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        overview = websearch_templates.tmpl_citesummary_overview(
            d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(overview)
        elif hasattr(req, "write"):
            req.write(overview)

        # 3) compute self-citations
        if compute_self_citations_p:
            overview = render_self_citations(d_recids, d_total_recs, ln)

            if not req:
                html.append(overview)
            elif hasattr(req, "write"):
                req.write(overview)

        header = websearch_templates.tmpl_citesummary_breakdown_header(ln)
        if not req:
            html.append(header)
        elif hasattr(req, "write"):
            req.write(header)

        # 4) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(
                d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS,
                searchpattern, searchfield, ln)

            if not req:
                html.append(fame_info)
            elif hasattr(req, "write"):
                req.write(fame_info)

        # 5) hcs calculate h index
        d_h_factors = {}

        def comparator(x, y):
            if x[1] > y[1]:
                return -1
            elif x[1] == y[1]:
                return 0
            else:
                return +1

        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_h_factors[coll] = 0
            d_recid_citecount_l[coll].sort(cmp=comparator)
            #req.write(repr(d_recid_citecount_l[coll])) # DEBUG
            for citecount in d_recid_citecount_l[coll]:
                d_h_factors[coll] += 1
                if d_h_factors[coll] > citecount[1]:
                    d_h_factors[coll] -= 1
                    break
        h_idx = websearch_templates.tmpl_citesummary_h_index(
            d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(h_idx)
        elif hasattr(req, "write"):
            req.write(h_idx)

        # 6) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)
def render_self_citations(d_recids, d_total_recs, ln):
    try:
        tags = get_authors_tags()
    except IndexError, e:
        register_exception(prefix="attribute " + \
            str(e) + " missing in config", alert_admin=True)
        return ""

    d_recid_citers = {}
    d_total_cites = {}
    d_avg_cites = {}
    for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS:
        d_total_cites[coll] = 0
        d_avg_cites[coll] = 0

        d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
        authors_cache = {}
        for recid, lciters in d_recid_citers[coll]:
            d_total_cites[coll] += \
                compute_self_citations(recid, lciters, authors_cache, tags)

        if d_total_recs[coll] != 0:
            d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]

    return websearch_templates.tmpl_citesummary_minus_self_cites(
        d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)


def summarize_records(recids,
                      of,
                      ln,
def summarize_records(recids,
                      of,
                      ln,
                      searchpattern="",
                      searchfield="",
                      req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'hcs':
        # this is HTML cite summary
        html = []
        compute_self_citations = True

        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(
                    p=colldef)
            d_total_recs[coll] = len(d_recids[coll])
            if d_total_recs[coll] > CFG_CITESUMMARY_SELFCITES_THRESHOLD:
                compute_self_citations = False

        prologue = websearch_templates.tmpl_citesummary_prologue(
            d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern,
            searchfield, ln)

        if not req:
            html.append(prologue)
        elif hasattr(req, "write"):
            req.write(prologue)

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        d_recid_citecount_l = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citecount_l[coll] = []
            d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
                    d_recid_citecount_l[coll].append((recid, len(lciters)))
            if d_total_cites[coll] != 0:
                d_avg_cites[
                    coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        overview = websearch_templates.tmpl_citesummary_overview(
            d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(overview)
        elif hasattr(req, "write"):
            req.write(overview)

        # 3) compute self-citations
        if compute_self_citations:
            try:
                tags = get_authors_tags()
            except (IndexError, ConfigParser.NoOptionError), msg:
                register_exception(prefix="attribute " + \
                    str(msg) + " missing in config", alert_admin=True)
                compute_self_citations = False

        if compute_self_citations:
            d_recid_citers = {}
            d_total_cites = {}
            d_avg_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_total_cites[coll] = 0
                d_avg_cites[coll] = 0

                d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
                authors_cache = {}
                for recid, lciters in d_recid_citers[coll]:
                    if lciters:
                        authors = get_authors_from_record(recid, tags)
                        if len(authors) > 20:
                            # Use collaboration names
                            collaborations = get_collaborations_from_record(
                                recid, tags)
                            for cit in lciters:
                                cit_collaborations = get_collaborations_from_record(
                                    cit, tags)
                                if len(
                                        collaborations.intersection(
                                            cit_collaborations)) == 0:
                                    d_total_cites[coll] += 1

                        else:
                            # Use author names
                            for cit in lciters:
                                cit_authors = get_authors_from_record(
                                    cit, tags)
                                #extend with circle of friends
                                for author in list(cit_authors)[:20]:
                                    author_friends = get_coauthors(
                                        author, tags, authors_cache)
                                    cit_authors.update(author_friends)

                                if len(authors.intersection(cit_authors)) == 0:
                                    d_total_cites[coll] += 1

                if d_total_cites[coll] != 0:
                    d_avg_cites[
                        coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
            overview = websearch_templates.tmpl_citesummary_minus_self_cites(
                d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

            if not req:
                html.append(overview)
            elif hasattr(req, "write"):
                req.write(overview)

        header = websearch_templates.tmpl_citesummary_breakdown_header(ln)
        if not req:
            html.append(header)
        elif hasattr(req, "write"):
            req.write(header)

        # 4) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(
                d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS,
                searchpattern, searchfield, ln)

            if not req:
                html.append(fame_info)
            elif hasattr(req, "write"):
                req.write(fame_info)

        # 5) hcs calculate h index
        d_h_factors = {}

        def comparator(x, y):
            if x[1] > y[1]:
                return -1
            elif x[1] == y[1]:
                return 0
            else:
                return +1

        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_h_factors[coll] = 0
            d_recid_citecount_l[coll].sort(cmp=comparator)
            #req.write(repr(d_recid_citecount_l[coll])) # DEBUG
            for citecount in d_recid_citecount_l[coll]:
                d_h_factors[coll] += 1
                if d_h_factors[coll] > citecount[1]:
                    d_h_factors[coll] -= 1
                    break
        h_idx = websearch_templates.tmpl_citesummary_h_index(
            d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(h_idx)
        elif hasattr(req, "write"):
            req.write(h_idx)

        # 6) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''
        # 6) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)


#for citation summary, code xcs/hcs (unless changed)
def print_citation_summary_xml(citedbylist):
    """Prints citation summary in xml."""
    alldict = calculate_citations(citedbylist)
    avgstr = str(alldict['avgcites'])
    totalcites = str(alldict['totalcites'])
    #format avg so that it does not span 10 digits
    avgstr = avgstr[0:4]
    reciddict = alldict['reciddict']
    #output formatting
    outp = "<citationsummary records=\"" + str(len(citedbylist))
    outp += "\" citations=\"" + str(totalcites) + "\">"
def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,
       for instance p='Smith, Paul' and f='author'.  They are used for links.
       REQ is the Apache/mod_python request object.
    """
    if of == 'hcs':
        # this is HTML cite summary
        html = []
        # 1) hcs prologue:
        d_recids = {}
        d_total_recs = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            if not colldef:
                d_recids[coll] = recids
            else:
                d_recids[coll] = recids & search_engine.search_pattern(p=colldef)
            d_total_recs[coll] = len(d_recids[coll])

        prologue = websearch_templates.tmpl_citesummary_prologue(d_total_recs, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)

        if not req:
            html.append(prologue)
        elif hasattr(req, "write"):
            req.write(prologue)

        # 2) hcs overview:
        d_recid_citers = {}
        d_total_cites = {}
        d_avg_cites = {}
        d_recid_citecount_l = {}
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_total_cites[coll] = 0
            d_avg_cites[coll] = 0
            d_recid_citecount_l[coll] = []
            d_recid_citers[coll] =  get_cited_by_list(d_recids[coll])
            for recid, lciters in d_recid_citers[coll]:
                if lciters:
                    d_total_cites[coll] += len(lciters)
                    d_recid_citecount_l[coll].append((recid, len(lciters)))
            if d_total_cites[coll] != 0:
                d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]
        overview = websearch_templates.tmpl_citesummary_overview(d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(overview)
        elif hasattr(req, "write"):
            req.write(overview)

        # 3) hcs break down by fame:
        for low, high, fame in CFG_CITESUMMARY_FAME_THRESHOLDS:
            d_cites = {}
            for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
                d_cites[coll] = 0
                for recid, lciters in d_recid_citers[coll]:
                    numcites = 0
                    if lciters:
                        numcites = len(lciters)
                    if numcites >= low and numcites <= high:
                        d_cites[coll] += 1
            fame_info = websearch_templates.tmpl_citesummary_breakdown_by_fame(d_cites, low, high, fame, CFG_CITESUMMARY_COLLECTIONS, searchpattern, searchfield, ln)

            if not req:
                html.append(fame_info)
            elif hasattr(req, "write"):
                req.write(fame_info)

        # 4) hcs calculate h index
        d_h_factors = {}
        def comparator(x, y):
            if x[1] > y[1]:
                return -1
            elif x[1] == y[1]:
                return 0
            else: return +1
        for coll, colldef in CFG_CITESUMMARY_COLLECTIONS:
            d_h_factors[coll] = 0
            d_recid_citecount_l[coll].sort(cmp=comparator)
            #req.write(repr(d_recid_citecount_l[coll])) # DEBUG
            for citecount in d_recid_citecount_l[coll]:
                d_h_factors[coll] += 1
                if d_h_factors[coll] > citecount[1]:
                    d_h_factors[coll] -= 1
                    break
        h_idx = websearch_templates.tmpl_citesummary_h_index(d_h_factors, CFG_CITESUMMARY_COLLECTIONS, ln)

        if not req:
            html.append(h_idx)
        elif hasattr(req, "write"):
            req.write(h_idx)

        # 5) hcs epilogue:
        eplilogue = websearch_templates.tmpl_citesummary_epilogue(ln)

        if not req:
            html.append(eplilogue)
        elif hasattr(req, "write"):
            req.write(eplilogue)

        if not req:
            return "\n".join(html)
        else:
            return ''

    elif of == 'xcs':
        # this is XML cite summary
        citedbylist = get_cited_by_list(recids)
        return print_citation_summary_xml(citedbylist)
def get_citers(d_recids):
    """For each recid fetches the list of citing papers"""
    d_recid_citers = {}
    for coll, recids in d_recids.iteritems():
        d_recid_citers[coll] = get_cited_by_list(recids)
    return d_recid_citers
Exemple #11
0
def get_citers(d_recids):
    """For each recid fetches the list of citing papers"""
    d_recid_citers = {}
    for coll, recids in d_recids.iteritems():
        d_recid_citers[coll] = get_cited_by_list(recids)
    return d_recid_citers
def render_self_citations(d_recids, d_total_recs, ln):
    try:
        tags = get_authors_tags()
    except IndexError, e:
        register_exception(prefix="attribute " + \
            str(e) + " missing in config", alert_admin=True)
        return ""

    d_recid_citers = {}
    d_total_cites = {}
    d_avg_cites = {}
    for coll, dummy_colldef in CFG_CITESUMMARY_COLLECTIONS:
        d_total_cites[coll] = 0
        d_avg_cites[coll] = 0

        d_recid_citers[coll] = get_cited_by_list(d_recids[coll])
        authors_cache = {}
        for recid, lciters in d_recid_citers[coll]:
            d_total_cites[coll] += \
                compute_self_citations(recid, lciters, authors_cache, tags)

        if d_total_recs[coll] != 0:
            d_avg_cites[coll] = d_total_cites[coll] * 1.0 / d_total_recs[coll]

    return websearch_templates.tmpl_citesummary_minus_self_cites(
        d_total_cites, d_avg_cites, CFG_CITESUMMARY_COLLECTIONS, ln)


def summarize_records(recids, of, ln, searchpattern="", searchfield="", req=None):
    """Write summary report for records RECIDS in the format OF in language LN.
       SEARCHPATTERN and SEARCHFIELD are search query that led to RECIDS,