def report(query, label, refresh=True):
    """Print out an org-mode report for search results.

    Parameters
    ----------
    query : str
        The search query based on which results the report should be
        generated.

    label : str
        The label used in the document title ("Report for ...").

    refresh : bool (optional, default=True)
        Whether to refresh a cached file containing results of a previous
        query or not.
    """
    # Header
    print('*** Report for {}\n'.format(label))
    print('#+attr_latex: :placement [H] :center nil')

    # Perform query
    s = ScopusSearch(query, refresh=refresh)
    journal_res = [p for p in s.results if p.aggregationType == "Journal"]

    # Parse results
    doc_types = Counter([p.aggregationType for p in s.results])
    paper_cites = {(p.title, p.doi): int(p.citedby_count) for p in journal_res}
    Ncites = sum(paper_cites.values())
    papers = len(journal_res)
    author_count = [len(p.authid.split(";")) for p in journal_res]
    au_counts = defaultdict(lambda: 0)
    j_counts = defaultdict(lambda: 0)
    for p in journal_res:
        for auth in zip(p.authname.split(";"), p.authid.split(";")):
            key = (auth[0], auth[1])
            au_counts[key] += 1
        jkey = (p.publicationName, p.source_id, p.issn)
        j_counts[jkey] += 1

    # Document information
    print('#+caption: Types of documents found for {}.'.format(label))
    print('| Document type | count |\n|-')
    for key, value in doc_types.items():
        print('| {} | {} |'.format(key, value))

    print('\n\n{} articles ({} citations) '
          'found by {} authors'.format(papers, Ncites, len(au_counts)))

    # Author counts {(name, scopus-id): count}
    auth_url = "[[https://www.scopus.com/authid/detail.uri?authorId={}][{}]]"
    view = [(auth_url.format(k[1], k[0]), v, k[1])
            for k, v in au_counts.items()]
    view.sort(reverse=True, key=itemgetter(1))
    print('\n#+attr_latex: :placement [H] :center nil')
    print('#+caption: Author publication counts for {0}.'.format(label))
    print('| name | count | categories |\n|-')
    for name, count, identifier in view[:20]:
        cats = ', '.join([
            '{} ({})'.format(cat[0], cat[1])
            for cat in get_subject_docs(identifier, refresh)[0:3]
        ])
        print('| {} | {} | {} |'.format(name, count, cats))

    # Journal information
    jour_url = '[[https://www.scopus.com/source/sourceInfo.url?sourceId={}][{}]]'
    jview = [(jour_url.format(k[1], k[0][0:50]), k[1], k[2], v)
             for k, v in j_counts.items()]
    jview.sort(reverse=True, key=itemgetter(3))
    print('\n\n#+attr_latex: :placement [H] :center nil')
    print('#+caption: Journal publication counts for {}.'.format(label))
    print('| Journal | count |\n|-')
    for journal, sid, issn, count in jview[0:12]:
        print('| {} | {} |'.format(journal, count))

    # Top cited papers
    pview = [('[[{}][{}]]'.format(k[1], k[0][0:60]), int(v))
             for k, v in paper_cites.items()]
    pview.sort(reverse=True, key=itemgetter(1))
    h_index = hindex([p[1] for p in pview])
    print('\n\n#+attr_latex: :placement [H] :center nil')
    print('#+caption: Top cited publication'
          ' counts for {}. h-index = {}.'.format(label, h_index))
    print('| title | cite count |\n|-')
    for title, count in pview[0:10]:
        print('| {} | {} |'.format(title, count))

    # Plot authors per publication
    plt.figure()
    plt.hist(author_count, 20)
    plt.xlabel('# authors')
    plt.ylabel('frequency')
    plt.savefig('{}-nauthors-per-publication.png'.format(label))

    # Bibliography
    print('\n\n#+caption: Number of authors '
          'on each publication for {}.'.format(label))
    print('[[./{}-nauthors-per-publication.png]]'.format(label))
    print('''**** Bibliography  :noexport:
     :PROPERTIES:
     :VISIBILITY: folded
     :END:''')
    for i, p in enumerate(journal_res):
        abstract = AbstractRetrieval(p.eid)
        print('{}. {}\n'.format(i + 1, abstract))
def calculate_h_index(df_authors, db_path, has_key=False):
    # Collect three stats: (i) author name and his/her h-index, (ii) citation list of each pmid, and (iii) author pmids

    author_2_hindex = dict()
    author_2_hindex_return = dict()
    pmid_2_cite = dict()
    author_2_pmids = dict()
    h_index_db = dict()

    with open(db_path) as h_index_db_file:
        h_index_db = json.load(h_index_db_file)

    for name in tqdm(df_authors):

        if name[0] == ' ':
            author = name[1:]  # + ' ' + surname
        else:
            author = name  # + ' ' + surname

        author_pmids = []

        # First check if the name already exists in our db, if 0, then try again
        call_api_flag = True

        if author in h_index_db['h_indices']:
            h_index = h_index_db['h_indices'][author]
            # print(f"{author} found in current database ({db_path}) with a value of {h_index}")
            if h_index != 0:
                # print(f"{author} found in current database ({db_path}) with a value of {h_index}")
                author_2_hindex_return[author] = h_index
                call_api_flag = False
            else:
                # print(f"{author} found in current database ({db_path}) with a value of {h_index} ... retrying API call")

                # BELOW IS JUST FOR DEMO!!!
                #TODO
                author_2_hindex_return[author] = h_index
                call_api_flag = False
        else:
            print(f"{author} not found in current database ({db_path})")
        # This ensures that we are not checking short and very common names which takes forever to collect information
        if len(author) > 5 and call_api_flag:
            if has_key:
                page = pull_url(author, Entrez.email, Entrez.api_key)
            else:
                page = pull_url(author, Entrez.email)

            soup = BeautifulSoup(page.content, 'xml')

            ids = soup.find_all('Id', {})

            for id_ in ids:
                author_pmids.append(id_.get_text())
            author_2_pmids[author] = author_pmids
            citations = []

            retrieved = get_links_ids([
                int(pmid) for pmid in author_pmids
                if int(pmid) not in pmid_2_cite.keys()
            ])

            for pmid in retrieved.keys():
                link_list = []
                if pmid in retrieved:
                    link_list = retrieved[pmid]
                pmid_2_cite[pmid] = link_list
                citations.append(len(link_list))

            author_2_hindex[author] = int(hindex(citations))
            author_2_hindex_return[author] = author_2_hindex[author]
        elif call_api_flag:
            author_2_hindex[author] = -1
            author_2_hindex_return[author] = -1

    h_index_db['h_indices'].update(author_2_hindex)
    h_index_db['pmids'].update(author_2_pmids)
    h_index_db['citations'].update(pmid_2_cite)

    #TODO: make changes to path to overwrite current file
    with open(
            os.path.join(os.path.dirname(__file__),
                         f'./data/author_h_indexes.json'), 'w') as output:
        output.write(
            json.dumps({
                'h_indices': h_index_db['h_indices'],
                'pmids': h_index_db['pmids'],
                'citations': h_index_db['citations']
            }))

    return author_2_hindex_return
 def test_hindex_with_nan(self):
     citations = [6, 10, 5, 46, np.nan, 2]
     received = hindex(citations)
     expected = 4
     self.assertEqual(received, expected)
 def test_hindex_with_only_nan(self):
     citations = [np.nan, np.nan]
     received = hindex(citations, ignore_nan=False)
     self.assertTrue(np.isnan(received))
 def test_hindex(self):
     citations = [6, 10, 5, 46, 0, 2]
     received = hindex(citations)
     expected = 4
     self.assertEqual(received, expected)
 def calculateHIndex(self):
     authors_with_hIndex = 0
     paperCitations = []
     for eachCitation in self.citationArticles:
         paperCitations.append(eachCitation["Citations"])
     return hindex(paperCitations)
 def test_hindex_with_only_nan(self):
     citations = [np.nan, np.nan]
     received = hindex(citations, ignore_nan=False)
     self.assertTrue(np.isnan(received))
 def test_hindex_with_nan(self):
     citations = [6, 10, 5, 46, np.nan, 2]
     received = hindex(citations)
     expected = 4
     self.assertEqual(received, expected)
 def test_hindex(self):
     citations = [6, 10, 5, 46, 0, 2]
     received = hindex(citations)
     expected = 4
     self.assertEqual(received, expected)