Exemple #1
0
def testOverlap():
    """
    See if there is any overlap betwen linked list for two identical authors
    """

    name = 'Williams, B'
    year = 2002
    phd1 = list(
        ads.query('bibstem:*PhDT',
                  authors=name,
                  dates=year,
                  database='astronomy',
                  rows='all'))[0]
    year = 2010
    phd2 = list(
        ads.query('bibstem:*PhDT',
                  authors=name,
                  dates=year,
                  database='astronomy',
                  rows='all'))[0]

    list1 = phdArticle2row(phd1, returnLinkedPapers=True)
    list2 = phdArticle2row(phd2, returnLinkedPapers=True)

    overlap = list(set(list1).intersection(list2))
    print 'number of overlap papers in linked lists = %i' % len(overlap)
def generate_df(arXiv_dict):

    df = pd.DataFrame()

    for i, key in enumerate(arXiv_dict.keys()):
        pages, figures = search_comments(arXiv_dict[key]['comment'])

        try:
            ads_paper = list(ads.query(title=str(arXiv_dict[key]['title'])))
            if len(ads_paper) == 1:
                new_row = pd.DataFrame({
                    'Arxiv_key': [arXiv_dict.keys()[i]],
                    'Title': [ads_paper[0].title],
                    'Author': [ads_paper[0].author],
                    'Citation_count': [ads_paper[0].citation_count],
                    'Year': [ads_paper[0].year],
                    'Pub': [ads_paper[0].pub],
                    'Pages': [pages],
                    'Figures': [figures]
                })
                df = df.append(new_row)
        except:
            continue

    return df
Exemple #3
0
def test_citation_tree():

    output_filename = "citation-tree.json"

    paper = list(
        ads.query(author="^Casey, Andrew R.",
                  sort="citations",
                  order="desc",
                  rows=1))[0]
    paper.build_citation_tree(depth=2)

    ads.network.export(
        paper,
        "citations",
        output_filename,
        article_repr=lambda article: article.author[0],
        new_branch_repr=lambda article, branch: {
            "name": article.author[0],
            "children": branch
        },
        end_branch_repr=lambda article: {"name": article.author[0]},
        indent=2,
        clobber=True)

    if os.path.exists(output_filename):
        os.unlink(output_filename)
def testOverlap():
    """
    See if there is any overlap betwen linked list for two identical authors
    """

    name = 'Williams, B'
    year = 2002
    phd1 =  list(ads.query('bibstem:*PhDT', authors=name, dates=year,
                               database='astronomy', rows='all'))[0]
    year = 2010
    phd2 = list(ads.query('bibstem:*PhDT', authors=name, dates=year,
                               database='astronomy', rows='all'))[0]

    list1 = phdArticle2row(phd1, returnLinkedPapers=True)
    list2 = phdArticle2row(phd2, returnLinkedPapers=True)

    overlap = list(set(list1).intersection(list2))
    print 'number of overlap papers in linked lists = %i' % len(overlap)
def testPerson(name, phdyear):
    """
    Test a random person and see if it gives the correct answer
    """

    phdA =  list(ads.query('bibstem:*PhDT', authors=name, dates=phdyear, database='astronomy', rows='all'))

    result = phdArticle2row(phdA[0], checkUSA=False, verbose=True, plot=True)


    print 'PhD Institution: %s' % result['phd aff']
    print 'Latest Institution: %s' % result['latest aff'].encode('utf-8')
    print 'Last year: %i' % result['latest year']
def testPerson(name, phdyear):
    """
    Test a random person and see if it gives the correct answer
    """

    phdA =  list(ads.query('bibstem:*PhDT', authors=name, dates=phdyear, database='astronomy', rows='all'))

    result = phdArticle2row(phdA[0], checkUSA=False, verbose=True, plot=True)


    print 'PhD Institution: %s' % result['phd aff']
    print 'Latest Institution: %s' % result['latest aff'].encode('utf-8')
    print 'Last year: %i' % result['latest year']
Exemple #7
0
def test_citation_tree():

    output_filename = "citation-tree.json"

    paper = list(ads.query(author="^Casey, Andrew R.", sort="citations", order="desc", rows=1))[0]
    paper.build_citation_tree(depth=2)

    ads.network.export(paper, "citations", output_filename,
        article_repr=lambda article: article.author[0],
        new_branch_repr=lambda article, branch: {"name": article.author[0], "children": branch},
        end_branch_repr=lambda article: {"name": article.author[0]},
        indent=2, clobber=True)

    if os.path.exists(output_filename):
        os.unlink(output_filename)
Exemple #8
0
    def __getitem__(self, bibcode):
        """Access a paper given its bibcode."""
        # grab from the cache
        if self._ads_cache is not None:
            if bibcode in self._ads_cache:
                return self._ads_cache[bibcode]

        # or query from ADS
        ads_query = ads.query(query=bibcode)
        pub = ADSPub(ads_query.next())

        # cache it if we can
        if self._ads_cache is not None:
            self._ads_cache.insert(pub)

        return pub
Exemple #9
0
    def add_by_bibcode(self, bibcode, interactive=False, **kwargs):
        if ads is None:
            log.error("This action requires the ADS key to be setup.")
            return

        q = ads.query(bibcode)
        for article in q:
            # Data products are sometimes returned as NONARTICLE entries
            if article in self:
                log.warning("{} is already in the db.".format(article.bibcode))
            elif 'NONARTICLE' in article.property:
                log.warning("{} is not an article.".format(article.bibcode))
            else:
                if interactive:
                    self.add_interactively(article)
                else:
                    self.add(article, **kwargs)
Exemple #10
0
 def add_by_bibcode(self, bibcode, interactive=False, **kwargs):
     if ads is None:
         log.error("This action requires the ADS key to be setup.")
         return
     
     q = ads.query(bibcode)
     for article in q:
         # Data products are sometimes returned as NONARTICLE entries
         if article in self:
             log.warning("{} is already in the db.".format(article.bibcode))
         elif 'NONARTICLE' in article.property:
             log.warning("{} is not an article.".format(article.bibcode))
         else:
             if interactive:
                 self.add_interactively(article)
             else:
                 self.add(article, **kwargs)
Exemple #11
0
def abstract(request, bibcode):
    # return HttpResponse(f"Viewing abstract for bibcode {bibcode}")
    q = list(
        ads.query(bibcode,
                  fl=[
                      'bibcode', 'title', 'author', 'aff', 'doi', 'pub',
                      'pubdate', 'citation_count', 'abstract', 'arxiv_class',
                      'volume', 'issue', 'page', 'year', 'keyword',
                      'orcid_pub', 'orcid_user', 'orcid_other'
                  ]))
    assert len(q) == 1, "Non-unique bibcode"
    paper = q[0]

    bibtex = ads.ExportQuery(bibcode).execute()
    try:
        eprint = re.search(r'eprint = \{(.+)\}', bibtex)[1]
    except:
        eprint = None

    orcid = paper.orcid_pub
    try:
        orcid = [
            pub if pub != '-' else auth
            for pub, auth in zip(paper.orcid_pub, paper.orcid_user)
        ]
    except:
        pass
    try:
        orcid = [
            o if o != '-' else other
            for o, other in zip(orcid, paper.orcid_other)
        ]
    except:
        pass

    template = loader.get_template('abstract.html')
    context = {
        'paper': paper,
        'eprint': eprint,
        'bibtex': bibtex,
        'authors': zip(paper.author, paper.aff, orcid)
    }
    return HttpResponse(template.render(context, request))
def testRow():
    myphd = list(ads.query('bibcode:2007PhDT.........3Y', database='astronomy', rows='all'))[0]
    phdArticle2row(myphd)
def phdArticle2row(phdArticle, yearsPrePhD=7, verbose=False, checkUSA=True,
                   justKeys=False, plot=False, returnNetwork=False, returnLinkedPapers=False):
    """
    Take an ads article object and return a dict of information with keys:
    [name, phd year, phd bibcode, phd.aff, latest paper bibcode, latest year,
    latest aff, latest 1st author bibcode, latest 1st year, latest 1st aff,
    largest publication gap]

    Note:  Currently not making any cut based on peer-review. Thus, latest 1st author
    paper could be a AAS poster, SPIE paper, arXive posting, etc.

    XXX-consider pulling some metrics from ADS and putting them in the row.
    """
    if verbose:
        print 'searching for papers linked to:', phdArticle


    result = {}
    resultKeys = ['name', 'phd year', 'phd bibcode', 'phd aff',
                  'latest year',
                  'latest aff', 'latest 1st year',
                  'latest 1st aff', 'largest publication gap',
                  'numRecords','numLinked', 'uniqueName', 'latest year unlinked',
                  'noAstroJournal', 'nonUS', 'hindex', '1st auth hindex']

    if justKeys:
        return resultKeys


    for key in resultKeys:
        result[key] = None

    maxYear = datetime.date.today().year
    minYear = int(phdArticle.year) - yearsPrePhD
    years = '%i-%i'% (minYear,maxYear) #range(minYear, maxYear+1) #str(minYear)+'-%i'% maxYear

    result['name'] = authSimple(phdArticle.author[0])
    result['phd year'] = int(phdArticle.year)
    result['phd aff'] = phdArticle.aff[0]
    result['phd bibcode'] = phdArticle.bibcode
    result['phd aff'] = phdArticle.aff[0]


    # Check that phd is from the US
    if checkUSA:
        if not checkUSAff(phdArticle.aff[0]):
            result['nonUS'] = True
            if verbose:
                print '%s does not test as a USA affiliation' % phdArticle.aff[0].encode('utf-8')
            return result

    # Query for all the papers by this author name
    paperList = authorsPapers(phdArticle.author[0], years=years)

    if verbose:
        print 'Found %i papers' % len(paperList)

    result['numRecords'] = len(paperList)
    # Check that there's an astro paper in here
    if not inAstroJ(paperList):
        result['noAstroJournal'] = True
        if verbose:
            print 'Did not find an astro paper in results'
        return result

    # Find all the papers linked to the PHD in question
    linkedPapers, linkedGraph = authorGroup(paperList, phdArticle,
                                            authSimple(phdArticle.author[0]))

    # Check if the
    citations = [paper.citation_count for paper in linkedPapers]
    result['hindex'] = hindex(citations)

    if returnLinkedPapers:
        return linkedPapers
    result['numLinked'] = len(linkedPapers)
    if plot:
        years = [float(paper.year) for paper in linkedPapers]
        nx.draw_spring(linkedGraph)#, node_color=np.array(years))

    if verbose:
        print 'Found %i papers linked to phd' % len(linkedPapers)
    # Make sure there's still a publication in an astro journal
    if not inAstroJ(linkedPapers):
        result['noAstroJournal'] = True
        if verbose:
            print 'Did not find an astro paper in linked results'
        return result

    linkedYears = []
    linked1stA = []
    linked1stAYears = []
    latestPaper = linkedPapers[0]
    latest1stApaper = phdArticle

    latestAff = phdArticle.aff[0]
    affDate = phdArticle.pubdate.split('-')
    month = int(affDate[1])
    if month < 1:
        month = 1
    affDate = datetime.date(year=int(phdArticle.year), month=month, day=1)

    for paper in linkedPapers:
        if hasattr(paper, 'year'):
            linkedYears.append(int(paper.year))
            if int(paper.year) > int(latestPaper.year):
                latestPaper = paper

        if authSimple(paper.author[0]) == authSimple(phdArticle.author[0]):
            linked1stA.append(paper)
            if hasattr(paper, 'year'):
                linked1stAYears.append(int(paper.year))
                if int(paper.year) > int(latest1stApaper.year):
                    latest1stApaper = paper
        paperDate =int(paper.pubdate.split('-')[1])
        if paperDate < 1:
            paperDate = 1
        if hasattr(paper,'year'):
            paperDate = datetime.date(int(paper.year), paperDate, 1)

            if paperDate >= affDate:
                for auth,aff in zip(paper.author, paper.aff):
                    if authSimple(auth) == authSimple(phdArticle.author[0]):
                        if aff is not None:
                            if len(aff) > 3:
                                latestAff = aff
                                affYear = int(paper.year)


    result['largest publication gap'] = np.max(np.diff(np.sort(linkedYears)))
    result['latest year'] = int(latestPaper.year)
    result['latest 1st year'] = int(latest1stApaper.year)
    result['latest aff'] = latestAff

    allYears = [int(paper.year) for paper in paperList if hasattr(paper,'year')]
    result['latest year unlinked'] = np.max(allYears)
    citations = [paper.citation_count for paper in linked1stA]
    result['1st auth hindex'] = hindex(citations)

    # Test to see if this is the only person with this name and a phd in astro
    ack = list(ads.query('bibstem:"*PhDT", author:"%s"' % authSimple(phdArticle.author[0]),
                         database='astronomy'))
    titles = []
    if len(ack) > 1:
        # Make sure the titles are different
        for paper in ack:
            if hasattr(paper, 'title'):
                if paper.title is not None:
                    titles.append(paper.title[0].lower())
        titles = set(titles)
        # titles = set([paper.title[0].lower() for paper in ack if hasattr(paper, 'title')])
    if len(titles) > 1:
        if verbose:
            print authSimple(phdArticle.author[0])+' returns multiple PhDT.'
        result['uniqueName'] = False
    else:
        result['uniqueName'] = True

    if returnNetwork:
        return result, linkedGraph
    return result
Exemple #14
0
    def update(self,
               month=None,
               exclude=[
                   'keplerian', 'johannes', 'k<sub>2</sub>', "kepler equation",
                   "kepler's equation", "xmm-newton", "kepler's law",
                   "kepler's third law", "kepler problem", "kepler crater",
                   "kepler's supernova", "kepler's snr"
               ]):
        """Query ADS for new publications.

        Parameters
        ----------
        month : str
            Of the form "YYYY-MM".

        exclude : list of str
            Ignore articles if they contain any of the strings given
            in this list. (Case-insensitive.)
        """
        if ads is None:
            log.error("This action requires the ADS key to be setup.")
            return

        if month is None:
            month = datetime.datetime.now().strftime("%Y-%m")

        # First show all the papers with the Kepler funding message in the ack
        log.info("Querying ADS for acknowledgements (month={}).".format(month))
        qry = ads.query("""ack:"Kepler mission"
                           OR ack:"K2 mission"
                           OR ack:"Kepler team"
                           OR ack:"K2 team"
                           -ack:"partial support from"
                        """,
                        dates=month,
                        rows='all',
                        database='astronomy')
        articles = list(qry)
        for idx, article in enumerate(articles):
            statusmsg = ("Showing article {} out of {} that mentions Kepler "
                         "in the acknowledgements.\n\n".format(
                             idx + 1, len(articles)))
            self.add_interactively(article, statusmsg=statusmsg)

        # Then search for keywords in the title and abstracts
        log.info(
            "Querying ADS for titles and abstracts (month={}).".format(month))
        qry = ads.query("""abs:"Kepler" OR abs:"K2"
                           OR abs:"KIC" OR abs:"EPIC" OR abs:"KOI"
                           OR title:"Kepler" OR title:"K2"
                        """,
                        dates=month,
                        rows='all',
                        database='astronomy')  # ,property='refereed')
        articles = list(qry)

        for idx, article in enumerate(articles):
            # Ignore articles without abstract
            if not hasattr(article, 'abstract'):
                continue
            abstract_lower = article.abstract.lower()

            ignore = False

            # Ignore articles containing any of the excluded terms
            for term in exclude:
                if term.lower() in abstract_lower:
                    ignore = True

            # Ignore articles already in the database
            if article in self:
                ignore = True

            # Ignore all the unrefereed non-arxiv stuff
            try:
                if "NOT REFEREED" in article.property and article.pub != "ArXiv e-prints":
                    ignore = True
            except AttributeError:
                pass  # no .pub attribute

            # Ignore proposals and cospar abstracts
            if ".prop." in article.bibcode or "cosp.." in article.bibcode:
                ignore = True

            if not ignore:  # Propose to the user
                statusmsg = '(Reviewing article {} out of {}.)\n\n'.format(
                    idx + 1, len(articles))
                self.add_interactively(article, statusmsg=statusmsg)
        log.info('Finished reviewing all articles for {}.'.format(month))
Exemple #15
0
            page_numbers = int(comments[page_number_ind])
    if figure_number_ind!=-1:
        if comments[figure_number_ind].isdigit():
            figure_numbers = int(comments[figure_number_ind])
        
        
    return [page_numbers,figure_numbers]


# In[273]:

df = pd.DataFrame()
for i,key in enumerate(arXiv_dict.keys()):
    pages,figures = search_comments(arXiv_dict[key]['comment'])
    try:
        ads_paper = list(ads.query(title=str(arXiv_dict[key]['title'])))
        if len(ads_paper)==1:
            new_row = pd.DataFrame({'Title':[ads_paper[0].title],                          'Author':[ads_paper[0].author],                          'Citation_count':[ads_paper[0].citation_count],                          'Year':[ads_paper[0].year],                          'Pub':[ads_paper[0].pub],                          'Pages':[pages],'Figures':[figures]})
        else:
            print i,'key',key,'has multiple query returns'
        df = df.append(new_row)
    except:
        continue
#    except error:
#       continue


# In[282]:

df.plot(x='Pages',y='Citation_count',kind='scatter')
Exemple #16
0
def recommend_references(bibcode, num=3, ratio=0.5):
    """
    Return bibcodes of recommended articles that might have been worth citing
    in the bibcode provided.

    :param bibcode:
        The bibcode of the article that you would like to have recommended
        citations for.

    :type bibcode:
        str

    :param num:
        Number of article bibcodes to recommend.

    :type num:
        int

    :param ratio:
        Self-similarity ratio in names in order to identify two author names as
        being the same person.

    :type ratio:
        float

    :returns:
        Article bibcodes that probably should have been cited.
    """

    # OK I have used crazy variable names for "humans" because this is otherwise
    # a conceptually annoying problem.
    num = int(num)
    if 1 > num:
        raise ValueError("number of requested articles must be a a positive integer")

    if not (1 >= ratio > 0):
        raise ValueError("self-similarity ratio must be between (0, 1]")

    # First get the article
    try:
        original_article = list(ads.query(bibcode))[0]

    except:
        raise ValueError("could not find original article with bibcode {0}".format(bibcode))

    #logging.debug("Article has bibcode, title: {0} {1}".format(
    #    original_article.bibcode, original_article.title))

    # Find all the citations that the paper made
    articles_i_cited = list(original_article.references)

    #logging.debug("This article cited {0} papers".format(len(articles_i_cited)))

    bibcodes_of_articles_i_cited = [each.bibcode for each in articles_i_cited]

    # Who else cited the papers that I cited?
    which_articles_cite_what_we_cite = []

    # Go to all of those papers
    for article in articles_i_cited:

        # Find all of the references from those papers
        # (let's call them "all_references")
        which_articles_cite_what_we_cite.extend(
            [each.bibcode for each in article.citations])

    most_popular_articles_cited = [article for article in which_articles_cite_what_we_cite \
        if article not in bibcodes_of_articles_i_cited]

    # Create a collection counter for these items
    most_popular_articles_cited = collections.Counter(most_popular_articles_cited)

    # Sort by most values
    most_popular_articles_cited = sorted(
        most_popular_articles_cited.items(), key=operator.itemgetter(1))[::-1]

    # Let's not include papers written by the current author
    recommended_bibcodes = []
    original_author = original_article.author[0].lower().replace(".", "")
    for bibcode, popularity in most_popular_articles_cited:

        article = list(ads.query(bibcode))[0]
        this_author = article.author[0].lower().replace(".", "")

        if difflib.SequenceMatcher(a=this_author, b=original_author).ratio() > ratio:
            # Same author as the original one, so let's ignore it.
            continue

        else:
            recommended_bibcodes.append(bibcode)
            if len(recommended_bibcodes) == num:
                break

    return recommended_bibcodes
Exemple #17
0
            continue

        else:
            recommended_bibcodes.append(bibcode)
            if len(recommended_bibcodes) == num:
                break

    return recommended_bibcodes


if __name__ == "__main__":

    # citation-buddy bibtex_code
    # bibtex_code = 2014MNRAS.443..828C

    # STEPS
    # (1) Find all the citations that the paper made
    # (2) Go to all of those papers, and find out who else cited those papers
    # (3) Go to all of those those papers, and find out which papers they cited that
    #     you didn't. Count the frequency and find the highest ones

    if len(sys.argv) > 1:
        bibcodes = recommend_references(sys.argv[1])

        for bibcode in bibcodes:

            article = list(ads.query(bibcode))[0]
            et_al = ["", " et al"][len(article.author) > 1]
            print("I recommend this paper: {0} by {1}{2} at {3}".format(
                article.title[0], article.author[0], et_al, article.url))
Exemple #18
0
    def update(self, month=None,
               exclude=['keplerian', 'johannes', 'k<sub>2</sub>', "kepler equation",
                        "kepler's equation", "xmm-newton", "kepler's law", "kepler's third law",
                        "kepler problem", "kepler crater", "kepler's supernova", "kepler's snr"]):
        """Query ADS for new publications.

        Parameters
        ----------
        month : str
            Of the form "YYYY-MM".

        exclude : list of str
            Ignore articles if they contain any of the strings given
            in this list. (Case-insensitive.)
        """
        if ads is None:
            log.error("This action requires the ADS key to be setup.")
            return

        if month is None:
            month = datetime.datetime.now().strftime("%Y-%m")

        # First show all the papers with the Kepler funding message in the ack
        log.info("Querying ADS for acknowledgements (month={}).".format(month))
        qry = ads.query("""ack:"Kepler mission"
                           OR ack:"K2 mission"
                           OR ack:"Kepler team"
                           OR ack:"K2 team"
                           -ack:"partial support from"
                        """,
                        dates=month,
                        rows='all',
                        database='astronomy')
        articles = list(qry)
        for idx, article in enumerate(articles):
            statusmsg = ("Showing article {} out of {} that mentions Kepler "
                         "in the acknowledgements.\n\n".format(
                            idx+1, len(articles)))
            self.add_interactively(article, statusmsg=statusmsg)

        # Then search for keywords in the title and abstracts
        log.info("Querying ADS for titles and abstracts (month={}).".format(month))
        qry = ads.query("""abs:"Kepler" OR abs:"K2"
                           OR abs:"KIC" OR abs:"EPIC" OR abs:"KOI"
                           OR title:"Kepler" OR title:"K2"
                        """,
                        dates=month,
                        rows='all',
                        database='astronomy')  # ,property='refereed')
        articles = list(qry)

        for idx, article in enumerate(articles):
            # Ignore articles without abstract
            if not hasattr(article, 'abstract'):
                continue
            abstract_lower = article.abstract.lower()

            ignore = False

            # Ignore articles containing any of the excluded terms
            for term in exclude:
                if term.lower() in abstract_lower:
                    ignore = True

            # Ignore articles already in the database
            if article in self:
                ignore = True

            # Ignore all the unrefereed non-arxiv stuff
            try:
                if "NOT REFEREED" in article.property and article.pub != "ArXiv e-prints":
                    ignore = True
            except AttributeError:
                pass  # no .pub attribute

            # Ignore proposals and cospar abstracts
            if ".prop." in article.bibcode or "cosp.." in article.bibcode:
                ignore = True

            if not ignore:  # Propose to the user
                statusmsg = '(Reviewing article {} out of {}.)\n\n'.format(
                                idx+1, len(articles))
                self.add_interactively(article, statusmsg=statusmsg)
        log.info('Finished reviewing all articles for {}.'.format(month))
Exemple #19
0
def phdArticle2row(phdArticle,
                   yearsPrePhD=7,
                   verbose=False,
                   checkUSA=True,
                   justKeys=False,
                   plot=False,
                   returnNetwork=False,
                   returnLinkedPapers=False):
    """
    Take an ads article object and return a dict of information with keys:
    [name, phd year, phd bibcode, phd.aff, latest paper bibcode, latest year,
    latest aff, latest 1st author bibcode, latest 1st year, latest 1st aff,
    largest publication gap]

    Note:  Currently not making any cut based on peer-review. Thus, latest 1st author
    paper could be a AAS poster, SPIE paper, arXive posting, etc.

    XXX-consider pulling some metrics from ADS and putting them in the row.
    """
    if verbose:
        print 'searching for papers linked to:', phdArticle

    result = {}
    resultKeys = [
        'name', 'phd year', 'phd bibcode', 'phd aff', 'latest year',
        'latest aff', 'latest 1st year', 'latest 1st aff',
        'largest publication gap', 'numRecords', 'numLinked', 'uniqueName',
        'latest year unlinked', 'noAstroJournal', 'nonUS', 'hindex',
        '1st auth hindex'
    ]

    if justKeys:
        return resultKeys

    for key in resultKeys:
        result[key] = None

    maxYear = datetime.date.today().year
    minYear = int(phdArticle.year) - yearsPrePhD
    years = '%i-%i' % (
        minYear, maxYear
    )  #range(minYear, maxYear+1) #str(minYear)+'-%i'% maxYear

    result['name'] = authSimple(phdArticle.author[0])
    result['phd year'] = int(phdArticle.year)
    result['phd aff'] = phdArticle.aff[0]
    result['phd bibcode'] = phdArticle.bibcode
    result['phd aff'] = phdArticle.aff[0]

    # Check that phd is from the US
    if checkUSA:
        if not checkUSAff(phdArticle.aff[0]):
            result['nonUS'] = True
            if verbose:
                print '%s does not test as a USA affiliation' % phdArticle.aff[
                    0].encode('utf-8')
            return result

    # Query for all the papers by this author name
    paperList = authorsPapers(phdArticle.author[0], years=years)

    if verbose:
        print 'Found %i papers' % len(paperList)

    result['numRecords'] = len(paperList)
    # Check that there's an astro paper in here
    if not inAstroJ(paperList):
        result['noAstroJournal'] = True
        if verbose:
            print 'Did not find an astro paper in results'
        return result

    # Find all the papers linked to the PHD in question
    linkedPapers, linkedGraph = authorGroup(paperList, phdArticle,
                                            authSimple(phdArticle.author[0]))

    # Check if the
    citations = [paper.citation_count for paper in linkedPapers]
    result['hindex'] = hindex(citations)

    if returnLinkedPapers:
        return linkedPapers
    result['numLinked'] = len(linkedPapers)
    if plot:
        years = [float(paper.year) for paper in linkedPapers]
        nx.draw_spring(linkedGraph)  #, node_color=np.array(years))

    if verbose:
        print 'Found %i papers linked to phd' % len(linkedPapers)
    # Make sure there's still a publication in an astro journal
    if not inAstroJ(linkedPapers):
        result['noAstroJournal'] = True
        if verbose:
            print 'Did not find an astro paper in linked results'
        return result

    linkedYears = []
    linked1stA = []
    linked1stAYears = []
    latestPaper = linkedPapers[0]
    latest1stApaper = phdArticle

    latestAff = phdArticle.aff[0]
    affDate = phdArticle.pubdate.split('-')
    month = int(affDate[1])
    if month < 1:
        month = 1
    affDate = datetime.date(year=int(phdArticle.year), month=month, day=1)

    for paper in linkedPapers:
        if hasattr(paper, 'year'):
            linkedYears.append(int(paper.year))
            if int(paper.year) > int(latestPaper.year):
                latestPaper = paper

        if authSimple(paper.author[0]) == authSimple(phdArticle.author[0]):
            linked1stA.append(paper)
            if hasattr(paper, 'year'):
                linked1stAYears.append(int(paper.year))
                if int(paper.year) > int(latest1stApaper.year):
                    latest1stApaper = paper
        paperDate = int(paper.pubdate.split('-')[1])
        if paperDate < 1:
            paperDate = 1
        if hasattr(paper, 'year'):
            paperDate = datetime.date(int(paper.year), paperDate, 1)

            if paperDate >= affDate:
                for auth, aff in zip(paper.author, paper.aff):
                    if authSimple(auth) == authSimple(phdArticle.author[0]):
                        if aff is not None:
                            if len(aff) > 3:
                                latestAff = aff
                                affYear = int(paper.year)

    result['largest publication gap'] = np.max(np.diff(np.sort(linkedYears)))
    result['latest year'] = int(latestPaper.year)
    result['latest 1st year'] = int(latest1stApaper.year)
    result['latest aff'] = latestAff

    allYears = [
        int(paper.year) for paper in paperList if hasattr(paper, 'year')
    ]
    result['latest year unlinked'] = np.max(allYears)
    citations = [paper.citation_count for paper in linked1stA]
    result['1st auth hindex'] = hindex(citations)

    # Test to see if this is the only person with this name and a phd in astro
    ack = list(
        ads.query('bibstem:"*PhDT", author:"%s"' %
                  authSimple(phdArticle.author[0]),
                  database='astronomy'))
    titles = []
    if len(ack) > 1:
        # Make sure the titles are different
        for paper in ack:
            if hasattr(paper, 'title'):
                if paper.title is not None:
                    titles.append(paper.title[0].lower())
        titles = set(titles)
        # titles = set([paper.title[0].lower() for paper in ack if hasattr(paper, 'title')])
    if len(titles) > 1:
        if verbose:
            print authSimple(phdArticle.author[0]) + ' returns multiple PhDT.'
        result['uniqueName'] = False
    else:
        result['uniqueName'] = True

    if returnNetwork:
        return result, linkedGraph
    return result
Exemple #20
0
def testRow():
    myphd = list(
        ads.query('bibcode:2007PhDT.........3Y',
                  database='astronomy',
                  rows='all'))[0]
    phdArticle2row(myphd)