def get_paper_count(query, rows_per_page=500, max_pages=100):
    """
    Parameters
    ----------
    query : str
    rows_per_page : int (optional)
    max_pages : int (optional)

    Returns
    -------
    search_query : `ads.SearchQuery`
    count : int
    """
    q = ads.SearchQuery(q=query, rows=1)
    q.execute()
    return q.response.numFound
Esempio n. 2
0
def qsearch(request, qstring=None):
    if qstring is None:
        try:
            qstring = request.GET['qsearch']
        except:
            return HttpResponseRedirect('/')

    if 'qsort' in request.GET:
        sort = request.GET['qsort']
    else:
        sort = 'classic_factor'

    if 'page' in request.GET:
        page = int(request.GET['page'])
    else:
        page = 0

    results = list(
        ads.SearchQuery(q=qstring,
                        fl=[
                            'bibcode', 'title', 'author', 'pubdate', 'doi',
                            'classic_factor'
                        ],
                        rows=400,
                        start=page * 400,
                        sort=sort))

    if sort == 'classic_factor':
        try:
            norm = max(r.classic_factor for r in results)
            for r in results:
                r.classic_factor /= norm / 50
                r.classic_factor += 50
        except ValueError:
            # This usually means an empty result
            # just carry on
            pass

    template = loader.get_template('qsearch.html')
    context = {
        'qstring': qstring,
        'results': results,
        'total': len(results),
        'page': page,
        'sort': sort
    }
    return HttpResponse(template.render(context, request))
Esempio n. 3
0
def ReadADSAuthor(authlist):
    papers = []
    for author in authlist:
        try:
            papers += list(
                ads.SearchQuery(author=author,
                                sort="pubdate asc",
                                rows=400,
                                fl=[
                                    'id', 'bibcode', 'title', 'date',
                                    'citation_count', 'author', 'citation',
                                    'pubdate', 'year', 'pub', 'volume', 'page'
                                ]))
        except:
            print('No connection with ADS, no updates...')
        print('From ADS retrieved total {} papers'.format(len(papers)))
        return papers
Esempio n. 4
0
def get_total():
    print("Getting total #'s")
    years = np.arange(1970, thisyear + 1)
    values = []

    for year in years:
        print("Getting {0}".format(year))
        result = ads.SearchQuery(database='astronomy',
                                 year="{0}".format(year),
                                 property='refereed',
                                 fl=['year'],
                                 rows=1,
                                 max_pages=1)
        result.execute()
        values.append(result.response.numFound)

    return years, np.array(values)
Esempio n. 5
0
    def __init__(self, name, year=None):
        p = list(
            ads.SearchQuery(author=name,
                            max_pages=10,
                            fl=[
                                "id", "bibcode", "citation_count", "author",
                                "year", "property"
                            ]))

        # filter by year, if desired
        if year is None:
            self.mypapers = p
        else:
            pyr = [q for q in p if int(q.year) >= year]
            self.mypapers = pyr

        self.num = len(self.mypapers)
Esempio n. 6
0
def save_query_to_collection(query_params, collection, api_token=None):

    if api_token is None:
        # don't put this on github
        with open('api_token.txt', 'r') as f:
            ads.config.token = f.read()

    q = ads.SearchQuery(**query_params)  #  max allowed rows
    all_responses = []
    for paper in q:
        response = {}
        for field in query_params['fl']:
            response[field] = getattr(paper, field)
        all_responses.append(response)

    if len(all_responses) > 0:
        print('Inserting {} papers'.format(len(all_responses)))
        collection.insert_many(all_responses)  # modifies inplace
Esempio n. 7
0
def get_all_papers(author):
    papers = ads.SearchQuery(author=author,
                             sort="date",
                             max_pages=128,
                             fl=[
                                 "id", "title", "author", "doi", "year",
                                 "pubdate", "pub", "volume", "page",
                                 "identifier", "doctype", "citation_count",
                                 "bibcode"
                             ])

    all_dicts = []
    for paper in papers:
        # Get arxiv ID
        aid = [
            ":".join(t.split(":")[1:]) for t in paper.identifier
            if t.startswith("arXiv:")
        ]

        try:
            page = int(paper.page[0])
        except (ValueError, TypeError):
            page = None
            if paper.page is not None and paper.page[0].startswith("arXiv:"):
                aid.append(":".join(paper.page[0].split(":")[1:]))

        all_dicts.append(
            dict(
                doctype=paper.doctype,
                authors=paper.author,
                year=paper.year,
                pubdate=paper.pubdate,
                doi=paper.doi[0] if paper.doi is not None else None,
                title=paper.title[0],
                pub=paper.pub,
                volume=paper.volume,
                page=page,
                arxiv=aid[0] if len(aid) else None,
                citations=(paper.citation_count
                           if paper.citation_count is not None else 0),
                url="http://adsabs.harvard.edu/abs/" + paper.bibcode,
            ))

    return sorted(all_dicts, key=lambda x: x['pubdate'], reverse=True)
Esempio n. 8
0
def get_numbers(language):
    print("Getting {0} #'s".format(language))

    years = np.arange(1970, thisyear + 1)
    values = []

    for year in years:
        print("Getting {0}".format(year))
        query = ads.SearchQuery(full=language,
                                database='astronomy',
                                property='refereed',
                                year="{0}".format(year),
                                fl=['year'],
                                rows=100,
                                max_pages=1000)
        query.execute()
        values.append(query.response.numFound)

    return years, np.array(values, dtype=float)
Esempio n. 9
0
def search_ads(results_dir=None,verbose=False):
    """Search NASA ADS for publication mentioning the TOI.

    Parameters
    ----------
    results_dir : str
        directory location of downloaded files
    verbose : bool
        print texts

    Returns
    -------
    toi_pub : dict
        dictionary with TIC as key and paper tile as value
    """
    try:
        import ads
        ads.config.token = ADS_TOKEN
    except ImportError:
        raise ImportError('please install ads first')

    if results_dir is None:
        results_dir = '.'
    if not exists(results_dir):
        sys.exit('{} does not exist!'.format(results_dir))
    tics = glob(join(results_dir,'tic*'))

    toi_pub = {}
    if len(tics)>0:
        for tic in tqdm(tics):
            #TOI.01
            tic = tic.split('/')[-1][3:]
            q = query_toi(tic=int(tic),clobber=False)
            toi = q['TOI'].values[0]
            toi = str(toi).split('.')[0]
            #FIXME: filter by year > 2018
            papers = ads.SearchQuery(q='TOI '+(toi), sort="citation_count", fq='database:astronomy')
            toi_pub[tic] = [paper.title for paper in papers]
    else:
        sys.exit('No tic* directories found in {}'.format(results_dir))
    if verbose:
        print(toi_pub)
    return  toi_pub
Esempio n. 10
0
def cached_query(q):
    global CACHED_PAPERS
    global REMAINING_API_CALLS

    if not CACHED_PAPERS:
        with open('/opt/cache/cache.json') as cache_f:
            CACHED_PAPERS = json.load(cache_f)

    if q not in CACHED_PAPERS:
        results = ads.SearchQuery(author=q)
        CACHED_PAPERS[q] = {}
        for paper in results:
            if not paper.author:
                continue
            CACHED_PAPERS[q][paper.bibcode] = paper.author

        REMAINING_API_CALLS = results.response.get_ratelimits()['remaining']

    return CACHED_PAPERS[q]
Esempio n. 11
0
    def add_by_bibcode(self, bibcode, interactive=False, **kwargs):
        if ads is None:
            log.error("This action requires the ADS key to be setup.")
            return

        q = ads.SearchQuery(q="identifier:{}".format(bibcode), fl=FIELDS)
        for article in q:
            # Print useful warnings
            if bibcode != article.bibcode:
                log.warning("Requested {} but ADS API returned {}".format(bibcode, article.bibcode))
            if 'NONARTICLE' in article.property:
                # Note: data products are sometimes tagged as NONARTICLE
                log.warning("{} is not an article.".format(article.bibcode))

            if article in self:
                log.warning("{} is already in the db.".format(article.bibcode))
            else:
                if interactive:
                    self.add_interactively(article)
                else:
                    self.add(article, **kwargs)
Esempio n. 12
0
 def __init__(self,
              bibcode: str = None,
              db_article: ads.search.Article = None,
              judgement: bool = False):
     """
     Create new publication node
     :param bibcode: A bibcode to be queried from ADS
     :param db_article: An Article object to be used instead of querying the ADS
     """
     if db_article:
         self._article = db_article
     elif bibcode:
         _query = ads.SearchQuery(bibcode=bibcode,
                                  token=ADS_API_KEY,
                                  fl=[
                                      'bibcode', 'year', 'author', 'title',
                                      'reference', 'citation'
                                  ])
         self._article = _query.next()
         self._modularity_id: int = 0
     self.judgement = judgement
Esempio n. 13
0
    def __init__(self):
        p = list(
            ads.SearchQuery(author="Zingale, M",
                            max_pages=10,
                            fl=[
                                "id", "bibcode", "citation_count", "author",
                                "pub", "volume", "issue", "page", "year",
                                "title", "property", "authors"
                            ]))

        self.mypapers = p

        # hack around a bug whereby some papers might have "None" as the number of cites
        for paper in self.mypapers:
            if paper.citation_count is None:
                paper.citation_count = 0
            if paper.property is None:
                paper.property = []

        # do some sorting and splitting
        self.refereed = [q for q in self.mypapers if "REFEREED" in q.property]
        self.num = len(self.mypapers)
Esempio n. 14
0
    def _get_all_bibcodes(self, q):
        limits = {"remaining": "unknown"}

        with shelve.open(self.cache_file) as cache:
            if q in cache:
                result = cache[q]
                if result["expires"] >= time.time():
                    return result["bibcodes"], limits
                else:
                    del cache[q]

        sort = "bibcode desc"
        query = ads.SearchQuery(q=q, sort=sort, fl=["bibcode", "title"])
        bibcodes = []
        while True:
            query.execute()
            limits = query.response.get_ratelimits()
            new_bibcodes = []
            for a in query.response.articles:
                code = a.bibcode
                self.article_cache[code] = dict(a.items())
                new_bibcodes.append(a.bibcode)
            bibcodes += new_bibcodes
            if len(new_bibcodes) < 50:
                break

            # Check rate limits
            if int(limits["remaining"]) <= 0:
                wait = int(limits["reset"]) - time.time()
                print("Request has been rate limited. "
                      "Resets in {0} minutes".format(wait / 60.0))
                time.sleep(wait)

        with shelve.open(self.cache_file) as cache:
            cache[q] = dict(expires=time.time() + self.cache_ttl,
                            bibcodes=bibcodes)

        return bibcodes, limits
Esempio n. 15
0
    def getby_bibcode(bibcode):
        """
        Query an ADS item by bibcode.
        :param bibcode: bibcode (ADS unique identifier)
        :return: queried item as Node object
        """
        for i in range(5):
            try:
                query = ads.SearchQuery(bibcode=bibcode,
                                        fl=[
                                            'author', 'year', 'title',
                                            'bibcode', 'reference', 'citation'
                                        ])

                for item in query:
                    new_node = Node(item)
                    if new_node is not None:
                        return new_node
                    else:
                        print('Couldn\'t make node for bibcode {}'.format(
                            bibcode))
            except (IndexError, APIResponseError):
                print('Error occured while querying ADS. Retrying...')
                continue
Esempio n. 16
0
    def by_keywords(self, keywords):
        """Query ADS for the publications containing any of a list of keywords.

        Aliases of the keywords (as determined by ADS) are included in the search.

        Params:
        -------
        keywords: list of str
            The list of keywords to search for.

        Returns:
        --------
        list of Publication
            The publications containing any of the keywords (or their synonyms).
        """

        publications = dict()
        for keyword in keywords:

            print('Searching for ' + keyword)

            q = 'full:"{keyword}" AND pubdate:{pubdate}'.format(
                keyword=keyword, pubdate=self.pubdate)
            query = ads.SearchQuery(q=q,
                                    fl=self.fields,
                                    fq='database:astronomy')
            for result in list(query):
                if result.bibcode not in publications:
                    publications[result.bibcode] = {
                        f: getattr(result, f)
                        for f in self.fields
                    }
                    publications[result.bibcode]['keywords'] = []
                publications[result.bibcode]['keywords'].append(keyword)

        return publications
Esempio n. 17
0
def query_ads(people):
    """
    Query ads for given list of people for given affiliation
    """
    all_pubs = []
    for p in people:
        query = list(
            ads.SearchQuery(author=p,
                            fl=PROPERTIES + OTHER_PROPERTIES,
                            rows=MAX_ROWS,
                            max_pages=MAX_PAGES))
        for q in query:
            if not accept_publication(q, people):
                continue
            tmp = {p: q.__dict__[p] for p in PROPERTIES}
            tmp['year'] = int(tmp['year'])
            tmp['rs_author'] = [a for a in q.author if a in people]
            tmp['citation_count'] = len(
                q.citation) if q.citation is not None else 0
            tmp['date'] = '-'.join(
                ['01' if dt == '00' else dt for dt in q.pubdate.split('-')])
            all_pubs.append(tmp)

    return all_pubs
def main(output_path, figure_format, orcid=False, bibcodes=False, query=False, save=False, plot=False, printable=False, test=False, desc=None):

    # Imports should not be here, but I don't care....
    if test:
        import ads.sandbox as ads
    else:
        import ads

    fl = ['id', 'bibcode']
    rows = 2000
    max_pages = 1

    print('Using rows: {} with max_pages: {}'.format(rows, max_pages))

    # See what the user has given to generate the metrics plot
    if query:
        sq = ads.SearchQuery(q=query, fl=fl, rows=rows, max_pages=max_pages)
        sq.execute()
        bibcodes = [i.bibcode for i in sq.articles]
        print('You gave a query: {}'.format(query))
        print('Found {} bibcodes (e.g., {})'.format(len(bibcodes), bibcodes[0:4]))
    elif orcid:
        query = 'orcid:{}'.format(orcid)
        sq = ads.SearchQuery(q=query, fl=fl, rows=rows, max_pages=max_pages)
        sq.execute()
        bibcodes = [i.bibcode for i in sq.articles]
        print('You gave an ORCiD iD: {}'.format(orcid))
        print('Found {} bibcodes (e.g., {})'.format(len(bibcodes), bibcodes[0:4]))
    elif bibcodes:
        sq = False
        print('You gave {} bibcodes: {}'.format(len(bibcodes), bibcodes[0:4]))
    else:
        sys.exit()

    # Collect the metrics from the API
    mq = ads.MetricsQuery(bibcodes=bibcodes)
    metrics = mq.execute()

    if plot:
        # Number of papers
        y, t, r = get_numbers_of_papers(metrics)

        number = dict(name='numbers', year=y, total=t, refereed=r)
        # Number of citations
        y, r2r, r2nr, nr2r, nr2nr = get_citations_of_papers(metrics)
        citation = dict(name='citations', year=y, ref_to_ref=r2r, non_ref_to_ref=nr2r, ref_to_non_ref=r2nr, non_ref_to_non_ref=nr2nr)

        # Indices
        y, h, g, tori, i10, read10, i100 = get_indices_of_papers(metrics)
        index = dict(name='indices', year=y, h=h, g=g, tori=tori, i10=i10, read10=read10, i100=i100)

        # Number of reads
        y, t, rr = get_reads_of_papers(metrics)
        reads = dict(name='reads', year=y, total=t, reads_ref=rr)

        # Define the figure and the axes
        fig = plt.figure(0, figsize=(8.27, 11.69))
        ax1 = fig.add_subplot(411)
        ax2 = fig.add_subplot(412)
        ax3 = fig.add_subplot(413)
        ax4 = fig.add_subplot(414)

        # Number of papers
        step(ax1, number['year'], number['total'] - number['refereed'], label='Not refereed', color='green')
        step(ax1, number['year'], number['refereed'], label='Refereed', color='blue')
        ax1.set_ylim([0, max(number['total'])+1])
        ax1.set_ylabel('Numer of papers')
        leg1 = ax1.legend(loc=0)
        leg1.draw_frame(False)

        # Number of citations
        step(ax2, citation['year'], citation['ref_to_ref'], label='Ref. citations to ref. papers', color='blue')
        step(ax2, citation['year'], citation['ref_to_non_ref'], label='Ref. citations to non ref. papers', color='green')
        step(ax2, citation['year'], citation['non_ref_to_ref'], label='Non ref. citations to ref. papers', color='gold')
        step(ax2, citation['year'], citation['non_ref_to_non_ref'], label='Non ref. citations to non ref. papers', color='red')

        ax2.set_ylabel('Numer of citations')
        max_citation = max(
            citation['ref_to_ref'].max(),
            citation['ref_to_non_ref'].max(),
            citation['non_ref_to_ref'].max(),
            citation['non_ref_to_non_ref'].max()
        )
        ax2.set_ylim([0, max_citation+1])
        leg2 = ax2.legend(loc=0)
        leg2.draw_frame(False)

        # Indices
        ax3.errorbar(index['year'], index['h'], label='h Index', color='blue', lw=2, ls='-')
        ax3.errorbar(index['year'], index['g'], label='g Index', color='green', lw=2, ls='-')
        ax3.errorbar(index['year'], index['i10'], label='i10 Index', color='gold', lw=2, ls='-')
        ax3.errorbar(index['year'], index['tori'], label='tori Index', color='red', lw=2, ls='-')
        ax3.errorbar(index['year'], index['i100'], label='i100 Index', color='purple', lw=2, ls='-')
        ax3.errorbar(index['year'], index['read10'], label='read10 Index', color='darkblue', lw=2, ls='-')
        max_index = max(h.max(), g.max(), i10.max(), tori.max(), i100.max(), read10.max())

        ax3.set_ylim([0, max_index+1])
        leg3 = ax3.legend(loc=0, ncol=2)
        leg3.draw_frame(False)

        # Number of reads
        step(ax4, reads['year'], reads['total'] - reads['reads_ref'], label='Non refereed', color='green')
        step(ax4, reads['year'], reads['reads_ref'], label='Refereed', color='blue')
        max_reads = max(
            reads['total'].max(),
            reads['reads_ref'].max()
        )

        min_year = reads['year'][0]
        for i in range(len(reads['year'])):
            if reads['total'][i] > 0 or reads['reads_ref'][i] > 0:
                break
            min_year = reads['year'][i]

        ax4.set_xlim([min_year, reads['year'].max()])
        ax4.set_ylim([0, max_reads+1])

        ax4.set_xlabel('Year')
        ax4.set_ylabel('Number of reads')
        leg4 = ax4.legend(loc=0)
        leg4.draw_frame(False)

        figure_path = '{}/metrics.{}'.format(output_path, figure_format)
        plt.savefig(figure_path)

    # Save to disk if requested
    if save == 'csv':
        for output in [number, citation, index, reads]:
            with open('{}/{}.{}'.format(output_path, output['name'], save), 'w') as f:

                keys = [i for i in output.keys() if i != 'name' and i != 'year']
                f.write('#year,{}\n'.format(','.join(keys)))

                for i in range(len(output['year'])):
                    f.write('{year},{other}\n'.format(
                        year=output['year'][i].year,
                        other=','.join([str(output[k][i]) for k in keys])
                    ))

        save_metrics(metrics)

    # Does the user want a printable PDF?
    if printable:
        build_latex(metrics, orcid_id=orcid, plot=plot, desc=desc)
Esempio n. 19
0
def P(c):
    return min(5,3 + 2*c/12.)

num_pubs = len(bibcodes)
if (num_pubs < 11):
	raise ValueError("La lista de publicaciones debe tener al menos 10 articulos")

PI_name_length=len(PI_name)

l_array=[]
p_array=[]
N=10

for b in bibcodes:
	res = ads.SearchQuery(bibcode=b,fl=['year', 'author', 'citation_count', 'title'])
	for paper in res:
		print("Processing paper: "+ b)
		print('Title: "'+unidecode(paper.title[0])+'"')
		yr = int(paper.year)
		years = 2016 - yr
		if (yr < 2011):
			raise ValueError("No papers prior to 2011 can be used")
		if years == 0: 
			years = 1
		p_i = P(paper.citation_count/years)
		if (yr == 2016) and (p_i < 4):
			p_i = 4.0
		p_array.append(p_i)
		author_list_length = len(paper.author)
		bis = min(author_list_length,5)
Esempio n. 20
0
import pandas as pd
import ads

yt_bib = '2011ApJS..192....9T'
fields = ['author', 'bibcode', 'pubdate', 'title', 'author_norm']

q = ads.SearchQuery(reference=yt_bib, fl=fields, max_pages=100)
l = list(q)

data = {_: [] for _ in fields}

for r in l:
    for f in fields:
        data[f].append(getattr(r, f))

df = pd.DataFrame(data)

df.to_json("data/yt_citations.json", orient="records")
Esempio n. 21
0
    for keyword in keywords:
        print(keyword)

        publication_data = []

        for journal in bibstemlist:
            # Initiate the dictionary for this journal
            journal_data = {"name": journal, "articles": [], "total": 0}

            for year in range(years[0], years[1] + 1):
                # Perform the query
                # We actually don't want all the results, we just want the metadata
                # which tells us how many publications there were
                q = ads.SearchQuery(
                    q="abstract:(=\"{keyword}\") bibstem:\"{journal}\" year:{year}"
                    .format(keyword=keyword, journal=journal, year=year),
                    fl=['id'],
                    rows=1)
                q.execute()

                num = int(q.response.numFound)
                print(
                    "using {keyword} in abstract {journal} had {num} publications in {year}"
                    .format(keyword=keyword,
                            journal=journal,
                            num=num,
                            year=year))

                # Save this data
                journal_data["articles"].append([year, num])
                journal_data["total"] += num
Esempio n. 22
0
            except:
                print "quering for some name that can't print"
            papers = authorsPapers(phd.author[0].encode('utf-8'),
                                   years='%i-%i' %
                                   (int(phd.year) - yearsPrePhD, now.year))
            phdDict = article2dict(phd)
            result = [article2dict(paper) for paper in papers]
            # Make sure the phd is in there!
            if phdDict not in result:
                result.append(phdDict)
            flags['nonUS'] = False

            # Need to add a search for similar named phds
            ack = list(
                ads.SearchQuery(q='bibstem:"*PhDT" author:"%s"' %
                                authSimple(phd.author[0].encode('utf-8')),
                                database='astronomy'))
            if len(ack) > 1:
                titles = set([
                    paper.title[0].lower() for paper in ack
                    if paper.title is not None
                ])
                if len(titles) > 1:
                    flags['uniqueName'] = False
                else:
                    flags['uniqueName'] = True
            else:
                flags['uniqueName'] = True

        savefile = phd.bibcode.replace('.', '_') + '.npz'
        np.savez(os.path.join(outDir, savefile), result=result, flags=flags)
Esempio n. 23
0
# encoding: utf-8
""" Who are the most cited astronomers? """

__author__ = "Andy Casey <*****@*****.**>"

import ads

# Let's assume the most cited people have the most cited papers, since we can
# only search for papers, not people
most_cited_papers = ads.SearchQuery(q='*',
                                    sort='citation_count desc',
                                    fq='database:astronomy',
                                    rows=50,
                                    fl=['first_author'])

# Who are these successful people, anyways?
successful_astronomers = [paper.first_author for paper in most_cited_papers]

# Okay, let's get the top 200 most-cited papers for each person and see how
# many citations they have in total
total_citations = {}
for astronomer in successful_astronomers:
    papers = ads.SearchQuery(first_author=astronomer.encode('utf-8'),
                             sort='citation_count desc',
                             fq='database:astronomy',
                             rows=50,
                             fl=['citation_count'])
    total_citations[astronomer] = sum(
        [paper.citation_count for paper in papers])

# Now there's a problem because astronomers publish under "Aaronson, A" and
Esempio n. 24
0
    item = json.loads(filetext, object_pairs_hook=OrderedDict)
    item = item[list(item.keys())[0]]

    if 'sources' in item:
        for source in item['sources']:
            if 'bibcode' in source:
                bc = source['bibcode']
                if bc not in biblio:
                    tqdm.write(bc)

                    authors = ''
                    if bc in bibauthordict:
                        authors = bibauthordict[bc]

                    allauthors = list(ads.SearchQuery(bibcode=bc))
                    if allauthors and allauthors[0].author:
                        allauthors = allauthors[0].author
                    else:
                        allauthors = []
                    biblio[bc] = OrderedDict([('authors', authors),
                                              ('allauthors', allauthors),
                                              ('bibcode', bc), ('events', []),
                                              ('eventdates', []),
                                              ('types', []), ('photocount', 0),
                                              ('spectracount', 0),
                                              ('metacount', 0)])

                biblio[bc]['events'].append(item['name'])

                if 'discoverdate' in item and item['discoverdate']:
Esempio n. 25
0
#  listing with code names
arg1 = sys.argv[1]

codes = open(arg1).readlines()
for code in codes:
    code = code.strip()
    if code[0] == '#':
        continue

    print("# CODE:", code)

    # lazy loading (expensive)
    #q = ads.SearchQuery(full=code, sort=year)

    # loading with fields ahead of time
    q = ads.SearchQuery(
        full=code,
        fl=['title', 'first_author', 'year', 'citation_count', 'bibcode'],
        sort='year',
        rows=10)

    n1 = 0
    for paper in q:
        print("%s\t%s\t%s\t%s\t%s" %
              (paper.year, paper.citation_count, paper.bibcode,
               paper.first_author, paper.title[0]))
        n1 = n1 + 1

    q1 = q.response.get_ratelimits()
    print('# %d %s\n' % (n1, q1['remaining']))
def add_paper_using_galpy(arxiv_id):
    # Read current file
    with open(os.path.join(_PAPERS_FILE_DIR, 'papers-using-galpy.json'),
              'r') as jsonFile:
        papers_data = json.load(jsonFile)
    duplicate = numpy.any([
        papers_data[p]['url'] == 'https://arxiv.org/abs/{}'.format(arxiv_id)
        for p in papers_data.keys()
    ])
    if duplicate:
        print("This appears to be a duplicate of an existing entry:")
        dup_indx = [
            papers_data[p]['url'] == 'https://arxiv.org/abs/{}'.format(
                arxiv_id) for p in papers_data.keys()
        ].index(True)
        print(
            json.dumps(papers_data[list(papers_data.keys())[dup_indx]],
                       indent=4,
                       separators=(',', ': ')).replace('\\n', '\n'))
        cont = input("Continue? [y/N] ")
        cont = cont.lower() == 'y'
        if not cont:
            print("Okay, aborting then...")
            sys.exit(-1)
    # Find paper on ADS
    if True:
        ads_paper = list(
            ads.SearchQuery(
                arxiv=arxiv_id,
                fl=['author', 'title', 'year', 'pub', 'volume', 'page']))[0]
    else:
        # Mock up
        class ads_paper_example():
            def __init__(self):
                self.author = ['Qian, Yansong', 'Arshad, Yumna', 'Bovy, Jo']
                self.title = ['The structure of accreted stellar streams']
                self.year = '2022'
                self.pub = 'Monthly Notices of the Royal Astronomical Society'
                self.volume = '511'
                self.page = ['2339']

        ads_paper = ads_paper_example()
    internal_id = build_internal_id(ads_paper, papers_data)
    new_entry = build_and_edit_new_entry(ads_paper, internal_id, arxiv_id)
    print("Adding entry {}".format(arxiv_id))
    # Move the screenshot in the right place
    done = input("""Now please take a screen shot of an example figure 
  and place it in the paper-figs directory. Just take 
  it with the standard Mac Screenshot app and have it 
  be saved to that directory. I'll do the rest! 
  Please press enter when done, any other input will 
  lead me to abort the operation! """)
    if not done == '':
        print("Okay, aborting then...")
        sys.exit(-1)
    # Find the Screenshot file and move it
    possible_screenshots = glob.glob(
        os.path.join(_PAPERS_FILE_DIR, 'paper-figs', 'Screen Shot*'))
    if len(possible_screenshots) > 1:
        print("Found multiple possible screen shots... aborting ...")
        sys.exit(-1)
    shutil.move(
        possible_screenshots[0],
        os.path.join(_PAPERS_FILE_DIR, 'paper-figs',
                     '{}.png'.format(internal_id.lower())))
    print("Moved file to {}".format(
        os.path.join('paper-figs', '{}.png'.format(internal_id.lower()))))
    num_lines = sum(1 for line in open(
        os.path.join(_PAPERS_FILE_DIR, 'papers-using-galpy.json')))
    with open(os.path.join(_PAPERS_FILE_DIR, 'papers-using-galpy.json'),
              'r+') as jsonFile:
        contents = jsonFile.readlines()
        pretty_print_new_entry(
            arxiv_id,
            internal_id,
            new_entry,
            print_func=lambda x: contents.insert(-11, x + '\n'))
        jsonFile.seek(0)
        jsonFile.writelines(contents)
    print("Success!")
    return None
Esempio n. 27
0
def process_token(article_identifier, prefs, bibdesk):
    """
    Process a single article token from the user, adding it to BibDesk.

    Parameters
    ----------
    article_identifier : str
        Any user-supplied `str` token.
    prefs : :class:`Preferences`
        A `Preferences` instance.
    bibdesk : :class:`BibDesk`
        A `BibDesk` AppKit hook instance.
    """
    """
    print((prefs['default']['ads_token']))
    print(article_identifier)
    """

    if 'true' in prefs['options']['alert_sound'].lower():
        alert_sound = 'Frog'
    else:
        alert_sound = None

    if 'dev_key' not in prefs['default']['ads_token']:
        ads.config.token = prefs['default']['ads_token']

    ads_query = ads.SearchQuery(identifier=article_identifier,
                                fl=[
                                    'author', 'first_author', 'bibcode',
                                    'identifier', 'alternate_bibcode', 'id',
                                    'year', 'title', 'abstract'
                                ])
    try:
        ads_articles = list(ads_query)
    except:
        logging.info(
            "API response error, Likely no authorized key is provided!")
        notify('API response error',
               'key:' + prefs['default']['ads_token'],
               'Likely no authorized key is provided!',
               alert_sound=alert_sound)
        return False

    if len(ads_articles) != 1:
        logging.debug(
            ' Zero or Multiple ADS entries for the article identifiier: {}'.
            format(article_identifier))
        logging.debug('Matching Number: {}'.format(len(ads_articles)))
        notify('Found Zero or Multiple ADS antries for ',
               article_identifier,
               ' No update in BibDesk',
               alert_sound=alert_sound)
        logging.info("Found Zero or Multiple ADS antries for {}".format(
            article_identifier))
        logging.info("No update in BibDesk")

        return False

    ads_article = ads_articles[0]

    use_bibtexabs = False
    #   use "bibtex" by default
    #   another option could be "bibtexabs":
    #       https://github.com/andycasey/ads/pull/109
    #   however, a change in ads() is required and the abstract field from the "bibtexabs" option doesn't
    #   always comply with the tex syntax.
    if use_bibtexabs == True:
        ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode,
                                     format='bibtexabs').execute()
    else:
        ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode,
                                     format='bibtex').execute()

    logging.debug("process_token: >>>API limits")
    logging.debug("process_token:    {}".format(
        ads_query.response.get_ratelimits()))
    logging.debug("process_token: >>>ads_bibtex")
    logging.debug("process_token:    {}".format(ads_bibtex))

    for k, v in ads_article.items():
        logging.debug('process_token: >>>{}'.format(k))
        logging.debug('process_token:    {}'.format(v))

    article_bibcode = ads_article.bibcode
    gateway_url = 'https://' + prefs['default']['ads_mirror'] + '/link_gateway'
    #   https://ui.adsabs.harvard.edu/link_gateway by default

    if 'true' in prefs['options']['download_pdf'].lower():
        pdf_filename, pdf_status = process_pdf(article_bibcode,
                                               prefs=prefs,
                                               gateway_url=gateway_url)
    else:
        pdf_filename = '.null'

    kept_pdfs = []
    kept_fields = {}
    kept_groups = []

    found = difflib.get_close_matches(ads_article.title[0],
                                      bibdesk.titles,
                                      n=1,
                                      cutoff=.7)

    # first author is the same
    if len(found) > 0:
        if found and difflib.SequenceMatcher(
                None,
                bibdesk.authors(bibdesk.pid(found[0]))[0],
                ads_article.author[0]).ratio() > .6:
            # further comparison on abstract
            abstract = bibdesk('abstract', bibdesk.pid(found[0])).stringValue()
            if not abstract or difflib.SequenceMatcher(
                    None, abstract, ads_article.abstract).ratio() > .6:
                pid = bibdesk.pid(found[0])
                kept_groups = bibdesk.get_groups(pid)
                # keep all fields for later comparison
                # (especially rating + read bool)
                kept_fields = dict((k, v) for k, v in zip(
                    bibdesk('return name of fields', pid, True),
                    bibdesk('return value of fields', pid, True))
                                   # Adscomment may be arXiv only
                                   if k != 'Adscomment')
                # plus BibDesk annotation
                kept_fields['BibDeskAnnotation'] = bibdesk(
                    'return its note', pid).stringValue()
                kept_pdfs += bibdesk.safe_delete(pid)
                notify('Duplicate publication removed',
                       article_identifier,
                       ads_article.title[0],
                       alert_sound=alert_sound)
                logging.info('Duplicate publication removed:')
                logging.info(article_identifier)
                logging.info(ads_article.title[0])
                bibdesk.refresh()

    # add new entry
    ads_bibtex_clean = ads_bibtex.replace('\\', r'\\').replace('"', r'\"')
    pub = bibdesk(f'import from "{ads_bibtex_clean}"')

    # pub id
    pub = pub.descriptorAtIndex_(1).descriptorAtIndex_(3).stringValue()

    # automatic cite key
    bibdesk('set cite key to generated cite key', pub)

    # abstract
    if ads_article.abstract is not None:
        ads_abstract_clean = ads_article.abstract.replace('\\', r'\\').replace(
            '"', r'\"').replace('}', ' ').replace('{', ' ')
        bibdesk(f'set abstract to "{ads_abstract_clean}"', pub)

    doi = bibdesk('value of field "doi"', pub).stringValue()

    if pdf_filename.endswith('.pdf') and pdf_status:
        # register PDF into BibDesk
        bibdesk(
            f'add POSIX file "{pdf_filename}" to beginning of linked files',
            pub)
        # automatic file name
        bibdesk('auto file', pub)
    elif 'http' in pdf_filename and not doi:
        # URL for electronic version - only add it if no DOI link present
        # (they are very probably the same)
        bibdesk(
            f'make new linked URL at end of linked URLs with data "{pdf_filename}"',
            pub)

    # add URLs as linked URL if not there yet
    urls = bibdesk('value of fields whose name ends with "url"',
                   pub,
                   strlist=True)
    if 'arxiv' in article_bibcode.lower():
        article_gateway = get_article_gateway(article_bibcode,
                                              gateway_url=gateway_url)
        urls += [article_gateway['eprint_html']]

    urlspub = bibdesk('linked URLs', pub, strlist=True)

    for u in [u for u in urls if u not in urlspub]:
        bibdesk(f'make new linked URL at end of linked URLs with data "{u}"',
                pub)

    # add old annotated files
    for kept_pdf in kept_pdfs:
        bibdesk(f'add POSIX file "{kept_pdf}" to end of linked files', pub)

    # re-insert custom fields
    bibdesk_annotation = kept_fields.pop("BibDeskAnnotation", '')
    bibdesk(f'set its note to "{bibdesk_annotation}"', pub)
    newFields = bibdesk('return name of fields', pub, True)
    for k, v in list(kept_fields.items()):
        if k not in newFields:
            bibdesk(f'set value of field "{(k, v)}" to "{pub}"')
    notify('New publication added',
           bibdesk('cite key', pub).stringValue(),
           ads_article.title[0],
           alert_sound=alert_sound)
    logging.info('New publication added:')
    logging.info(bibdesk('cite key', pub).stringValue())
    logging.info(ads_article.title[0])

    # add back the static groups assignment
    if kept_groups != []:
        new_groups = bibdesk.add_groups(pub, kept_groups)

    return True
Esempio n. 28
0
from networkx.readwrite.gexf import write_gexf

CONSTANTS.titles.remove(
    *CONSTANTS.titles)  # This data set of names contains no titles

SOLAR_ASTROPHYSICS_QUERY = (
    'keyword:"Astrophysics - Solar and Stellar Astrophysics" '
    'title:("solar" OR "sun" OR "helio" OR "cme" OR "corona")'
    # 'year:2020-2021'
)

if __name__ == "__main__":
    # Make sure environment variable ADS_DEV_KEY is defined
    solar_papers = ads.SearchQuery(
        q=SOLAR_ASTROPHYSICS_QUERY,
        fl=["author"],
        max_pages=1000,
    )
    solar_coauthorship = nx.DiGraph()
    for paper in solar_papers:
        paper_authors = list(paper.author)
        if len(paper_authors) > 1:
            for second_author in paper_authors[1:]:
                solar_coauthorship.add_edge(paper_authors[0], second_author)
        else:
            solar_coauthorship.add_node(paper_authors[0])
    # Merge duplicate author names
authors = [(author, HumanName(author)) for author in solar_coauthorship.nodes]
lnfi = defaultdict(list)  # Last name, first initial
for author, parsed in authors:
    if len(parsed.first) > 0:
Esempio n. 29
0
    outfile = sys.argv[2]
else:
    infile = 'ads_refs.tsv'
    outfile = 'ads_refs.dat'

# Read the infile

ads_data = {}

with open(infile, 'r') as f:

    reader = csv.reader(f, delimiter='\t')
    next(reader, None)

    for line in reader:
        ref = line[0]
        bibcode = line[1]
        ads_data[ref] = bibcode

# Replace the bibcodes with the ADS articles

for key, value in ads_data.items():
    print('Processing key:', key)
    ads_data[key] = list(ads.SearchQuery(bibcode=value))[0]

# Write the data

with open(outfile, 'wb') as f:

    pickle.dump(ads_data, f)
Esempio n. 30
0
ads.config.token = get_dev_key()

with open('cv.bib', 'r') as fh:
    bib_database = bibtexparser.load(fh, parser=parser)

firstauthor_entries = []
nonfirstauthor_entries = []

total_cites = 0
total_firstauthor_cites = 0

for entry in bib_database.entries:
    if 'doi' in entry:
        paper = ads.SearchQuery(
            doi=entry['doi'],
            fl=['citation_count', 'author', 'year', 'id', 'bibcode'])
        pfx = "Loaded from doi {0}".format(entry['doi'])
    elif 'adsurl' in entry:
        adsurl = entry['adsurl'].split("/")[-1].replace(
            "%26", "&") if 'adsurl' in entry else None
        paper = ads.SearchQuery(
            bibcode=adsurl,
            fl=['citation_count', 'author', 'year', 'id', 'bibcode'])
        pfx = "Loaded from adsurl {0}".format(adsurl)
    else:
        print("Skipped {0} because it has no DOI or ADSURL".format(
            entry['title']))
        continue

    paper.execute()