Exemple #1
0
def retrieve_bibtex_from_ads(bibcode):
    """
    Get the bibtex entry for a given bibcode
    """
    q = ads.ExportQuery(bibcode, format="bibtex")
    export = q.execute()
    return export
Exemple #2
0
def export_citation(bibcodes, output_format="aastex"):
    """Export the bibcodes in the form of aastex 

    Args:
        bibcodes (list): string list of bibcodes
        output_format (str): output format
                             ("aastex")

    Returns:
        bibs (list): string list of bibs
    """
    if len(bibcodes) == 0:
        return []
    else:
        q = ads.ExportQuery(bibcodes, format=output_format)
        try:
            export_response = q.execute()
        except:
            print("{0} is not in ADS library!".format(bibcodes))
        else:
            bibs = list()
            for bib in export_response.split("\n"):
                if len(bib) > 0:
                    bibs.append(bib)
            return bibs
Exemple #3
0
def export(ctx, format, bibcodes):
    """
    Export article(s) to the specified format.

    - Export one article to bibtex:

        ads export 2005IAUS..216..170H
    
    - Export multiple articles to bibtex:

        ads export 2005IAUS..216..170H '2017A&A...608A.116C'
    
    NOTE: If a bibcode contains `&` e.g., "2017A&A...608A.116C",
    either `&` needs to be escaped as in
    
        ads export 2017A\&A...608A.116C
    
    or put in quotes

        ads export "2017A&A...608A.116C"

    because in bash, `&` means put process in the background.
    """
    if ctx.obj["debug"]:
        logger.setLevel(logging.DEBUG)
    # TODO: This is breaking up string if one item given from stdin.
    bibcodes = list(map(find_bibcode, bibcodes))
    logger.debug(f"bibcodes: {bibcodes}")
    if len(bibcodes) == 0:
        raise click.UsageError("At least one bibcode must be specified.")

    if not ctx.obj["debug"]:
        q = ads.ExportQuery(bibcodes, format=format)
        click.echo(q())
Exemple #4
0
 def update_bib(self):
     bibcodes = list(set(self.get_bibcodes()) - set(self._bib.keys()))
     if bibcodes:
         self._bib.update(
             bibtexparser.loads(
                 ads.ExportQuery(bibcodes,
                                 'bibtex').execute()).entries_dict)
Exemple #5
0
def get_bibtex_from_ADS(arg_dict):

    papers = ads.SearchQuery(**arg_dict)
    papers = [paper for paper in papers]

    if len(papers) == 0:
        print('No papers found in ADS')
        return
    opts_ = []

    max_res = int(config.config['ADS']['max results'])

    if len(papers) > max_res:
        papers = papers[:max_res]

    for paper in papers:
        if len(paper.author) > 1:
            et_al = 'et. al '
        else:
            et_al = ''
        opts_ += [f'{paper.author[0]} {et_al}[{paper.year}]: {paper.title}' ]

    questions = [
        inquirer.Checkbox('ads',
            message="Add to bibtex and attempt PDF fetch?",
            choices=opts_,
        ),
    ]
    answers = inquirer.prompt(questions)

    save_papers = []
    for ans in answers['ads']:
        save_papers.append(papers[opts_.index(ans)])

    if len(save_papers) == 0:
        return 

    papers = save_papers
    del save_papers

    parser = bib.get_parser()

    bibcodes = [paper.bibcode for paper in papers]

    bibtex_data = bibtex_query = ads.ExportQuery(
        bibcodes=bibcodes,
        format='bibtex',
    ).execute()

    bib_database = bibtexparser.loads(bibtex_data, parser)

    return bib_database, bibcodes
Exemple #6
0
def _to_format(format, filter=None):
    """Convert bibcodes to a range of different output formats.


    Parameters
    ----------
    format : string
        Output format: ``bibtex`` | ``aastex``  | ``icarus`` | ``mnras``.

    filter : string, optional
        Filter the bibliography by key, showing only those that start
        with this string.


    Returns
    -------
    text : string
        ADS entries for all the bibliographic items in the given
        format.  Uses a query to the export service to get the data
        for each reference.

    """
    import ads

    output = ''
    for task, ref in _filter(filter).items():
        with warnings.catch_warnings():
            # warnings.filterwarnings('error')
            try:
                for key, val in ref.items():
                    # This method avoids using multiple calls to the
                    # API that may impact rate limits
                    # https://github.com/adsabs/adsabs-dev-api/blob/master/Export_API.ipynb
                    query = ads.ExportQuery(list(val), format=format)
                    data = query.execute()
                    output += '% {:s}/{:s}:\n{:s}\n'.format(task, key, data)
            except ads.exceptions.APIResponseError as e:
                e = str(e)
                if '<title>' in e:
                    e = e[e.find('<title>') + 7:e.find('</title>')]
                warnings.warn(
                    'cannot obtain ADS data for {:s}/{:s}: ({:s})'.format(
                        task, key, e), RuntimeWarning)
                pass

    return output
Exemple #7
0
def abstract(request, bibcode):
    # return HttpResponse(f"Viewing abstract for bibcode {bibcode}")
    q = list(
        ads.query(bibcode,
                  fl=[
                      'bibcode', 'title', 'author', 'aff', 'doi', 'pub',
                      'pubdate', 'citation_count', 'abstract', 'arxiv_class',
                      'volume', 'issue', 'page', 'year', 'keyword',
                      'orcid_pub', 'orcid_user', 'orcid_other'
                  ]))
    assert len(q) == 1, "Non-unique bibcode"
    paper = q[0]

    bibtex = ads.ExportQuery(bibcode).execute()
    try:
        eprint = re.search(r'eprint = \{(.+)\}', bibtex)[1]
    except:
        eprint = None

    orcid = paper.orcid_pub
    try:
        orcid = [
            pub if pub != '-' else auth
            for pub, auth in zip(paper.orcid_pub, paper.orcid_user)
        ]
    except:
        pass
    try:
        orcid = [
            o if o != '-' else other
            for o, other in zip(orcid, paper.orcid_other)
        ]
    except:
        pass

    template = loader.get_template('abstract.html')
    context = {
        'paper': paper,
        'eprint': eprint,
        'bibtex': bibtex,
        'authors': zip(paper.author, paper.aff, orcid)
    }
    return HttpResponse(template.render(context, request))
Exemple #8
0
 def query_ads_bibtex(self, bibcodes):
     """
     Query ADS for the paper bibtexes specified by a list of bibcodes ('bibcodes')
     """
     bc_ads = BibtexCollection()
     try:
         bibtex_string = ads.ExportQuery(bibcodes=bibcodes, format='bibtex').execute()
         bc_ads.read_from_string(bibtex_string)
         bibcodes_found = bc_ads.bibcode_entries.keys()
         nresults = len(bibcodes_found)
         nbibcodes = len(bibcodes)
         if nresults==nbibcodes:
             return bc_ads
         else:
             print('WARNING: did not retrieve bibtex for {} bibcodes:'.format(nresults-nbibcodes))
             for bc in bibcodes:
                 if not bc in bibcodes_found:
                     print(bc)
             
     except ads.exceptions.APIResponseError:
         print('ERROR: ADS APIResponseError. You probably exceeded your rate limit.')
         raise
Exemple #9
0
    def getBibtexs(self, bibcodes):
        """Obtain a string containing the bibtex entries for all the
        requested bibcodes

        Parameter:
            bibcodes: a single bibcode
                (string containing the ADS identifier of a given entry)
                or a list of bibcodes

        Output:
            a string with all the bibtex entries
        """
        ads.config.token = pbConfig.params["ADSToken"]
        try:
            self.q = ads.ExportQuery(bibcodes=bibcodes, format="bibtex")
            export = self.q.execute()
        except ads.exceptions.APIResponseError:
            pBLogger.exception(self.unauthorized)
        except Exception:
            pBLogger.exception(self.genericExportError, exc_info=True)
        else:
            pBLogger.info(self.getLimitInfo())
            return export
        return ""
Exemple #10
0
def main():
    parser = ArgumentParser()
    parser.add_argument('files', metavar='TEX', nargs='+', help='tex files to search citation keys')
    parser.add_argument('-o', '--output', metavar='BIB', help='main bibtex file; new entries will be added to this file, existing entries may be updated')
    parser.add_argument('-r', '--other', nargs='+', metavar='BIB', help='other bibtex files that contain existing references (read-only)')
    parser.add_argument('--no-update', dest='update', action='store_false', help='for existing entries, do not check ADS for updates')
    parser.add_argument('--force-regenerate', action='store_true', help='for all existing entries, regenerate the bibtex with the latest version from ADS if found')
    parser.add_argument('--include-physics', action='store_true', help='include physics database when searching ADS')
    parser.add_argument('--no-backup', dest='backup', action='store_false', help='back up output file if being overwritten')
    parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=__version__))
    args = parser.parse_args()

    if args.include_physics:
        _database = '("astronomy" OR "physics")'

    if len(args.files) == 1 and args.files[0].lower().endswith('.bib'): # bib update mode
        if args.output or args.other:
            parser.error('Input file is a bib file, not tex file. This will enter bib update mode. Do not specify "output" and "other".')
        if not args.update:
            parser.error('Input file is a bib file, not tex file. This will enter bib update mode. Must not specify --no-update')
        if not os.path.isfile(args.files[0]):
            parser.error('Cannot locate input bib file {}'.format(args.files[0]))
        keys = None
        args.output = args.files[0]

    elif args.output: # bib output is specified
        keys, _ = search_keys(args.files, find_bib=False)

    else: # bib output is missing, auto-identify
        keys, bib = search_keys(args.files, find_bib=True)
        if not bib:
            parser.error('Cannot identify bibtex file from the tex source. Use -o to specify a bibtex file as output.')
        args.output = bib.pop(0)
        if args.other:
            args.other.extend(bib)
        else:
            args.other = bib

        msg = 'Auto-identifying bibtex files...\n'
        msg += 'Main bibtex source (output file): {}\n'.format(args.output)
        if args.other:
            msg += 'Additional bibtex sources: {}\n'.format(', '.join(args.other))
        print(_headerize(msg))

    if os.path.isfile(args.output):
        with open(args.output) as fp:
            bib = bibtexparser.load(fp, parser=get_bparser())
    else:
        bib = bibtexparser.loads(' ', parser=get_bparser())

    bib_other = bibtexparser.loads(' ', parser=get_bparser())
    if args.other:
        for f in args.other:
            with open(f) as fp:
                bib_other = update_bib(bib_other, bibtexparser.load(fp, parser=get_bparser()))

    if keys is None: # bib update mode
        keys = list(bib.entries_dict)

    not_found = set()
    to_retrieve = set()
    all_entries = defaultdict(list)

    for key in keys:
        if key in bib.entries_dict:
            if args.update:
                bibcode = extract_bibcode(bib.entries_dict[key])
                bibcode_new = entry2bibcode(bib.entries_dict[key])
                if bibcode_new:
                    all_entries[bibcode_new].append(key)
                    if bibcode_new != bibcode or args.force_regenerate:
                        to_retrieve.add(bibcode_new)
                        print('{}: UPDATE => {}'.format(key, bibcode_new))
                        continue
            print('{}: EXISTING'.format(key))
            continue

        if key in bib_other.entries_dict:
            print('{}: FOUND IN OTHER BIB SOURCE, IGNORED'.format(key))
            continue

        bibcode = find_bibcode(key)
        if bibcode:
            to_retrieve.add(bibcode)
            all_entries[bibcode].append(key)
            print('{}: NEW ENTRY => {}'.format(key, bibcode))
        else:
            not_found.add(key)
            print('{}: NOT FOUND'.format(key))

    if not_found:
        print(_headerize('Please check the following keys'))
        for key in not_found:
            print(key)

    repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1]
    if repeated_keys:
        print(_headerize('The following keys refer to the same entry'))
        for b, k in repeated_keys:
            print('{1} has been referred as the following keys; please keep only one:\n{0}\n'.format(' '.join(k), b))

    if to_retrieve:
        print(_headerize('Building new bibtex file, please wait...'))
        bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), 'bibtex').execute(), parser=get_bparser())
        for entry in bib_new.entries:
            entry['ID'] = all_entries[entry['ID']][0]
        bib = update_bib(bib, bib_new)
        bib_dump_str = bibtexparser.dumps(bib).encode('utf8')
        if args.backup and os.path.isfile(args.output):
            copyfile(args.output, args.output + '.bak')
        with open(args.output, 'wb') as fp:
            fp.write(bib_dump_str)

    print(_headerize('Done!'))

    # check version
    try:
        latest_version = StrictVersion(requests.get(
            'https://pypi.python.org/pypi/adstex/json').json()['info']['version'])
    except (requests.RequestException, KeyError, ValueError):
        pass
    else:
        if latest_version > StrictVersion(__version__):
            msg = 'A newer version of adstex (v{}) is now available!\n'.format(latest_version)
            msg += 'Please consider updating it by running:\n\n'
            msg += 'pip install adstex=={}'.format(latest_version)
            print(_headerize(msg))
Exemple #11
0
        "--bib-format",
        choices=["bibtex", "bibtexabs"],
        help="""[[DISABLED]] Format for bibtex file. 
                       bibtexabs only works if using the git version of the abs module""",
        default="bibtex",
    )

    parser.add_argument("--debug", action="store_true")

    args = parser.parse_args()
    bibcodefile = args.bibcodes
    bibfile = args.bibfile
    token = args.token

    bibcodes = args.bibcode_list
    print(bibcodes)

    if args.debug:
        print(bibcodefile)
        print(bibfile)
        print(token)

    print("Adding {} new items".format(len(bibcodes)))
    bibtex = ads.ExportQuery(bibcodes, format=args.bib_format).execute()

    with open(bibcodefile, "a+") as f:
        f.writelines("{}\n".format(bc) for bc in bibcodes)
    with open(bibfile, "a+") as f:
        f.write("\n\n\n\n\n")
        f.write(bibtex)
Exemple #12
0
    def add_to_library(self, bibcode):

        if len(bibcode.split('arXiv')) == 2:
            print(bibcode)
            try:
                connection = 0
                while connection == 0:
                    try:
                        ads_paper = list(
                            ads.SearchQuery(alternate_bibcode=bibcode,
                                            fl=['bibcode']))[0]
                        try:
                            del self.database[bibcode]
                        except KeyError:
                            pass
                        bibcode = ads_paper.bibcode
                        connection = 1
                        time.sleep(1)
                    except requests.ConnectionError:
                        time.sleep(1)
            except IndexError:
                pass
        else:
            pass

        if bibcode not in self.database:
            connection = 0
            while connection == 0:
                try:
                    a = ads.SearchQuery(
                        bibcode=bibcode,
                        fl=['adsurl', 'title', 'author', 'abstract'])
                    a = list(a)[0]
                    b = ads.ExportQuery(bibcodes=bibcode,
                                        format="bibtex").execute()
                    connection = 1
                    self.database[bibcode] = {
                        'adsurl': get_adsurl_from_bibtex(b),
                        'title': a.title,
                        'author': a.author,
                        'abstarct': a.abstract,
                        'bibtex': b,
                        'call': bibcode
                    }

                    old_call = self.database[bibcode]['call']
                    new_call = a.author[0].split(',')[0].replace(
                        ' ', '').replace('-', '') + bibcode[:4]
                    new_call = ud.normalize('NFKD', new_call).encode(
                        'ascii', 'ignore').decode("utf-8")
                    print(str(new_call))
                    self.database[bibcode]['bibtex'] = \
                        self.database[bibcode]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call))
                    self.database[bibcode]['call'] = new_call
                    calls = [
                        self.database[ff]['call'].split('B')[0]
                        for ff in self.database
                    ]
                    for i in self.database:
                        if calls.count(self.database[i]['call']) > 1:
                            old_call = self.database[i]['call']
                            new_call = self.database[i]['call'].split(
                                'B')[0] + 'B' + i
                            new_call = new_call.replace('&', 'a')
                            self.database[i]['bibtex'] = \
                                self.database[i]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call))
                            self.database[i]['call'] = new_call

                except requests.ConnectionError:
                    time.sleep(1)

        self.update_library_pickle()

        self.update_library_bib()
Exemple #13
0
def add(search_query, author, title):
    fl = [
        'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title',
        'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype",
        "identifier", "links_data"
    ]
    if author:
        search_query += "author:" + author
    if title:
        search_query += "title:" + title
    papers = list(ads.SearchQuery(q=search_query, fl=fl))
    if len(papers) == 0:
        selection = ads.search.Article
        exit()
    elif len(papers) == 1:
        selection = papers[0]  # type:ads.search.Article
    else:
        # first_ten = itertools.islice(papers, 10)
        first_ten = papers[:10]
        single_paper: ads.search.Article
        for index, single_paper in enumerate(first_ten):
            print(index, single_paper.title[0], single_paper.first_author)
        selected_index = click.prompt('select paper', type=int)
        selection = papers[selected_index]  # type:ads.search.Article

    assert len(selection.doi) == 1
    doi = selection.doi[0]

    try:

        paper = Paper.get(Paper.doi == doi)
        print("this paper has already been added")
        exit(1)

    except peewee.DoesNotExist:
        pass

    print("fetching bibcode")
    q = ads.ExportQuery([selection.bibcode])
    bibtex = q.execute()

    print("saving in db")

    paper = Paper()
    assert len(selection.title) == 1
    paper.doi = doi
    paper.title = selection.title[0]
    paper.abstract = selection.abstract
    paper.bibcode = selection.bibcode
    paper.year = selection.year
    paper.pubdate = selection.pubdate
    paper.pdf_downloaded = False
    paper.first_author = Author.get_or_create(name=selection.first_author)[0]
    paper.publication = Publication.get_or_create(name=selection.pub)[0]
    paper.doctype = Doctype.get_or_create(name=selection.doctype)[0]
    paper.arxiv_identifier = [
        ident for ident in selection.identifier if "arXiv:" in ident
    ][0].split("arXiv:")[-1]
    paper.bibtex = bibtex
    links = [json.loads(string) for string in selection.links_data]
    print(links)
    paper.save()
    authors = [Author.get_or_create(name=name)[0] for name in selection.author]
    for author in db.batch_commit(authors, 100):
        PaperAuthors.create(author=author, paper=paper)
    keywords = [
        Keyword.get_or_create(keyword=keyword)[0]
        for keyword in selection.keyword
    ]
    for keyword in db.batch_commit(keywords, 100):
        PaperKeywords.create(keyword=keyword, paper=paper)
    print("fetching PDF")
    arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier)
    r = requests.get(arxiv_url, stream=True)
    print(arxiv_url)
    with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f:
        chunk_size = 1024  # bytes
        file_size = int(r.headers.get('content-length', 0))
        progress_length = math.ceil(file_size // chunk_size)
        with click.progressbar(r.iter_content(chunk_size=20),
                               length=progress_length) as progress_chunks:
            for chunk in progress_chunks:
                f.write(chunk)
    paper.pdf_downloaded = True
    paper.save()
Exemple #14
0
def doQuery(args, config, **kwargs):
    query = BuildQuery()

    query.setKey('sort', 'citation_count')
    for key in vars(args):
        if key not in ['interactive', 'func']:
            query.setKey(key, vars(args)[key])
    for key in kwargs:
        if key not in ['interactive', 'func']:
            query.setKey(key, kwargs[key])

    fl = [
        'abstract', 'first_author', 'author', 'year', 'pub', 'title', 'bibcode'
    ]
    query.setKey('fl', fl)

    # get results
    results = query.execute()

    res_as_array = []

    # loop over results
    try:  # sometimes, the ads script gets an IndexError, hide this
        res_as_array = [res for res in results]
    except IndexError:
        res_ar_array = []
    printResults(res_as_array)

    if len(res_as_array) == 0:
        print('No results')
        return

    if args.interactive or ('interactive' in kwargs and kwargs['interactive']):
        print('')
        inp = input(
            'Comma separated articles to download [e.g. 1-3, 4], [m] for more'
            ' [q] to quit or'
            ' add more parameters to request [e.g. year:2016]: ')

        # Match quit request
        if inp == 'q':
            return

        # Match requests for more articles
        # match any string like "5m", "10 more", …
        grp = re.match('(\d+) ?[mM](ore)?', inp)
        if grp is not None:
            nmore = int(grp.group(1))

            # load more
            if 'rows' in kwargs:
                kwargs['rows'] += nmore
            else:
                kwargs['rows'] = 50 + nmore

            print('Loading {} more!'.format(nmore))
            doQuery(args, **kwargs)
            return

        # Match selection
        mask = parseAsList(inp)
        if len(mask) > 0:
            papers = [r for i, r in enumerate(res_as_array) if i in mask]
            print('Selected:')
            printResults(papers)

            action = getInput('Download [d], bibtex[b], quit[q]? ',
                              lambda e: e.lower())

            # Download selected articles
            if 'd' in action:
                for paper in papers:
                    print('Downloading "{}"'.format(paper.title[0]))
                    downloadPaper(paper, config)

            # Get bibtex reference
            if 'b' in action:
                try:
                    import pyperclip
                    clip = True
                except ModuleNotFoundError:
                    clip = False

                print('Downloading bibtex entries')
                # Get the list of bibcodes
                bibcodes = [p.bibcode for p in papers]
                eq = ads.ExportQuery(bibcodes)
                bib = eq.execute()
                print(bib)
                if clip:
                    print('Copied to clipboard!')
                    pyperclip.copy(bib)

            return papers

        else:  # match more request
            if args.q is not None:
                args.q = args.q + ' ' + inp
            elif 'q' in kwargs:
                kwargs['q'] += ' ' + inp
            else:
                kwargs['q'] = inp
            doQuery(args, **kwargs)
            return
    else:
        return res_ar_array
Exemple #15
0
##
## Note : To strip an existing BibTeX file down to bibcodes with vim,
##	:v/^@/d
##	:%s/@.*{//g
##	:%s/,//g

import ads

## Setup the argument parser
import argparse

parser = argparse.ArgumentParser(description='bibcode to import')
parser.add_argument('bibcode', help='A bibcode for input')
args = parser.parse_args()

## Read bibcode input from file if not specified
#bibcode = args.bibcode
with open(args.bibcode) as f:
    bibcode = f.read().splitlines()
f.close()

## Query ADS with the set of bibcodes
q = ads.ExportQuery(bibcodes=bibcode, format='bibtex')
bibtex = q.execute()

## Write BibTeX entries to file
with open(args.bibcode + '.bib', 'a') as bibfile:
    print(bibtex, file=bibfile)

bibfile.close()
def main():

    desc = 'a simple tool to find out-of-date arXiv preprints, optionally updating and writing a new file'
    parser = argparse.ArgumentParser(description=desc)

    h = 'the input bib file to search through'
    parser.add_argument('bibfile', type=str, help=h)

    h = 'do a dry run, simply printing out all of the out-of-date references'
    parser.add_argument('--dry-run', '-n', action='store_true', help=h)

    h = 'the output bib file to write; if not provided; any new entries will be writted to stdout'
    parser.add_argument('-o', '--output', type=str, help=h)

    h = "string specifying NASA ADS API token; see https://github.com/adsabs/adsabs-dev-api#access. "
    h += "The token can also be stored in ~/.ads/dev_key for repeated use"
    parser.add_argument('-t', '--token', type=str, help=h)

    h = 'whether to use verbose output'
    parser.add_argument('-v', '--verbose', action='store_true', help=h)

    ns = parser.parse_args()

    # set the token
    if ns.token is not None:
        os.environ['ADS_DEV_KEY'] = ns.token

    # parse the bib file
    with open(ns.bibfile, 'r') as ff:
        refs = bibtexparser.load(ff)

    # the indices of pre-prints
    preprints = []
    for i, r in enumerate(refs.entries):
        adsurl = r.get('adsurl', None)
        if is_preprint(r):
            preprints.append(i)
        elif ns.verbose:
            print("entry '%s' appears to be a published work" %(r['ID']))

    # sort from largest to smallest
    preprints = sorted(preprints, reverse=True)
    args = (len(preprints), len(refs.entries))
    print("%d out of %d references possibly out-of-date..." % args)

    # get the relevant info from ADS
    updated = []
    for i in preprints:
        r = refs.entries[i]
        print("checking publication status of the '%s' bib entry" %r['ID'])
        arxiv_id = None

        # try to match the pattern
        for field in r:
            for pattern in ARXIV_PATTERNS:
                 matches = pattern.search(r[field])
                 if matches:
                     arxiv_id = matches.group(0)
                     break

        # check ads url too
        if arxiv_id is None and 'adsurl' in r and 'abs/' in r['adsurl']:
            arxiv_id = r['adsurl'].split('abs/')[-1]

        # skip this one and warn!
        if arxiv_id is None:
            warnings.warn("cannot check entry '%s'; please add 'eprint' or proper 'adsurl' fields" %r['ID'])
            continue

        # query for the bibcode
        try:
            q = ads.SearchQuery(q="arxiv:%s" %arxiv_id, fl=['bibcode'])
        except:
            raise ValueError("syntax error in bib file; check 'eprint' and 'adsurl' fields for '%s'" %r['ID'])

        # check for token
        if q.token is None:
            raise RuntimeError("no ADS API token found; cannot query the ADS database. "
                               "See https://github.com/adsabs/adsabs-dev-api#access")

        # process each paper
        for paper in q:

            # get the bibtex
            bibquery = ads.ExportQuery(paper.bibcode)
            bibtex = bibquery.execute()

            # new ref entry
            new_ref = bibtexparser.loads(bibtex).entries[0]


            # update if published
            if not is_preprint(new_ref):
                updated.append(new_ref['ID'])
                print("  '%s' entry found to be out-of-date" %r['ID'])

                # remove old entry
                refs.entries.pop(i)

                # add new entry
                refs.entries.append(new_ref)

    # write output file
    if len(updated) and not ns.dry_run:

        writer = bibtexparser.bwriter.BibTexWriter()
        if ns.output is not None:
            with open(ns.output, 'w') as ff:
                ff.write(writer.write(refs))
        else:
            # only print out the new ones
            indices = [i for i, ref in enumerate(refs.entries) if ref['ID'] in updated]
            refs.entries = [refs.entries[i] for i in indices]
            print(writer.write(refs))
Exemple #17
0
    def multi_add_to_library(self, bibcodes):

        for nbib, bibcode in enumerate(bibcodes):

            if len(bibcode.split('arXiv')) == 2:
                try:
                    connection = 0
                    while connection == 0:
                        try:
                            ads_paper = list(
                                ads.SearchQuery(alternate_bibcode=bibcode,
                                                fl=['bibcode']))[0]
                            try:
                                del self.database[bibcode]
                            except KeyError:
                                pass
                            bibcodes[nbib] = ads_paper.bibcode
                            connection = 1
                        except requests.ConnectionError:
                            time.sleep(1)
                except IndexError:
                    pass
            else:
                pass

        bibcodes = list(np.unique(bibcodes))

        bibcodes_to_include = []
        for i in bibcodes:
            if i not in self.database:
                bibcodes_to_include.append(i)

        connection = 0
        bibtexs_to_include = [ff for ff in bibcodes_to_include]
        while connection == 0:
            try:
                print('connecting...')
                bibtexs_to_include = ads.ExportQuery(
                    bibcodes=bibcodes_to_include, format='bibtex').execute()
                bibtexs_to_include = bibtexs_to_include.split('\n\n')[:-1]
                connection = 1
            except requests.ConnectionError:
                time.sleep(1)

        bibcodes_to_include = [
            str(ff.split('\n')[0].split('{')[1][:-1])
            for ff in bibtexs_to_include
        ]

        print('bibtexs loaded')
        for nbib, bibcode in enumerate(bibcodes_to_include):
            print(bibcode)
            if bibcode not in self.database:
                connection = 0
                while connection == 0:
                    try:
                        a = ads.SearchQuery(
                            bibcode=bibcode,
                            fl=['adsurl', 'title', 'author', 'abstract'])
                        a = list(a)[0]
                        b = bibtexs_to_include[nbib]
                        connection = 1
                        self.database[bibcode] = {
                            'adsurl': get_adsurl_from_bibtex(b),
                            'title': a.title,
                            'author': a.author,
                            'abstarct': a.abstract,
                            'bibtex': b,
                            'call': bibcode
                        }

                        old_call = self.database[bibcode]['call']
                        new_call = a.author[0].split(',')[0].replace(
                            ' ', '').replace('-', '') + bibcode[:4]
                        new_call = ud.normalize('NFKD', new_call).encode(
                            'ascii', 'ignore').decode("utf-8")
                        new_call = new_call.replace('&', 'a')
                        self.database[bibcode]['bibtex'] = \
                            self.database[bibcode]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call))
                        self.database[bibcode]['call'] = new_call
                    except requests.ConnectionError:
                        time.sleep(1)

        calls = [
            self.database[ff]['call'].split('B')[0] for ff in self.database
        ]
        for i in self.database:
            if calls.count(self.database[i]['call']) > 1:
                old_call = self.database[i]['call']
                new_call = self.database[i]['call'].split('B')[0] + 'B' + i
                new_call = new_call.replace('&', 'a')
                self.database[i]['bibtex'] = \
                    self.database[i]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call))
                self.database[i]['call'] = new_call

        self.update_library_pickle()

        self.update_library_bib()
Exemple #18
0
def process_token(article_identifier, prefs, bibdesk):
    """
    Process a single article token from the user, adding it to BibDesk.

    Parameters
    ----------
    article_identifier : str
        Any user-supplied `str` token.
    prefs : :class:`Preferences`
        A `Preferences` instance.
    bibdesk : :class:`BibDesk`
        A `BibDesk` AppKit hook instance.
    """
    """
    print((prefs['default']['ads_token']))
    print(article_identifier)
    """

    if 'true' in prefs['options']['alert_sound'].lower():
        alert_sound = 'Frog'
    else:
        alert_sound = None

    if 'dev_key' not in prefs['default']['ads_token']:
        ads.config.token = prefs['default']['ads_token']

    ads_query = ads.SearchQuery(identifier=article_identifier,
                                fl=[
                                    'author', 'first_author', 'bibcode',
                                    'identifier', 'alternate_bibcode', 'id',
                                    'year', 'title', 'abstract'
                                ])
    try:
        ads_articles = list(ads_query)
    except:
        logging.info(
            "API response error, Likely no authorized key is provided!")
        notify('API response error',
               'key:' + prefs['default']['ads_token'],
               'Likely no authorized key is provided!',
               alert_sound=alert_sound)
        return False

    if len(ads_articles) != 1:
        logging.debug(
            ' Zero or Multiple ADS entries for the article identifiier: {}'.
            format(article_identifier))
        logging.debug('Matching Number: {}'.format(len(ads_articles)))
        notify('Found Zero or Multiple ADS antries for ',
               article_identifier,
               ' No update in BibDesk',
               alert_sound=alert_sound)
        logging.info("Found Zero or Multiple ADS antries for {}".format(
            article_identifier))
        logging.info("No update in BibDesk")

        return False

    ads_article = ads_articles[0]

    use_bibtexabs = False
    #   use "bibtex" by default
    #   another option could be "bibtexabs":
    #       https://github.com/andycasey/ads/pull/109
    #   however, a change in ads() is required and the abstract field from the "bibtexabs" option doesn't
    #   always comply with the tex syntax.
    if use_bibtexabs == True:
        ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode,
                                     format='bibtexabs').execute()
    else:
        ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode,
                                     format='bibtex').execute()

    logging.debug("process_token: >>>API limits")
    logging.debug("process_token:    {}".format(
        ads_query.response.get_ratelimits()))
    logging.debug("process_token: >>>ads_bibtex")
    logging.debug("process_token:    {}".format(ads_bibtex))

    for k, v in ads_article.items():
        logging.debug('process_token: >>>{}'.format(k))
        logging.debug('process_token:    {}'.format(v))

    article_bibcode = ads_article.bibcode
    gateway_url = 'https://' + prefs['default']['ads_mirror'] + '/link_gateway'
    #   https://ui.adsabs.harvard.edu/link_gateway by default

    if 'true' in prefs['options']['download_pdf'].lower():
        pdf_filename, pdf_status = process_pdf(article_bibcode,
                                               prefs=prefs,
                                               gateway_url=gateway_url)
    else:
        pdf_filename = '.null'

    kept_pdfs = []
    kept_fields = {}
    kept_groups = []

    found = difflib.get_close_matches(ads_article.title[0],
                                      bibdesk.titles,
                                      n=1,
                                      cutoff=.7)

    # first author is the same
    if len(found) > 0:
        if found and difflib.SequenceMatcher(
                None,
                bibdesk.authors(bibdesk.pid(found[0]))[0],
                ads_article.author[0]).ratio() > .6:
            # further comparison on abstract
            abstract = bibdesk('abstract', bibdesk.pid(found[0])).stringValue()
            if not abstract or difflib.SequenceMatcher(
                    None, abstract, ads_article.abstract).ratio() > .6:
                pid = bibdesk.pid(found[0])
                kept_groups = bibdesk.get_groups(pid)
                # keep all fields for later comparison
                # (especially rating + read bool)
                kept_fields = dict((k, v) for k, v in zip(
                    bibdesk('return name of fields', pid, True),
                    bibdesk('return value of fields', pid, True))
                                   # Adscomment may be arXiv only
                                   if k != 'Adscomment')
                # plus BibDesk annotation
                kept_fields['BibDeskAnnotation'] = bibdesk(
                    'return its note', pid).stringValue()
                kept_pdfs += bibdesk.safe_delete(pid)
                notify('Duplicate publication removed',
                       article_identifier,
                       ads_article.title[0],
                       alert_sound=alert_sound)
                logging.info('Duplicate publication removed:')
                logging.info(article_identifier)
                logging.info(ads_article.title[0])
                bibdesk.refresh()

    # add new entry
    ads_bibtex_clean = ads_bibtex.replace('\\', r'\\').replace('"', r'\"')
    pub = bibdesk(f'import from "{ads_bibtex_clean}"')

    # pub id
    pub = pub.descriptorAtIndex_(1).descriptorAtIndex_(3).stringValue()

    # automatic cite key
    bibdesk('set cite key to generated cite key', pub)

    # abstract
    if ads_article.abstract is not None:
        ads_abstract_clean = ads_article.abstract.replace('\\', r'\\').replace(
            '"', r'\"').replace('}', ' ').replace('{', ' ')
        bibdesk(f'set abstract to "{ads_abstract_clean}"', pub)

    doi = bibdesk('value of field "doi"', pub).stringValue()

    if pdf_filename.endswith('.pdf') and pdf_status:
        # register PDF into BibDesk
        bibdesk(
            f'add POSIX file "{pdf_filename}" to beginning of linked files',
            pub)
        # automatic file name
        bibdesk('auto file', pub)
    elif 'http' in pdf_filename and not doi:
        # URL for electronic version - only add it if no DOI link present
        # (they are very probably the same)
        bibdesk(
            f'make new linked URL at end of linked URLs with data "{pdf_filename}"',
            pub)

    # add URLs as linked URL if not there yet
    urls = bibdesk('value of fields whose name ends with "url"',
                   pub,
                   strlist=True)
    if 'arxiv' in article_bibcode.lower():
        article_gateway = get_article_gateway(article_bibcode,
                                              gateway_url=gateway_url)
        urls += [article_gateway['eprint_html']]

    urlspub = bibdesk('linked URLs', pub, strlist=True)

    for u in [u for u in urls if u not in urlspub]:
        bibdesk(f'make new linked URL at end of linked URLs with data "{u}"',
                pub)

    # add old annotated files
    for kept_pdf in kept_pdfs:
        bibdesk(f'add POSIX file "{kept_pdf}" to end of linked files', pub)

    # re-insert custom fields
    bibdesk_annotation = kept_fields.pop("BibDeskAnnotation", '')
    bibdesk(f'set its note to "{bibdesk_annotation}"', pub)
    newFields = bibdesk('return name of fields', pub, True)
    for k, v in list(kept_fields.items()):
        if k not in newFields:
            bibdesk(f'set value of field "{(k, v)}" to "{pub}"')
    notify('New publication added',
           bibdesk('cite key', pub).stringValue(),
           ads_article.title[0],
           alert_sound=alert_sound)
    logging.info('New publication added:')
    logging.info(bibdesk('cite key', pub).stringValue())
    logging.info(ads_article.title[0])

    # add back the static groups assignment
    if kept_groups != []:
        new_groups = bibdesk.add_groups(pub, kept_groups)

    return True
Exemple #19
0
        # if we are running for the first time
        # then there is no file of bibcodes to compare to
        # so we will just download the whole library
        if (not os.path.isfile(bibcodefile)) or refresh:
            print('Creating new bib file for ADS Library "{}", id: {}'.format(
                metadata["name"], metadata["id"]))

            library = get_library(
                library_id=metadata["id"],
                num_documents=metadata["num_documents"],
                rows=rows,
            )
            print("New bib file has {} items".format(len(library)))

            bibtex = ads.ExportQuery(library, format=args.bib_format).execute()

            with open("library.id", "w") as f:
                f.write(library_id)
            #                 f.write(bibcodefile)
            #                 f.write(bibfile)

            with open(bibcodefile, "w") as f:
                f.writelines("{}\n".format(bc) for bc in library)

            with open(bibfile, "w") as f:
                f.write(bibtex)

        # if there is a file of bibcodes we
        # need to compare with it. Unfortunately,
        # we have to download the entire library
Exemple #20
0
def get_references(useads=False,
                   cache=True,
                   updaterefcache=False,
                   bibtex=False,
                   showfails=False):
    """
    Return a dictionary of paper
    `reference <http://www.atnf.csiro.au/research/pulsar/psrcat/psrcat_ref.html>`_
    in the ATNF catalogue. The keys are the ref strings given in the ATNF
    catalogue.

    Note: The way that the ATNF references are stored has changed, so if you
    downloaded the catalogue with a version of psrqpy before v1.0.8 you may
    need to run this function with ``updaterefcache=True`` to allow references
    to work. You may also want to update the ATNF catalogue tarball with:

    >>> import psrqpy
    >>> psrqpy.QueryATNF(checkupdate=True)

    Args:
        useads (bool): boolean to set whether to use the python mod:`ads`
            module to get the NASA ADS URL for the references.
        cache (bool): use cached, or cache, the reference bundled with the
            catalogue tarball.
        updaterefcache (bool): update the cached references.
        bibtex (bool): if using ADS return the bibtex for the reference along
            with the ADS URL.
        showfails (bool): if outputting NASA ADS references set this flag to
            True to output the reference tags of references that fail to be
            found (mainly for debugging purposes).

    Returns:
        dict: a dictionary of references.
    """

    import tempfile
    import json

    # get the tarball
    try:
        dbtarfile = download_file(ATNF_TARBALL, cache=not updaterefcache)
    except IOError:
        raise IOError("Problem accessing ATNF catalogue tarball")

    try:
        # open tarball
        pulsargz = tarfile.open(dbtarfile, mode="r:gz")

        # extract the references
        reffile = pulsargz.extractfile("psrcat_tar/psrcat_ref")
    except IOError:
        raise IOError("Problem extracting the database file")

    refdic = {
        line.split()[0]: " ".join(line.split()[2:])
        for line in reffile.read().decode("utf-8").strip().split("***")
        if len(line) > 0
    }

    reffile.close()
    pulsargz.close()  # close tar file

    # if not requiring ADS references just return the current dictionary
    if not useads:
        return refdic
    else:
        try:
            import ads
            from ads.exceptions import APIResponseError
        except ImportError:
            warnings.warn(
                "Could not import ADS module, so no ADS information "
                "will be included",
                UserWarning,
            )
            return refdic, None

    # try getting cached references
    if not cache:
        adsrefs = {}
    else:
        from astropy.utils.data import is_url_in_cache

        tmpdir = tempfile.gettempdir()  # get system "temporary" directory
        dummyurl = "file://{}/ads_cache".format(tmpdir)
        dummyfile = os.path.join("{}".format(tmpdir), "ads_cache")

        # check if cached ADS refs list exists (using dummy URL)
        if is_url_in_cache(dummyurl) and not updaterefcache:
            adsfile = download_file(dummyurl, cache=True, show_progress=False)

            try:
                fp = open(adsfile, "r")
            except IOError:
                warnings.warn("Could not load ADS URL cache for references",
                              UserWarning)
                return refdic, None

            cachedrefs = json.load(fp)
            fp.close()

            adsrefs = None
            adsbibtex = None
            failures = None
            if "urls" in cachedrefs:
                adsrefs = cachedrefs["urls"]
            if bibtex and "bibtex" in cachedrefs:
                adsbibtex = cachedrefs["bibtex"]
            if showfails and "failures" in cacherefs:
                failures = cachedrefs["failures"]

            if bibtex:
                if failures is None:
                    return refdic, adsrefs, adsbibtex
                else:
                    return refdic, adsrefs, adsbibtex, failures
            else:
                if failures is None:
                    return refdic, adsrefs
                else:
                    return refdic, adsrefs, failures
        else:
            adsrefs = {}

    # loop over references
    j = 0
    bibcodes = {}
    failures = []
    for reftag in refdic:
        j = j + 1

        refstring = refdic[reftag]

        # check if IAU Circular or PhD thesis
        iaucirc = True if "IAU Circ" in refstring else False
        thesis = True if "PhD thesis" in refstring else False

        sepauthors = ""

        # check for arXiv identifier
        arxivid = None
        if "arXiv:" in refstring or "ArXiv:" in refstring:
            for searchterm in [
                    r"[Aa]rXiv:[0-9]{4}.[0-9]*",
                    r"[Aa]rXiv:astro-ph/[0-9]{7}",
            ]:
                match = re.search(searchterm, refstring)

                if match is not None:
                    arxivid = match.group().lower()
                    break
        else:
            if iaucirc:
                # get circular number (value after IAU Circ. No.)
                spl = re.split(r"([0-9]{4})", refstring)
                noidx = 1
                for val in spl:
                    if "IAU Circ" in val:
                        break
                    noidx += 1
                volume = spl[noidx]
            else:
                # do splitting on the year (allows between 1000-2999)
                spl = re.split(r"([1-2][0-9]{3})", refstring)

                if len(spl) < 2:
                    # no authors + year, so ignore!
                    failures.append(reftag)
                    continue

                year = spl[1] if len(spl[1]) == 4 else None

                try:
                    int(year)
                except (ValueError, TypeError):
                    # "year" is not an integer
                    failures.append(reftag)
                    continue

                # get the authors (remove line breaks/extra spaces and final full-stop)
                authors = spl[0].strip().strip(".")

                # remove " Jr." from any author names (as it causes issues!)
                authors = authors.replace(" Jr.", "")

                # replace ampersands/and with ".," for separation
                authors = authors.replace(" &", ".,").replace(" and", ".,")

                # separate out authors
                sepauthors = [
                    auth.lstrip() for auth in authors.split(".,")
                    if len(auth.strip()) > 0 and "et al" not in auth
                ]

                # remove any "'s for umlauts in author names
                sepauthors = [a.replace(r'"', "") for a in sepauthors]

                if len(sepauthors) == 0:
                    # no authors were parsed
                    failures.append(reftag)
                    continue

            if not thesis and not iaucirc:
                volume = None
                page = None
                if len(spl) > 2:
                    # join the remaining values and split on ","
                    extrainfo = [
                        info
                        for info in ("".join(spl[2:])).lstrip(".").split(",")
                        if len(info.strip()) > 0
                    ]

                    # get the journal volume (assumed to be second from last)
                    try:
                        # in case volume contains issue number in brackets perform split
                        volume = int(extrainfo[-2].strip().split("(")[0])
                    except (IndexError, TypeError, ValueError):
                        # could not get the volume
                        pass

                    # get the page if given (assumed to be th last value)
                    try:
                        testpage = re.sub("[\+\-\.]", "",
                                          extrainfo[-1].strip().split("-")[0])
                        if not testpage.startswith(
                                "eaao"):  # Science Advances page string
                            if (testpage[0].upper() in ["L", "A", "E"]
                                    or testpage[0:4]
                                    == ""):  # e.g. for ApJL, A&A, PASA
                                _ = int(testpage[1:])
                            elif testpage[-1].upper(
                            ) == "P":  # e.g., for early MNRAS
                                _ = int(testpage[:-1])
                            else:
                                _ = int(testpage)
                        page = testpage
                    except (IndexError, TypeError, ValueError):
                        # could not get the page
                        pass

                if volume is None or page is None:
                    failures.append(reftag)
                    continue

        # generate the query string
        if arxivid is None:
            if not thesis:
                if iaucirc:
                    myquery = 'bibstem:"IAUC" volume:"{}"'.format(volume)
                else:
                    # default query without authors
                    myquery = "year:{} AND volume:{} AND page:{}".format(
                        year, volume, page)

                    # add author if given
                    if len(sepauthors) > 0:
                        # check if authors have spaces in last names (a few cases due to formating of some accented names),
                        # if so try next author...
                        for k, thisauthor in enumerate(sepauthors):
                            if len(thisauthor.split(",")[0].split()) == 1:
                                myquery += ' AND author:"{}{}"'.format(
                                    "^" if k == 0 else "", thisauthor)
                                break
            else:
                myquery = 'year: {} AND author:"^{}" AND bibstem:"PhDT"'.format(
                    year, sepauthors[0])
        else:
            myquery = arxivid

        try:
            article = ads.SearchQuery(q=myquery)
        except APIResponseError:
            failures.append(reftag)
            warnings.warn(
                "Could not get reference information, so no ADS "
                "information for {} will be included".format(reftag),
                UserWarning,
            )
            continue

        for paper in article:
            bibcodes[reftag] = paper.bibcode
            adsrefs[reftag] = ADS_URL.format(bibcodes[reftag])

        # check if paper bibcode was found
        if reftag not in bibcodes:
            failures.append(reftag)

    if bibtex:
        # use ExportQuery to get bibtex
        expquery = ads.ExportQuery(list(
            bibcodes.values())).execute().split("\n\n")

        adsbibtex = {}
        for reftag in bibcodes:
            for equery in expquery:
                if bibcodes[reftag] in equery:
                    adsbibtex[reftag] = equery
                    break

    if cache:
        # output adsrefs to cache file
        try:
            # output to dummy temporary file and then "download" to cache
            fp = open(dummyfile, "w")

            cachedic = {}
            cachedic["urls"] = adsrefs

            if bibtex:
                cachedic["bibtex"] = adsbibtex

            if showfails:
                cachedic["failures"] = failures

            json.dump(cachedic, fp, indent=2)
            fp.close()
        except IOError:
            raise IOError("Could not output the ADS references to a file")

        # cache the file
        _ = download_file(dummyurl, cache=True, show_progress=False)

        # remove the temporary file
        os.remove(dummyfile)

    if bibtex:
        if showfails:
            return refdic, adsrefs, adsbibtex, failures
        else:
            return refdic, adsrefs, adsbibtex
    else:
        if showfails:
            return refdic, adsrefs, failures
        else:
            return refdic, adsrefs
Exemple #21
0
def main():
    parser = ArgumentParser()
    parser.add_argument("files",
                        metavar="TEX",
                        nargs="+",
                        help="tex files to search citation keys")
    parser.add_argument(
        "-o",
        "--output",
        metavar="BIB",
        help=
        "main bibtex file; new entries will be added to this file, existing entries may be updated",
    )
    parser.add_argument(
        "-r",
        "--other",
        nargs="+",
        metavar="BIB",
        help="other bibtex files that contain existing references (read-only)",
    )
    parser.add_argument(
        "--no-update",
        dest="update",
        action="store_false",
        help="for existing entries, do not check ADS for updates",
    )
    parser.add_argument(
        "--force-regenerate",
        action="store_true",
        help=
        "for all existing entries, regenerate the bibtex with the latest version from ADS if found",
    )
    parser.add_argument(
        "--merge-other",
        action="store_true",
        help="merge the entries from other bibtex files",
    )  # thanks to syrte for adding this option
    parser.add_argument(
        "--include-physics",
        action="store_true",
        help="include physics database when searching ADS",
    )
    parser.add_argument(
        "--no-backup",
        dest="backup",
        action="store_false",
        help="back up output file if being overwritten",
    )
    parser.add_argument(
        "--version",
        action="version",
        version="%(prog)s {version}".format(version=__version__),
    )
    args = parser.parse_args()

    if args.include_physics:
        global _database  # pylint: disable=global-statement
        _database = '("astronomy" OR "physics")'

    if len(args.files) == 1 and args.files[0].lower().endswith(
            ".bib"):  # bib update mode
        if args.output or args.other:
            parser.error(
                "Input file is a bib file, not tex file. This will enter bib update mode. Do not specify `--output` and `--other` together in this mode."
            )
        if not args.update:
            parser.error(
                "Input file is a bib file, not tex file. This will enter bib update mode. Must not specify --no-update"
            )
        if not os.path.isfile(args.files[0]):
            parser.error("Cannot locate input bib file {}".format(
                args.files[0]))
        keys = None
        args.output = args.files[0]

    elif args.output:  # bib output is specified
        keys, _ = search_keys(args.files, find_bib=False)

    else:  # bib output is missing, auto-identify
        keys, bib = search_keys(args.files, find_bib=True)
        if not bib:
            parser.error(
                "Cannot identify bibtex file from the tex source. Use -o to specify a bibtex file as output."
            )
        args.output = bib.pop(0)
        if args.other:
            args.other.extend(bib)
        else:
            args.other = bib

        msg = "Auto-identifying bibtex files...\n"
        msg += "Main bibtex source (output file): {}\n".format(args.output)
        if args.other:
            msg += "Additional bibtex sources: {}\n".format(", ".join(
                args.other))
        print(_headerize(msg))

    if os.path.isfile(args.output):
        with open(args.output) as fp:
            bib = bibtexparser.load(fp, parser=get_bparser())
    else:
        bib = bibtexparser.loads(" ", parser=get_bparser())

    bib_other = bibtexparser.loads(" ", parser=get_bparser())
    if args.other:
        for f in args.other:
            with open(f) as fp:
                bib_other = update_bib(
                    bib_other, bibtexparser.load(fp, parser=get_bparser()))

    if keys is None:  # bib update mode
        keys = list(bib.entries_dict)

    not_found = set()
    to_retrieve = set()
    all_entries = defaultdict(list)

    for key in keys:
        key_exists = key in bib.entries_dict
        key_exists_in_others = key in bib_other.entries_dict

        if (key_exists
                and args.update) or (key_exists_in_others and args.merge_other
                                     and args.update):
            bibcode = extract_bibcode(bib.entries_dict[key])
            bibcode_new = entry2bibcode(bib.entries_dict[key])
            if bibcode_new:
                all_entries[bibcode_new].append(key)
                if bibcode_new != bibcode or args.force_regenerate:
                    to_retrieve.add(bibcode_new)
                    print("{}:{} UPDATE => {}".format(
                        key,
                        ""
                        if key_exists else "FOUND IN SECONDARY BIB SOURCES,",
                        bibcode_new,
                    ))
                    continue

        if key_exists:
            print("{}: EXISTING".format(key))
            continue

        if key_exists_in_others and args.merge_other:
            bib.entries_dict[key] = bib_other.entries_dict[key]
            bib.entries = list(bib.entries_dict.values())
            print("{}: FOUND IN OTHER BIB SOURCE, MERGED".format(key))
            continue

        if key_exists_in_others:
            print("{}: FOUND IN OTHER BIB SOURCE, IGNORED".format(key))
            continue

        bibcode = find_bibcode(key)
        if bibcode:
            to_retrieve.add(bibcode)
            all_entries[bibcode].append(key)
            print("{}: NEW ENTRY => {}".format(key, bibcode))
        else:
            not_found.add(key)
            print("{}: NOT FOUND".format(key))

    if not_found:
        print(_headerize("Please check the following keys"))
        for key in not_found:
            print(key)

    repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1]
    if repeated_keys:
        print(_headerize("The following keys refer to the same entry"))
        for b, k in repeated_keys:
            print(
                "{1} has been referred as the following keys; please keep only one:\n{0}\n"
                .format(" ".join(k), b))

    if to_retrieve:
        print(_headerize("Building new bibtex file, please wait..."))
        bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve),
                                                     "bibtex").execute(),
                                     parser=get_bparser())
        for entry in bib_new.entries:
            entry["ID"] = all_entries[entry["ID"]][0]
        bib = update_bib(bib, bib_new)
        bib_dump_str = bibtexparser.dumps(bib).encode("utf8")
        if args.backup and os.path.isfile(args.output):
            copyfile(args.output, args.output + ".bak")
        with open(args.output, "wb") as fp:
            fp.write(bib_dump_str)

    print(_headerize("Done!"))

    # check version
    try:
        latest_version = StrictVersion(
            requests.get(
                "https://pypi.python.org/pypi/adstex/json",
                timeout=0.1,
            ).json()["info"]["version"])
    except (requests.RequestException, KeyError, ValueError):
        pass
    else:
        if latest_version > StrictVersion(__version__):
            msg = "A newer version of adstex (v{}) is now available!\n".format(
                latest_version)
            msg += "Please consider updating it by running:\n\n"
            msg += "pip install adstex=={}".format(latest_version)
            print(_headerize(msg))
Exemple #22
0
def main():
    parser = ArgumentParser()
    parser.add_argument('files',
                        metavar='TEX',
                        nargs='+',
                        help='tex files to search citation keys')
    parser.add_argument(
        '-o',
        '--output',
        metavar='BIB',
        required=True,
        help=
        'main bibtex file; new entries will be added to this file, existing entries may be updated'
    )
    parser.add_argument(
        '-r',
        '--other',
        nargs='+',
        metavar='BIB',
        help='other bibtex files that contain existing references (read-only)')
    parser.add_argument(
        '--no-update',
        dest='update',
        action='store_false',
        help='for existing entries, do not check ADS for updates')
    parser.add_argument(
        '--force-update',
        dest='force_update',
        action='store_true',
        help=
        'for all existing entries, overwrite with the latest version from ADS')
    parser.add_argument('--include-physics',
                        dest='include_physics',
                        action='store_true',
                        help='include physics database when searching ADS')
    parser.add_argument(
        '--version',
        action='version',
        version='%(prog)s {version}'.format(version=__version__))
    args = parser.parse_args()

    if args.include_physics:
        _database = '("astronomy" OR "physics")'

    keys = search_keys(args.files)

    if os.path.isfile(args.output):
        with open(args.output) as fp:
            bib = bibtexparser.load(fp, parser=get_bparser())
    else:
        bib = bibtexparser.loads(' ', parser=get_bparser())

    bib_other = bibtexparser.loads(' ', parser=get_bparser())
    if args.other:
        for f in args.other:
            with open(f) as fp:
                bib_other = update_bib(
                    bib_other, bibtexparser.load(fp, parser=get_bparser()))

    not_found = set()
    to_retrieve = set()
    all_entries = defaultdict(list)
    try:
        for key in keys:
            if key in bib.entries_dict:
                if args.update:
                    bibcode = extract_bibcode(bib.entries_dict[key])
                    bibcode_new = entry2bibcode(bib.entries_dict[key])
                    if bibcode_new:
                        all_entries[bibcode_new].append(key)
                        if bibcode_new != bibcode or args.force_update:
                            to_retrieve.add(bibcode_new)
                            print('{}: UPDATE => {}'.format(key, bibcode_new))
                            continue
                print('{}: EXISTING'.format(key))
                continue

            if key in bib_other.entries_dict:
                print('{}: FOUND IN OTHER REFS, IGNORED'.format(key))
                continue

            bibcode = find_bibcode(key)
            if bibcode:
                to_retrieve.add(bibcode)
                all_entries[bibcode].append(key)
                print('{}: NEW ENTRY => {}'.format(key, bibcode))
            else:
                not_found.add(key)
                print('{}: NOT FOUND'.format(key))
    except KeyboardInterrupt:
        print()

    if not_found:
        print(_headerize('Please check the following keys'))
        for key in not_found:
            print(key)

    repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1]
    if repeated_keys:
        print(_headerize('The following keys refer to the same entry'))
        for b, k in repeated_keys:
            print(
                '{1} has been referred as the following keys; please keep only one:\n{0}\n'
                .format(' '.join(k), b))

    if to_retrieve:
        print(_headerize('Building new bibtex file, please wait...'))
        bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve),
                                                     'bibtex').execute(),
                                     parser=get_bparser())
        for entry in bib_new.entries:
            entry['ID'] = all_entries[entry['ID']][0]
        bib = update_bib(bib, bib_new)
        bib_dump_str = bibtexparser.dumps(bib).encode('utf8')
        with open(args.output, 'wb') as fp:
            fp.write(bib_dump_str)

    print(_headerize('Done!'))