def retrieve_bibtex_from_ads(bibcode): """ Get the bibtex entry for a given bibcode """ q = ads.ExportQuery(bibcode, format="bibtex") export = q.execute() return export
def export_citation(bibcodes, output_format="aastex"): """Export the bibcodes in the form of aastex Args: bibcodes (list): string list of bibcodes output_format (str): output format ("aastex") Returns: bibs (list): string list of bibs """ if len(bibcodes) == 0: return [] else: q = ads.ExportQuery(bibcodes, format=output_format) try: export_response = q.execute() except: print("{0} is not in ADS library!".format(bibcodes)) else: bibs = list() for bib in export_response.split("\n"): if len(bib) > 0: bibs.append(bib) return bibs
def export(ctx, format, bibcodes): """ Export article(s) to the specified format. - Export one article to bibtex: ads export 2005IAUS..216..170H - Export multiple articles to bibtex: ads export 2005IAUS..216..170H '2017A&A...608A.116C' NOTE: If a bibcode contains `&` e.g., "2017A&A...608A.116C", either `&` needs to be escaped as in ads export 2017A\&A...608A.116C or put in quotes ads export "2017A&A...608A.116C" because in bash, `&` means put process in the background. """ if ctx.obj["debug"]: logger.setLevel(logging.DEBUG) # TODO: This is breaking up string if one item given from stdin. bibcodes = list(map(find_bibcode, bibcodes)) logger.debug(f"bibcodes: {bibcodes}") if len(bibcodes) == 0: raise click.UsageError("At least one bibcode must be specified.") if not ctx.obj["debug"]: q = ads.ExportQuery(bibcodes, format=format) click.echo(q())
def update_bib(self): bibcodes = list(set(self.get_bibcodes()) - set(self._bib.keys())) if bibcodes: self._bib.update( bibtexparser.loads( ads.ExportQuery(bibcodes, 'bibtex').execute()).entries_dict)
def get_bibtex_from_ADS(arg_dict): papers = ads.SearchQuery(**arg_dict) papers = [paper for paper in papers] if len(papers) == 0: print('No papers found in ADS') return opts_ = [] max_res = int(config.config['ADS']['max results']) if len(papers) > max_res: papers = papers[:max_res] for paper in papers: if len(paper.author) > 1: et_al = 'et. al ' else: et_al = '' opts_ += [f'{paper.author[0]} {et_al}[{paper.year}]: {paper.title}' ] questions = [ inquirer.Checkbox('ads', message="Add to bibtex and attempt PDF fetch?", choices=opts_, ), ] answers = inquirer.prompt(questions) save_papers = [] for ans in answers['ads']: save_papers.append(papers[opts_.index(ans)]) if len(save_papers) == 0: return papers = save_papers del save_papers parser = bib.get_parser() bibcodes = [paper.bibcode for paper in papers] bibtex_data = bibtex_query = ads.ExportQuery( bibcodes=bibcodes, format='bibtex', ).execute() bib_database = bibtexparser.loads(bibtex_data, parser) return bib_database, bibcodes
def _to_format(format, filter=None): """Convert bibcodes to a range of different output formats. Parameters ---------- format : string Output format: ``bibtex`` | ``aastex`` | ``icarus`` | ``mnras``. filter : string, optional Filter the bibliography by key, showing only those that start with this string. Returns ------- text : string ADS entries for all the bibliographic items in the given format. Uses a query to the export service to get the data for each reference. """ import ads output = '' for task, ref in _filter(filter).items(): with warnings.catch_warnings(): # warnings.filterwarnings('error') try: for key, val in ref.items(): # This method avoids using multiple calls to the # API that may impact rate limits # https://github.com/adsabs/adsabs-dev-api/blob/master/Export_API.ipynb query = ads.ExportQuery(list(val), format=format) data = query.execute() output += '% {:s}/{:s}:\n{:s}\n'.format(task, key, data) except ads.exceptions.APIResponseError as e: e = str(e) if '<title>' in e: e = e[e.find('<title>') + 7:e.find('</title>')] warnings.warn( 'cannot obtain ADS data for {:s}/{:s}: ({:s})'.format( task, key, e), RuntimeWarning) pass return output
def abstract(request, bibcode): # return HttpResponse(f"Viewing abstract for bibcode {bibcode}") q = list( ads.query(bibcode, fl=[ 'bibcode', 'title', 'author', 'aff', 'doi', 'pub', 'pubdate', 'citation_count', 'abstract', 'arxiv_class', 'volume', 'issue', 'page', 'year', 'keyword', 'orcid_pub', 'orcid_user', 'orcid_other' ])) assert len(q) == 1, "Non-unique bibcode" paper = q[0] bibtex = ads.ExportQuery(bibcode).execute() try: eprint = re.search(r'eprint = \{(.+)\}', bibtex)[1] except: eprint = None orcid = paper.orcid_pub try: orcid = [ pub if pub != '-' else auth for pub, auth in zip(paper.orcid_pub, paper.orcid_user) ] except: pass try: orcid = [ o if o != '-' else other for o, other in zip(orcid, paper.orcid_other) ] except: pass template = loader.get_template('abstract.html') context = { 'paper': paper, 'eprint': eprint, 'bibtex': bibtex, 'authors': zip(paper.author, paper.aff, orcid) } return HttpResponse(template.render(context, request))
def query_ads_bibtex(self, bibcodes): """ Query ADS for the paper bibtexes specified by a list of bibcodes ('bibcodes') """ bc_ads = BibtexCollection() try: bibtex_string = ads.ExportQuery(bibcodes=bibcodes, format='bibtex').execute() bc_ads.read_from_string(bibtex_string) bibcodes_found = bc_ads.bibcode_entries.keys() nresults = len(bibcodes_found) nbibcodes = len(bibcodes) if nresults==nbibcodes: return bc_ads else: print('WARNING: did not retrieve bibtex for {} bibcodes:'.format(nresults-nbibcodes)) for bc in bibcodes: if not bc in bibcodes_found: print(bc) except ads.exceptions.APIResponseError: print('ERROR: ADS APIResponseError. You probably exceeded your rate limit.') raise
def getBibtexs(self, bibcodes): """Obtain a string containing the bibtex entries for all the requested bibcodes Parameter: bibcodes: a single bibcode (string containing the ADS identifier of a given entry) or a list of bibcodes Output: a string with all the bibtex entries """ ads.config.token = pbConfig.params["ADSToken"] try: self.q = ads.ExportQuery(bibcodes=bibcodes, format="bibtex") export = self.q.execute() except ads.exceptions.APIResponseError: pBLogger.exception(self.unauthorized) except Exception: pBLogger.exception(self.genericExportError, exc_info=True) else: pBLogger.info(self.getLimitInfo()) return export return ""
def main(): parser = ArgumentParser() parser.add_argument('files', metavar='TEX', nargs='+', help='tex files to search citation keys') parser.add_argument('-o', '--output', metavar='BIB', help='main bibtex file; new entries will be added to this file, existing entries may be updated') parser.add_argument('-r', '--other', nargs='+', metavar='BIB', help='other bibtex files that contain existing references (read-only)') parser.add_argument('--no-update', dest='update', action='store_false', help='for existing entries, do not check ADS for updates') parser.add_argument('--force-regenerate', action='store_true', help='for all existing entries, regenerate the bibtex with the latest version from ADS if found') parser.add_argument('--include-physics', action='store_true', help='include physics database when searching ADS') parser.add_argument('--no-backup', dest='backup', action='store_false', help='back up output file if being overwritten') parser.add_argument('--version', action='version', version='%(prog)s {version}'.format(version=__version__)) args = parser.parse_args() if args.include_physics: _database = '("astronomy" OR "physics")' if len(args.files) == 1 and args.files[0].lower().endswith('.bib'): # bib update mode if args.output or args.other: parser.error('Input file is a bib file, not tex file. This will enter bib update mode. Do not specify "output" and "other".') if not args.update: parser.error('Input file is a bib file, not tex file. This will enter bib update mode. Must not specify --no-update') if not os.path.isfile(args.files[0]): parser.error('Cannot locate input bib file {}'.format(args.files[0])) keys = None args.output = args.files[0] elif args.output: # bib output is specified keys, _ = search_keys(args.files, find_bib=False) else: # bib output is missing, auto-identify keys, bib = search_keys(args.files, find_bib=True) if not bib: parser.error('Cannot identify bibtex file from the tex source. Use -o to specify a bibtex file as output.') args.output = bib.pop(0) if args.other: args.other.extend(bib) else: args.other = bib msg = 'Auto-identifying bibtex files...\n' msg += 'Main bibtex source (output file): {}\n'.format(args.output) if args.other: msg += 'Additional bibtex sources: {}\n'.format(', '.join(args.other)) print(_headerize(msg)) if os.path.isfile(args.output): with open(args.output) as fp: bib = bibtexparser.load(fp, parser=get_bparser()) else: bib = bibtexparser.loads(' ', parser=get_bparser()) bib_other = bibtexparser.loads(' ', parser=get_bparser()) if args.other: for f in args.other: with open(f) as fp: bib_other = update_bib(bib_other, bibtexparser.load(fp, parser=get_bparser())) if keys is None: # bib update mode keys = list(bib.entries_dict) not_found = set() to_retrieve = set() all_entries = defaultdict(list) for key in keys: if key in bib.entries_dict: if args.update: bibcode = extract_bibcode(bib.entries_dict[key]) bibcode_new = entry2bibcode(bib.entries_dict[key]) if bibcode_new: all_entries[bibcode_new].append(key) if bibcode_new != bibcode or args.force_regenerate: to_retrieve.add(bibcode_new) print('{}: UPDATE => {}'.format(key, bibcode_new)) continue print('{}: EXISTING'.format(key)) continue if key in bib_other.entries_dict: print('{}: FOUND IN OTHER BIB SOURCE, IGNORED'.format(key)) continue bibcode = find_bibcode(key) if bibcode: to_retrieve.add(bibcode) all_entries[bibcode].append(key) print('{}: NEW ENTRY => {}'.format(key, bibcode)) else: not_found.add(key) print('{}: NOT FOUND'.format(key)) if not_found: print(_headerize('Please check the following keys')) for key in not_found: print(key) repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1] if repeated_keys: print(_headerize('The following keys refer to the same entry')) for b, k in repeated_keys: print('{1} has been referred as the following keys; please keep only one:\n{0}\n'.format(' '.join(k), b)) if to_retrieve: print(_headerize('Building new bibtex file, please wait...')) bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), 'bibtex').execute(), parser=get_bparser()) for entry in bib_new.entries: entry['ID'] = all_entries[entry['ID']][0] bib = update_bib(bib, bib_new) bib_dump_str = bibtexparser.dumps(bib).encode('utf8') if args.backup and os.path.isfile(args.output): copyfile(args.output, args.output + '.bak') with open(args.output, 'wb') as fp: fp.write(bib_dump_str) print(_headerize('Done!')) # check version try: latest_version = StrictVersion(requests.get( 'https://pypi.python.org/pypi/adstex/json').json()['info']['version']) except (requests.RequestException, KeyError, ValueError): pass else: if latest_version > StrictVersion(__version__): msg = 'A newer version of adstex (v{}) is now available!\n'.format(latest_version) msg += 'Please consider updating it by running:\n\n' msg += 'pip install adstex=={}'.format(latest_version) print(_headerize(msg))
"--bib-format", choices=["bibtex", "bibtexabs"], help="""[[DISABLED]] Format for bibtex file. bibtexabs only works if using the git version of the abs module""", default="bibtex", ) parser.add_argument("--debug", action="store_true") args = parser.parse_args() bibcodefile = args.bibcodes bibfile = args.bibfile token = args.token bibcodes = args.bibcode_list print(bibcodes) if args.debug: print(bibcodefile) print(bibfile) print(token) print("Adding {} new items".format(len(bibcodes))) bibtex = ads.ExportQuery(bibcodes, format=args.bib_format).execute() with open(bibcodefile, "a+") as f: f.writelines("{}\n".format(bc) for bc in bibcodes) with open(bibfile, "a+") as f: f.write("\n\n\n\n\n") f.write(bibtex)
def add_to_library(self, bibcode): if len(bibcode.split('arXiv')) == 2: print(bibcode) try: connection = 0 while connection == 0: try: ads_paper = list( ads.SearchQuery(alternate_bibcode=bibcode, fl=['bibcode']))[0] try: del self.database[bibcode] except KeyError: pass bibcode = ads_paper.bibcode connection = 1 time.sleep(1) except requests.ConnectionError: time.sleep(1) except IndexError: pass else: pass if bibcode not in self.database: connection = 0 while connection == 0: try: a = ads.SearchQuery( bibcode=bibcode, fl=['adsurl', 'title', 'author', 'abstract']) a = list(a)[0] b = ads.ExportQuery(bibcodes=bibcode, format="bibtex").execute() connection = 1 self.database[bibcode] = { 'adsurl': get_adsurl_from_bibtex(b), 'title': a.title, 'author': a.author, 'abstarct': a.abstract, 'bibtex': b, 'call': bibcode } old_call = self.database[bibcode]['call'] new_call = a.author[0].split(',')[0].replace( ' ', '').replace('-', '') + bibcode[:4] new_call = ud.normalize('NFKD', new_call).encode( 'ascii', 'ignore').decode("utf-8") print(str(new_call)) self.database[bibcode]['bibtex'] = \ self.database[bibcode]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call)) self.database[bibcode]['call'] = new_call calls = [ self.database[ff]['call'].split('B')[0] for ff in self.database ] for i in self.database: if calls.count(self.database[i]['call']) > 1: old_call = self.database[i]['call'] new_call = self.database[i]['call'].split( 'B')[0] + 'B' + i new_call = new_call.replace('&', 'a') self.database[i]['bibtex'] = \ self.database[i]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call)) self.database[i]['call'] = new_call except requests.ConnectionError: time.sleep(1) self.update_library_pickle() self.update_library_bib()
def add(search_query, author, title): fl = [ 'id', 'author', 'first_author', 'bibcode', 'id', 'year', 'title', 'abstract', 'doi', 'pubdate', "pub", "keyword", "doctype", "identifier", "links_data" ] if author: search_query += "author:" + author if title: search_query += "title:" + title papers = list(ads.SearchQuery(q=search_query, fl=fl)) if len(papers) == 0: selection = ads.search.Article exit() elif len(papers) == 1: selection = papers[0] # type:ads.search.Article else: # first_ten = itertools.islice(papers, 10) first_ten = papers[:10] single_paper: ads.search.Article for index, single_paper in enumerate(first_ten): print(index, single_paper.title[0], single_paper.first_author) selected_index = click.prompt('select paper', type=int) selection = papers[selected_index] # type:ads.search.Article assert len(selection.doi) == 1 doi = selection.doi[0] try: paper = Paper.get(Paper.doi == doi) print("this paper has already been added") exit(1) except peewee.DoesNotExist: pass print("fetching bibcode") q = ads.ExportQuery([selection.bibcode]) bibtex = q.execute() print("saving in db") paper = Paper() assert len(selection.title) == 1 paper.doi = doi paper.title = selection.title[0] paper.abstract = selection.abstract paper.bibcode = selection.bibcode paper.year = selection.year paper.pubdate = selection.pubdate paper.pdf_downloaded = False paper.first_author = Author.get_or_create(name=selection.first_author)[0] paper.publication = Publication.get_or_create(name=selection.pub)[0] paper.doctype = Doctype.get_or_create(name=selection.doctype)[0] paper.arxiv_identifier = [ ident for ident in selection.identifier if "arXiv:" in ident ][0].split("arXiv:")[-1] paper.bibtex = bibtex links = [json.loads(string) for string in selection.links_data] print(links) paper.save() authors = [Author.get_or_create(name=name)[0] for name in selection.author] for author in db.batch_commit(authors, 100): PaperAuthors.create(author=author, paper=paper) keywords = [ Keyword.get_or_create(keyword=keyword)[0] for keyword in selection.keyword ] for keyword in db.batch_commit(keywords, 100): PaperKeywords.create(keyword=keyword, paper=paper) print("fetching PDF") arxiv_url = "https://arxiv.org/pdf/{id}".format(id=paper.arxiv_identifier) r = requests.get(arxiv_url, stream=True) print(arxiv_url) with open('library/{filename}.pdf'.format(filename=paper.id), 'wb') as f: chunk_size = 1024 # bytes file_size = int(r.headers.get('content-length', 0)) progress_length = math.ceil(file_size // chunk_size) with click.progressbar(r.iter_content(chunk_size=20), length=progress_length) as progress_chunks: for chunk in progress_chunks: f.write(chunk) paper.pdf_downloaded = True paper.save()
def doQuery(args, config, **kwargs): query = BuildQuery() query.setKey('sort', 'citation_count') for key in vars(args): if key not in ['interactive', 'func']: query.setKey(key, vars(args)[key]) for key in kwargs: if key not in ['interactive', 'func']: query.setKey(key, kwargs[key]) fl = [ 'abstract', 'first_author', 'author', 'year', 'pub', 'title', 'bibcode' ] query.setKey('fl', fl) # get results results = query.execute() res_as_array = [] # loop over results try: # sometimes, the ads script gets an IndexError, hide this res_as_array = [res for res in results] except IndexError: res_ar_array = [] printResults(res_as_array) if len(res_as_array) == 0: print('No results') return if args.interactive or ('interactive' in kwargs and kwargs['interactive']): print('') inp = input( 'Comma separated articles to download [e.g. 1-3, 4], [m] for more' ' [q] to quit or' ' add more parameters to request [e.g. year:2016]: ') # Match quit request if inp == 'q': return # Match requests for more articles # match any string like "5m", "10 more", … grp = re.match('(\d+) ?[mM](ore)?', inp) if grp is not None: nmore = int(grp.group(1)) # load more if 'rows' in kwargs: kwargs['rows'] += nmore else: kwargs['rows'] = 50 + nmore print('Loading {} more!'.format(nmore)) doQuery(args, **kwargs) return # Match selection mask = parseAsList(inp) if len(mask) > 0: papers = [r for i, r in enumerate(res_as_array) if i in mask] print('Selected:') printResults(papers) action = getInput('Download [d], bibtex[b], quit[q]? ', lambda e: e.lower()) # Download selected articles if 'd' in action: for paper in papers: print('Downloading "{}"'.format(paper.title[0])) downloadPaper(paper, config) # Get bibtex reference if 'b' in action: try: import pyperclip clip = True except ModuleNotFoundError: clip = False print('Downloading bibtex entries') # Get the list of bibcodes bibcodes = [p.bibcode for p in papers] eq = ads.ExportQuery(bibcodes) bib = eq.execute() print(bib) if clip: print('Copied to clipboard!') pyperclip.copy(bib) return papers else: # match more request if args.q is not None: args.q = args.q + ' ' + inp elif 'q' in kwargs: kwargs['q'] += ' ' + inp else: kwargs['q'] = inp doQuery(args, **kwargs) return else: return res_ar_array
## ## Note : To strip an existing BibTeX file down to bibcodes with vim, ## :v/^@/d ## :%s/@.*{//g ## :%s/,//g import ads ## Setup the argument parser import argparse parser = argparse.ArgumentParser(description='bibcode to import') parser.add_argument('bibcode', help='A bibcode for input') args = parser.parse_args() ## Read bibcode input from file if not specified #bibcode = args.bibcode with open(args.bibcode) as f: bibcode = f.read().splitlines() f.close() ## Query ADS with the set of bibcodes q = ads.ExportQuery(bibcodes=bibcode, format='bibtex') bibtex = q.execute() ## Write BibTeX entries to file with open(args.bibcode + '.bib', 'a') as bibfile: print(bibtex, file=bibfile) bibfile.close()
def main(): desc = 'a simple tool to find out-of-date arXiv preprints, optionally updating and writing a new file' parser = argparse.ArgumentParser(description=desc) h = 'the input bib file to search through' parser.add_argument('bibfile', type=str, help=h) h = 'do a dry run, simply printing out all of the out-of-date references' parser.add_argument('--dry-run', '-n', action='store_true', help=h) h = 'the output bib file to write; if not provided; any new entries will be writted to stdout' parser.add_argument('-o', '--output', type=str, help=h) h = "string specifying NASA ADS API token; see https://github.com/adsabs/adsabs-dev-api#access. " h += "The token can also be stored in ~/.ads/dev_key for repeated use" parser.add_argument('-t', '--token', type=str, help=h) h = 'whether to use verbose output' parser.add_argument('-v', '--verbose', action='store_true', help=h) ns = parser.parse_args() # set the token if ns.token is not None: os.environ['ADS_DEV_KEY'] = ns.token # parse the bib file with open(ns.bibfile, 'r') as ff: refs = bibtexparser.load(ff) # the indices of pre-prints preprints = [] for i, r in enumerate(refs.entries): adsurl = r.get('adsurl', None) if is_preprint(r): preprints.append(i) elif ns.verbose: print("entry '%s' appears to be a published work" %(r['ID'])) # sort from largest to smallest preprints = sorted(preprints, reverse=True) args = (len(preprints), len(refs.entries)) print("%d out of %d references possibly out-of-date..." % args) # get the relevant info from ADS updated = [] for i in preprints: r = refs.entries[i] print("checking publication status of the '%s' bib entry" %r['ID']) arxiv_id = None # try to match the pattern for field in r: for pattern in ARXIV_PATTERNS: matches = pattern.search(r[field]) if matches: arxiv_id = matches.group(0) break # check ads url too if arxiv_id is None and 'adsurl' in r and 'abs/' in r['adsurl']: arxiv_id = r['adsurl'].split('abs/')[-1] # skip this one and warn! if arxiv_id is None: warnings.warn("cannot check entry '%s'; please add 'eprint' or proper 'adsurl' fields" %r['ID']) continue # query for the bibcode try: q = ads.SearchQuery(q="arxiv:%s" %arxiv_id, fl=['bibcode']) except: raise ValueError("syntax error in bib file; check 'eprint' and 'adsurl' fields for '%s'" %r['ID']) # check for token if q.token is None: raise RuntimeError("no ADS API token found; cannot query the ADS database. " "See https://github.com/adsabs/adsabs-dev-api#access") # process each paper for paper in q: # get the bibtex bibquery = ads.ExportQuery(paper.bibcode) bibtex = bibquery.execute() # new ref entry new_ref = bibtexparser.loads(bibtex).entries[0] # update if published if not is_preprint(new_ref): updated.append(new_ref['ID']) print(" '%s' entry found to be out-of-date" %r['ID']) # remove old entry refs.entries.pop(i) # add new entry refs.entries.append(new_ref) # write output file if len(updated) and not ns.dry_run: writer = bibtexparser.bwriter.BibTexWriter() if ns.output is not None: with open(ns.output, 'w') as ff: ff.write(writer.write(refs)) else: # only print out the new ones indices = [i for i, ref in enumerate(refs.entries) if ref['ID'] in updated] refs.entries = [refs.entries[i] for i in indices] print(writer.write(refs))
def multi_add_to_library(self, bibcodes): for nbib, bibcode in enumerate(bibcodes): if len(bibcode.split('arXiv')) == 2: try: connection = 0 while connection == 0: try: ads_paper = list( ads.SearchQuery(alternate_bibcode=bibcode, fl=['bibcode']))[0] try: del self.database[bibcode] except KeyError: pass bibcodes[nbib] = ads_paper.bibcode connection = 1 except requests.ConnectionError: time.sleep(1) except IndexError: pass else: pass bibcodes = list(np.unique(bibcodes)) bibcodes_to_include = [] for i in bibcodes: if i not in self.database: bibcodes_to_include.append(i) connection = 0 bibtexs_to_include = [ff for ff in bibcodes_to_include] while connection == 0: try: print('connecting...') bibtexs_to_include = ads.ExportQuery( bibcodes=bibcodes_to_include, format='bibtex').execute() bibtexs_to_include = bibtexs_to_include.split('\n\n')[:-1] connection = 1 except requests.ConnectionError: time.sleep(1) bibcodes_to_include = [ str(ff.split('\n')[0].split('{')[1][:-1]) for ff in bibtexs_to_include ] print('bibtexs loaded') for nbib, bibcode in enumerate(bibcodes_to_include): print(bibcode) if bibcode not in self.database: connection = 0 while connection == 0: try: a = ads.SearchQuery( bibcode=bibcode, fl=['adsurl', 'title', 'author', 'abstract']) a = list(a)[0] b = bibtexs_to_include[nbib] connection = 1 self.database[bibcode] = { 'adsurl': get_adsurl_from_bibtex(b), 'title': a.title, 'author': a.author, 'abstarct': a.abstract, 'bibtex': b, 'call': bibcode } old_call = self.database[bibcode]['call'] new_call = a.author[0].split(',')[0].replace( ' ', '').replace('-', '') + bibcode[:4] new_call = ud.normalize('NFKD', new_call).encode( 'ascii', 'ignore').decode("utf-8") new_call = new_call.replace('&', 'a') self.database[bibcode]['bibtex'] = \ self.database[bibcode]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call)) self.database[bibcode]['call'] = new_call except requests.ConnectionError: time.sleep(1) calls = [ self.database[ff]['call'].split('B')[0] for ff in self.database ] for i in self.database: if calls.count(self.database[i]['call']) > 1: old_call = self.database[i]['call'] new_call = self.database[i]['call'].split('B')[0] + 'B' + i new_call = new_call.replace('&', 'a') self.database[i]['bibtex'] = \ self.database[i]['bibtex'].replace('{0}{1}'.format('{', old_call), '{0}{1}'.format('{', new_call)) self.database[i]['call'] = new_call self.update_library_pickle() self.update_library_bib()
def process_token(article_identifier, prefs, bibdesk): """ Process a single article token from the user, adding it to BibDesk. Parameters ---------- article_identifier : str Any user-supplied `str` token. prefs : :class:`Preferences` A `Preferences` instance. bibdesk : :class:`BibDesk` A `BibDesk` AppKit hook instance. """ """ print((prefs['default']['ads_token'])) print(article_identifier) """ if 'true' in prefs['options']['alert_sound'].lower(): alert_sound = 'Frog' else: alert_sound = None if 'dev_key' not in prefs['default']['ads_token']: ads.config.token = prefs['default']['ads_token'] ads_query = ads.SearchQuery(identifier=article_identifier, fl=[ 'author', 'first_author', 'bibcode', 'identifier', 'alternate_bibcode', 'id', 'year', 'title', 'abstract' ]) try: ads_articles = list(ads_query) except: logging.info( "API response error, Likely no authorized key is provided!") notify('API response error', 'key:' + prefs['default']['ads_token'], 'Likely no authorized key is provided!', alert_sound=alert_sound) return False if len(ads_articles) != 1: logging.debug( ' Zero or Multiple ADS entries for the article identifiier: {}'. format(article_identifier)) logging.debug('Matching Number: {}'.format(len(ads_articles))) notify('Found Zero or Multiple ADS antries for ', article_identifier, ' No update in BibDesk', alert_sound=alert_sound) logging.info("Found Zero or Multiple ADS antries for {}".format( article_identifier)) logging.info("No update in BibDesk") return False ads_article = ads_articles[0] use_bibtexabs = False # use "bibtex" by default # another option could be "bibtexabs": # https://github.com/andycasey/ads/pull/109 # however, a change in ads() is required and the abstract field from the "bibtexabs" option doesn't # always comply with the tex syntax. if use_bibtexabs == True: ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode, format='bibtexabs').execute() else: ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode, format='bibtex').execute() logging.debug("process_token: >>>API limits") logging.debug("process_token: {}".format( ads_query.response.get_ratelimits())) logging.debug("process_token: >>>ads_bibtex") logging.debug("process_token: {}".format(ads_bibtex)) for k, v in ads_article.items(): logging.debug('process_token: >>>{}'.format(k)) logging.debug('process_token: {}'.format(v)) article_bibcode = ads_article.bibcode gateway_url = 'https://' + prefs['default']['ads_mirror'] + '/link_gateway' # https://ui.adsabs.harvard.edu/link_gateway by default if 'true' in prefs['options']['download_pdf'].lower(): pdf_filename, pdf_status = process_pdf(article_bibcode, prefs=prefs, gateway_url=gateway_url) else: pdf_filename = '.null' kept_pdfs = [] kept_fields = {} kept_groups = [] found = difflib.get_close_matches(ads_article.title[0], bibdesk.titles, n=1, cutoff=.7) # first author is the same if len(found) > 0: if found and difflib.SequenceMatcher( None, bibdesk.authors(bibdesk.pid(found[0]))[0], ads_article.author[0]).ratio() > .6: # further comparison on abstract abstract = bibdesk('abstract', bibdesk.pid(found[0])).stringValue() if not abstract or difflib.SequenceMatcher( None, abstract, ads_article.abstract).ratio() > .6: pid = bibdesk.pid(found[0]) kept_groups = bibdesk.get_groups(pid) # keep all fields for later comparison # (especially rating + read bool) kept_fields = dict((k, v) for k, v in zip( bibdesk('return name of fields', pid, True), bibdesk('return value of fields', pid, True)) # Adscomment may be arXiv only if k != 'Adscomment') # plus BibDesk annotation kept_fields['BibDeskAnnotation'] = bibdesk( 'return its note', pid).stringValue() kept_pdfs += bibdesk.safe_delete(pid) notify('Duplicate publication removed', article_identifier, ads_article.title[0], alert_sound=alert_sound) logging.info('Duplicate publication removed:') logging.info(article_identifier) logging.info(ads_article.title[0]) bibdesk.refresh() # add new entry ads_bibtex_clean = ads_bibtex.replace('\\', r'\\').replace('"', r'\"') pub = bibdesk(f'import from "{ads_bibtex_clean}"') # pub id pub = pub.descriptorAtIndex_(1).descriptorAtIndex_(3).stringValue() # automatic cite key bibdesk('set cite key to generated cite key', pub) # abstract if ads_article.abstract is not None: ads_abstract_clean = ads_article.abstract.replace('\\', r'\\').replace( '"', r'\"').replace('}', ' ').replace('{', ' ') bibdesk(f'set abstract to "{ads_abstract_clean}"', pub) doi = bibdesk('value of field "doi"', pub).stringValue() if pdf_filename.endswith('.pdf') and pdf_status: # register PDF into BibDesk bibdesk( f'add POSIX file "{pdf_filename}" to beginning of linked files', pub) # automatic file name bibdesk('auto file', pub) elif 'http' in pdf_filename and not doi: # URL for electronic version - only add it if no DOI link present # (they are very probably the same) bibdesk( f'make new linked URL at end of linked URLs with data "{pdf_filename}"', pub) # add URLs as linked URL if not there yet urls = bibdesk('value of fields whose name ends with "url"', pub, strlist=True) if 'arxiv' in article_bibcode.lower(): article_gateway = get_article_gateway(article_bibcode, gateway_url=gateway_url) urls += [article_gateway['eprint_html']] urlspub = bibdesk('linked URLs', pub, strlist=True) for u in [u for u in urls if u not in urlspub]: bibdesk(f'make new linked URL at end of linked URLs with data "{u}"', pub) # add old annotated files for kept_pdf in kept_pdfs: bibdesk(f'add POSIX file "{kept_pdf}" to end of linked files', pub) # re-insert custom fields bibdesk_annotation = kept_fields.pop("BibDeskAnnotation", '') bibdesk(f'set its note to "{bibdesk_annotation}"', pub) newFields = bibdesk('return name of fields', pub, True) for k, v in list(kept_fields.items()): if k not in newFields: bibdesk(f'set value of field "{(k, v)}" to "{pub}"') notify('New publication added', bibdesk('cite key', pub).stringValue(), ads_article.title[0], alert_sound=alert_sound) logging.info('New publication added:') logging.info(bibdesk('cite key', pub).stringValue()) logging.info(ads_article.title[0]) # add back the static groups assignment if kept_groups != []: new_groups = bibdesk.add_groups(pub, kept_groups) return True
# if we are running for the first time # then there is no file of bibcodes to compare to # so we will just download the whole library if (not os.path.isfile(bibcodefile)) or refresh: print('Creating new bib file for ADS Library "{}", id: {}'.format( metadata["name"], metadata["id"])) library = get_library( library_id=metadata["id"], num_documents=metadata["num_documents"], rows=rows, ) print("New bib file has {} items".format(len(library))) bibtex = ads.ExportQuery(library, format=args.bib_format).execute() with open("library.id", "w") as f: f.write(library_id) # f.write(bibcodefile) # f.write(bibfile) with open(bibcodefile, "w") as f: f.writelines("{}\n".format(bc) for bc in library) with open(bibfile, "w") as f: f.write(bibtex) # if there is a file of bibcodes we # need to compare with it. Unfortunately, # we have to download the entire library
def get_references(useads=False, cache=True, updaterefcache=False, bibtex=False, showfails=False): """ Return a dictionary of paper `reference <http://www.atnf.csiro.au/research/pulsar/psrcat/psrcat_ref.html>`_ in the ATNF catalogue. The keys are the ref strings given in the ATNF catalogue. Note: The way that the ATNF references are stored has changed, so if you downloaded the catalogue with a version of psrqpy before v1.0.8 you may need to run this function with ``updaterefcache=True`` to allow references to work. You may also want to update the ATNF catalogue tarball with: >>> import psrqpy >>> psrqpy.QueryATNF(checkupdate=True) Args: useads (bool): boolean to set whether to use the python mod:`ads` module to get the NASA ADS URL for the references. cache (bool): use cached, or cache, the reference bundled with the catalogue tarball. updaterefcache (bool): update the cached references. bibtex (bool): if using ADS return the bibtex for the reference along with the ADS URL. showfails (bool): if outputting NASA ADS references set this flag to True to output the reference tags of references that fail to be found (mainly for debugging purposes). Returns: dict: a dictionary of references. """ import tempfile import json # get the tarball try: dbtarfile = download_file(ATNF_TARBALL, cache=not updaterefcache) except IOError: raise IOError("Problem accessing ATNF catalogue tarball") try: # open tarball pulsargz = tarfile.open(dbtarfile, mode="r:gz") # extract the references reffile = pulsargz.extractfile("psrcat_tar/psrcat_ref") except IOError: raise IOError("Problem extracting the database file") refdic = { line.split()[0]: " ".join(line.split()[2:]) for line in reffile.read().decode("utf-8").strip().split("***") if len(line) > 0 } reffile.close() pulsargz.close() # close tar file # if not requiring ADS references just return the current dictionary if not useads: return refdic else: try: import ads from ads.exceptions import APIResponseError except ImportError: warnings.warn( "Could not import ADS module, so no ADS information " "will be included", UserWarning, ) return refdic, None # try getting cached references if not cache: adsrefs = {} else: from astropy.utils.data import is_url_in_cache tmpdir = tempfile.gettempdir() # get system "temporary" directory dummyurl = "file://{}/ads_cache".format(tmpdir) dummyfile = os.path.join("{}".format(tmpdir), "ads_cache") # check if cached ADS refs list exists (using dummy URL) if is_url_in_cache(dummyurl) and not updaterefcache: adsfile = download_file(dummyurl, cache=True, show_progress=False) try: fp = open(adsfile, "r") except IOError: warnings.warn("Could not load ADS URL cache for references", UserWarning) return refdic, None cachedrefs = json.load(fp) fp.close() adsrefs = None adsbibtex = None failures = None if "urls" in cachedrefs: adsrefs = cachedrefs["urls"] if bibtex and "bibtex" in cachedrefs: adsbibtex = cachedrefs["bibtex"] if showfails and "failures" in cacherefs: failures = cachedrefs["failures"] if bibtex: if failures is None: return refdic, adsrefs, adsbibtex else: return refdic, adsrefs, adsbibtex, failures else: if failures is None: return refdic, adsrefs else: return refdic, adsrefs, failures else: adsrefs = {} # loop over references j = 0 bibcodes = {} failures = [] for reftag in refdic: j = j + 1 refstring = refdic[reftag] # check if IAU Circular or PhD thesis iaucirc = True if "IAU Circ" in refstring else False thesis = True if "PhD thesis" in refstring else False sepauthors = "" # check for arXiv identifier arxivid = None if "arXiv:" in refstring or "ArXiv:" in refstring: for searchterm in [ r"[Aa]rXiv:[0-9]{4}.[0-9]*", r"[Aa]rXiv:astro-ph/[0-9]{7}", ]: match = re.search(searchterm, refstring) if match is not None: arxivid = match.group().lower() break else: if iaucirc: # get circular number (value after IAU Circ. No.) spl = re.split(r"([0-9]{4})", refstring) noidx = 1 for val in spl: if "IAU Circ" in val: break noidx += 1 volume = spl[noidx] else: # do splitting on the year (allows between 1000-2999) spl = re.split(r"([1-2][0-9]{3})", refstring) if len(spl) < 2: # no authors + year, so ignore! failures.append(reftag) continue year = spl[1] if len(spl[1]) == 4 else None try: int(year) except (ValueError, TypeError): # "year" is not an integer failures.append(reftag) continue # get the authors (remove line breaks/extra spaces and final full-stop) authors = spl[0].strip().strip(".") # remove " Jr." from any author names (as it causes issues!) authors = authors.replace(" Jr.", "") # replace ampersands/and with ".," for separation authors = authors.replace(" &", ".,").replace(" and", ".,") # separate out authors sepauthors = [ auth.lstrip() for auth in authors.split(".,") if len(auth.strip()) > 0 and "et al" not in auth ] # remove any "'s for umlauts in author names sepauthors = [a.replace(r'"', "") for a in sepauthors] if len(sepauthors) == 0: # no authors were parsed failures.append(reftag) continue if not thesis and not iaucirc: volume = None page = None if len(spl) > 2: # join the remaining values and split on "," extrainfo = [ info for info in ("".join(spl[2:])).lstrip(".").split(",") if len(info.strip()) > 0 ] # get the journal volume (assumed to be second from last) try: # in case volume contains issue number in brackets perform split volume = int(extrainfo[-2].strip().split("(")[0]) except (IndexError, TypeError, ValueError): # could not get the volume pass # get the page if given (assumed to be th last value) try: testpage = re.sub("[\+\-\.]", "", extrainfo[-1].strip().split("-")[0]) if not testpage.startswith( "eaao"): # Science Advances page string if (testpage[0].upper() in ["L", "A", "E"] or testpage[0:4] == ""): # e.g. for ApJL, A&A, PASA _ = int(testpage[1:]) elif testpage[-1].upper( ) == "P": # e.g., for early MNRAS _ = int(testpage[:-1]) else: _ = int(testpage) page = testpage except (IndexError, TypeError, ValueError): # could not get the page pass if volume is None or page is None: failures.append(reftag) continue # generate the query string if arxivid is None: if not thesis: if iaucirc: myquery = 'bibstem:"IAUC" volume:"{}"'.format(volume) else: # default query without authors myquery = "year:{} AND volume:{} AND page:{}".format( year, volume, page) # add author if given if len(sepauthors) > 0: # check if authors have spaces in last names (a few cases due to formating of some accented names), # if so try next author... for k, thisauthor in enumerate(sepauthors): if len(thisauthor.split(",")[0].split()) == 1: myquery += ' AND author:"{}{}"'.format( "^" if k == 0 else "", thisauthor) break else: myquery = 'year: {} AND author:"^{}" AND bibstem:"PhDT"'.format( year, sepauthors[0]) else: myquery = arxivid try: article = ads.SearchQuery(q=myquery) except APIResponseError: failures.append(reftag) warnings.warn( "Could not get reference information, so no ADS " "information for {} will be included".format(reftag), UserWarning, ) continue for paper in article: bibcodes[reftag] = paper.bibcode adsrefs[reftag] = ADS_URL.format(bibcodes[reftag]) # check if paper bibcode was found if reftag not in bibcodes: failures.append(reftag) if bibtex: # use ExportQuery to get bibtex expquery = ads.ExportQuery(list( bibcodes.values())).execute().split("\n\n") adsbibtex = {} for reftag in bibcodes: for equery in expquery: if bibcodes[reftag] in equery: adsbibtex[reftag] = equery break if cache: # output adsrefs to cache file try: # output to dummy temporary file and then "download" to cache fp = open(dummyfile, "w") cachedic = {} cachedic["urls"] = adsrefs if bibtex: cachedic["bibtex"] = adsbibtex if showfails: cachedic["failures"] = failures json.dump(cachedic, fp, indent=2) fp.close() except IOError: raise IOError("Could not output the ADS references to a file") # cache the file _ = download_file(dummyurl, cache=True, show_progress=False) # remove the temporary file os.remove(dummyfile) if bibtex: if showfails: return refdic, adsrefs, adsbibtex, failures else: return refdic, adsrefs, adsbibtex else: if showfails: return refdic, adsrefs, failures else: return refdic, adsrefs
def main(): parser = ArgumentParser() parser.add_argument("files", metavar="TEX", nargs="+", help="tex files to search citation keys") parser.add_argument( "-o", "--output", metavar="BIB", help= "main bibtex file; new entries will be added to this file, existing entries may be updated", ) parser.add_argument( "-r", "--other", nargs="+", metavar="BIB", help="other bibtex files that contain existing references (read-only)", ) parser.add_argument( "--no-update", dest="update", action="store_false", help="for existing entries, do not check ADS for updates", ) parser.add_argument( "--force-regenerate", action="store_true", help= "for all existing entries, regenerate the bibtex with the latest version from ADS if found", ) parser.add_argument( "--merge-other", action="store_true", help="merge the entries from other bibtex files", ) # thanks to syrte for adding this option parser.add_argument( "--include-physics", action="store_true", help="include physics database when searching ADS", ) parser.add_argument( "--no-backup", dest="backup", action="store_false", help="back up output file if being overwritten", ) parser.add_argument( "--version", action="version", version="%(prog)s {version}".format(version=__version__), ) args = parser.parse_args() if args.include_physics: global _database # pylint: disable=global-statement _database = '("astronomy" OR "physics")' if len(args.files) == 1 and args.files[0].lower().endswith( ".bib"): # bib update mode if args.output or args.other: parser.error( "Input file is a bib file, not tex file. This will enter bib update mode. Do not specify `--output` and `--other` together in this mode." ) if not args.update: parser.error( "Input file is a bib file, not tex file. This will enter bib update mode. Must not specify --no-update" ) if not os.path.isfile(args.files[0]): parser.error("Cannot locate input bib file {}".format( args.files[0])) keys = None args.output = args.files[0] elif args.output: # bib output is specified keys, _ = search_keys(args.files, find_bib=False) else: # bib output is missing, auto-identify keys, bib = search_keys(args.files, find_bib=True) if not bib: parser.error( "Cannot identify bibtex file from the tex source. Use -o to specify a bibtex file as output." ) args.output = bib.pop(0) if args.other: args.other.extend(bib) else: args.other = bib msg = "Auto-identifying bibtex files...\n" msg += "Main bibtex source (output file): {}\n".format(args.output) if args.other: msg += "Additional bibtex sources: {}\n".format(", ".join( args.other)) print(_headerize(msg)) if os.path.isfile(args.output): with open(args.output) as fp: bib = bibtexparser.load(fp, parser=get_bparser()) else: bib = bibtexparser.loads(" ", parser=get_bparser()) bib_other = bibtexparser.loads(" ", parser=get_bparser()) if args.other: for f in args.other: with open(f) as fp: bib_other = update_bib( bib_other, bibtexparser.load(fp, parser=get_bparser())) if keys is None: # bib update mode keys = list(bib.entries_dict) not_found = set() to_retrieve = set() all_entries = defaultdict(list) for key in keys: key_exists = key in bib.entries_dict key_exists_in_others = key in bib_other.entries_dict if (key_exists and args.update) or (key_exists_in_others and args.merge_other and args.update): bibcode = extract_bibcode(bib.entries_dict[key]) bibcode_new = entry2bibcode(bib.entries_dict[key]) if bibcode_new: all_entries[bibcode_new].append(key) if bibcode_new != bibcode or args.force_regenerate: to_retrieve.add(bibcode_new) print("{}:{} UPDATE => {}".format( key, "" if key_exists else "FOUND IN SECONDARY BIB SOURCES,", bibcode_new, )) continue if key_exists: print("{}: EXISTING".format(key)) continue if key_exists_in_others and args.merge_other: bib.entries_dict[key] = bib_other.entries_dict[key] bib.entries = list(bib.entries_dict.values()) print("{}: FOUND IN OTHER BIB SOURCE, MERGED".format(key)) continue if key_exists_in_others: print("{}: FOUND IN OTHER BIB SOURCE, IGNORED".format(key)) continue bibcode = find_bibcode(key) if bibcode: to_retrieve.add(bibcode) all_entries[bibcode].append(key) print("{}: NEW ENTRY => {}".format(key, bibcode)) else: not_found.add(key) print("{}: NOT FOUND".format(key)) if not_found: print(_headerize("Please check the following keys")) for key in not_found: print(key) repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1] if repeated_keys: print(_headerize("The following keys refer to the same entry")) for b, k in repeated_keys: print( "{1} has been referred as the following keys; please keep only one:\n{0}\n" .format(" ".join(k), b)) if to_retrieve: print(_headerize("Building new bibtex file, please wait...")) bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), "bibtex").execute(), parser=get_bparser()) for entry in bib_new.entries: entry["ID"] = all_entries[entry["ID"]][0] bib = update_bib(bib, bib_new) bib_dump_str = bibtexparser.dumps(bib).encode("utf8") if args.backup and os.path.isfile(args.output): copyfile(args.output, args.output + ".bak") with open(args.output, "wb") as fp: fp.write(bib_dump_str) print(_headerize("Done!")) # check version try: latest_version = StrictVersion( requests.get( "https://pypi.python.org/pypi/adstex/json", timeout=0.1, ).json()["info"]["version"]) except (requests.RequestException, KeyError, ValueError): pass else: if latest_version > StrictVersion(__version__): msg = "A newer version of adstex (v{}) is now available!\n".format( latest_version) msg += "Please consider updating it by running:\n\n" msg += "pip install adstex=={}".format(latest_version) print(_headerize(msg))
def main(): parser = ArgumentParser() parser.add_argument('files', metavar='TEX', nargs='+', help='tex files to search citation keys') parser.add_argument( '-o', '--output', metavar='BIB', required=True, help= 'main bibtex file; new entries will be added to this file, existing entries may be updated' ) parser.add_argument( '-r', '--other', nargs='+', metavar='BIB', help='other bibtex files that contain existing references (read-only)') parser.add_argument( '--no-update', dest='update', action='store_false', help='for existing entries, do not check ADS for updates') parser.add_argument( '--force-update', dest='force_update', action='store_true', help= 'for all existing entries, overwrite with the latest version from ADS') parser.add_argument('--include-physics', dest='include_physics', action='store_true', help='include physics database when searching ADS') parser.add_argument( '--version', action='version', version='%(prog)s {version}'.format(version=__version__)) args = parser.parse_args() if args.include_physics: _database = '("astronomy" OR "physics")' keys = search_keys(args.files) if os.path.isfile(args.output): with open(args.output) as fp: bib = bibtexparser.load(fp, parser=get_bparser()) else: bib = bibtexparser.loads(' ', parser=get_bparser()) bib_other = bibtexparser.loads(' ', parser=get_bparser()) if args.other: for f in args.other: with open(f) as fp: bib_other = update_bib( bib_other, bibtexparser.load(fp, parser=get_bparser())) not_found = set() to_retrieve = set() all_entries = defaultdict(list) try: for key in keys: if key in bib.entries_dict: if args.update: bibcode = extract_bibcode(bib.entries_dict[key]) bibcode_new = entry2bibcode(bib.entries_dict[key]) if bibcode_new: all_entries[bibcode_new].append(key) if bibcode_new != bibcode or args.force_update: to_retrieve.add(bibcode_new) print('{}: UPDATE => {}'.format(key, bibcode_new)) continue print('{}: EXISTING'.format(key)) continue if key in bib_other.entries_dict: print('{}: FOUND IN OTHER REFS, IGNORED'.format(key)) continue bibcode = find_bibcode(key) if bibcode: to_retrieve.add(bibcode) all_entries[bibcode].append(key) print('{}: NEW ENTRY => {}'.format(key, bibcode)) else: not_found.add(key) print('{}: NOT FOUND'.format(key)) except KeyboardInterrupt: print() if not_found: print(_headerize('Please check the following keys')) for key in not_found: print(key) repeated_keys = [t for t in all_entries.items() if len(t[1]) > 1] if repeated_keys: print(_headerize('The following keys refer to the same entry')) for b, k in repeated_keys: print( '{1} has been referred as the following keys; please keep only one:\n{0}\n' .format(' '.join(k), b)) if to_retrieve: print(_headerize('Building new bibtex file, please wait...')) bib_new = bibtexparser.loads(ads.ExportQuery(list(to_retrieve), 'bibtex').execute(), parser=get_bparser()) for entry in bib_new.entries: entry['ID'] = all_entries[entry['ID']][0] bib = update_bib(bib, bib_new) bib_dump_str = bibtexparser.dumps(bib).encode('utf8') with open(args.output, 'wb') as fp: fp.write(bib_dump_str) print(_headerize('Done!'))