def check_arxiv_published(value, field="id", get_first=True, keep_eprint=False): found = False published = False bib = "" value = re.sub("arxiv\:", "", value, flags=re.I) found, items = get_arxiv_info(value, field) if found: if get_first is False and field == "ti" and len(items) > 1: found, item = ask_which_is(value, items) else: item = items[0] if found: if "arxiv_doi" in item: doi = item["arxiv_doi"] published, bib = get_bib_from_doi(doi) if keep_eprint: eprint = re.split('/|v', item["id"])[-2] bib = add_eprint_to_bib(bib, eprint) else: bib = generate_bib_from_arxiv(item, value, field) else: print("\t\nArxiv not found.") return found, published, bib
def update_bib(bib): bib_id = bib["ID"] if "doi" in bib: found, bib_string = get_bib_from_doi(bib["doi"]) if found: bib = bibtexparser.loads(bib_string).entries[0] bib["ID"] = bib_id return bib
def get_bib_from_title(title, get_first=False, abbrev_journal=False): found = False bib = "" found, item = get_from_title(title, get_first) if found: if item["is_crossref"]: if "DOI" in item: doi = item["DOI"] found, bib = get_bib_from_doi(doi) else: if "arxiv_doi" in item: doi = item["arxiv_doi"] published, bib = get_bib_from_doi(doi, abbrev_journal) else: bib = generate_bib_from_arxiv(item, title, field="ti") # if "short-container-title" in item: # abbreviated_journal = item["short-container-title"][0]##aqui pode acontecer de realizar a chamada para return found, bib
def pdf2bib(pdf_file): """ Given a PDF file, tries to extract the paper's DOI and fetch the BibTex entry :param pdf_file: the path to the PDF file :return: The bibtex entry as a string """ found_a_doi = False bib_string = '' pdf_text = get_pdf_page_text(pdf_file) # Try each of the regexes in sequence. Hopefully one will work. for doi_re in all_doi_res: doi_match = doi_re.search(pdf_text) # If we did not find a match, try the next one. If we did, make a note of that, because that # will affect the error we give if this fails. if doi_match is None: continue else: found_a_doi = True doi_string = doi_match.group(0) # Assume (for now) that the DOI suffix cannot include Unicode. This will stop the DOI at the first non-ASCII # character. Which corrects an issue with e.g. doi:10.5194/acp-11-8543-2011 where in the PDF the (c) symbol comes # right after the DOI and gets included last_idx = 0 for idx, char in enumerate(doi_string): if ord(char) > 127: break else: last_idx = idx + 1 doi_string = doi_string[:last_idx] root_logger.debug('Looking up DOI "{}"'.format(doi_string)) # Try to retrieve the bib string based on the doi. If we do so successfully, go ahead and return. # If not, then try the next regex. If there are none left, then we'll leave bib_string as an empty # string and raise the appropriate error. success, bib_string = get_bib_from_doi(doi_string) if success: break else: bib_string = '' if not found_a_doi: raise DoiNotFoundError('DOI search failed on {}'.format(pdf_file)) elif len(bib_string) == 0: raise BibRetrievalError( 'Bib string lookup failed on {}'.format(pdf_file)) return bib_string
def main(section): output = "" out = reportinator.cache + "/output.bib" inp = reportinator.cache + "/dois.txt" f = open(out, "w+") fp = open(inp, "r") for line in fp: doi = line.split(" ")[0] found, bib = get_bib_from_doi(doi) if found: f.write(bib + "\n") output += "\n\\section{References}\n" output += "\\nocite{" + "*}\n" output += "\\printbibliography[heading=none]\n\n" return output
def _to_bibtex(doi, template, idx): try: from doi2bib.crossref import get_bib_from_doi except ImportError: print( "Cannot generate BibTeX citation, missing doi2bib dependency", file=sys.stderr, ) return doi if "doi.org" not in doi: return doi bib = get_bib_from_doi(doi)[1] # replace identifier with template name m = re.search(r"([A-Z])\w+", bib) return bib.replace(m.group(), "%s%s" % (template.lower(), idx))
def check_arxiv_published(value, field="id", get_first=True): found = False published = False bib = "" value = re.sub("arxiv\:", "", value, flags=re.I) found, items = get_arxiv_info(value, field) if found: if get_first is False and field == "ti" and len(items) > 1: found, item = ask_which_is(value, items) else: item = items[0] if found: if item["doi"] != None: doi = item["doi"] published, bib = get_bib_from_doi(doi) else: bib = generate_bib_from_arxiv(item, value, field) else: print("\t\nArxiv not found.") return found, published, bib
dois = set() with open(f) as h: for line in h: for x in re.findall('{doi:([^}]*)}',line): for y in x.split(','): y = re.sub(' ','',y) if not y.startswith('doi:'): y = 'doi:'+y dois.add(y) for x in re.findall('{https://doi.org/([^}]*)}',line): for y in x.split(','): y = re.sub(' ','',y) if not y.startswith('https://doi.org/'): y = 'https://doi.org/'+y dois.add(y) with open(args.bib, 'w') as upv: if args.b is not None: with open(args.b) as h: for line in h: upv.write(line) for y in dois - labels: out = get_bib_from_doi(y) if out[0]: x = out[1].split(',') x[0] = re.sub('{[^,]*','{'+y,x[0]) bib = ','.join(x) try: upv.write(bib.encode('UTF-8')+'\n') except: import pdb; pdb.set_trace()
def check_arxiv_published(value, field="id", get_first=True, keep_eprint=False): """ Parameters ---------- value: str value of the field field: str field used for the arxiv search API get_first: bool keep_eprint: bool If True keep the arxiv number if the paper has already been published Returns ------- found: bool True if found the arxiv item published: bool True if the arxiv has already been published bib: str bibtext string """ found = False published = False bib = "" value = re.sub("arxiv\:", "", value, flags=re.I) found, items = get_arxiv_info(value, field) if found: if field == "ti": title = value.lower().replace(" ", "") for item_arxiv in items: title_arxiv = item_arxiv["title"].lower().replace(" ", "").replace( "\n", "") if title_arxiv == title: items = [item_arxiv] break if get_first is False and field == "ti" and len(items) > 1: found, item = ask_which_is(value, items) else: item = items[0] if found: if "arxiv_doi" in item: doi = item["arxiv_doi"] published, bib = get_bib_from_doi(doi) if keep_eprint: eprint = re.split('/|v', item["id"])[-2] bib = add_eprint_to_bib(bib, eprint) else: bib = generate_bib_from_arxiv(item, value, field) else: print("\t\nArxiv not found.") return found, published, bib