Esempio n. 1
0
def process_id(id_):
    processed_id = id_.strip()
    if processed_id.endswith(",") or processed_id.endswith("}"):
        processed_id = processed_id[:-1]
    id_lower = processed_id.lower()
    category = check_categories(processed_id)
    if "iv:" in id_lower:
        processed_id = id_lower[id_lower.index(":") + 1:]
    elif "iv.org" in id_lower:
        if category is not None:
            processed_id = id_lower[id_lower.index(category):]
        else:
            processed_id = id_lower[id_lower.rindex("/") + 1:]
    elif "doi.org" in id_lower:
        processed_id = processed_id[id_lower.index("doi.org/") + 8:]
    regexp = re.compile(r'[a-zA-Z]+[0-9]+[a-zA-Z]+')
    if regexp.search(processed_id):
        return find_file(processed_id + ".pdf")
    elif category is not None and processed_id.startswith(category):
        bibtex = postprocess_arxiv(arxiv2bib([processed_id])[0])
    elif "/" in processed_id:
        bibtex = doi2bib(processed_id)
    else:
        bibtex = postprocess_arxiv(arxiv2bib([processed_id])[0])
    return normalize(bibtex)
Esempio n. 2
0
def get_bibtex(arxiv_id):
    """
    Get a BibTeX entry for a given arXiv ID.

    .. note::

        Using awesome https://pypi.python.org/pypi/arxiv2bib/ module.

    :param arxiv_id: The canonical arXiv id to get BibTeX from.
    :returns: A BibTeX string or ``None``.

    >>> get_bibtex('1506.06690')
    "@article{1506.06690v2,\\nAuthor        = {Lucas Verney and Lev Pitaevskii and Sandro Stringari},\\nTitle         = {Hybridization of first and second sound in a weakly-interacting Bose gas},\\nEprint        = {1506.06690v2},\\nDOI           = {10.1209/0295-5075/111/40005},\\nArchivePrefix = {arXiv},\\nPrimaryClass  = {cond-mat.quant-gas},\\nAbstract      = {Using Landau's theory of two-fluid hydrodynamics we investigate the sound\\nmodes propagating in a uniform weakly-interacting superfluid Bose gas for\\nvalues of temperature, up to the critical point. In order to evaluate the\\nrelevant thermodynamic functions needed to solve the hydrodynamic equations,\\nincluding the temperature dependence of the superfluid density, we use\\nBogoliubov theory at low temperatures and the results of a perturbative\\napproach based on Beliaev diagrammatic technique at higher temperatures.\\nSpecial focus is given on the hybridization phenomenon between first and second\\nsound which occurs at low temperatures of the order of the interaction energy\\nand we discuss explicitly the behavior of the two sound velocities near the\\nhybridization point.},\\nYear          = {2015},\\nMonth         = {Jun},\\nUrl           = {http://arxiv.org/abs/1506.06690v2},\\nFile          = {1506.06690v2.pdf}\\n}"

    >>> get_bibtex('1506.06690v1')
    "@article{1506.06690v1,\\nAuthor        = {Lucas Verney and Lev Pitaevskii and Sandro Stringari},\\nTitle         = {Hybridization of first and second sound in a weakly-interacting Bose gas},\\nEprint        = {1506.06690v1},\\nDOI           = {10.1209/0295-5075/111/40005},\\nArchivePrefix = {arXiv},\\nPrimaryClass  = {cond-mat.quant-gas},\\nAbstract      = {Using Landau's theory of two-fluid hydrodynamics we investigate the sound\\nmodes propagating in a uniform weakly-interacting superfluid Bose gas for\\nvalues of temperature, up to the critical point. In order to evaluate the\\nrelevant thermodynamic functions needed to solve the hydrodynamic equations,\\nincluding the temperature dependence of the superfluid density, we use\\nBogoliubov theory at low temperatures and the results of a perturbative\\napproach based on Beliaev diagrammatic technique at higher temperatures.\\nSpecial focus is given on the hybridization phenomenon between first and second\\nsound which occurs at low temperatures of the order of the interaction energy\\nand we discuss explicitly the behavior of the two sound velocities near the\\nhybridization point.},\\nYear          = {2015},\\nMonth         = {Jun},\\nUrl           = {http://arxiv.org/abs/1506.06690v1},\\nFile          = {1506.06690v1.pdf}\\n}"
    """
    # Fetch bibtex using arxiv2bib module
    try:
        bibtex = arxiv2bib.arxiv2bib([arxiv_id])
    except HTTPError:
        bibtex = []

    for bib in bibtex:
        if isinstance(bib, arxiv2bib.ReferenceErrorInfo):
            continue
        else:
            # Return fetched bibtex
            return bib.bibtex()
    # An error occurred, return None
    return None
Esempio n. 3
0
def get_bibtex(arxiv_id):
    """
    Get a BibTeX entry for a given arXiv ID.

    .. note::

        Using awesome https://pypi.python.org/pypi/arxiv2bib/ module.

    :param arxiv_id: The canonical arXiv id to get BibTeX from.
    :returns: A BibTeX string or ``None``.

    >>> get_bibtex('1506.06690')
    "@article{1506.06690v2,\\nAuthor        = {Lucas Verney and Lev Pitaevskii and Sandro Stringari},\\nTitle         = {Hybridization of first and second sound in a weakly-interacting Bose gas},\\nEprint        = {1506.06690v2},\\nDOI           = {10.1209/0295-5075/111/40005},\\nArchivePrefix = {arXiv},\\nPrimaryClass  = {cond-mat.quant-gas},\\nAbstract      = {Using Landau's theory of two-fluid hydrodynamics we investigate the sound\\nmodes propagating in a uniform weakly-interacting superfluid Bose gas for\\nvalues of temperature, up to the critical point. In order to evaluate the\\nrelevant thermodynamic functions needed to solve the hydrodynamic equations,\\nincluding the temperature dependence of the superfluid density, we use\\nBogoliubov theory at low temperatures and the results of a perturbative\\napproach based on Beliaev diagrammatic technique at higher temperatures.\\nSpecial focus is given on the hybridization phenomenon between first and second\\nsound which occurs at low temperatures of the order of the interaction energy\\nand we discuss explicitly the behavior of the two sound velocities near the\\nhybridization point.},\\nYear          = {2015},\\nMonth         = {Jun},\\nUrl           = {http://arxiv.org/abs/1506.06690v2},\\nFile          = {1506.06690v2.pdf}\\n}"

    >>> get_bibtex('1506.06690v1')
    "@article{1506.06690v1,\\nAuthor        = {Lucas Verney and Lev Pitaevskii and Sandro Stringari},\\nTitle         = {Hybridization of first and second sound in a weakly-interacting Bose gas},\\nEprint        = {1506.06690v1},\\nDOI           = {10.1209/0295-5075/111/40005},\\nArchivePrefix = {arXiv},\\nPrimaryClass  = {cond-mat.quant-gas},\\nAbstract      = {Using Landau's theory of two-fluid hydrodynamics we investigate the sound\\nmodes propagating in a uniform weakly-interacting superfluid Bose gas for\\nvalues of temperature, up to the critical point. In order to evaluate the\\nrelevant thermodynamic functions needed to solve the hydrodynamic equations,\\nincluding the temperature dependence of the superfluid density, we use\\nBogoliubov theory at low temperatures and the results of a perturbative\\napproach based on Beliaev diagrammatic technique at higher temperatures.\\nSpecial focus is given on the hybridization phenomenon between first and second\\nsound which occurs at low temperatures of the order of the interaction energy\\nand we discuss explicitly the behavior of the two sound velocities near the\\nhybridization point.},\\nYear          = {2015},\\nMonth         = {Jun},\\nUrl           = {http://arxiv.org/abs/1506.06690v1},\\nFile          = {1506.06690v1.pdf}\\n}"
    """
    # Fetch bibtex using arxiv2bib module
    try:
        bibtex = arxiv2bib.arxiv2bib([arxiv_id])
    except HTTPError:
        bibtex = []

    for bib in bibtex:
        if isinstance(bib, arxiv2bib.ReferenceErrorInfo):
            continue
        else:
            # Return fetched bibtex
            return bib.bibtex()
    # An error occurred, return None
    return None
Esempio n. 4
0
    def run(self):
        """Produce output and error messages"""
        keys = []

        # collect all citations
        for filename in self.args.filenames:
            try:
                with open(filename) as f:
                    keys = keys + self._read(f)
            except IOError as e:
                self.messages.append("Could not open %s" % filename)

        bib = []

        arXiv = set(filter(arxiv2bib.is_valid, keys))
        # arxiv2bib does all keys at once
        bib = bib + [b for b in arxiv2bib.arxiv2bib(arXiv)]

        MR = set(filter(mr2bib.is_valid, keys))
        # mr2bib is key per key
        try:
            bib = bib + [b for b in mr2bib.mr2bib(MR)]
        except mr2bib.AuthenticationException:
            self.messages.append("Not authenticated to Mathematical Reviews")

        for b in bib:
            if isinstance(b, arxiv2bib.ReferenceErrorInfo) or isinstance(
                    b, mr2bib.ReferenceErrorInfo):
                self.error_count += 1

            else:
                self.output.append(b.bibtex())

        self.code = self.tally_errors(bib)
Esempio n. 5
0
def arxiv_parser(filename):
    from arxiv2bib import arxiv2bib
    id_ = filename.split("/")[-1].split(".pdf")[0]
    ref = arxiv2bib([id_])[0]
    output = {}
    output['Author'] = ref.authors[-1].split(" ")[-1]
    output['Year'] = ref.years
    output['Journal'] = 'arXiv'
Esempio n. 6
0
def get_arxiv_bibtex(arxiv_id):
    """
    Use the arxiv2bib package to get the BibTex for an arXiv preprint.
    """
    import arxiv2bib
    ref, = arxiv2bib.arxiv2bib([arxiv_id])
    if isinstance(ref, arxiv2bib.ReferenceErrorInfo):
        raise KeyError(f'{ref.message}: {arxiv_id}')
    return ref.bibtex()
Esempio n. 7
0
def pdf_scraper(filename, info):

    doi_pattern = [
        'DOI:', "doi:", "http://dx.doi.org/", "doi/", "DOI ",
        "https://doi.org/"
    ]

    output = pdf2txt(filename)
    parsed_info = {}
    print(filename)
    for doi_string in doi_pattern:
        if output.find(doi_string) != -1:

            doi_position = output.find(doi_string) + len(doi_string)
            doish = output[doi_position:doi_position + 50]
            doi = doish.split("\n")[0]

            if len(doi) == 0 or doi.find("/") == -1:
                doi = doish.split("\n")[1]

            for delimiter in [' ', ',', ')', '\t', 'http:']:
                doi = doi_splitter(doi, delimiter)

            if doi[-1] == '.':
                doi = doi[:-1]

            print("parsed doish", filename, repr(doish), repr(doi))
            parsed_info = requests_by_doi(doi)

        if parsed_info != {}:
            break

    if "_arXiv.pdf" in filename and parsed_info == {}:
        from arxiv2bib import arxiv2bib
        offset = len('arXiv:')
        idx = output.find('arXiv:') + offset
        if idx == -1:
            return {}
        id_ = output[idx + offset:idx + offset + 10]
        ref = arxiv2bib([id_])[0]
        author = (",").join([a.split(" ")[-1] for a in ref.authors])
        title = ref.title
        parsed_info['FullEntry'] = author + " " + title

    return parsed_info
Esempio n. 8
0
def get_bib(doi, filename=None):
    """ get bib from crossref.org and arXiv.org """

    if doi is None:
        return False, None
    if not isinstance(doi, str):
        return False, None

    found = False
    bib = None

    # for arXiv:XXXX case
    if doi.lower()[:5] == "arxiv":
        doi = doi[6:]
        bib_object = arxiv2bib([doi])
        bib = bib_object[0].bibtex()
        if len(bib) > 0:
            found = True
            bib = bib_to_dict(bib)
        else:
            found = False

    # for crossref
    else:
        bare_url = "http://api.crossref.org/"
        url = "{}works/{}/transform/application/x-bibtex"
        url = url.format(bare_url, doi)

        r = requests.get(url)

        found = False if r.status_code != 200 else True

        bibtex_str = str(r.content, "utf-8")

        if bibtex_str.find("Resource not found") == -1:
            if filename is not None:
                with open(filename, "w") as f:
                    f.write(bibtex_str)

            bib = bib_to_dict(bibtex_str)
        else:
            found = False

    return found, bib
Esempio n. 9
0
def arXiv2Bib(arxiv):
    """Returns bibTeX string of metadata for a given arXiv id

    arxiv is an arxiv id
    """
    bibtex = arxiv_metadata.arxiv2bib([arxiv])
    for bib in bibtex:
        if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
            continue
        else:
            fetched_bibtex = BibTexParser(bib.bibtex())
            fetched_bibtex = fetched_bibtex.get_entry_dict()
            fetched_bibtex = fetched_bibtex[list(fetched_bibtex.keys())[0]]
            try:
                del(fetched_bibtex['file'])
            except KeyError:
                pass
            return tools.parsed2Bibtex(fetched_bibtex)
    return ''
Esempio n. 10
0
def arXiv2Bib(arxiv):
    """Returns bibTeX string of metadata for a given arXiv id

    arxiv is an arxiv id
    """
    bibtex = arxiv_metadata.arxiv2bib([arxiv])
    for bib in bibtex:
        if isinstance(bib, arxiv_metadata.ReferenceErrorInfo):
            continue
        else:
            fetched_bibtex = bibtexparser.loads(bib.bibtex())
            fetched_bibtex = fetched_bibtex.entries_dict
            fetched_bibtex = fetched_bibtex[list(fetched_bibtex.keys())[0]]
            try:
                del(fetched_bibtex['file'])
            except KeyError:
                pass
            return tools.parsed2Bibtex(fetched_bibtex)
    return ''
Esempio n. 11
0
 def setUp(self):
     fakedata.start()
     result = a2b.arxiv2bib(['1011.9999', '1001.1001v1', 'x', '1205.1001'])
     self.not_found, self.judge, self.invalid, self.frv = result
Esempio n. 12
0
    elif 'published-online' in entry:
        dp = entry['published-online']['date-parts'][0]
    year, month, *a = dp
    doi = entry['DOI']
    url = entry['URL']
    journal = entry['container-title']
    if isinstance(journal, list):
        journal = journal[0]
    yearmonth = '%04i-%02i' % (year, month)
    return {'title': title, 'authors': authors, 'year': year, 'month': month,
            'doi': doi, 'url': url, 'journal': journal, 'yearmonth': yearmonth}

for ref in pubs['dois']:
    found, val = doi2bib.get_json(ref)
    if found:
        bibs.append(convert_doientry(val['message']))

arxivs = arxiv2bib.arxiv2bib(pubs['arxiv'])
for ref in arxivs:
    month = m.index(ref.month) + 1
    yearmonth = '%04i-%02i' % (int(ref.year), month)
    bibs.append( {'title': ref.title, 'authors': ref.authors,
                  'year': ref.year,
                  'month': month,
                  'yearmonth': yearmonth,
                  'url': ref.url,
                  'journal': "arXiv"} )

with open("_data/citations.yml", "w") as f:
    yaml.dump(bibs, f, default_flow_style=False)
Esempio n. 13
0
def try_get_bib(url, acl_anthology_by_id, acl_anthology_by_title):
    """
    Gets a URL to a publication and tries to extract a bib entry for it
    Returns None if it fails (e.g. if it's not a publication)
    :param url: the URL to extract a publication from
    :param acl_anthology_by_id: a dictionary of publications by ID (from ACL anthology)
    :param acl_anthology_by_title: a dictionary of publications by title (from ACL anthology)
    :return: a tuple of (bib entry, tuple) or None if not found / error occurred
    """
    lowercased_url = url.lower()
    filename = lowercased_url.split('/')[-1]

    # Only try to open papers with extension pdf or bib or without extension
    if '.' in filename and not filename.endswith(
            '.pdf') and not filename.endswith('.bib'):
        return None

    # If ends with bib, read it
    if filename.endswith('.bib'):
        bib_entry = urllib.request.urlopen(url).read().decode('utf-8')
        title = get_title_from_bib_entry(bib_entry)
        return (bib_entry, title)

    paper_id = filename.replace('.pdf', '')

    # Paper from TACL
    if 'transacl.org' in lowercased_url or 'tacl' in lowercased_url:
        result = get_bib_from_tacl(paper_id)

        if result is not None:
            try:
                bib_entry, title = result
                title = normalize_title(title)
                return (bib_entry, title)
            except:
                pass

    # If arXiv URL, get paper details from arXiv
    if 'arxiv.org' in lowercased_url:
        results = arxiv2bib([paper_id])

        if len(results) > 0:
            try:
                result = results[0]
                title = normalize_title(result.title)

                # First, try searching for the title in the ACL anthology. If the paper
                # was published in a *CL conference, it should be cited from there and not from arXiv
                bib_entry = acl_anthology_by_title.get(title, None)

                # Not found in ACL - take from arXiv
                if bib_entry is None:
                    bib_entry = result.bibtex()
            except:
                pass

        if bib_entry:
            return (bib_entry, title)

    # If the URL is from the ACL anthology, take it by ID
    if 'aclanthology' in lowercased_url or 'aclweb.org' in lowercased_url:
        bib_entry = acl_anthology_by_id.get(paper_id.upper(), None)

        if bib_entry:
            title = get_title_from_bib_entry(bib_entry)
            return (bib_entry, title)

    # If the URL is from Semantic Scholar
    if 'semanticscholar.org' in lowercased_url and not lowercased_url.endswith(
            'pdf'):
        result = get_bib_from_semantic_scholar(url)

        if result is not None:
            try:
                semantic_scholar_bib_entry, title = result
                title = normalize_title(title)

                # First, try searching for the title in the ACL anthology. If the paper
                # was published in a *CL conference, it should be cited from there and not from Semantic Scholar
                bib_entry = acl_anthology_by_title.get(title, None)

                # Not found in ACL - take from Semantic Scholar
                if bib_entry is None:
                    bib_entry = semantic_scholar_bib_entry

                return (bib_entry, title)
            except:
                pass

    # Else: try to read the pdf and find it in the acl anthology by the title
    if lowercased_url.endswith('pdf'):

        # Download the file to a temporary file
        data = urllib.request.urlopen(url).read()
        with open('temp.pdf', 'wb') as f_out:
            f_out.write(data)

        result = get_from_pdf('temp.pdf', acl_anthology_by_title)

        if result is not None:
            bib_entry, title = result
            title = normalize_title(title)
            return (bib_entry, title)

    # Didn't find
    logger.warning('Could not find {}'.format(url))
    return None
Esempio n. 14
0
            citationkeylist.append(citationkey)

    citationkeylist = list(set(citationkeylist))

    # =============================================================================
    #     Classement
    # =============================================================================

    # Liste de reference provenant d'arXiv
    list_arxiv_id = []

    for doi in citationkeylist:
        if arxiv2bib.is_valid(doi):
            list_arxiv_id.append(doi)

    refs_arxiv = arxiv2bib.arxiv2bib(list_arxiv_id)
    list_text_refs_arxiv = [arxref.bibtex() for arxref in refs_arxiv]

    # liste de reference au format doi
    list_doi_refs = []
    doi_pattern = '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)'

    for doi in citationkeylist:
        if re.search(doi_pattern, doi):
            list_doi_refs.append(doi)

    list_text_refs_doi = [pydoi2bib(doi) for doi in list_doi_refs]

    # Ajout des references doi et arxiv au fichier texte final
    if len(list_text_refs_arxiv) > 0:
        text_complet += ' \n \n ' + ' \n \n '.join(
Esempio n. 15
0
new_bibkey = list(new_bibkey)
doi = ones_like(biblist)
print bib
j = 0
bibfile = codecs.open('../biblio/' + name + '.bib', 'w', 'utf-8')
if len(bib) == 0:
    bibfile.close()
# bibfile=open('../biblio/'+name+'.bib','w')
for i, ref in enumerate(biblist):
    old_bibkey[i] = ref[ref.find("[") + 1:ref.find("]")]
    doi[i] = ref.split(' ')[-1]
    doi[i] = doi[i].replace('https://doi.org/', '')
    doi[i] = doi[i].replace('https://journals.aps.org/prb/abstract/', '')
    if doi[i][:5] == 'arXiv':
        try:
            out = a2b.arxiv2bib([doi[i].split(':')[-1]])
            bibitem = out[0].bibtex()
        except urllib2.HTTPError:
            bibitem = ''
    else:
        try:
            print 'searching DOI:', doi[i]
            bibitem = doi2bib(doi[i])
        except urllib2.HTTPError:
            bibitem = ''
    y, z = bibitem.split(",", 1)
    x, key = y.split("{", 1)
    if key in new_bibkey:
        new_bibkey[i] = key + alphabet[j]
        j += 1
        bibitem = bibitem.replace(key, new_bibkey[i])
Esempio n. 16
0
 def setUp(self):
     fakedata.start()
     result = a2b.arxiv2bib(['1011.9999', '1001.1001v1', 'x', '1205.1001'])
     self.not_found, self.judge, self.invalid, self.frv = result