def test_align(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'ID': 'abc123',
                                 'ENTRYTYPE': 'book',
                                 'author': 'test',
                                 'thisisaverylongkey': 'longvalue'}]
        writer = BibTexWriter()
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
 author             = {test},
 thisisaverylongkey = {longvalue}
}

"""
        self.assertEqual(result, expected)

        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.align_values = True
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 author    = {Toto, A and Titi, B},
 title     = {A title}
}

@article{Wigner1938,
 author    = {Wigner, E.},
 doi       = {10.1039/TF9383400029},
 issn      = {0014-7672},
 journal   = {Trans. Faraday Soc.},
 owner     = {fr},
 pages     = {29--41},
 publisher = {The Royal Society of Chemistry},
 title     = {The transition state method},
 volume    = {34},
 year      = {1938}
}

@book{Yablon2005,
 author    = {Yablon, A.D.},
 publisher = {Springer},
 title     = {Optical fiber fusion slicing},
 year      = {2005}
}

"""
        self.assertEqual(result, expected)
def save_citation(citation_record):
    cite_anchor = citation_record.find('a', {'class': 'gs_nph', 'href': '#', "role": "button"})
    if not cite_anchor or not cite_anchor['onclick']:
        logging.warn("No Cite anchor for citation: %s" % citation_record)
        return
    citation_id = cite_anchor['onclick'].split(',')[1][1:-1]
    logging.info("Getting formated cite from citation id: " + citation_id)
    params = {"q": "info:%s:scholar.google.com/" % citation_id, "output": "cite"}
    soup = create_soup_by_url("https://scholar.google.com/scholar", params)
    bib_anchor = soup.find('a', {"class": "gs_citi"})
    if not bib_anchor:
        logging.debug("BibTex page soup is: %s" % soup.getText())
        logging.warn("No BibTex citation provided for citation: %s" % citation_id)
        return
    soup = create_soup_by_url(bib_anchor['href'])
    global citation_num
    citation_num += 1
    # Adding a tag to the bib entry about google scholar citation ID
    citation_entry = bibtexparser.loads(soup.getText()).entries[0]
    citationID = citation_entry['ID'] # e.g., melville2004review
    citation_entry["gscholar_id"] = citation_id
    db_entry=[]
    db_entry.append(citation_entry)
    db = BibDatabase()
    db.entries = db_entry
    g_bib_entry = bibtexparser.dumps(db)
    bib_entry = "%% [%d]\n%s" % (citation_num, g_bib_entry)
    logging.info(bib_entry.strip())
    with open(opts.citation_name, "a+") as f:
        f.write(bib_entry.encode('utf-8'))
    if opts.should_download:
        pdf_div = citation_record.find('div', {"class": "gs_ggs gs_fl"})
        if pdf_div:
            download_pdf(pdf_div.a['href'], citationID)
Example #3
0
File: zbl.py Project: siudej/Cite
 def _processResults(self, data):
     """ Get bibtex data from zbMATH website. """
     bibs = re.findall("(?si)bibtex/.*?\d{3,}\.bib", data)
     data = []
     import bibtexparser
     from bibtexparser.bparser import BibTexParser
     parser = BibTexParser()
     parser.customization = customizations
     if self.otherID:
         # setup for MRef fetching
         from msn import MRef
         mr = MRef()
     for bib in bibs:
         bibtext = urllib.urlopen("https://zbmath.org/" + bib).read()
         zbl = bibtexparser.loads(bibtext, parser=parser)
         if self.otherID and mr.fetch(bibtext):
             # found MRef match for zbMATH record
             msn = bibtexparser.loads(mr.refs)
             # use MSN bibtex entry with zbl number added
             # and doi transfered if missing
             msn.entries[0]['zbl'] = zbl.entries[0]['zbl']
             if 'doi' not in msn.entries[0] and 'doi' in zbl.entries[0]:
                 msn.entries[0]['doi'] = zbl.entries[0]['doi']
             zbl = msn
         data.append(bibtexparser.dumps(zbl))
     self.refs = "\n".join(data)
 def test_multiple_string_write(self):
     bib_database = BibDatabase()
     bib_database.strings['name1'] = 'value1'
     bib_database.strings['name2'] = 'value2'  # Order is important!
     result = bibtexparser.dumps(bib_database)
     expected = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
     self.assertEqual(result, expected)
Example #5
0
def normalize_keyword_case():
    for d in review.documents:
        bib = bibtexparser.loads(d.bib)
        if bib.entries[0].has_key('keyword'):
            bib.entries[0]['keyword'] = bib.entries[0]['keyword'].lower()
            d.bib = bibtexparser.dumps(bib)
            d.save()
Example #6
0
def normalize_keyword_delimitter():
    for d in review.documents:
        bib = bibtexparser.loads(d.bib)
        if bib.entries[0].has_key('keyword'):
            bib.entries[0]['keyword'] = bib.entries[0]['keyword'].replace(';',',')
            d.bib = bibtexparser.dumps(bib)
            d.save()
Example #7
0
def normalize_keyword_visualization():
    for d in review.documents:
        bib = bibtexparser.loads(d.bib)
        if bib.entries[0].has_key('keyword'):
            bib.entries[0]['keyword'] = bib.entries[0]['keyword'].replace('visualis','visualiz')
            d.bib = bibtexparser.dumps(bib)
            d.save()
    def test_content_entries_only(self):
        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['entries']
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{Toto3000,
 author = {Toto, A and Titi, B},
 title = {A title}
}

@article{Wigner1938,
 author = {Wigner, E.},
 doi = {10.1039/TF9383400029},
 issn = {0014-7672},
 journal = {Trans. Faraday Soc.},
 owner = {fr},
 pages = {29--41},
 publisher = {The Royal Society of Chemistry},
 title = {The transition state method},
 volume = {34},
 year = {1938}
}

@book{Yablon2005,
 author = {Yablon, A.D.},
 publisher = {Springer},
 title = {Optical fiber fusion slicing},
 year = {2005}
}

"""
        self.assertEqual(result, expected)
Example #9
0
def main():
    args = _args()

    bibfile = args.input_bib
    texfile = args.input_tex
    with open(bibfile) as bibtex_file:
        parser = BibTexParser()
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

        citation_keys = set()
        re_cite = re.compile('cite\{([0-9A-Za-z,\s]+)\}')
        with open(texfile) as tex_file:
            for l in tex_file:
                labels = re_cite.findall(l)
                if labels:
                    for l in labels:
                        for z in l.split(','):
                            citation_keys.add(z.strip())
        print('Found {} citation keys'.format(len(citation_keys)))

        old_entries = bib_database.entries[:]
        bib_database.entries = [x for x in old_entries if x['ID'] in citation_keys]

        bibtex_string = bibtexparser.dumps(bib_database)
        with open(args.output_bib, 'w') as new_bibtex_file:
            new_bibtex_file.write(bibtex_string.encode('utf8'))
        print('Cleaned file saved in {}'.format(args.output_bib))
Example #10
0
def normalize(input_file, output_file):
    """
    read a *.bib file, change every 'title' and 'booktitle' field to only
    use uppercase for the first letter and write the changes to the output
    file.

    Parameters
    ----------
    input_file : file
        the *.bib file to normalized
    output_file : file
        the *.bib output file
    """
    bibtex_str = input_file.read()
    bib_database = bibtexparser.loads(bibtex_str)

    for entry in bib_database.entries:
        for field in ('title', 'booktitle'):
            if field in entry:
                field_str = entry[field]
                # don't touch titles that are (partially) enclosed in brackets
                if (not FIXED_TITLE_RE.match(field_str)
                   and not BRACKETS_RE.search(field_str)):
                    if ':' in field_str:
                        # split no more than once
                        title, subtitle = field_str.split(':', 1)
                        entry[field] = u'{}: {}'.format(title,
                                                        subtitle.lower())
                    else:
                        new_field_str = field_str.capitalize()
                        entry[field] = new_field_str

    new_bibstr = bibtexparser.dumps(bib_database)
    output_file.write(new_bibstr.encode('utf-8'))
 def test_write_dependent_strings(self):
     bib_database = BibDatabase()
     bib_database.strings['title'] = 'Mr'
     expr = BibDataStringExpression([BibDataString(bib_database, 'title'), 'Smith'])
     bib_database.strings['name'] = expr
     result = bibtexparser.dumps(bib_database)
     expected = '@string{title = {Mr}}\n\n@string{name = title # {Smith}}\n\n'
     self.assertEqual(result, expected)
 def test_write_common_strings(self):
     bib_database = BibDatabase()
     bib_database.load_common_strings()
     writer = BibTexWriter(write_common_strings=True)
     result = bibtexparser.dumps(bib_database, writer=writer)
     with io.open('bibtexparser/tests/data/common_strings.bib') as f:
         expected = f.read()
     self.assertEqual(result, expected)
Example #13
0
def bibdatabase2bibtex(data):
    """
    Convert a BibDatabase object to a BibTeX string.

    :param data: A ``bibtexparser.BibDatabase`` object.
    :return: A formatted BibTeX string.
    """
    return bibtexparser.dumps(data)
Example #14
0
def export_citation(paper_id):
    html_content = get(cfg.URL_BIBTEX.format(id=paper_id), execute_js=False)
    soup = BeautifulSoup(html_content, 'html.parser')
    if soup.pre:
        bibtex = soup.pre.get_text()
        bib = bibtexparser.loads(bibtex)
        bib.entries[0]['abstract'] = get_abstract(paper_id)
        return bibtexparser.dumps(bib)
    return ''
Example #15
0
def get_citation_text(citation_id):
    log.info(citation_id)
    res = urlopen(cfg.URL_BIBTEX.format(id=citation_id))
    if res:
        soup = BeautifulSoup(res.read())
        if soup.pre:
            bibtex = soup.pre.get_text()
            bib = bp.loads(bibtex)
            bib.entries[0]['abstract'] = get_abstract(citation_id)
            return {citation_id: bp.dumps(bib)}
    return ''
 def file_update(self,is_update):
     """
         Update the current local file with changes selected by the user
         Args:
             is_update: Flag indicates if the user made any selections to update the current local file            
     """
     if(is_update):
         open(self.view.local_file.get(), 'w').close()
         with open(self.view.local_file.get(), 'w') as bibtex_file:
             bibtex_str = bibtexparser.dumps(self.model.bibdb_local)
             bibtex_file.write(bibtex_str.encode('utf8'))
Example #17
0
 def writeFile(self, fname):
     try:
         btex = bibtexparser.dumps(self.bibdb)
         fd = open(fname, 'wb')
         fd.write(btex.encode('utf8', 'replace'))
         fd.close()
     except IOError as e:
         print("I/O error({0}): '%s': {1}".\
                 format(e.errno, e.strerror) % fname)
     except:
         print("Unexpected error:", sys.exc_info()[0])
Example #18
0
def write_bibtex_file(filename, db):
    """
        Write BiBTeX file with content from db
    """

    writer = BibTexWriter()
    writer.order_entries_by = ('counter', 'year', 'ID')
    with open(filename, 'w') as output_file:
        bibtex_str = bibtexparser.dumps(db, writer=writer)
        output_file.write(bibtex_str.encode('utf8'))
        print("Wrote %i records into filename '%s'" %
              (len(db.entries), filename))
Example #19
0
    def bibtex(self) -> str:
        """Returns the publication as a Bibtex entry

        :getter: Returns a Bibtex entry in text format
        :type: str
        """
        if not self._filled:
            self.fill()
        a = BibDatabase()
        converted_dict = self.bib
        converted_dict['author_id'] = ', '.join(converted_dict['author_id'])
        a.entries = [converted_dict]
        return bibtexparser.dumps(a)
    def test_content_comment_only(self):
        with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file)
        writer = BibTexWriter()
        writer.contents = ['comments']
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@comment{}

@comment{A comment}

"""
        self.assertEqual(result, expected)
Example #21
0
def bibtex_passthrough(text, set_id=None):
    """
    Fix errors in a bibtex record and optional change its ID.
    """
    parser = bibtexparser.bparser.BibTexParser()
    parser.ignore_nonstandard_types = False
    bibdb = bibtexparser.loads(text, parser)
    entry, = bibdb.entries
    if 'author' in entry:
        entry['author'] = ' and '.join(entry['author'].rstrip(';').split('; '))
    if set_id is not None:
        entry['ID'] = set_id
    return bibtexparser.dumps(bibdb)
Example #22
0
    def handle(self, *args, **options):
        inBibtexFile = options["bibtex_file"]

        if not os.path.isfile(inBibtexFile):
            self.stdout.write(
                self.style.ERROR(
                    "File '{}' does not exist.".format(inBibtexFile)))
            sys.exit()

        try:
            with open(inBibtexFile, encoding="utf-8") as bibtexFile:
                bibtexData = bibtexparser.load(bibtexFile)
        except:
            self.stdout.write(
                self.style.WARNING(
                    "Failed reading file with UTF-8 encoding, atttempting to read as Latin-1."
                ))
            try:
                with open(inBibtexFile, encoding="ISO-8859-1") as bibtexFile:
                    bibtexData = bibtexparser.load(bibtexFile)
            except:
                self.stdout.write(
                    self.style.ERROR(
                        "Failed reading file with either UTF-8 or Latin-1 encoding."
                    ))
                sys.exit()

        bibWriter = bibtexparser.bwriter.BibTexWriter()
        bibWriter.contents = ["entries"]
        bibWriter.indent = "    "
        for entry in bibtexData.entries:
            singleEntryBibDatabase = bibtexparser.bibdatabase.BibDatabase
            singleEntryBibDatabase.entries = [entry]
            bibtex = bibtexparser.dumps(singleEntryBibDatabase, bibWriter)
            link = None
            for linkKey in ["url", "URL", "doi", "DOI"]:
                if linkKey in entry:
                    link = entry[linkKey]
                    break
            if not link:
                link = 'https://www.google.com/search?q="{}"'.format(
                    entry["title"])

            paper = Paper(bibtex=bibtex, link=link)
            paper.save()
            self.stdout.write(
                self.style.SUCCESS("  -- Imported: {}".format(paper)))

        self.stdout.write(
            self.style.SUCCESS("Successfully imported {} papers.".format(
                len(bibtexData.entries))))
Example #23
0
def entry_sdiff(entries, color=True, bcolors=bcolors, best=None):
    """split diff
    """
    if not entries:
        return ''
    assert all(entries), 'some entries are empty'

    if not color:
        bcolors = dummybcolors

    db = bibtexparser.bibdatabase.BibDatabase()
    db.entries.append(None)

    merged = merge_entries(entries)
    conflicting_fields = [
        k for k in merged if isinstance(merged[k], ConflictingField)
    ]
    somemissing = [k for k in merged if any(k not in e for e in entries)]

    entry_strings = []

    for i, entry in enumerate(entries):
        db.entries[0] = entry
        string = bibtexparser.dumps(db)
        if six.PY2:
            string = string.decode(
                'utf-8')  # decode to avoid failure in replace
        # color the conflicting fields
        lines = []
        for line in string.splitlines():
            for k in conflicting_fields + somemissing:
                fmt = lambda s: (bcolors.WARNING if k in conflicting_fields
                                 else bcolors.BOLD) + s + bcolors.ENDC
                if k != k.lower() and '@' in line:
                    line = line.replace(entry[k], fmt(entry[k]))
                elif line.strip().startswith('{} = {{'.format(k)):
                    line = fmt(line)
            lines.append(line)
        string = '\n'.join(lines)
        if best is None:
            entry_strings.append(bcolors.OKBLUE + '* (' + str(i + 1) + ')' +
                                 bcolors.ENDC + '\n' + string)
        elif entry == best:
            entry_strings.append(bcolors.OKBLUE + '* (' + str(i + 1) + ')' +
                                 bcolors.ENDC + '\n' + string)
        else:
            entry_strings.append(bcolors.OKBLUE + '  (' + str(i + 1) + ')' +
                                 bcolors.ENDC + '\n' + string)

    return '\n'.join(entry_strings)
Example #24
0
    def active_bibliography(self) -> str:
        """A BibTeX string containing only active entries.

        An active entry is an entry in the database that is relevant to at least one used function call.
        """
        active_database = BibDatabase()
        if len(self.citations) == 0:
            return ""
        used_keys = set.union(*self.citations.values())
        active_database.entries = [
            entry for key, entry in self.bib_database.entries_dict.items()
            if key in used_keys
        ]
        return bibtexparser.dumps(active_database)
    def test_entry_separator(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'ID': 'abc123',
                                 'ENTRYTYPE': 'book',
                                 'author': 'test'}]
        writer = BibTexWriter()
        writer.entry_separator = ''
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
 author = {test}
}
"""
        self.assertEqual(result, expected)
Example #26
0
def main():

    # Get the command line arguments
    args = docopt(__doc__)

    if args['--doi']:
        bibtex_entry = bibtex_from_doi(args['--doi'])
    else:
        bibtex_entry = pyperclip.paste()

    # setup a parser customization to deal with non-english characters in Latex
    # options are needed to deal with months.
    parser = bibtexparser.bparser.BibTexParser(interpolate_strings=True,
                                               common_strings=True,
                                               customization=format)

    # create the bibfile object and parse to get the dictionary
    bib_database = bibtexparser.loads(bibtex_entry, parser=parser)

    # prepare a list of keys we want to delete from the entry
    remove_keys = [
        'month', 'keyword', 'language', 'read', 'rating', 'date-added',
        'date-modified', 'abstract', 'local-url', 'file', 'uri', 'ISSN',
        'issn', 'keywords'
    ]

    # Strip those keys from the dictionary
    for paper in bib_database.entries:
        for rkey in remove_keys:
            if rkey in paper:
                del paper[rkey]

    # Copy back to the pasteboard
    pyperclip.copy(bibtexparser.dumps(bib_database))

    # output to the terminal
    print(bibtexparser.dumps(bib_database))
Example #27
0
def crossref_to_bibtex(r):
    """convert crossref result to bibtex
    """
    bib = {}

    if 'author' in r:
        family = lambda p: p['family'] if len(p['family'].split(
        )) == 1 else u'{' + p['family'] + u'}'
        bib['author'] = ' and '.join([
            family(p) + ', ' + p.get('given', '') for p in r.get('author', [])
            if 'family' in p
        ])

    # for k in ['issued','published-print', 'published-online']:
    k = 'issued'
    if k in r and 'date-parts' in r[k] and len(r[k]['date-parts']) > 0:
        date = r[k]['date-parts'][0]
        bib['year'] = str(date[0])
        if len(date) >= 2:
            bib['month'] = str(date[1])
        # break

    if 'DOI' in r: bib['doi'] = r['DOI']
    if 'URL' in r: bib['url'] = r['URL']
    if 'title' in r: bib['title'] = r['title'][0]
    if 'container-title' in r: bib['journal'] = r['container-title'][0]
    if 'volume' in r: bib['volume'] = r['volume']
    if 'issue' in r: bib['number'] = r['issue']
    if 'page' in r: bib['pages'] = r['page']
    if 'publisher' in r: bib['publisher'] = r['publisher']

    # entry type
    type = bib.get('type', 'journal-article')
    type_mapping = {'journal-article': 'article'}
    bib['ENTRYTYPE'] = type_mapping.get(type, type)

    # bibtex key
    year = str(bib.get('year', '0000'))
    if 'author' in r:
        ID = r['author'][0]['family'] + u'_' + six.u(year)
    else:
        ID = year
    # if six.PY2:
    # ID = str(''.join([c if ord(c) < 128 else '_' for c in ID]))  # make sure the resulting string is ASCII
    bib['ID'] = ID

    db = bibtexparser.bibdatabase.BibDatabase()
    db.entries.append(bib)
    return bibtexparser.dumps(db)
Example #28
0
    def test_indent(self):
        bib_database = BibDatabase()
        bib_database.entries = [{'id': 'abc123',
                                 'type': 'book',
                                 'author': 'test'}]
        writer = BibTexWriter()
        writer.indent = '  '
        result = bibtexparser.dumps(bib_database, writer)
        expected = \
"""@book{abc123,
  author = {test}
}

"""
        self.assertEqual(result, expected)
 def test_sort_missing_field(self):
     bib_database = BibDatabase()
     bib_database.entries = [{'ID': 'b',
                              'ENTRYTYPE': 'article',
                              'year': '2000'},
                             {'ID': 'c',
                              'ENTRYTYPE': 'book',
                              'year': '2010'},
                             {'ID': 'a',
                              'ENTRYTYPE': 'book'}]
     writer = BibTexWriter()
     writer.order_entries_by = ('year', )
     result = bibtexparser.dumps(bib_database, writer)
     expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
     self.assertEqual(result, expected)
Example #30
0
def extract_bibtex(bib_database, id):

    # print("bib_database.entries: ", bib_database.entries)
    pos = None
    for i, entry in enumerate(bib_database.entries):
        if entry['ID'] == id:
            pos = i
            # print(entry['ID'])

    bib_db = copy.deepcopy(bib_database)
    # print(id)
    # print("pos:", pos)
    del bib_db.entries[pos + 1:]
    del bib_db.entries[:pos]
    str = bibtexparser.dumps(bib_db)
    return str
Example #31
0
    def do_clip(self, args):
        '''Copy bibtex entry to clipboard'''

        id_ = self._get_bibid(args)
        if id_ is None:
            print('Index out of range')
            return
        entry = self.bibtex.entries[id_]

        bib_database = bibtexparser.bibdatabase.BibDatabase()
        bib_database.entries = [entry]

        data = bibtexparser.dumps(bib_database)
        cmd = ['xsel','-b','-i']
        subprocess.run(cmd, universal_newlines=True, input=data)
        print('Copied bibtex entry to clipboard')
Example #32
0
File: fetch.py Project: siudej/Cite
    def _cleanupBibTex(self, count):
        """ Clean up bibtex and ensure uniform look. """
        import bibtexparser
        from bibtexparser.bparser import BibTexParser
        parser = BibTexParser()
        parser.customization = homogeneize_latex_encoding
        bib = bibtexparser.loads(self.refs, parser=parser)

        # save results
        from bibtexparser.bwriter import BibTexWriter
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.indent = '    '
        writer.order_entries_by = ('id')
        self.number = len(bib.entries)
        self.refs = bibtexparser.dumps(bib, writer)
Example #33
0
def compile_bibtex(citations, big_bibtex_ffp):
    """
     finds the bibtex entries that correspond to a list of cite keys,
     from a large bibtex file and writes a new bibtex file
     with just the required references.
     :param citations: a list of citation keys.
    :param big_bibtex_ffp: full file path to bibtex file
    :return:
    """

    remove_keys = [
        "annote", "date-added", "date-modified", "local-url", "file", "rating",
        "month", "uri", "read"
        "abstract", "read"
    ]

    with open(big_bibtex_ffp) as org_bibtex_file:
        org_bibtex_database = bibtexparser.load(org_bibtex_file)

    new_bibtex_db = bibtexparser.loads(" ")  # create a new bibtex DB obj
    for entry in citations:
        if entry not in org_bibtex_database.entries_dict:
            print("Not found: ", entry)
        else:
            new_bibtex_db.entries.append(
                org_bibtex_database.entries_dict[entry])
            # new_bibtex_db.entries_dict[entry['ID']] = org_bibtex_database.entries_dict[entry]

    # new_bibtex_db = copy.deepcopy(org_bibtex_database)
    # for entry in org_bibtex_database.entries_dict:
    #     if entry not in citations:
    #         print("Not found: ", entry)
    #         del new_bibtex_db.entries_dict[entry]
    #     else:
    #         print("adding: ", entry)
    #         new_bibtex_db.entries_dict[entry] = org_bibtex_database.entries_dict[entry]

    # print(new_bibtex_db.entries)

    for entry in new_bibtex_db.entries_dict:
        for r_key in remove_keys:
            if r_key in new_bibtex_db.entries_dict[entry]:
                del new_bibtex_db.entries_dict[entry][r_key]
    bibtex_str = bibtexparser.dumps(new_bibtex_db)
    return bibtex_str
Example #34
0
 def __str__(self):
     bib = BibDatabase()
     bib.entries = [{
         'ENTRYTYPE': 'article',
         'ID': self.entry_number,
         'author': self.author,
         'journal': self.journal,
         'title': self.title,
         'year': self.year,
         'volume': self.volume,
         'number': self.number,
         'pages': self.pages,
         'abstract': self.abstract,
         'keyword': self.keyword,
         'doi': self.doi,
         'issn': self.issn
     }]
     return bibtexparser.dumps(bib)
Example #35
0
def fix_bibliography(bibtex_string):
    """
    Given a bibliography file, `fixes` it by removing URLs from articles,
    ASCIIifying all the fields and replacing dates with years.
    """

    # Make a parser that will ASCIIify everything:
    # See: https://bibtexparser.readthedocs.io/en/v0.6.2/tutorial.html#accents-and-weird-characters
    parser = BibTexParser()
    parser.customization = homogeneize_latex_encoding

    bibtex = bibtexparser.loads(bibtex_string, parser=parser)

    for entry in bibtex.entries:
        fix_entry(entry)

    # TODO: if py3k, do not encode.
    return bibtexparser.dumps(bibtex).encode("UTF-8")
    def bibtex(self, publication: Publication) -> str:
        """Returns the publication as a Bibtex entry

        :param publication: Scholar or Citation publication container object
        :type publication: Publication

        :getter: Returns a Bibtex entry in text format
        :type: str
        """
        if not publication['filled']:
            publication = self.fill(publication)
        a = BibDatabase()
        converted_dict = publication['bib']
        converted_dict = remap_bib(converted_dict, _BIB_REVERSE_MAPPING)
        str_dict = {key: str(value) for key, value in converted_dict.items()}
        # convert every key of the dictionary to string to be Bibtex compatible
        a.entries = [str_dict]
        return bibtexparser.dumps(a)
Example #37
0
def fix_bibliography(bibtex_string):
    """
    Given a bibliography file, `fixes` it by removing URLs from articles,
    ASCIIifying all the fields and replacing dates with years.
    """

    # Make a parser that will ASCIIify everything:
    # See: https://bibtexparser.readthedocs.io/en/v0.6.2/tutorial.html#accents-and-weird-characters
    parser = BibTexParser()
    parser.customization = homogeneize_latex_encoding

    bibtex = bibtexparser.loads(bibtex_string, parser=parser)

    for entry in bibtex.entries:
        fix_entry(entry)

    # TODO: if py3k, do not encode.
    return bibtexparser.dumps(bibtex).encode("UTF-8")
Example #38
0
def main():
    output = sys.argv[1]
    mds = sys.argv[2:]
    es = []
    for fn in mds:
        # print(f"loading {fn}")
        with open(fn, "r", encoding='UTF-8') as f:
            ls = f.readlines()[1:]
            ls = itertools.takewhile(lambda x: x != "---\n", ls)
            e = yaml.load("".join(ls), Loader=yaml.FullLoader)
            e['ID'] = fn.split("/")[1][0:-3]
            for i in ['title', 'booktitle']:
                if i in e:
                    s = e[i]
                    s = s.replace("#", "\#")
                    s = s.replace("&", "\&")
                    e[i] = s
            e['title'] = "{" + e['title'] + "}"
            if 'authors' in e:
                e['author'] = " and ".join(e['authors'])
                del e['authors']
            for i in ['isbn', 'pages', 'volume', 'year']:
                if i in e: e[i] = str(e[i])
            for i in [
                    'added', 'layout', 'notes', 'papers', 'read', 'readings',
                    'topics'
            ]:
                if i in e: del e[i]
            es.append(e)

    db = BibDatabase()
    db.entries = es

    writer = BibTexWriter()
    writer.contents = ['entries']
    writer.indent = '  '
    # writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
    bibtex_str = bibtexparser.dumps(db, writer)
    with open(output, "w") as f:
        print(("#############################################\n"
               "# This file is machine generated, do not edit\n"
               "#############################################\n"),
              file=f)
        print(bibtex_str, file=f)
Example #39
0
def doi2bib(doi, cache):
    """
    Return a bibTeX string of metadata for a given DOI.
    Reference: https://gist.github.com/jrsmith3/5513926
    """

    if doi in cache:
        return bibtexparser.loads(cache[doi]).entries[0]

    url = "http://dx.doi.org/" + doi

    bib_headers = {"accept": "application/x-bibtex"}
    r = requests.get(url, headers=bib_headers)
    new_bibtex = r.text

    json_headers = {"accept": "application/citeproc+json"}
    r = requests.get(url, headers=json_headers)
    r_json = json.loads(r.text)

    new_bib_entry_local = {}
    parsed_bibtex = bibtexparser.loads(new_bibtex)
    if len(parsed_bibtex.entries) > 0:
        new_bib_entry_local = parsed_bibtex.entries[0]
        new_bib_entry_local['ID'] = bib_entry['ID']
        if len(r_json["subtitle"]) > 0:
            new_bib_entry_local["title"] = new_bib_entry_local[
                "title"] + ": " + r_json["subtitle"][0]
        if len(r_json["subtitle"]) > 1:
            print("Multiple subtitles:", file=sys.stderr)
            print(r_json["subtitle"], file=sys.stderr)
        if "Andrew J. Ko" in new_bib_entry_local["author"]:
            new_bib_entry_local["author"] = new_bib_entry_local[
                "author"].replace("Andrew J. Ko", "Amy J. Ko")
            print("Update Amy's name!", file=sys.stderr)
        parsed_bibtex.entries[0] = new_bib_entry_local
        print('%d / %d BibTex entries fixed!' % (i, j), file=sys.stderr)
    else:
        print("parse failed")
        print(doi)
        print(new_bibtex)

    cache[doi] = bibtexparser.dumps(parsed_bibtex)
    return new_bib_entry_local
def save_citation(citation_record):
    cite_anchor = citation_record.find('a', {
        'class': 'gs_nph',
        'href': '#',
        "role": "button"
    })
    if not cite_anchor or not cite_anchor['onclick']:
        logging.warn("No Cite anchor for citation: %s" % citation_record)
        return
    citation_id = cite_anchor['onclick'].split(',')[1][1:-1]
    logging.info("Getting formated cite from citation id: " + citation_id)
    params = {
        "q": "info:%s:scholar.google.com/" % citation_id,
        "output": "cite"
    }
    soup = create_soup_by_url("https://scholar.google.com/scholar", params)
    bib_anchor = soup.find('a', {"class": "gs_citi"})
    if not bib_anchor:
        logging.debug("BibTex page soup is: %s" % soup.getText())
        logging.warn("No BibTex citation provided for citation: %s" %
                     citation_id)
        return
    soup = create_soup_by_url(bib_anchor['href'])
    global citation_num
    citation_num += 1
    # Adding a tag to the bib entry about google scholar citation ID
    citation_entry = bibtexparser.loads(soup.getText()).entries[0]
    citationID = citation_entry['ID']  # e.g., melville2004review
    citation_entry["gscholar_id"] = citation_id
    db_entry = []
    db_entry.append(citation_entry)
    db = BibDatabase()
    db.entries = db_entry
    g_bib_entry = bibtexparser.dumps(db)
    bib_entry = "%% [%d]\n%s" % (citation_num, g_bib_entry)
    logging.info(bib_entry.strip())
    with open(opts.citation_name, "ab+") as f:
        f.write(bib_entry.encode('utf-8'))
    if opts.should_download:
        pdf_div = citation_record.find('div', {"class": "gs_ggs gs_fl"})
        if pdf_div:
            download_pdf(pdf_div.a['href'], citationID)
def bibtex_passthrough(text, set_id=None):
    """
    Fix errors in a bibtex record and optional change its ID.
    """
    parser = bibtexparser.bparser.BibTexParser()
    parser.ignore_nonstandard_types = False
    bibdb = bibtexparser.loads(text, parser)
    entry, = bibdb.entries
    if 'author' in entry:
        entry['author'] = ' and '.join(entry['author'].rstrip(';').split('; '))
    # Set URL as url rather than link attribute
    if 'link' in entry and 'url' not in entry:
        entry['url'] = entry.pop('link')
    # Upgrade arxiv links to HTTPS
    if 'url' in entry:
        pattern = re.compile(r'^http://arxiv\.org/')
        entry['url'] = pattern.sub('https://arxiv.org/', entry['url'])
    if set_id is not None:
        entry['ID'] = set_id
    return bibtexparser.dumps(bibdb)
Example #42
0
def main():

    args = parse_arg()
    if not (os.path.isfile(args.bib[0]) and os.path.isfile(args.bib[1])):
        print("input file not found")
        exit(0)

    with open(args.bib[0]) as bibtex_file:
        database1 = bibtexparser.load(bibtex_file)

    with open(args.bib[1]) as bibtex_file:
        database2 = bibtexparser.load(bibtex_file)

    result = subtract(database1, database2)

    if args.output:
        with open(args.output, 'w') as bibtex_file:
            bibtexparser.dump(result, bibtex_file)
    else:
        print(bibtexparser.dumps(result))
Example #43
0
def proc_bibtex(text, reverse=False):
    targets = ['author', 'title', 'journal']
    converter = l2u if reverse else u2l
    parser = BibTexParser()
    parser.homogenise_fields = False
    bib = bibtex.loads(text, parser)
    for item in bib.entries:
        for target in targets:
            if target not in item:
                continue
            if '\$' in item[target]:
                sys.stderr.write('error: quoted latex math expression in {}:{}, abort\n'
                                 .format(item['id'], target))
                sys.exit(1)
            elif '$' in item[target]:
                sys.stderr.write('warning: latex math expression in {}:{}, skipping\n'
                                 .format(item['id'], target))
                continue
            item[target] = converter(item[target])
    return bibtex.dumps(bib)
Example #44
0
def cleanbib(in_fp):
    blacklist = [
        'file',
        'annote',
        'abstract',
        'keywords',
        'archivePrefix',
        'mendeley-tags',
        'mendeley-groups',
    ]

    with open(in_fp, 'r') as in_fh:
        bib_db = bibtexparser.loads(in_fh.read())

    for e in bib_db.entries:
        remove = [k for k in e.keys() if k in blacklist]

        for k in remove:
            del e[k]

    return bibtexparser.dumps(bib_db)
Example #45
0
def write_bibtex(bibtex_entries):
    bib_database = bibtexparser.bibdatabase.BibDatabase()

    for e in bibtex_entries:
        # pop the useless contents
        e.pop('created_time', None)
        e.pop('file', None)
        e.pop('abstract', None)
        for k in e:
            if isinstance(e[k], list):
                e[k] = ' and '.join(e[k])
            e[k] = unicode_to_latex(e[k])
    bib_database.entries = bibtex_entries

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
    bibtex_str = bibtexparser.dumps(bib_database, writer)

    return bibtex_str
Example #46
0
def entry_ndiff(entries, color=True):
    ' diff of many entries '
    m = merge_entries(entries)
    SECRET_STRING = 'REPLACE_{}_FIELD'
    regex = re.compile(SECRET_STRING.format('(.*)'))  # reg exp to find
    choices = {}
    somemissing = []
    for k in m:
        if isinstance(m[k], ConflictingField):
            choices[k] = m[k].choices
            m[k] = SECRET_STRING.format(k)
        elif any(k not in e for e in entries):
            somemissing.append(k)
    db = bibtexparser.bibdatabase.BibDatabase()
    db.entries.append(m)
    s = bibtexparser.dumps(db)
    lines = []
    for line in s.splitlines():
        matches = regex.findall(line)
        if matches:
            k = matches[0]
            template = SECRET_STRING.format(k)
            lines.append('\u2304' * 3)
            for c in choices[k]:
                newline = '  ' + line.replace(template, '{}'.format(c))
                lines.append(
                    _colordiffline(newline, '!') if color else newline)
                lines.append('---')
            lines.pop()  # remove last ---
            # lines.append('^^^')
            lines.append('\u2303' * 3)
        elif any('{} = {{'.format(k) in line for k in somemissing):
            newline = '  ' + line
            lines.append(
                _colordiffline(newline, sign='*') if color else newline)
        elif not line.startswith(('@', '}')):
            lines.append('  ' + line)
        else:
            lines.append(line)
    return '\n'.join(lines)
Example #47
0
def biblatex_entry_by_doi(doi):
    # returns a BibLateX entry as dictionary or 'None'

    entry = printable_bibtex_entry_by_doi(doi)
    if entry:
        entry_str =  bibtex_entry_str(entry)
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        
        bib_database = bibtexparser.loads(entry_str, parser=parser)
    
        # convert 'journal' to 'journaltitle'
        for e in bib_database.entries:
            if 'journal' in e.keys():
                e['journaltitle'] = e['journal']
                del e['journal']

        bibtex_string = bibtexparser.dumps(bib_database)
        return bibtex_entry_from_str(bibtex_string)
    else:
        return None
    return(bibtex_string)
Example #48
0
    def build_citation(self, bib_database):
        """
        Converts bibtex into xml description

        Parameters
        ----------
        bib_database : bibtexparser.bibdatabase.BibDatabase
            Object containing bibtex data
        """
        bib_dict = bib_database.entries[0]
        citation = DM()

        if bib_dict['ENTRYTYPE'] == 'article':
            citation['document-type'] = 'journal'
            citation['title'] = bib_dict['title']
            citation['author'] = self.parse_authors(bib_dict['author'])
            citation['publication-name'] = bib_dict['journal']
            citation['publication-date'] = DM()
            citation['publication-date']['year'] = bib_dict['year']
            citation['volume'] = bib_dict['volume']
            if 'number' in bib_dict:
                citation['issue'] = bib_dict['number']
            elif 'issue' in bib_dict:
                citation['issue'] = bib_dict['issue']
            if 'abstract' in bib_dict:
                citation['abstract'] = bib_dict['abstract']
            if 'pages' in bib_dict:
                citation['pages'] = bib_dict['pages'].replace('--', '-')
            citation['DOI'] = bib_dict['doi']

        elif bib_dict['ENTRYTYPE'] == 'unpublished':
            citation['document-type'] = 'unspecified'
            citation['title'] = bib_dict['title']
            citation['author'] = self.parse_authors(bib_dict['author'])
            citation['publication-date'] = DM()
            citation['publication-date']['year'] = bib_dict['year']

        citation['bibtex'] = bibtexparser.dumps(bib_database)
        return citation
Example #49
0
 def bibtex(self, id=None):
     """Prepare bibtex entry for this article.
     
     Parameters
     ==========
     id : str
       The entry ID to use. If omitted, ``self.default_id()`` will
       be used.
     
     Returns
     =======
     bibtex : str
       The prepared bibtex entry.
     
     """
     metadata = self.metadata()
     metadata['ID'] = id if id is not None else self.default_id()
     # Convert to bibtex
     db = bibtexparser.bibdatabase.BibDatabase()
     db.entries = [metadata]
     bibtex = bibtexparser.dumps(db)
     return bibtex
Example #50
0
def proc_bibtex(text, reverse=False):
    targets = ['author', 'title', 'journal']
    converter = l2u if reverse else u2l
    parser = BibTexParser()
    parser.homogenise_fields = False
    bib = bibtex.loads(text, parser)
    for item in bib.entries:
        for target in targets:
            if target not in item:
                continue
            if '\$' in item[target]:
                sys.stderr.write(
                    'error: quoted latex math expression in {}:{}, abort\n'.
                    format(item['id'], target))
                sys.exit(1)
            elif '$' in item[target]:
                sys.stderr.write(
                    'warning: latex math expression in {}:{}, skipping\n'.
                    format(item['id'], target))
                continue
            item[target] = converter(item[target])
    return bibtex.dumps(bib)
Example #51
0
def scholar_get(title, db):
    # print(gscholar.query("linked open data", allresults=True))

    if title not in db:
        query = gscholar.query(title)
        if len(query) < 1:
            return {"title": "", "authors": [], "year": "", "bibtex": ""}
        db[title] = query[0]
        time.sleep(random.randint(0, 10))

    meta = {}

    parser = bibtexparser.bparser.BibTexParser()
    parser.customization = customizations
    raw_entry = bibtexparser.loads(db[title], parser=parser)
    entry = raw_entry.entries[0]

    meta["title"] = entry['title'].strip()
    meta["authors"] = entry['author'].replace(", ", " ").split("and")
    meta["year"] = entry.get('year', "").strip()
    meta["bibtex"] = bibtexparser.dumps(raw_entry)

    return meta
Example #52
0
def simplify_bibtex(args):
    bib_db = load_bibtex(args.bibfile)
    bib_ieee = load_bibtex(args.bib_journal)

    strings = collect_ieee_titles(bib_ieee.strings)
    parser = DOIParser()
    strings_filtered = OrderedDict()
    for entry in bib_db.entries:
        if 'doi' in entry:
            doi_prefix = parser.get_doi_key(entry['doi'])
            if doi_prefix in strings:
                key, journal = strings[doi_prefix]
                strings_filtered[key] = journal
                entry['journaltitle'] = key
        if 'shortjournal' in entry:
            entry.pop('shortjournal')

    bib_db.strings = strings_filtered
    text = bibtexparser.dumps(bib_db)
    text_patched = JournalPatcher().patch(text)
    text_bibtex = StringPatcher().patch(text_patched)
    with open(args.output, 'w') as output_file:
        output_file.write(text_bibtex)
 def test_sort_default(self):
     result = bibtexparser.dumps(self.bib_database)
     expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n"
     self.assertEqual(result, expected)
 def test_sort_none(self):
     writer = BibTexWriter()
     writer.order_entries_by = None
     result = bibtexparser.dumps(self.bib_database, writer)
     expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
     self.assertEqual(result, expected)
 def test_sort_type_id(self):
     writer = BibTexWriter()
     writer.order_entries_by = ('ENTRYTYPE', 'ID')
     result = bibtexparser.dumps(self.bib_database, writer)
     expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n"
     self.assertEqual(result, expected)