Exemple #1
0
def ircrebibmerge():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/sorted-articles.bib', encoding='utf8') as sortedarticle_file:
        sortedarticle_database = bibtexparser.load(sortedarticle_file, articlesparser)

    sortedarticles = sortedarticle_database.entries.copy()

    top15parser = BibTexParser(common_strings=False)
    top15parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/top15.bib', encoding='utf8') as top15_file:
        top15_database = bibtexparser.load(top15_file, top15parser)

    top15articles = top15_database.entries.copy()


    othersparser = BibTexParser(common_strings = False)
    othersparser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', encoding='utf8') as others_file:
        others_database = bibtexparser.load(others_file, othersparser)

    others = others_database.entries.copy()


    alldb = BibDatabase()
    entries = []

    for i in range(len(top15articles)):
        entries.append(top15articles[i].copy())

    for i in range(len(sortedarticles)):
        entries.append(sortedarticles[i].copy())

    for i in range(len(others)):
        entries.append(others[i].copy())

    alldb.entries = entries

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = None

    with open('/home/limingtao/ircre-bibtex/ircreupdate/newircre.bib', 'w', encoding='utf8') as newircrebibfile:
        bibtexparser.dump(alldb, newircrebibfile, writer=writer)

    return 0
Exemple #2
0
def _ingest_citations(rc):
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    from bibtexparser.customization import getnames

    parser = BibTexParser()
    parser.ignore_nonstandard_types = False

    def customizations(record):
        for n in ["author", "editor"]:
            if n in record:
                a = [i for i in record[n].replace("\n", " ").split(", ")]
                b = [i.split(" and ") for i in a]
                c = [item for sublist in b for item in sublist]
                d = [i.strip() for i in c]
                record[n] = getnames(d)

        return record

    parser.customization = customizations
    with open(rc.filename, "r", encoding='utf-8') as f:
        bibs = bibtexparser.load(f, parser=parser)
    coll = rc.client[rc.db][rc.coll]
    for bib in bibs.entries:
        bibid = bib.pop("ID")
        bib["entrytype"] = bib.pop("ENTRYTYPE")
        if "author" in bib:
            bib["author"] = [
                a.strip() for b in bib["author"] for a in RE_AND.split(b)
            ]
        if "title" in bib:
            bib["title"] = RE_SPACE.sub(" ", bib["title"])
        rc.client.update_one(rc.db, rc.coll, {"_id": bibid}, bib, upsert=True)
Exemple #3
0
def annotes_dicts(bibfile, pdfdir, filters, include_all=False):

    with open(bibfile, encoding="utf-8") as bibtex_file:
        bibtex_str = bibtex_file.read()
    parser = BibTexParser()
    parser.ignore_nonstandard_types = False
    bib_database = bibtexparser.loads(bibtex_str, parser)

    annotes_list = []

    for entry in bib_database.entries:
        match = True
        for key, pattern in filters:
            if key not in entry or not re.search(pattern, entry[key]):
                match = False
                break
        filepath = ''
        if match and (entry.get('file') or entry.get('review') or include_all):
            if entry.get('file'):
                filepath = os.path.join(pdfdir, entry['file'][1:-4])
                sys.stderr.write("%s\n" % filepath)
                annotes = get_annotes(filepath)
            else:
                annotes = []
            if annotes or entry.get('review') is not None or include_all:
                info = {'file': filepath}
                annotes_list.append(info)
                for k in 'author', 'year', 'title', 'journal', 'review', 'ID', 'doi':
                    info[k] = _to_utf(entry.get(k, None))
                info['annotations'] = [{k:_to_utf(v) for k,v in j._asdict().items()} for j in annotes]

    annotes_list.sort(key=lambda x: x['ID'])
    return annotes_list
Exemple #4
0
def get_bibtex(f):
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = True
    parser.customization = clean_tex

    return bibtexparser.load(f, parser)
def collect_author_name(fn):
    writer = BibTexWriter()
    #create the  output file
    output = fn.replace('.bib', '.author')
    #open the output file
    output_file = open(output, 'w+')
    #open the bibtex file
    with open(fn, encoding='ISO-8859-1') as bibtex_file:
        #read the bibtex file into a list of dictionary
        parser = BibTexParser(common_strings=True)
        parser.ignore_nonstandard_types = True
        parser.homogenise_fields = False
        bib_database = bibtexparser.loads(bibtex_file.read(), parser)
        entries = bib_database.entries
        #for each bibtex item
        for entry in entries:
            #go through all the entries
            for key, val in entry.items():
                #if the label for this entry is author
                if key.lower() == 'author':
                    all_authors = val.split('and')
                    #write all the values in this entry to authors' name file
                    for one_author in all_authors:
                        one_author = one_author.strip()
                        output_file.write(one_author + '\n')
        print(format_errors)
        print(parsing_errors)
    return output
def convert_entry_to_ccg_style(bib_str):
    """
    Load all entries (@article, @book, @inproceedings etc.) and convert them into ccg style

    Note:
    1.  This function only looks for entries, so the output won't include other items, such as @string, @comments

    2.  (TODO) In some cases, there might be two version of a publication with identical key (because they have
        the same author and year).

    :param bib_str: a string which contains one or more entry

    :return:    A list of triples, each triple corresponds to one entry from the input.

                Each triple has the form (old_key, new_key, new_bib) --

                old_key: the old entry key
                new_key: the new ccg-style entry key
                new_bib: converted ccg-style entry in string form
    """
    parser = BibTexParser()
    parser.ignore_nonstandard_types = False

    bib_db = bibtexparser.loads(bib_str, parser)

    result_list = []
    for entry in bib_db.entries:
        old_key = entry["ID"]
        new_key, new_entry = _entry_to_ccg_style(entry)
        new_entry_str = _entry_to_str(new_entry)
        result_list.append((old_key, new_key, new_entry_str))

    return result_list
def inject_labels(input_fn, output_fn, writer):
    #open the reformated bibtex file
    with open(input_fn, encoding='ISO-8859-1') as bibtex_file:
        #setup the parser for bibtex
        parser = BibTexParser(common_strings=True)
        parser.ignore_nonstandard_types = True
        parser.homogenise_fields = False
        bib_database = bibtexparser.loads(bibtex_file.read(), parser)
        #the format of the bibtex database is a list of dictionary
        entries = bib_database.entries
        new_entries = []
        for entry in entries:
            new_entry = {}
            #The key is the label and the val is the value in each entry
            for key, val in entry.items():
                #ignor the labels of id, entrytype and author
                if not key.lower() in ['id', 'entrytype', 'author']:
                    #append begining and ending labels to the value string
                    new_entry[key] = '@@@{}@@@ {} @@@@{}@@@@'.format(
                        key, val, key)
                else:
                    #assign the new value to the key
                    new_entry[key] = val
            new_entries.append(new_entry)
        #assign the new entries to the database
        bib_database.entries = new_entries
    #write the labeled bibtex file into the output file
    with open(output_fn, 'w') as out_file:
        out_file.write(writer.write(bib_database))
Exemple #8
0
def import_bibtex(
    bibtex,
    pub_dir="publication",
    featured=False,
    overwrite=False,
    normalize=False,
    dry_run=False,
):
    """Import publications from BibTeX file"""
    from academic.cli import AcademicError, log

    # Check BibTeX file exists.
    if not Path(bibtex).is_file():
        err = "Please check the path to your BibTeX file and re-run"
        log.error(err)
        raise AcademicError(err)

    # Load BibTeX file for parsing.
    with open(bibtex, "r", encoding="utf-8") as bibtex_file:
        parser = BibTexParser(common_strings=True)
        parser.customization = convert_to_unicode
        parser.ignore_nonstandard_types = False
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        for entry in bib_database.entries:
            parse_bibtex_entry(
                entry,
                pub_dir=pub_dir,
                featured=featured,
                overwrite=overwrite,
                normalize=normalize,
                dry_run=dry_run,
            )
Exemple #9
0
def getentries(filename):
    try:
        save_import_file(filename)
    except IOError as e:
        logg.error("bibtex import: save import file failed: {}".format(e))
        raise IOError("save import file failed")

    # use utf-8-sig instead of utf-8 to get rid of BOM_UTF8, which confuses bibtex parser
    for encoding in ('utf-8-sig', 'utf-16', None):
        try:
            error = None
            fi = codecs.open(filename, "r", encoding=encoding)
            parser = BibTexParser()
            # accept also non standard records like @SCIENCEREPORT
            parser.ignore_nonstandard_types = False
            parser.customization = _bibteximport_customize
            bibtex = bibtex_load(fi, parser=parser)
            # seems to be the correct encoding, don't try other encodings
            break
        except Exception as e:
            # check if there is a utf-encoding error, then try other encoding
            if (encoding is 'utf-8-sig' and str(e).lower().find('utf8') >= 0) or \
                (encoding is 'utf-16' and str(e).lower().find('utf-16') >= 0):
                continue
            error = e
            break

    if error:
        logg.error("bibtex import: bibtexparser failed: {}".format(e))
        raise ValueError("bibtexparser failed")

    return bibtex.entries
Exemple #10
0
def _ingest_citations(rc):
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    from bibtexparser.customization import getnames

    parser = BibTexParser()
    parser.ignore_nonstandard_types = False

    def customizations(record):
        for n in ['author', 'editor']:
            if n in record:
                a = [i for i in record[n].replace('\n', ' ').split(', ')]
                b = [i.split(" and ") for i in a]
                c = [item for sublist in b for item in sublist]
                d = [i.strip() for i in c]
                record[n] = getnames(d)

        return record

    parser.customization = customizations
    with open(rc.filename, 'r') as f:
        bibs = bibtexparser.load(f, parser=parser)
    coll = rc.client[rc.db][rc.coll]
    for bib in bibs.entries:
        bibid = bib.pop('ID')
        bib['entrytype'] = bib.pop('ENTRYTYPE')
        if 'author' in bib:
            bib['author'] = [a.strip() for b in bib['author'] for a in
                             RE_AND.split(b)]
        if 'title' in bib:
            bib['title'] = RE_SPACE.sub(' ', bib['title'])
        rc.client.update_one(rc.db, rc.coll, {'_id': bibid},
                             bib, upsert=True)
Exemple #11
0
def getcitation():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile:
        articles_database = bibtexparser.load(articlesfile, articlesparser)

    articleentries = articles_database.entries

    import random
    samplelist = random.sample(range(len(articleentries)), 20)
    print(samplelist)

    for i in samplelist:
        print("---------------------------")
        print("Entry number: " + str(i))
        title = articleentries[i]['title']
        clusterid = articleentries[i]['clusterid']
        print("Title: " + title)
        print("Cluster ID: " + clusterid)

        if not clusterid == "unknown":
            print(str(i))
            try:
                citations = os.popen(
                    '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[
                    -1]
            except:
                citations = "unknown"
        else:
            citations = "unknown"

        print("new Citations: " + citations)

        if 'cited' in articleentries[i]:
            oldcitednumber = int(articleentries[i]['cited'])
        else:
            oldcitednumber = 0

        print("Old Cited Number: " + str(oldcitednumber))

        if not citations == "unknown":
            citednumber = int(citations)
            if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8):
                articleentries[i]['cited'] = str(citednumber)

        writer = BibTexWriter()
        writer.indent = '    '
        writer.order_entries_by = ('order',)

        with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile:
            bibtexparser.dump(articles_database, newarticlefile, writer=writer)

        os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib")

    os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib")
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile:
        bibtexparser.dump(articles_database, newarticlefile, writer=writer)

    return 0
Exemple #12
0
def updatestatistics():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile:
        articles_database = bibtexparser.load(articlesfile, articlesparser)

    articleentries = articles_database.entries
    totalcitations = 0
    totalif = 0.0
    citationlist = []
    jourallist = []
    hihonumber = 0
    totalpublications = len(articleentries) + 28
    totalarticles = len(articleentries)
    for i in range(len(articleentries)):

        if 'cited' in articleentries[i]:
            citednumber = int(articleentries[i]['cited'])
        else:
            citednumber = 0
        if 'impactfactor' in articleentries[i]:
            impactfactor = float(articleentries[i]['impactfactor'])
        else:
            impactfactor = 0.0

        if 'hihosubject' in articleentries[i]:
            hihonumber = hihonumber + 1

        citationlist.append(citednumber)
        jourallist.append(articleentries[i]['journal'])
        totalcitations = totalcitations + citednumber
        totalif = totalif + impactfactor
    hindex = Hindex(citationlist)
    i10index = I10index(citationlist)
    totalcitations = totalcitations + 19
    citationperpaper = totalcitations / len(articleentries)
    journalnumber = len(set(jourallist))
    averageif = totalif / len(articleentries)
    # print(totalcitations)
    # print(hindex)
    # print(i10index)
    # print(citationperpaper)
    # print(journalnumber)
    # print(averageif)
    # print(hihonumber)
    # print(totalpublications)

    with open('/home/limingtao/ircre-bibtex/ircreupdate/newstatistics.js', 'w', encoding='utf8') as statisticsjsfile:
        statisticsjsfile.write('totalpublications = "%d";\n' % totalpublications)
        statisticsjsfile.write('totalarticles = "%d";\n' % totalarticles)
        statisticsjsfile.write('totalcitations = "%d";\n' % totalcitations)
        statisticsjsfile.write('hindex = "%d";\n' % hindex)
        statisticsjsfile.write('i10index = "%d";\n' % i10index)
        statisticsjsfile.write('numberjournals = "%d";\n' % journalnumber)
        statisticsjsfile.write('numberesihighlycited = "%d";\n' % hihonumber)
        statisticsjsfile.write('citationperpaper = "%.2f";\n' % citationperpaper)
        statisticsjsfile.write('averageif = "%.3f";\n' % averageif)
    return 0
Exemple #13
0
def read_bibtex(bibtex_str):
    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenize_fields = True
    bib_database = parser.parse(bibtex_str)
    keyworded = map(bibtexparser.customization.keyword, bib_database.entries)
    converted = list(map(bibtexparser.customization.convert_to_unicode, keyworded))
    authored = map(bibtexparser.customization.author, converted)
    return list(authored)
def get_bibtex_data(filename):
    parser = BibTexParser()
    parser.ignore_nonstandard_types = False
    with open(filename) as f:
        bib_database = bibtexparser.loads(f.read(), parser)
    sources_dict_lst = []
    for entry in bib_database.entries:
        sources_dict_lst.append(entry)
    return sources_dict_lst
Exemple #15
0
    def database(self) -> BibDatabase:
        """Return the BibTex Python object representation of master file.

        """
        logger.info(f'parsing master bibtex file: {self.master_bib}')
        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        with open(self.master_bib) as f:
            return bibtexparser.load(f, parser)
Exemple #16
0
    def add_entry_by_string(self,
                            bib_string,
                            file_name=None,
                            skip_if_file_exists=True,
                            skip_if_doi_exists=False,
                            parser=None):
        """
        Add a new entry corresponding to a BibTex string.
        :param bib_string: a string giving the section in a BibTex file that would represent this reference.
        :param file_name: the name of a local file to include in the reference section. Optional.
        :param skip_if_file_exists: boolean, default is True, meaning that if a reference pointing to the same local
        file already exists in the database, this reference will not be added. Intended to make it easy to update a
        database without worrying about overwriting existing files.
        :param skip_if_doi_exists: boolean, default is False, but if True, do not add this reference if another
        reference with the same DOI already exists. Intended to avoid adding duplicate files.
        :param parser: An instance of bibtexparser.bparser.BibTextParser customized to parse the new string. The default
        parser is set with:
            * ignore_nonstandard_types = False
            * parser.homogenise_fields = True
            * parser.customization = lambda entry: self.format_entry(entry)
        thus, the custom parsing uses the format_entry method of this class with the instance of the class at the time
        this method was called.
        :return: none, adds entry in place.
        """
        if skip_if_file_exists and file_name is not None:
            if file_name in self.files:
                root_logger.info(
                    'Not adding {}, entry for that file already in .bib file'.
                    format(file_name))
                return

        # To ensure we get a properly formatted string, we'll parse it into a standard BibDatabase then steal
        # the entry from it
        if parser is None:
            parser = BibTexParser()
            parser.ignore_nonstandard_types = False
            parser.homogenise_fields = True
            # Create a lambda function that knows about the current state of the database
            parser.customization = lambda entry: self.format_entry(entry)

        tmpdat = parser.parse(bib_string)

        if skip_if_doi_exists and 'doi' in tmpdat.entries[
                0] and tmpdat.entries[0]['doi'] in self.dois:
            root_logger.info(
                'Not adding {}, entry with DOI "{}" already in bib file'.
                format(file_name, tmpdat.entries[0]['doi']))
            return

        if file_name is not None:
            tmpdat.entries[0]['file'] = file_name

        # We shouldn't need to do anything else. The other means of access entries (e.g. the dict) seem to be properties
        # created on the fly from the entries list
        self.entries.append(tmpdat.entries[0])
Exemple #17
0
def get_bibtex_dict (stream):
    from bibtexparser.bparser import BibTexParser
    parser = BibTexParser ()
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False

    # TODO: one bit of homogenization that might be nice: it seems that
    # newlines get preserved, in `author` records at least. Those should be
    # replaced with spaces (and multiple spaces collapsed if needed).

    return parser.parse_file (stream).get_entry_dict ()
Exemple #18
0
def get_bibtex_dict(stream):
    from bibtexparser.bparser import BibTexParser
    parser = BibTexParser()
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False

    # TODO: one bit of homogenization that might be nice: it seems that
    # newlines get preserved, in `author` records at least. Those should be
    # replaced with spaces (and multiple spaces collapsed if needed).

    return parser.parse_file(stream).get_entry_dict()
Exemple #19
0
    def _open_bib_db(self, bibfile):
        """Open the bibtex database"""
        self._bib_path = os.path.dirname(bibfile)

        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        parser.homogenize_fields = True

        with open(bibfile) as bib_:
            bib_db = bibtexparser.load(bib_, parser)

        return bib_db
Exemple #20
0
    def __init__(self, bfile, jfile="data/journals.csv"):
        
        if not bfile:
            fname = '/home/mattis/DropBox/evobib/basic.bib'

        with open(bfile) as f:
            bd = f.read()

        # try open journals file
        self._journals = dict([(a,b[0]) for a,*b in csv2list(jfile)])

        # customize stuff
        parser = BTP()
        parser.ignore_nonstandard_types = False
        bdb = btp.loads(bd, parser=parser)

        self._entries = bdb.entries

        # make entries to keys
        self._dict = {}
        crossrefs = []
        for entry in self._entries:
            self._dict[entry['id']] = defaultdict(str)
            for k in entry:
                if k != 'id':
                    self._dict[entry['id']][k] = entry[k]
            if 'crossref' in entry:
                crossrefs += [(entry['id'],entry['crossref'])]

        # resolve crossrefs
        for source,target in crossrefs:
            if source in self._dict and target in self._dict:
                for k in self._dict[target]:
                    if k not in self._dict[source]:
                        self._dict[source][k] = self._dict[target][k]
            else:
                print("[!] WARNING: target for <{0}> missing!".format(target))
        
        self._alias = dict(
                location = ['address'],
                year = ['date']
                )
        self._modifiers = {
            'paperconference' : 'Paper, presented at the conference',
            'paperworkshop': 'Paper, presented at the workshop',
            'talkconference': 'Talk, held at the conference',
            'talkworkshop': 'Talk, held at the workshop',
            'talkatm': 'Talk, held at the'
            }

        self._clean_keys()
        self._clean_entries()
        self._load_templates()
Exemple #21
0
    def __init__(self, bfile, jfile="data/journals.csv"):
        
        if not bfile:
            fname = '/home/mattis/DropBox/evobib/basic.bib'

        with open(bfile) as f:
            bd = f.read()

        # try open journals file
        self._journals = dict([(a,b[0]) for a,*b in csv2list(jfile)])

        # customize stuff
        parser = BTP()
        parser.ignore_nonstandard_types = False
        bdb = btp.loads(bd, parser=parser)

        self._entries = bdb.entries

        # make entries to keys
        self._dict = {}
        crossrefs = []
        for entry in self._entries:
            self._dict[entry['id']] = defaultdict(str)
            for k in entry:
                if k != 'id':
                    self._dict[entry['id']][k] = entry[k]
            if 'crossref' in entry:
                crossrefs += [(entry['id'],entry['crossref'])]

        # resolve crossrefs
        for source,target in crossrefs:
            if source in self._dict and target in self._dict:
                for k in self._dict[target]:
                    if k not in self._dict[source]:
                        self._dict[source][k] = self._dict[target][k]
            else:
                print("[!] WARNING: target for <{0}> missing!".format(target))
        
        self._alias = dict(
                location = ['address'],
                year = ['date']
                )
        self._modifiers = {
            'paperconference' : 'Paper, presented at the conference',
            'paperworkshop': 'Paper, presented at the workshop',
            'talkconference': 'Talk, held at the conference',
            'talkworkshop': 'Talk, held at the workshop',
            'talkatm': 'Talk, held at the'
            }

        self._clean_keys()
        self._clean_entries()
        self._load_templates()
Exemple #22
0
def collection_from_bibtex_str(bib_str, **kwargs):
    """
    Transform a Bibtex string (e.g. from a .bib-file) to a BibJSON collection.
    :param bib_str: input bibtex string
    :param kwargs: metadata for the BibJSON collection. "collection" parameter must be set.
    :return BibJSON collection dictionary
    """
    bib_parser = BibTexParser()
    bib_parser.ignore_nonstandard_types = False     # this is flipped. this seems to be an error in the library
    bib_parser.customization = _parse_bib_entry

    bib_obj = bibtexparser.loads(bib_str, parser=bib_parser)

    return collection_from_dict(bib_obj.entries_dict, **kwargs)
def load_bibtex(bibfile):

    parser = BibTexParser()
    parser.ignore_nonstandard_types = False

    with io.open(bibfile, 'r', encoding='utf-8') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

    bib_entries = bib_database.entries

    bib_entries.sort(key=lambda x: x.get("author", ""))
    bib_entries.sort(key=lambda x: mo_co(x.get("month", "")), reverse=True)
    bib_entries.sort(key=lambda x: x.get("year", ""), reverse=True)

    return bib_entries
Exemple #24
0
def read_bib(filename):
    """ read bibtex file and return bibtexparser object """

    if not os.path.exists(filename):
        print("... no bib file: {}".format(filename))
        os.exit(0)

    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False

    with open(filename) as f:
        bibtex_str = f.read()

    bib_database = bibtexparser.loads(bibtex_str, parser)
    return bib_database
Exemple #25
0
 def bibtex_reader(self, bibtextdata):
     """
     Parse the bibtex data
     
     Arguments:
         bibtextdata {str} -- bibtexdata
     
     Returns:
         list -- list of all entries of the bibtex
     """
     parser = BibTexParser()
     parser.ignore_nonstandard_types = False
     parser.homogenise_fields = False
     parser.common_strings = False
     bib_database = bibtexparser.loads(bibtextdata, parser)
     return bib_database.entries[0]
Exemple #26
0
def getclusterid(title, author):
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as article_file:
        article_database = bibtexparser.load(article_file, parser)

    article_entries = article_database.entries.copy()

    entries = bib_database.entries
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")
    print("Total articles number: " + str(len(entries)))
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order',)

    for i in range(len(entries)):
        if entries[i]['clusterid'] == 'unknown':
            print("---------------------------")
            print("Entry number: " + str(i))
            title = entries[i]['title']
            print("Title: " + title)
            clusterid = ''
            try:
                clusterid = os.popen(
                    '''/home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -t --phrase="''' + title + '''" |grep ID| grep Cluster''').read().strip().split()[
                    -1]
            except:
                clusterid = "unknown"

            print("new Cluster ID: " + clusterid)
            entries[i]['clusterid'] = clusterid
        with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
            bibtexparser.dump(bib_database, newbibfile, writer=writer)
        os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib /home/limingtao/ircre-bibtex/ircreupdate/tempclusterid-added-ircre.bib")

    with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
        bibtexparser.dump(bib_database, newbibfile, writer=writer)

    return 0
def _process_bibtex(file, expected_count=1) -> "typing.List[EditableFM]":
    """
    Parse a BibTeX .bib file and return the parsed metadata
    :param file: The .bib file to parse
    :param expected_count: The expected number of entries inside the .bib
    :return: The parsed metadata as a list of EditableFM
    """
    parser = BibTexParser(common_strings=True)
    parser.customization = import_bibtex.convert_to_unicode
    parser.ignore_nonstandard_types = False
    with Path(bibtex_dir, file).open("r", encoding="utf-8") as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser=parser)
        results = []
        for entry in bib_database.entries:
            results.append(
                import_bibtex.parse_bibtex_entry(entry, dry_run=True))
        assert len(results) == expected_count
        return results
Exemple #28
0
def get_bibtex_entry(doi, bibtext_cache={}, shortdoi_cache={}):
    """
    Return a bibtexparser entry for a DOI
    """
    bibtext = get_bibtext(doi, cache=bibtext_cache)
    if not bibtext:
        return None

    short_doi = shorten(doi, cache=shortdoi_cache)
    parser = BibTexParser()
    parser.ignore_nonstandard_types = False
    bibdb = bibtexparser.loads(bibtext, parser)
    entry, = bibdb.entries
    quoted_doi = urllib.request.quote(doi)
    entry['link'] = 'https://doi.org/{}'.format(quoted_doi)
    if 'author' in entry:
        entry['author'] = ' and '.join(entry['author'].rstrip(';').split('; '))
    entry['ID'] = short_doi[3:]
    return entry
def parse_bibfile(bibfile):
    """given a bibtex .bib file, parse it and return the papers found"""

    with open(bibfile) as bibtex_file:
        parser = BibTexParser()
        parser.customization = customizations
        parser.ignore_nonstandard_types = False
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

        papers = []

        for e in bib_database.entries:
            p = extract_paper_info(e)
            if not e is None:
                papers.append(p)

    papers.sort(reverse=True)

    return papers
Exemple #30
0
    def read(self, filename):
        metadata = {
            'title': 'Publications',
            'category': 'Publications',
            'date': str(datetime.datetime.now())
        }
        parsed = {}
        for key, value in metadata.items():
            parsed[key] = self.process_metadata(key, value)
        with open(filename) as f:
            parser = BibTexParser()
            parser.ignore_nonstandard_types = False
            db = parser.parse_file(f)
            entries = [self._parse_entry(e) for e in db.entries]

            thesis = []
            publications = []
            arxiv = []
            workshops = []
            non_refereed = []
            media = []
            projects = []

            thesis = [e for e in entries if e['type'] == 'thesis']
            publications = [e for e in entries if e['type'] == 'publication']
            arxiv = [e for e in entries if e['type'] == 'arxiv']
            workshops = [e for e in entries if e['type'] == 'workshop']
            non_refereed = [e for e in entries if e['type'] == 'non-refereed']
            media = [e for e in entries if e['type'] == 'media']
            projects = [e for e in entries if e['type'] == 'project']
            for e in entries:
                del e['type']
            jinja_env = Environment()
            jinja_env.filters['compile_jsx'] = compile_jsx
            html = jinja_env.from_string(template).render(
                thesis=thesis,
                publications=publications,
                media=media,
                arxiv=arxiv,
                non_refereed=non_refereed,
                projects=projects,
                workshops=workshops)
        return html, parsed
Exemple #31
0
def parse_bibfile(bibfile):
    """given a bibtex .bib file, parse it and return the papers found"""

    with open(bibfile) as bibtex_file:
        parser = BibTexParser()
        parser.customization = customizations
        parser.ignore_nonstandard_types = False
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

        papers = []

        for e in bib_database.entries:
            p = extract_paper_info(e)
            if not e is None:
                papers.append(p)

    papers.sort(reverse=True)

    return papers
Exemple #32
0
    def main(self):
        # Parse arguments
        self.args = self.argument_parser.parse_args()
        self.verbose = self.args.verbose

        # Set up the BibTeX parser
        parser = BibTexParser()
        parser.homogenise_fields = False
        parser.ignore_nonstandard_types = False
        parser.customization = lambda r: BibItem(r,
                                                 self.keywords.update,
                                                 self.config)
        # Parse the database
        self.db = bibtexparser.load(self.args.infile, parser=parser)

        # Invoke the command chosen by the user
        command = getattr(self, self.args.command.replace('-', '_'))
        args = self.config.get(self.args.command, {})
        command(**args)
    def open(self, bibfile):

        # read file
        with open(bibfile) as bibtex_file:
            bibtex_str = bibtex_file.read()

        # tune the parser
        parser = BibTexParser(common_strings=True)
        parser.ignore_nonstandard_types = True
        parser.homogenise_fields = True

        # generate database
        self.bib_database = bibtexparser.loads(bibtex_str, parser)

        # print(self.bib_database.entries)

        # get all PDFs and store them as a dictionary with the id as index
        for e in self.bib_database.entries:
            # print(e['ID'] + " " + e['file'].split(':')[1])
            self.pdf_files[e['ID']] = e['file'].split(':')[1]
Exemple #34
0
def bib_to_dict(bib_string):
    """ convert bibtex string to dictionary """

    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False
    parser.customization = convert_to_unicode

    bdb = bibtexparser.loads(bib_string, parser)

    if len(bdb.entries) > 0:
        for i in range(len(bdb.entries)):
            if bdb.entries[i].get('keywords', '') != '':
                bdb.entries[i]['keywords'] = bdb.entries[i].get(
                    'keywords').split(',')

        if len(bdb.entries) == 1: return bdb.entries[0]
        else: return bdb.entries
    else:
        return None
Exemple #35
0
def parse(filename):
    """
    parse bibtex file and return dictionary of key values as result
    uses a formatter to create Unite text for each entry
    :returns: dicionary
        - key of dictionary item is BibTeX entry key
        - val of dictionary item is text for Unite to display for that entry
        all text in the dictionary, including keys, is unicode
    """
    # 1. parse the file
    entries = list()
    with open(filename) as bibtex_file:
        parser = BibTexParser()
        parser.customization = customizations
        parser.ignore_nonstandard_types=False
        entries = bibtexparser.load(bibtex_file, parser=parser).entries
    # 2. build the Unite text for each entry
    unite_keyvals = dict()
    for e in entries:
        f = getattr(formatter.apalike, e['ENTRYTYPE'], formatter.apalike.default)
        unite_keyvals[unicode(e['ID'])] = f(e)
    return unite_keyvals
#see https://docs.python.org/3/howto/argparse.html
parser = argparse.ArgumentParser("")
parser.add_argument('-t', '--target', default="~/Mega/library.bib")
parser.add_argument('-o', '--output', default="bibtex")
parser.add_argument('-l', '--library', default="~/MEGA/Mendeley")
args = parser.parse_args()

args.target = realpath(abspath(expanduser(args.target)))
args.library = realpath(abspath(expanduser(args.library)))
assert(exists(args.target))

logging.info("Targeting: {}".format(args.target))
logging.info("Output to: {}".format(args.output))

parser = BibTexParser(common_strings=False)
parser.ignore_nonstandard_types = False
parser.homogenise_fields = True

def make_bar(k, v, left_pad_v, right_scale_v):
    pad = ((10 + left_pad_v) - len(k))
    bar = ceil(((100 - pad) / right_scale_v) * v)
    full_str = "{}{}({}) : {}>\n".format(k, " " * pad, v, "=" *  bar)
    return full_str

def file_to_hash(filename):
    if not isfile(filename):
        raise Exception(filename)
    with open(filename, 'rb') as f:
        return sha256(f.read()).hexdigest()

def add_slash_if_necessary(x):
def parse_urlfile(url_file):
    """
    take a file of the form

    category: ads url

    and get the bibtex from the URL and return a list of Paper objects
    with the category stored as the subject

    """

    papers = []

    with open(url_file) as f:

        parser = BibTexParser()
        parser.customization = customizations
        parser.ignore_nonstandard_types = False

        for line in f:
            if line.startswith("#") or line.strip() == "": continue

            subject, url = line.split(": ")

            # for the ADS bibtex URL, lop off the paper_id
            paper_id = url.strip().split("/")[-1]
            bibtex_url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX".format(paper_id)

            # get the bibtex in html -- this is a little tricky, since
            # urlopen gives us a byte object that we need to decode
            # into unicode before we can play with it.
            print(bibtex_url)
            with urllib.request.urlopen(bibtex_url) as response:
                bibtex_html = response.read()

            raw_bibtex_html = bibtex_html.splitlines()

            bibtex_string = ""
            for bibline in raw_bibtex_html:
                bibtex_string += "{}\n".format(bibline.decode("utf8"))

            # strip off any header and just leave the bibtex
            found_start = False
            bibtex = ""
            for bibline in bibtex_string:
                if bibline.startswith("@"):
                    found_start = True
                if found_start:
                    bibtex += bibline

            # parse the bibtex string
            bib_database = bibtexparser.loads(bibtex, parser=parser)

            for e in bib_database.entries:
                p = extract_paper_info(e)
                if not e is None:
                    p.subject = subject
                    papers.append(p)

    papers.sort(reverse=True)
    return papers