Example #1
0
def main():
    p = argparse.ArgumentParser()
    p.add_argument("input_file",
                   help='The original .bib file you want to sanitize.')
    p.add_argument("config", help='The config file in JSON format.')
    p.add_argument("output_file", help='Name of the new sanitized file.')

    args = p.parse_args()

    # parse original bibfile
    with open(args.input_file) as bibFile:
        bibDB = bibtexparser.load(bibFile)

    # parse config file
    with open(args.config) as configFile:
        confDB = json.load(configFile)

    checkDuplicates(args.input_file)

    bibDB = checkMandatoryFieldsAndKeywords(bibDB, confDB['read_config'])

    checkTags(bibDB, confDB['read_config']['tag_regex'])

    # write results
    writer = BibTexWriter()
    writer.contents = ['entries']

    # use ordering that is defined in "sort_order" in the config.json file
    writer.order_entries_by = confDB["write_config"]["sort_order"]

    with open(args.output_file, 'w') as resFile:
        resFile.write(bibtexparser.dumps(bibDB, writer))
Example #2
0
def getcitation():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile:
        articles_database = bibtexparser.load(articlesfile, articlesparser)

    articleentries = articles_database.entries

    import random
    samplelist = random.sample(range(len(articleentries)), 20)
    print(samplelist)

    for i in samplelist:
        print("---------------------------")
        print("Entry number: " + str(i))
        title = articleentries[i]['title']
        clusterid = articleentries[i]['clusterid']
        print("Title: " + title)
        print("Cluster ID: " + clusterid)

        if not clusterid == "unknown":
            print(str(i))
            try:
                citations = os.popen(
                    '''/usr/bin/python3 /home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -C ''' + clusterid + ''' |grep -v list |grep Citations''').read().strip().split()[
                    -1]
            except:
                citations = "unknown"
        else:
            citations = "unknown"

        print("new Citations: " + citations)

        if 'cited' in articleentries[i]:
            oldcitednumber = int(articleentries[i]['cited'])
        else:
            oldcitednumber = 0

        print("Old Cited Number: " + str(oldcitednumber))

        if not citations == "unknown":
            citednumber = int(citations)
            if citednumber > oldcitednumber and ((citednumber - oldcitednumber) < 8):
                articleentries[i]['cited'] = str(citednumber)

        writer = BibTexWriter()
        writer.indent = '    '
        writer.order_entries_by = ('order',)

        with open('/home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib', 'w', encoding='utf8') as newarticlefile:
            bibtexparser.dump(articles_database, newarticlefile, writer=writer)

        os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/cited-add-articles.bib tempcited-add-articles.bib")

    os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/articles.bib /home/limingtao/ircre-bibtex/ircreupdate/oldarticles.bib")
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as newarticlefile:
        bibtexparser.dump(articles_database, newarticlefile, writer=writer)

    return 0
def write_bib(bib_database, filen="dl4m.bib"):
    """Description of write_bib
    Write the items stored in bib_database into filen
    """
    writer = BibTexWriter()
    writer.indent = '  '
    writer.order_entries_by = ('noneyear', "author")
    with open(filen, "w", encoding="utf-8") as bibfile:
        bibfile.write(writer.write(bib_database))
Example #4
0
def _writer():
    '''
    Return a configured bibtex writer.
    '''
    writer = BibTexWriter()
    writer.indent = '  '
    writer.order_entries_by = ('ID',)
    writer.display_order = ['title', 'author', 'editor']
    return writer
Example #5
0
 def dumps(bibman):
     db = bibtexparser.bparser.BibDatabase()
     db._entries_dict = bibman.cleaned
     db.entries = list(bibman.cleaned.values())
     writer = BibTexWriter()
     # writer.order_entries_by = ('type', 'author', 'year')
     writer.order_entries_by = None
     writer.contents = ['comments', 'entries']
     writer.indent = '    '
     new_text = bibtexparser.dumps(db, writer)
     return new_text
Example #6
0
def bibtexclassify():
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/ircre.bib', encoding='utf8') as bibtexfile:
        ircrebib_database = bibtexparser.load(bibtexfile, parser)

    allentries = ircrebib_database.entries.copy()
    # ----------------------------------------
    # get all articles
    # -----------------------------------------
    article_entries = []
    for i in range(len(allentries)):
        if allentries[i]['ENTRYTYPE'] == 'article':
            article_entries.append(allentries[i].copy())

    article_database = BibDatabase()
    article_database.entries = article_entries

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order',)
    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', 'w', encoding='utf8') as article_file:
        bibtexparser.dump(article_database, article_file, writer=writer)

    otherentries= []
    for i in range(len(allentries)):
        if allentries[i]['ENTRYTYPE'] == 'inbook' or allentries[i]['ENTRYTYPE'] == 'inproceedings' or allentries[i]['ENTRYTYPE'] == 'incollection':
            otherentries.append(allentries[i].copy())

    other_database = BibDatabase()
    other_database.entries = otherentries

    writer2 = BibTexWriter()
    writer2.indent = '    '
    writer2.order_entries_by = ('order',)
    with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', 'w', encoding='utf8') as others_file:
        bibtexparser.dump(other_database, others_file, writer=writer2)


    return 0
Example #7
0
def write_bibtex_file(filename, db):
    """
        Write BiBTeX file with content from db
    """

    writer = BibTexWriter()
    writer.order_entries_by = ('year', 'ID')
    with open(filename, 'wb') as output_file:
        bibtex_str = bibtexparser.dumps(db, writer=writer)
        output_file.write(bibtex_str.encode('utf8'))
        print("Wrote %i records into filename '%s'" %
              (len(db.entries), filename))
def write_output(bib_entries, config):
    db = BibDatabase()
    # clean up the bib_entries
    for bib_entry in bib_entries:
        bib_entry.pop("sig1")
    db.entries = bib_entries
    writer = BibTexWriter()
    writer.indent = ' ' * config['space']
    writer.order_entries_by = ('ENTRYTYPE', 'title')
    if config['sort'] == 'ne':
        writer.order_entries_by = ('title', 'ENTRYTYPE')
    if config['inplace']:
        with open(config['input_path'], 'w') as output_file:
            output_file.write(writer.write(db))
    else:
        print('''
        ##########################################
        # Formatted file preview
        ##########################################
        ''')
        print(writer.write(db))
Example #9
0
def ircrebibmerge():
    articlesparser = BibTexParser(common_strings=False)
    articlesparser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/sorted-articles.bib', encoding='utf8') as sortedarticle_file:
        sortedarticle_database = bibtexparser.load(sortedarticle_file, articlesparser)

    sortedarticles = sortedarticle_database.entries.copy()

    top15parser = BibTexParser(common_strings=False)
    top15parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/top15.bib', encoding='utf8') as top15_file:
        top15_database = bibtexparser.load(top15_file, top15parser)

    top15articles = top15_database.entries.copy()


    othersparser = BibTexParser(common_strings = False)
    othersparser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/others.bib', encoding='utf8') as others_file:
        others_database = bibtexparser.load(others_file, othersparser)

    others = others_database.entries.copy()


    alldb = BibDatabase()
    entries = []

    for i in range(len(top15articles)):
        entries.append(top15articles[i].copy())

    for i in range(len(sortedarticles)):
        entries.append(sortedarticles[i].copy())

    for i in range(len(others)):
        entries.append(others[i].copy())

    alldb.entries = entries

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = None

    with open('/home/limingtao/ircre-bibtex/ircreupdate/newircre.bib', 'w', encoding='utf8') as newircrebibfile:
        bibtexparser.dump(alldb, newircrebibfile, writer=writer)

    return 0
 def test_sort_missing_field(self):
     bib_database = BibDatabase()
     bib_database.entries = [{'ID': 'b',
                              'ENTRYTYPE': 'article',
                              'year': '2000'},
                             {'ID': 'c',
                              'ENTRYTYPE': 'book',
                              'year': '2010'},
                             {'ID': 'a',
                              'ENTRYTYPE': 'book'}]
     writer = BibTexWriter()
     writer.order_entries_by = ('year', )
     result = bibtexparser.dumps(bib_database, writer)
     expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n"
     self.assertEqual(result, expected)
Example #11
0
def parse_csv_file(args):
    """Parse a CSV file into bibtex format."""

    entries = []

    filter_rows = get_filter_rows(args)

    with open(args.csv_file) as csv_file:
        reader = csv.DictReader(csv_file)

        for i, row in enumerate(reader, 1):

            if i < args.starting_row:
                continue

            row = fix_columns_headers(row)

            key = entry_key(row)
            if filter_rows.get(key):
                continue

            if row['Item Type'] not in TYPES:
                print('ItemType not found: "{}"'.format(row['Item Type']))
                sys.exit()

            row_type = TYPES[row['Item Type']]

            entry = add_entry(row, row_type['type'], row_type['remap'])

            entries.append(entry)

    if args.randomize:
        entries = np.random.permutation(entries)  # pylint: disable=no-member

    for i, beg in enumerate(range(0, len(entries), args.max_entries), 1):
        file_name = args.bibtex_file
        if args.max_entries != MAX_ENTRIES:
            root, ext = os.path.splitext(file_name)
            file_name = '{}{}{}'.format(root, i, ext)

        print(i, len(entries[beg:beg + args.max_entries]))

        bibtex_db = BibDatabase()
        bibtex_db.entries = entries[beg:beg + args.max_entries]
        writer = BibTexWriter()
        writer.order_entries_by = None
        with open(file_name, 'w') as bibtex_file:
            bibtex_file.write(writer.write(bibtex_db))
Example #12
0
File: fetch.py Project: siudej/Cite
    def _cleanupBibTex(self, count):
        """ Clean up bibtex and ensure uniform look. """
        import bibtexparser
        from bibtexparser.bparser import BibTexParser
        parser = BibTexParser()
        parser.customization = homogeneize_latex_encoding
        bib = bibtexparser.loads(self.refs, parser=parser)

        # save results
        from bibtexparser.bwriter import BibTexWriter
        writer = BibTexWriter()
        writer.contents = ['entries']
        writer.indent = '    '
        writer.order_entries_by = ('id')
        self.number = len(bib.entries)
        self.refs = bibtexparser.dumps(bib, writer)
Example #13
0
def getclusterid(title, author):
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as article_file:
        article_database = bibtexparser.load(article_file, parser)

    article_entries = article_database.entries.copy()

    entries = bib_database.entries
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")
    print("Total articles number: " + str(len(entries)))
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order',)

    for i in range(len(entries)):
        if entries[i]['clusterid'] == 'unknown':
            print("---------------------------")
            print("Entry number: " + str(i))
            title = entries[i]['title']
            print("Title: " + title)
            clusterid = ''
            try:
                clusterid = os.popen(
                    '''/home/limingtao/ircre-bibtex/ircreupdate/scholarpy/scholar.py -c 1 -t --phrase="''' + title + '''" |grep ID| grep Cluster''').read().strip().split()[
                    -1]
            except:
                clusterid = "unknown"

            print("new Cluster ID: " + clusterid)
            entries[i]['clusterid'] = clusterid
        with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
            bibtexparser.dump(bib_database, newbibfile, writer=writer)
        os.popen("cp /home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib /home/limingtao/ircre-bibtex/ircreupdate/tempclusterid-added-ircre.bib")

    with open('/home/limingtao/ircre-bibtex/ircreupdate/clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
        bibtexparser.dump(bib_database, newbibfile, writer=writer)

    return 0
Example #14
0
def main():
    import bibtexparser
    from bibtexparser.bwriter import BibTexWriter

    with open('ircre.bib', encoding='utf8') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)

    entries = bib_database.entries
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")
    print("Total articles number: " + str(len(entries)))
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order', )

    for i in range(len(entries)):
        if entries[i]['clusterid'] == 'unknown':
            print("---------------------------")
            print("Entry number: " + str(i))
            title = entries[i]['title']
            print("Title: " + title)
            clusterid = ''
            try:
                clusterid = os.popen(
                    '''./scholarpy/scholar.py -c 1 -t --phrase="''' + title +
                    '''" |grep ID| grep Cluster''').read().strip().split()[-1]
            except:
                clusterid = "unknown"

            print("new Cluster ID: " + clusterid)
            entries[i]['clusterid'] = clusterid
        with open('clusterid-added-ircre.bib', 'w',
                  encoding='utf8') as newbibfile:
            bibtexparser.dump(bib_database, newbibfile, writer=writer)
        os.popen("cp clusterid-added-ircre.bib tempclusterid-added-ircre.bib")

    with open('clusterid-added-ircre.bib', 'w', encoding='utf8') as newbibfile:
        bibtexparser.dump(bib_database, newbibfile, writer=writer)

    return 0
def proc_bib(input_io: TextIOWrapper,
             output_io: TextIOWrapper,
             jdb: JournalDB,
             silent: bool = False,
             output_format: str = "bib",
             abbrev_type="iso4"):
    if not hasattr(Journal, abbrev_type):
        raise ValueError(f"Invalid abbreviation type `{abbrev_type}`")

    bib_db = bibtexparser.load(input_io)

    for entry in bib_db.entries:
        journaltitle = entry.get("journaltitle")
        if journaltitle is None:
            continue
        journaltitle = braces_regex.sub("", journaltitle)

        name_pattern = re.compile(fr"^{re.escape(journaltitle)}(:?.*)$",
                                  RegexFlag.IGNORECASE)
        # TODO: query using lambdas?
        # TODO: normalize names (just in index?).
        res = jdb.journals.query_one(Journal.names_key, name_pattern)
        if res:
            _, journal = res
            abbrev = getattr(journal, abbrev_type)

            if output_format == "bib":
                entry["journaltitle"] = f"{{{abbrev or journaltitle}}}"
            elif output_format == "sourcemap":
                gen_sourcemap_map(journal, journaltitle, abbrev, output_io)

        abbrev_msg = f"abbreviating to '{abbrev}'" if res else f"no abbreviation found"
        if not silent:
            info(f"found journal name '{journaltitle}'; {abbrev_msg}.")

    if output_format == "bib":
        bib_writer = BibTexWriter()
        bib_writer.add_trailing_comma = True
        bib_writer.display_order = None
        bib_writer.indent = "\t"
        bib_writer.order_entries_by = None
        bibtex_code = bib_writer.write(bib_db)
        output_io.write(bibtex_code)
    elif output_format == "sourcemap":
        pass
Example #16
0
def parse_bibtex(file, build_dir):
    """
    Parse merged bibtex file again with customization to clean citations.
    
    @type file: .bib file
    @param file: file to be parsed
    @type build_dir: file path
    @param build_dir: where to save
    """

    parser = BibTexParser()
    parser.customization = customizations
    years = []
    with open(file, 'r') as f:
        bibtex = bibtexparser.load(f, parser=parser)
        for i in range(len(bibtex.entries)):
            for key, value in bibtex.entries[i].items():
                if key == 'year':
                    years.append(int(value))
    years.sort()
    years.reverse()
    years_no_repeat = []
    for i in range(len(years)):
        if years_no_repeat.count(years[i]) == 0:
            years_no_repeat.append(years[i])

    for i in range(len(years_no_repeat)):
        bibtext = copy.deepcopy(bibtex)
        array = []
        for j in range(len(bibtex.entries)):
            for key, value in bibtex.entries[j].items():
                if key == 'year':
                    if int(value) == years_no_repeat[i]:
                        array.append(bibtex.entries[j])
        bibtext.entries = array
        parse_file = os.path.join(build_dir,
                                  str(years_no_repeat[i]) + 'parsed.bib')
        writer = BibTexWriter()
        writer.order_entries_by = ('ENTRYTYPE', )
        with open(parse_file, 'w') as f:
            f.write(writer.write(bibtext))
Example #17
0
def write_bib(db, order=False):
    """
    Write bibtex string.

    Args:
        db (BibDatabase): database object to dump..
        order (bool): whether to reorder entries upon writing.

    Returns:
        The dumped string.
    """

    # Custom writer
    writer = BibTexWriter()
    writer.indent = '\t'
    writer.order_entries_by = None

    # Replace month by numeric value
    for entry in db.entries:
        if 'month' in entry:
            for x in [MONTHS, MONTHS_FULL]:
                if entry['month'] in x:
                    entry['month'] = '{:02d}'.format(
                        x.index(entry['month']) + 1)

    if order:
        # Manual sort
        order_entries_by = ('year', 'author', 'ID')
        sort_entries(db, order_entries_by)

    if not config.use_utf8_characters:
        db.entries = [
            nomenclature.encode_ascii_latex(entry) for entry in db.entries
        ]

    if config.protect_uppercase:
        for entry in db.entries:
            entry["title"] = latex.protect_uppercase(entry["title"])

    # Write bib string
    return writer.write(db)
Example #18
0
def write_bibtex(bibtex_entries):
    bib_database = bibtexparser.bibdatabase.BibDatabase()

    for e in bibtex_entries:
        # pop the useless contents
        e.pop('created_time', None)
        e.pop('file', None)
        e.pop('abstract', None)
        for k in e:
            if isinstance(e[k], list):
                e[k] = ' and '.join(e[k])
            e[k] = unicode_to_latex(e[k])
    bib_database.entries = bibtex_entries

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
    bibtex_str = bibtexparser.dumps(bib_database, writer)

    return bibtex_str
Example #19
0
def articlessort():
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as articlesfile:
        articles_database = bibtexparser.load(articlesfile, parser)

    articles = articles_database.entries.copy()

    for i in range(len(articles)):
        try:
            articles[i]['sortkey1'] = float(articles[i]['impactfactor'])
        except:
            articles[i]['sortkey1'] = float(0)
        try:
            articles[i]['sortkey2'] = int(articles[i]['cited'])
        except:
            articles[i]['sortkey2'] = int(0)

    sorted_by_journalif_cited = sorted(articles, key=lambda x: (x['sortkey1'], x['journal'], x['sortkey2'], x['year']),
                                       reverse=True)

    for i in range(len(sorted_by_journalif_cited)):
        sorted_by_journalif_cited[i]['order'] = str(i).zfill(6)

    for i in range(len(sorted_by_journalif_cited)):
        sorted_by_journalif_cited[i].pop('sortkey1')
        sorted_by_journalif_cited[i].pop('sortkey2')

    sortedarticlesdatabase = BibDatabase()
    sortedarticlesdatabase.entries = sorted_by_journalif_cited

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order',)
    with open('/home/limingtao/ircre-bibtex/ircreupdate/sorted-articles.bib', 'w', encoding='utf8') as sortedarticlesfile:
        bibtexparser.dump(sortedarticlesdatabase, sortedarticlesfile, writer=writer)

    return 0
Example #20
0
def getop15articles():
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False

    with open('/home/limingtao/ircre-bibtex/ircreupdate/articles.bib', encoding='utf8') as article_file:
        article_database = bibtexparser.load(article_file, parser)

    article_entries = article_database.entries.copy()

    for i in range(len(article_entries)):
        try:
            article_entries[i]['sortkey1'] = int(article_entries[i]['cited'])
        except:
            article_entries[i]['sortkey1'] = int(0)

    articles_sorted_by_cited = sorted(article_entries, key=lambda x: (x['sortkey1']), reverse=True)

    top15articles = []
    for i in range(15):
        top15articles.append(articles_sorted_by_cited[i].copy())

    for i in range(len(top15articles)):
        top15articles[i]['ENTRYTYPE'] = 'toparticle'
        top15articles[i]['ID'] = top15articles[i]['ID'] + 'a'

    for i in range(len(top15articles)):
        top15articles[i].pop('sortkey1')

    top15_database = BibDatabase()
    top15_database.entries = top15articles

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = None

    with open('/home/limingtao/ircre-bibtex/ircreupdate/top15.bib', 'w', encoding='utf8') as top15_file:
        bibtexparser.dump(top15_database, top15_file, writer=writer)
    return 0
Example #21
0
def post_processing(output_bib_entries, removed_value_names, abbr_dict, sort):
    bibparser = bibtexparser.bparser.BibTexParser(
        ignore_nonstandard_types=False)
    bib_entry_str = ""
    for entry in output_bib_entries:
        for line in entry:
            if is_contain_var(line):
                continue
            bib_entry_str += line
        bib_entry_str += "\n"
    parsed_entries = bibtexparser.loads(bib_entry_str, bibparser)
    if len(parsed_entries.entries) < len(output_bib_entries) - 5:
        print(
            "Warning: len(parsed_entries.entries) < len(output_bib_entries) -5 -->",
            len(parsed_entries.entries), len(output_bib_entries))
        output_str = ""
        for entry in output_bib_entries:
            for line in entry:
                # if any([re.match(r".*%s.*=.*"%n, line) for n in removed_value_names if len(n)>1]):
                #     continue
                output_str += line
            output_str += "\n"
        return output_str
    for output_entry in parsed_entries.entries:
        for remove_name in removed_value_names:
            if remove_name in output_entry:
                del output_entry[remove_name]
        for (short, pattern) in abbr_dict:
            for place in ["booktitle", "journal"]:
                if place in output_entry:
                    if re.match(pattern, output_entry[place]):
                        output_entry[place] = short

    writer = BibTexWriter()
    if not sort:
        writer.order_entries_by = None
    return bibtexparser.dumps(parsed_entries, writer=writer)
Example #22
0
def write_bib(db, order=False):
    """
    Write bibtex string.

    Args:
        db (BibDatabase): database object to dump..
        order (bool): whether to reorder entries upon writing.

    Returns:
        The dumped string.
    """

    # Custom writer
    writer = BibTexWriter()
    writer.indent = '\t'
    writer.order_entries_by = None

    # Replace month by numeric value
    for entry in db.entries:
        if 'month' in entry and entry['month'] in MONTHS:
            entry['month'] = '{:02d}'.format(MONTHS.index(entry['month']) + 1)

    if order:
        # Manual sort
        order_entries_by = ('year', 'author', 'ID')
        sort_entries(db, order_entries_by)

    if not config.use_utf8_characters:
        db.entries = [nomenclature.encode_ascii_latex(entry) for entry in db.entries]

    if config.protect_uppercase:
        for entry in db.entries:
            entry["title"] = latex.protect_uppercase(entry["title"])

    # Write bib string
    return writer.write(db)
 def test_sort_type_id(self):
     writer = BibTexWriter()
     writer.order_entries_by = ('ENTRYTYPE', 'ID')
     result = bibtexparser.dumps(self.bib_database, writer)
     expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n"
     self.assertEqual(result, expected)
 def test_sort_none(self):
     writer = BibTexWriter()
     writer.order_entries_by = None
     result = bibtexparser.dumps(self.bib_database, writer)
     expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n"
     self.assertEqual(result, expected)
Example #25
0
def write_bib(bib_database, filen="dl4m.bib"):
    writer = BibTexWriter()
    writer.indent = '  '
    writer.order_entries_by = ('noneyear', "author")
    with open(filen, "w", encoding="utf-8") as bibfile:
        bibfile.write(writer.write(bib_database))
Example #26
0
            else:
                print(f'I detected a duplicate based on the key {id}. '
                      f'I will augment it with a letter and try again. '
                      f'Please double-check, if this is correct.. '
                      f'my duplicate detection algorithm is pretty bad.\n\n')
                id = id_orig + letters[i]
                i += 1

        if not duplicate:
            bib = re.sub(r'(@[a-z]*{)(.*),', r'\1' + id + ',', bib)
            bib_db = bibtexparser.loads(bib)
            db.entries.extend(bib_db.get_entry_list())
        else:
            bib_db = None

    if id_list:
        writer = BibTexWriter()
        writer.indent = '\t'
        writer.order_entries_by = ('year', 'ID')
        writer.add_trailing_comma = True
        with open('../_bibliography/pint.bib', 'w') as bibfile:
            bibfile.write(writer.write(db))

        for line in fileinput.input('../_bibliography/pint.bib', inplace=True):
            if '@comment{' in line:
                line = line.replace('@comment{', '')
            if re.match(r'%}+', line) is not None:
                line = re.sub(r'%}+', '%', line)
            line = line.rstrip('\r\n')
            print(line)
Example #27
0
        },
    },
    'loggers': {
        '': {
            'handlers': ['default'],
            'level': 'ERROR',
            'formatter': 'standard',
            'propagate': True
        }
    }
})

writer = BibTexWriter()
writer.contents = ['comments', 'entries']
writer.indent = '  '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')

def create_id(t, year, title):
	return str(t) + "_" + str(year) + "_" + str(space_to_underscore(title))

def pdf(pdf_files, shared_pdf, bibtex_folder, bibtex_files, gscholar):
	for pdf in pdf_files:
		txt = re.sub("\W", " ", gs.convert_pdf_to_txt(pdf)).lower()
		#Research determined that the cutting of 35 words gives the 
		#highest accuracy
		words = txt.strip().split()[:35]
		words = " ".join(words)		
		print words
		if gscholar == True:
			bib = load(gs.pdflookup(pdf, all, gs.FORMAT_BIBTEX)[0])
			keys = bib.entries[0].keys()
Example #28
0
    bib_database = bibtexparser.load(bibtex_file, parser=parser)

if bib_database :
    now = datetime.datetime.now()
    success = "{0} Loaded {1} found {2} entries".format(now, input_b, len(bib_database.entries))
    print(success)
else :
    now = datetime.datetime.now()
    errs = "{0} Failed to read {1}".format(now, input_b)
    print(errs)
    sys.exit(errs)

bibtex_str = None
if bib_database:
    writer = BibTexWriter()
    writer.order_entries_by = ('author', 'year', 'type')
    bibtex_str = bibtexparser.dumps(bib_database, writer)
    #print(str(bibtex_str))
    with open(output_b, "w") as text_file:
        #print >> text_file, bibtex_str
        print(bibtex_str, file=text_file)

if bibtex_str:
    now = datetime.datetime.now()
    success = "{0} Wrote to {1} with len {2}".format(now, output_b, len(bibtex_str))
    print(success)
else:
    now = datetime.datetime.now()
    errs = "{0} Failed to write {1}".format(now, output_b)
    print(errs)
    sys.exit(errs)
Example #29
0
    'conf-papers.bib', 'articles.bib', 'journal-issues.bib', 'reports.bib'
]

for filename in all_bibs:
    parser = bibtexparser.bparser.BibTexParser(common_strings=True)
    with open(filename, encoding='utf-8') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

    entries = bib_database.entries

    for entry in entries:
        if (filename == 'conf-papers.bib'):
            entry['category'] = 'Conference'
        elif (filename == 'articles.bib' or filename == 'journal-issues.bib'):
            entry['category'] = 'Journal'
        elif (filename == 'reports.bib'):
            entry['category'] = 'Report'
        else:
            print("Unknown filename!")
            break

    complete_entries.extend(entries)

db = BibDatabase()
db.entries = complete_entries

writer = BibTexWriter()
writer.order_entries_by = ('author')

with open('publications.bib', 'w', encoding='utf-8') as bibfile:
    bibtexparser.dump(db, bibfile, writer)
Example #30
0
if bib_database:
    now = datetime.datetime.now()
    success = "{0} Loaded {1} found {2} entries".format(
        now, input_b, len(bib_database.entries))
    print(success)
else:
    now = datetime.datetime.now()
    errs = "{0} Failed to read {1}".format(now, input_b)
    print(errs)
    sys.exit(errs)

bibtex_str = None
if bib_database:
    writer = BibTexWriter()
    writer.order_entries_by = ('author', 'year', 'type')
    bibtex_str = bibtexparser.dumps(bib_database, writer)
    #print(str(bibtex_str))
    with open(output_b, "w") as text_file:
        print(bibtex_str, file=text_file)

if bibtex_str:
    now = datetime.datetime.now()
    success = "{0} Wrote to {1} with len {2}".format(now, output_b,
                                                     len(bibtex_str))
    print(success)
else:
    now = datetime.datetime.now()
    errs = "{0} Failed to write {1}".format(now, output_b)
    print(errs)
    sys.exit(errs)
Example #31
0
if os.path.exists(folder + '-clean'):
    print 'cleaning ' + folder + '-clean/'
    for file in os.listdir(folder + '-clean'):
        try:
            if os.path.isfile(folder + '-clean/' + file):
                os.unlink(folder + '-clean/' + file)
        except Exception as e:
            print(e)
else:
    os.makedirs(folder + '-clean')

#Writer customization
writer = BibTexWriter()
writer.contents = ['entries']
writer.indent = '  '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')

#parser customization, need a new parser for each file
#parser = BibTexParser()
#parser.common_strings = True

#Bib dictionary for months
Months = """@STRING{ jan = "jan"}
@STRING{ feb = "feb"}
@STRING{ mar = "mar"}
@STRING{ apr = "apr"}
@STRING{ may = "may"}
@STRING{ jun = "jun"}
@STRING{ jul = "jul"}
@STRING{ aug = "aug"}
@STRING{ sep = "sep"}
Example #32
0
def fix_conference_title_names(clean_text, key_list=None):
    """
    mass bibtex fixes

    CommandLine:
        ./fix_bib.py
    """

    # Find citations from the tex documents
    if key_list is None:
        key_list = find_used_citations(testdata_fpaths())
        key_list = list(set(key_list))
        ignore = ['JP', '?']
        for item in ignore:
            try:
                key_list.remove(item)
            except ValueError:
                pass

    unknown_confkeys = []

    conference_keys = [
        'journal',
        'booktitle',
    ]

    ignore_confkey = []

    bib_database = bibtexparser.loads(clean_text)

    bibtex_dict = bib_database.get_entry_dict()

    isect = set(ignore_confkey).intersection(
        set(constants_tex_fixes.CONFERENCE_TITLE_MAPS.keys()))
    assert len(isect) == 0, repr(isect)

    #ut.embed()
    #conftitle_to_types_hist = ut.ddict(list)

    type_key = 'ENTRYTYPE'

    debug_author = ut.get_argval('--debug-author', type_=str, default=None)
    # ./fix_bib.py --debug_author=Kappes

    for key in bibtex_dict.keys():
        entry = bibtex_dict[key]

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')
        else:
            debug = False

        if debug:
            print(' --- ENTRY ---')
            print(ut.repr3(entry))

        #if type_key not in entry:
        #    #entry[type_key] = entry['ENTRYTYPE']
        #    ut.embed()

        # Clip abstrat
        if 'abstract' in entry:
            entry['abstract'] = ' '.join(entry['abstract'].split(' ')[0:7])

        # Remove Keys
        remove_keys = [
            'note',
            'urldate',
            'series',
            'publisher',
            'isbn',
            'editor',
            'shorttitle',
            'copyright',
            'language',
            'month',
            # These will be put back in
            #'number',
            #'pages',
            #'volume',
        ]
        entry = ut.delete_dict_keys(entry, remove_keys)

        # Fix conference names
        confkeys = list(set(entry.keys()).intersection(set(conference_keys)))
        #entry = ut.delete_dict_keys(entry, ['abstract'])
        # TODO: FIX THESE IF NEEDBE
        #if len(confkeys) == 0:
        #    print(ut.dict_str(entry))
        #    print(entry.keys())
        if len(confkeys) == 1:
            confkey = confkeys[0]
            old_confval = entry[confkey]
            # Remove curly braces
            old_confval = old_confval.replace('{', '').replace('}', '')
            if old_confval in ignore_confkey:
                print(ut.dict_str(entry))
                continue

            new_confval_candiates = []
            if old_confval.startswith('arXiv'):
                continue

            # for conf_title, patterns in constants_tex_fixes.CONFERENCE_TITLE_MAPS.items():
            for conf in constants_tex_fixes.CONFERENCES:
                if conf.matches(old_confval):
                    conf_title = conf.accro()
                    if debug:
                        print('old_confval = %r' % (old_confval, ))
                        print('conf_title = %r' % (conf_title, ))
                    new_confval = conf_title
                    new_confval_candiates.append(new_confval)

            if len(new_confval_candiates) == 0:
                new_confval = None
            elif len(new_confval_candiates) == 1:
                new_confval = new_confval_candiates[0]
            else:
                assert False, 'double match'

            if new_confval is None:
                if key in key_list:
                    unknown_confkeys.append(old_confval)
                #print(old_confval)
            else:
                # Overwrite old confval
                entry[confkey] = new_confval

            # Record info about types of conferneces
            true_confval = entry[confkey].replace('{', '').replace('}', '')

            # FIX ENTRIES THAT SHOULD BE CONFERENCES
            if true_confval in constants_tex_fixes.CONFERENCE_LIST:
                if entry[type_key] == 'inproceedings':
                    pass
                    #print(confkey)
                    #print(ut.dict_str(entry))
                elif entry[type_key] == 'article':
                    entry['booktitle'] = entry['journal']
                    del entry['journal']
                    #print(ut.dict_str(entry))
                elif entry[type_key] == 'incollection':
                    pass
                else:
                    raise AssertionError('UNKNOWN TYPE: %r' %
                                         (entry[type_key], ))

                if 'booktitle' not in entry:
                    print('DOES NOT HAVE CORRECT CONFERENCE KEY')
                    print(ut.dict_str(entry))

                assert 'journal' not in entry, 'should not have journal'

                #print(entry['type'])
                entry[type_key] = 'inproceedings'

            # FIX ENTRIES THAT SHOULD BE JOURNALS
            if true_confval in constants_tex_fixes.JOURNAL_LIST:

                if entry[type_key] == 'article':
                    pass
                elif entry[type_key] == 'inproceedings':
                    pass
                    #print(ut.dict_str(entry))
                elif entry[type_key] == 'incollection':
                    pass
                else:
                    raise AssertionError('UNKNOWN TYPE: %r' %
                                         (entry['type'], ))

                if 'journal' not in entry:
                    print('DOES NOT HAVE CORRECT CONFERENCE KEY')
                    print(ut.dict_str(entry))

                assert 'booktitle' not in entry, 'should not have booktitle'
                #print(entry['type'])
                #entry['type'] = 'article'

            #conftitle_to_types_hist[true_confval].append(entry['type'])

        elif len(confkeys) > 1:
            raise AssertionError('more than one confkey=%r' % (confkeys, ))

        # Fix Authors
        if 'author' in entry:
            authors = six.text_type(entry['author'])
            for truename, alias_list in constants_tex_fixes.AUTHOR_NAME_MAPS.items(
            ):
                pattern = six.text_type(
                    ut.regex_or([
                        ut.util_regex.whole_word(alias) for alias in alias_list
                    ]))
                authors = re.sub(pattern,
                                 six.text_type(truename),
                                 authors,
                                 flags=re.UNICODE)
            entry['author'] = authors
    """
    article = journal
    inprocedings = converence paper

    """

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ut.dict_str(conftitle_to_types_set_hist))

    print(ut.list_str(sorted(unknown_confkeys)))
    print('len(unknown_confkeys) = %r' % (len(unknown_confkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)
    return new_bibtex_str
Example #33
0
def main(bib_fpath=None):
    r"""
    intro point to fixbib script

    CommmandLine:
        fixbib
        python -m fixtex bib
        python -m fixtex bib --dryrun
        python -m fixtex bib --dryrun --debug
    """

    if bib_fpath is None:
        bib_fpath = 'My Library.bib'

    # DEBUG = ub.argflag('--debug')
    # Read in text and ensure ascii format
    dirty_text = ut.readfrom(bib_fpath)

    from fixtex.fix_tex import find_used_citations, testdata_fpaths

    if exists('custom_extra.bib'):
        extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False)
        parser = bparser.BibTexParser()
        ut.delete_keys(parser.alt_dict, ['url', 'urls'])
        print('Parsing extra bibtex file')
        extra_text = ut.readfrom('custom_extra.bib')
        extra_database = extra_parser.parse(extra_text, partial=False)
        print('Finished parsing extra')
        extra_dict = extra_database.get_entry_dict()
    else:
        extra_dict = None

    #udata = dirty_text.decode("utf-8")
    #dirty_text = udata.encode("ascii", "ignore")
    #dirty_text = udata

    # parser = bparser.BibTexParser()
    # bib_database = parser.parse(dirty_text)
    # d = bib_database.get_entry_dict()

    print('BIBTEXPARSER LOAD')
    parser = bparser.BibTexParser(ignore_nonstandard_types=False,
                                  common_strings=True)
    ut.delete_keys(parser.alt_dict, ['url', 'urls'])
    print('Parsing bibtex file')
    bib_database = parser.parse(dirty_text, partial=False)
    print('Finished parsing')

    bibtex_dict = bib_database.get_entry_dict()
    old_keys = list(bibtex_dict.keys())
    new_keys = []
    for key in ub.ProgIter(old_keys, label='fixing keys'):
        new_key = key
        new_key = new_key.replace(':', '')
        new_key = new_key.replace('-', '_')
        new_key = re.sub('__*', '_', new_key)
        new_keys.append(new_key)

    # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict'
    assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict'

    for key, new_key in zip(old_keys, new_keys):
        if key != new_key:
            entry = bibtex_dict[key]
            entry['ID'] = new_key
            bibtex_dict[new_key] = entry
            del bibtex_dict[key]

    # The bibtext is now clean. Print it to stdout
    #print(clean_text)
    verbose = None
    if verbose is None:
        verbose = 1

    # Find citations from the tex documents
    key_list = None
    if key_list is None:
        cacher = ub.Cacher('texcite1', enabled=0)
        data = cacher.tryload()
        if data is None:
            fpaths = testdata_fpaths()
            key_list, inverse = find_used_citations(fpaths,
                                                    return_inverse=True)
            # ignore = ['JP', '?', 'hendrick']
            # for item in ignore:
            #     try:
            #         key_list.remove(item)
            #     except ValueError:
            #         pass
            if verbose:
                print('Found %d citations used in the document' %
                      (len(key_list), ))
            data = key_list, inverse
            cacher.save(data)
        key_list, inverse = data

    # else:
    #     key_list = None

    unknown_pubkeys = []
    debug_author = ub.argval('--debug-author', default=None)
    # ./fix_bib.py --debug_author=Kappes

    if verbose:
        print('Fixing %d/%d bibtex entries' %
              (len(key_list), len(bibtex_dict)))

    # debug = True
    debug = False
    if debug_author is not None:
        debug = False

    known_keys = list(bibtex_dict.keys())
    missing_keys = set(key_list) - set(known_keys)
    if extra_dict is not None:
        missing_keys.difference_update(set(extra_dict.keys()))

    if missing_keys:
        print('The library is missing keys found in tex files %s' %
              (ub.repr2(missing_keys), ))

    # Search for possible typos:
    candidate_typos = {}
    sedlines = []
    for key in missing_keys:
        candidates = ut.closet_words(key, known_keys, num=3, subset=True)
        if len(candidates) > 1:
            top = candidates[0]
            if ut.edit_distance(key, top) == 1:
                # "sed -i -e 's/{}/{}/g' *.tex".format(key, top)
                import os
                replpaths = ' '.join(
                    [relpath(p, os.getcwd()) for p in inverse[key]])
                sedlines.append("sed -i -e 's/{}/{}/g' {}".format(
                    key, top, replpaths))
        candidate_typos[key] = candidates
        print('Cannot find key = %r' % (key, ))
        print('Did you mean? %r' % (candidates, ))

    print('Quick fixes')
    print('\n'.join(sedlines))

    # group by file
    just = max([0] + list(map(len, missing_keys)))
    missing_fpaths = [inverse[key] for key in missing_keys]
    for fpath in sorted(set(ub.flatten(missing_fpaths))):
        # ut.fix_embed_globals()
        subkeys = [k for k in missing_keys if fpath in inverse[k]]
        print('')
        ut.cprint('--- Missing Keys ---', 'blue')
        ut.cprint('fpath = %r' % (fpath, ), 'blue')
        ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'),
                  'blue')
        for key in subkeys:
            print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'),
                                   ' '.join(candidate_typos[key])))

    # for key in list(bibtex_dict.keys()):

    if extra_dict is not None:
        # Extra database takes precidence over regular
        key_list = list(ut.unique(key_list + list(extra_dict.keys())))
        for k, v in extra_dict.items():
            bibtex_dict[k] = v

    full = ub.argflag('--full')

    for key in key_list:
        try:
            entry = bibtex_dict[key]
        except KeyError:
            continue
        self = BibTexCleaner(key, entry, full=full)

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')

        if debug:
            ut.cprint(' --- ENTRY ---', 'yellow')
            print(ub.repr2(entry, nl=1))

        entry = self.fix()
        # self.clip_abstract()
        # self.shorten_keys()
        # self.fix_authors()
        # self.fix_year()
        # old_pubval = self.fix_pubkey()
        # if old_pubval:
        #     unknown_pubkeys.append(old_pubval)
        # self.fix_arxiv()
        # self.fix_general()
        # self.fix_paper_types()

        if debug:
            print(ub.repr2(entry, nl=1))
            ut.cprint(' --- END ENTRY ---', 'yellow')
        bibtex_dict[key] = entry

    unwanted_keys = set(bibtex_dict.keys()) - set(key_list)
    if verbose:
        print('Removing unwanted %d entries' % (len(unwanted_keys)))
    ut.delete_dict_keys(bibtex_dict, unwanted_keys)

    if 0:
        d1 = bibtex_dict.copy()
        full = True
        for key, entry in d1.items():
            self = BibTexCleaner(key, entry, full=full)
            pub = self.publication()
            if pub is None:
                print(self.entry['ENTRYTYPE'])

            old = self.fix_pubkey()
            x1 = self._pubval()
            x2 = self.standard_pubval(full=full)
            # if x2 is not None and len(x2) > 5:
            #     print(ub.repr2(self.entry))

            if x1 != x2:
                print('x2 = %r' % (x2, ))
                print('x1 = %r' % (x1, ))
                print(ub.repr2(self.entry))

            # if 'CVPR' in self.entry.get('booktitle', ''):
            #     if 'CVPR' != self.entry.get('booktitle', ''):
            #         break
            if old:
                print('old = %r' % (old, ))
            d1[key] = self.entry

    if full:
        d1 = bibtex_dict.copy()

        import numpy as np
        import pandas as pd
        df = pd.DataFrame.from_dict(d1, orient='index')

        paged_items = df[~pd.isnull(df['pub_accro'])]
        has_pages = ~pd.isnull(paged_items['pages'])
        print('have pages {} / {}'.format(has_pages.sum(), len(has_pages)))
        print(ub.repr2(paged_items[~has_pages]['title'].values.tolist()))

        entrytypes = dict(list(df.groupby('pub_type')))
        if False:
            # entrytypes['misc']
            g = entrytypes['online']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            entrytypes['book']
            entrytypes['thesis']
            g = entrytypes['article']
            g = entrytypes['incollection']
            g = entrytypes['conference']

        def lookup_pub(e):
            if e == 'article':
                return 'journal', 'journal'
            elif e == 'incollection':
                return 'booksection', 'booktitle'
            elif e == 'conference':
                return 'conference', 'booktitle'
            return None, None

        for e, g in entrytypes.items():
            print('e = %r' % (e, ))
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            if 'pub_full' in g.columns:
                place_title = g['pub_full'].tolist()
                print(ub.repr2(ub.dict_hist(place_title)))
            else:
                print('Unknown publications')

        if 'report' in entrytypes:
            g = entrytypes['report']
            missing = g[pd.isnull(g['title'])]
            if len(missing):
                print('Missing Title')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'journal' in entrytypes:
            g = entrytypes['journal']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['journal'])]
            if len(missing):
                print('Missing Journal')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'conference' in entrytypes:
            g = entrytypes['conference']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'incollection' in entrytypes:
            g = entrytypes['incollection']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'thesis' in entrytypes:
            g = entrytypes['thesis']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            missing = g[pd.isnull(g['institution'])]
            if len(missing):
                print('Missing Institution')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        # import utool
        # utool.embed()

    # Overwrite BibDatabase structure
    bib_database._entries_dict = bibtex_dict
    bib_database.entries = list(bibtex_dict.values())

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ub.repr2(conftitle_to_types_set_hist))

    print('Unknown conference keys:')
    print(ub.repr2(sorted(unknown_pubkeys)))
    print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)

    # Need to check
    #jegou_aggregating_2012

    # Fix the Journal Abreviations
    # References:
    # https://www.ieee.org/documents/trans_journal_names.pdf

    # Write out clean bibfile in ascii format
    clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean')

    if not ub.argflag('--dryrun'):
        ut.writeto(clean_bib_fpath, new_bibtex_str)
Example #34
0
def main():
    import bibtexparser
    from bibtexparser.bwriter import BibTexWriter

    with open('ircre.bib', encoding='utf8') as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file)

    entries = bib_database.entries
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")
    print("Total articles number: " + str(len(entries)))
    print("---------------------------")
    print("---------------------------")
    print("---------------------------")

    writer = BibTexWriter()
    writer.indent = '    '
    writer.order_entries_by = ('order', )

    articleentries = []

    for i in range(len(entries)):
        if entries[i]['ENTRYTYPE'] == 'article':
            articleentries.append(entries[i].copy())

    for n in range(len(entries) - 100):
        i = n + 100
        print("---------------------------")
        print("Entry number: " + str(i))
        title = entries[i]['title']
        clusterid = entries[i]['clusterid']
        print("Title: " + title)
        print("Cluster ID: " + clusterid)

        if not clusterid == "unknown":
            print("hello" + str(i))
            try:
                citations = os.popen('''./scholarpy/scholar.py -c 1 -C ''' +
                                     clusterid +
                                     ''' |grep -v list |grep Citations'''
                                     ).read().strip().split()[-1]
            except:
                citations = "unknown"
        else:
            citations = "unknown"

        print("new Citations: " + citations)

        if 'cited' in entries[i]:
            oldcitednumber = int(entries[i]['cited'])
        else:
            oldcitednumber = 0

        print("Old Cited Number: " + str(oldcitednumber))

        if not citations == "unknown":
            citednumber = int(citations)
            if citednumber > oldcitednumber and (
                (citednumber - oldcitednumber) < 8):
                entries[i]['cited'] = str(citednumber)

        with open('cited-add-ircre.bib', 'w', encoding='utf8') as newbibfile:
            bibtexparser.dump(bib_database, newbibfile, writer=writer)
        os.popen("cp cited-add-ircre.bib tempcited-add-ircre.bib")

    with open('cited-add-ircre.bib', 'w', encoding='utf8') as newbibfile:
        bibtexparser.dump(bib_database, newbibfile, writer=writer)

    return 0