Beispiel #1
0
def metaDictToBib(jobid, metadict, omit_keys, path_prefix):
    """Export meta data to bibtex format

    Args:
        jobid (int): id of job.
        metadict (DocMeta): meta dict of a doc.
        alt_dict (dict): dict for key changes.
        omit_keys (list): keys to omit in the converted dict.
        path_prefix (str): folder path to prepend to attachment file paths.

    Returns:
        rec (int): 0 if successful, 1 otherwise.
        jobid (int): the input jobid as it is.
        dbtext (str): formated bibtex entry, '' if <rec>==1.
        docid (int): id of the processed document.
    """

    try:
        alt_dict = INV_ALT_KEYS
        ord_dict = toOrdinaryDict(metadict, alt_dict, omit_keys, path_prefix)

        db = BibDatabase()
        db.entries = [
            ord_dict,
        ]
        writer = BibTexWriter()
        writer.indent = '    '
        writer.comma_first = False
        dbtext = writer.write(db)

        return 0, jobid, dbtext, metadict['id']

    except Exception:
        LOGGER.exception('Failed to write to bibtex')
        return 1, jobid, '', metadict['id']
Beispiel #2
0
    def save(self, bibfile=-1):

        """
        save the biblist with :
            - the original filename without any arg
              or
            - the given file name if not empty

        """

        if bibfile == -1:
            bibfile = self.name

        db = BibDatabase()
        for item in self:
            db.entries.append(item)

        writer = BibTexWriter()    # this class is needed to prepare format
        writer.indent = '   '      # indent entries with 4 spaces instead of one
        writer.comma_first = False # place the comma at the beginning of the line
        writer.align_values = True # with a nice indentation

        print('')
        print(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile))
        print('')

        with open(os.path.join(os.path.expandvars('$PYBLIO_BIB'), bibfile), 'w') as bf:
            bf.write('\n')
            bf.write(writer.write(db))
            bf.write('\n')
Beispiel #3
0
def convert_to_bib(content, save_fpath):
    papers = parse_api_response(content)
    db = BibDatabase()

    db.entries = papers
    writer = BibTexWriter()
    writer.indent = "    "
    writer.comma_first = True
    with open(save_fpath, "w+") as bibfile:
        bibfile.write(writer.write(db))
    def test_comma_first(self):
        with io.open(_data_path('book.bib'), 'r') as bibfile:
            bib = BibTexParser(bibfile.read())

        with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile:
            expected = bibfile.read()
        writer = BibTexWriter()
        writer.indent = '   '
        writer.comma_first = True
        result = writer.write(bib)
        self.maxDiff = None
        self.assertEqual(expected, result)
Beispiel #5
0
    def test_comma_first(self):
        with io.open(_data_path('book.bib'), 'r') as bibfile:
            bib = BibTexParser(bibfile.read())

        with io.open(_data_path('book_comma_first.bib'), 'r') as bibfile:
            expected = bibfile.read()
        writer = BibTexWriter()
        writer.indent = '   '
        writer.comma_first = True
        result = writer.write(bib)
        self.maxDiff = None
        self.assertEqual(expected, result)
    def test_comma_first_and_trailing_comma(self):
        with io.open(_data_path('article.bib'), 'r') as bibfile:
            bib = BibTexParser(bibfile.read())

        with io.open(_data_path('article_comma_first_and_trailing_comma_output.bib'), 'r') as bibfile:
            expected = bibfile.read()
        writer = BibTexWriter()
        writer.add_trailing_comma = True
        writer.comma_first = True
        result = writer.write(bib)
        self.maxDiff = None
        self.assertEqual(expected, result)
Beispiel #7
0
    def test_comma_first_and_trailing_comma(self):
        with io.open(_data_path('article.bib'), 'r') as bibfile:
            bib = BibTexParser(bibfile.read())

        with io.open(
                _data_path(
                    'article_comma_first_and_trailing_comma_output.bib'),
                'r') as bibfile:
            expected = bibfile.read()
        writer = BibTexWriter()
        writer.add_trailing_comma = True
        writer.comma_first = True
        result = writer.write(bib)
        self.maxDiff = None
        self.assertEqual(expected, result)
Beispiel #8
0
    def write_res(self, passed_entries, passed_name, failed_entries,
                  failed_name):
        db = BibDatabase()
        db.entries = passed_entries
        writer = BibTexWriter()
        writer.indent = '    '
        writer.comma_first = False
        with open("results/" + passed_name, 'w') as bibfile:
            bibfile.write(writer.write(db))
        with open("results/" + failed_name, 'w') as f:
            json.dump(failed_entries, f, indent=4)

        print("Writing data to filesystem!")
        print("  -successful results can be found in: results/" + passed_name)
        print("  -failed results can be found in: results/" + failed_name)
Beispiel #9
0
        None

dup(entries, 'ID')
dup(entries, 'title')

entries = list(filter(lambda x: not hasattr(x, 'ID'), entries))

entries.sort(key=lambda x: x['year'], reverse=True)
entries.sort(key=lambda x: x['ID'], reverse=True)

db.entries = entries

if len(keys) != len(set(keys)):
    print('\u001b[31m' + 'Duplicate Keys' + '\u001b[0m')
    for i in set(keys):
        if keys.count(i) > 1:
            print('-- Duplicate ', i, '--')

if len(set(db.entries_dict.keys())) != len(set(keys)):
    print('\u001b[31m' + 'Unparsed Keys' + '\u001b[0m')
    for i in set(keys) - set(db.entries_dict.keys()):
        print('-- MISSING: ', i, '--')

writer = BibTexWriter()
writer.indent = '   '
writer.comma_first = False  # place the comma at the beginning of the line
with open('bibtex.bib', 'w') as bibfile:
    bibfile.write(writer.write(db))

print('bibtex written')
def toString(e):
    writer = BibTexWriter()
    writer.indent = '    '     # indent entries with 4 spaces instead of one
    writer.comma_first = True  # place the comma at the beginning of the line

    return writer._entry_to_bibtex(e)
Beispiel #11
0
    def formatText(self):
        if self.BibtexfilePath != '':
            self.openfile()
        else:
            self.readcontent()

        m = self.getMap()
        m['IEEE Global Communications Conference'] = m['IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        del m['IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        print m

        length = 0
        nb = {}
        for bibtex in self.allbibtex:
            for key in bibtex.keys():
                if len(key) > length and key != 'ENTRYTYPE':
                    length = len(key)
            for k, v in bibtex.items():
                if k == 'ENTRYTYPE' or k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'doi' or k == 'ISSN' or k == 'keywords':
                    continue
                elif v == '':
                    continue
                elif 'url' in k:
                    continue

                nk = k + (length - len(k)) * ' '

                if 'booktitle' in nk:
                    if '(' in v:
                        v1 = v.split('(')[1].split(')')[0]
                        nb[nk] = 'Proc. of ' + v1
                        continue
                    flag = 0 # 未更改booktitle

                    to_remove = "~`!@#$%^&*(){}[];':<>|-=_+"
                    table = {ord(char): None for char in to_remove}
                    clean_v = v.translate(table)

                    #clean_v = v.translate(string.punctuation)
                    #print clean_v
                    for kk, vv in m.items():
                        if kk in clean_v:
                            nb[nk] = 'Proc. of ' + vv[0]
                            publish = 'publish' + (length - 7) * ' '
                            nb[publish] = vv[1]
                            flag = 1
                            break
                    if flag == 0:
                        nb[nk] = v
                        print v
                    continue

                elif nk.strip() == 'title' and 'booktitle' not in nk:
                    self.tilte = v
                    nv = v.split(' ')
                    for i in range(len(nv)):
                        # 标题除介词和冠词外,首字母大写
                        if nv[i] in self.prep or nv[i] in self.artie:
                            continue
                        # 首字母大写
                        else:
                            if 97 <= ord(nv[i][0]) <= 122:
                                nv[i] = chr(ord(nv[i][0])-32)+nv[i][1:]

                    v = ' '.join(nv)
                    nb[nk] = '{' + v + '}'
                    continue

                elif 'pages' in nk:
                    if '--' in v:
                        nb[nk] = v
                        continue
                    nb[nk] = v.replace('-', '--')
                    continue
                elif 'author' in nk:
                    if '\n' in v:
                        nb[nk] = v.replace('\n', ' ')
                        continue

                # 其他不做改变
                nb[nk] = v

            db = BibDatabase()
            db.entries = [nb]
            writer = BibTexWriter()
            writer.indent = '\t'  # indent entries with 4 spaces instead of one
            writer.comma_first = False  # place the comma at the beginning of the line
            with open(self.tilte+'.bib', 'wb') as bibfile:
                bibfile.write(writer.write(db))
Beispiel #12
0
        dbtext = writer.write(db)

        return 0, jobid, dbtext, metadict['id']

    except Exception:
        LOGGER.exception('Failed to write to bibtex')
        return 1, jobid, '', metadict['id']


if __name__ == '__main__':
    aa = readBibFile('test.bib')
    pprint(aa[-1])

    # test export

    entries = []
    for eii in aa:
        dii = toOrdinaryDict(eii, INV_ALT_KEYS, OMIT_KEYS, '/home/')
        entries.append(dii)

    db = BibDatabase()
    db.entries = entries
    writer = BibTexWriter()
    writer.indent = '    '
    writer.comma_first = False

    with open('testexport.bib', 'w') as fout:
        dbtext = writer.write(db)
        print('dbtext', dbtext)
        fout.write(dbtext)
Beispiel #13
0
    def formatText(self):
        if self.BibtexfilePath != '':
            self.openfile()
        else:
            self.readcontent()

        m = self.getMap()
        m['IEEE Global Communications Conference'] = m[
            'IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        del m[
            'IEEE Global Communications Conference, incorporating the Global Internet Symposium']
        print m

        length = 0
        nb = {}
        for bibtex in self.allbibtex:
            for key in bibtex.keys():
                if len(key) > length and key != 'ENTRYTYPE':
                    length = len(key)
            for k, v in bibtex.items():
                if k == 'ENTRYTYPE' or k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'ID':
                    nb[k] = v
                    continue
                elif k == 'doi' or k == 'ISSN' or k == 'keywords':
                    continue
                elif v == '':
                    continue
                elif 'url' in k:
                    continue

                nk = k + (length - len(k)) * ' '

                if 'booktitle' in nk:
                    if '(' in v:
                        v1 = v.split('(')[1].split(')')[0]
                        nb[nk] = 'Proc. of ' + v1
                        continue
                    flag = 0  # 未更改booktitle

                    to_remove = "~`!@#$%^&*(){}[];':<>|-=_+"
                    table = {ord(char): None for char in to_remove}
                    clean_v = v.translate(table)

                    #clean_v = v.translate(string.punctuation)
                    #print clean_v
                    for kk, vv in m.items():
                        if kk in clean_v:
                            nb[nk] = 'Proc. of ' + vv[0]
                            publish = 'publish' + (length - 7) * ' '
                            nb[publish] = vv[1]
                            flag = 1
                            break
                    if flag == 0:
                        nb[nk] = v
                        print v
                    continue

                elif nk.strip() == 'title' and 'booktitle' not in nk:
                    self.tilte = v
                    nv = v.split(' ')
                    for i in range(len(nv)):
                        # 标题除介词和冠词外,首字母大写
                        if nv[i] in self.prep or nv[i] in self.artie:
                            continue
                        # 首字母大写
                        else:
                            if 97 <= ord(nv[i][0]) <= 122:
                                nv[i] = chr(ord(nv[i][0]) - 32) + nv[i][1:]

                    v = ' '.join(nv)
                    nb[nk] = '{' + v + '}'
                    continue

                elif 'pages' in nk:
                    if '--' in v:
                        nb[nk] = v
                        continue
                    nb[nk] = v.replace('-', '--')
                    continue
                elif 'author' in nk:
                    if '\n' in v:
                        nb[nk] = v.replace('\n', ' ')
                        continue

                # 其他不做改变
                nb[nk] = v

            db = BibDatabase()
            db.entries = [nb]
            writer = BibTexWriter()
            writer.indent = '\t'  # indent entries with 4 spaces instead of one
            writer.comma_first = False  # place the comma at the beginning of the line
            with open(self.tilte + '.bib', 'wb') as bibfile:
                bibfile.write(writer.write(db))
def toString(e):
    writer = BibTexWriter()
    writer.indent = '    '  # indent entries with 4 spaces instead of one
    writer.comma_first = True  # place the comma at the beginning of the line

    return writer._entry_to_bibtex(e)