Ejemplo n.º 1
0
def read_bibtex(bibtex_str):
    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenize_fields = True
    bib_database = parser.parse(bibtex_str)
    keyworded = map(bibtexparser.customization.keyword, bib_database.entries)
    converted = list(map(bibtexparser.customization.convert_to_unicode, keyworded))
    authored = map(bibtexparser.customization.author, converted)
    return list(authored)
Ejemplo n.º 2
0
    def _open_bib_db(self, bibfile):
        """Open the bibtex database"""
        self._bib_path = os.path.dirname(bibfile)

        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        parser.homogenize_fields = True

        with open(bibfile) as bib_:
            bib_db = bibtexparser.load(bib_, parser)

        return bib_db
    def __init__(self, file):
        def customizations(record):
            """Use some functions delivered by the library

            :param record: a record
            :returns: -- customized record
            """
            record = convert_to_unicode(record)
            record = add_plaintext_fields(
                record
            )  # Removes {} etc. from a entry and puts them into plain_[entry]
            return record

        parser = BibTexParser()
        parser.homogenize_fields = True
        parser.customization = customizations
        bib_database = bibtexparser.loads(self.fix_missing_cite_keys(file),
                                          parser)

        file_papers = []
        self._ignored_raw_entries = list()
        self._missing_entries = list()

        for entry in bib_database.entries:
            if "doi" in entry:
                if "title" in entry:
                    file_papers.append(
                        LiteratureEntry(doi=entry["doi"],
                                        title=entry["plain_title"]))
                else:
                    file_papers.append(
                        LiteratureEntry(doi=entry["doi"], title=""))
            elif "title" in entry:
                file_papers.append(
                    LiteratureEntry(doi="", title=entry["plain_title"]))
            else:
                self._ignored_raw_entries.append(entry)

        self._papers = self.load_entries_from_database(file_papers)

        for entry in file_papers:
            if not entry.result_found:
                self._missing_entries.append(entry)
Ejemplo n.º 4
0
def sync():
    global repo, bib_database, token_db

    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenize_fields = True

    repo = git.Repo(repo_path)
    origin = repo.remotes.origin
    origin.pull()

    with open(repo_path + "/" + repo_name) as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser)

    try:
        with open(repo_path + "/tokens.json") as tokens:
            token_db = json.load(tokens)
    except:
        tokens = False

    return "Synced!"
Ejemplo n.º 5
0
def main():
    args = _args()

    bibfile = args.input_bib
    with open(bibfile) as bibtex_file:
        parser = BibTexParser()
        parser.ignore_nonstandard_types = False
        parser.homogenize_fields = True
        parser.common_strings = True
        parser.customization = keep_uppercase
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

        if args.clean:
            for entry in bib_database.entries:
                for k in ('file', 'annote', 'abstract', 'url', 'file', 'link'):
                    entry.pop(k, None)

        for entry in bib_database.entries:
            entry['title'] = '{{{}}}'.format(entry['title'])

        bibwriter = BibTexWriter()
        with open(args.output_bib, 'w') as outbib:
            bibtexparser.dump(bib_database, outbib, bibwriter)
Ejemplo n.º 6
0
def readBibFile(bibfile):
    """Read and parse bibtex file.

    Args:
        bibfile (str): abspath to input bibtex file.

    Returns: results (list): DocMeta dicts, each for an parsed entry in the
                             bibtex file.
    """

    bibfile = os.path.abspath(bibfile)
    if not os.path.exists(bibfile):
        return None

    with open(bibfile, 'r') as fin:
        parser = BibTexParser()
        parser.homogenize_fields = True
        parser.customization = customizations
        bib = bibtexparser.load(fin, parser=parser)

        LOGGER.info('Read in bib file: %s' % bibfile)

    results = []

    for eii in bib.entries:
        eii = splitNames(eii)
        eii = altKeys(eii, ALT_KEYS)
        eii['citationkey'] = eii['ID']

        if 'folders_l' in eii:
            del eii['folders_l']

        docii = sqlitedb.DocMeta()
        docii.update(eii)
        results.append(docii)

    return results
Ejemplo n.º 7
0
    os.environ['PYTHONUNBUFFERED'] = '1'
    buf_arg = 1
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', buf_arg)
sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', buf_arg)

# due to my mistake in after rename, the bibtex id/keys are not synced with numpaperorder; do that here

DOTHEMOVES = False  # True # False

# get list of immediate child subdirs SO:973473 :
subdirs = sorted(next(os.walk(dir_data))[1])  #ok
# 02_SMC Conference 2015:044/74: orig 'G. Presti and D.A. Mauro and G. Haus' ->  _DATA_/02_SMC\ Conference\ 2015/smc_2015_044.pdf
nummoved = 0
# homogenize_fields: Sanitize BibTeX field names, for example change `url` to `link` etc.
tbparser = BibTexParser()
tbparser.homogenize_fields = False  # no dice
tbparser.alt_dict[
    'url'] = 'url'  # this finally prevents change 'url' to 'link'
for subdir in subdirs:
    bibsubdir = os.path.join(dir_data, subdir)
    bibfile = os.path.join(bibsubdir, "%s.bib" % (subdir))
    print((bibfile, os.path.isfile(bibfile)))
    with open(bibfile) as bibtex_file:
        bibtex_str = bibtex_file.read()
    bib_database = bibtexparser.loads(bibtex_str, tbparser)
    #pprint.pprint(bib_database.entries) # already here,replaces 'url' with 'link'
    confbiblen = len(bib_database.entries)
    havemoves = False
    for icpbe, confpaperbibentry in enumerate(bib_database.entries):
        oldid = confpaperbibentry['ID']
        year = confpaperbibentry['year']
def create_bibtex_parser() -> BibTexParser:
    bibtex_parser = BibTexParser(common_strings=True)
    bibtex_parser.ignore_nonstandard_types = False
    bibtex_parser.homogenize_fields = True
    bibtex_parser.customization = homogenize_latex_encoding
    return bibtex_parser
Ejemplo n.º 9
0
    def parse_bib(self):
        # docs = []
        content_saved = 0
        for root, dirs, files in os.walk(self.directory):
            for file in files:
                if file.endswith('.bib'):
                    with open(self.directory + file) as bibtex_file:
                        try:
                            parser = BibTexParser()
                            bibtex_str = bibtex_file.read()
                            st = bibtex_str.replace("Web of Science-Category", "Web-of-Science-Category").replace("\{",
                                                                                                                  "[").replace(
                                "\}", "]").replace("\{", "[").replace("\%", "%").replace("\\", '')
                            st = st.replace('Early Access Date', 'month').replace('Early Access Year',
                                                                                  'year')  # .replace('\n', ' ')
                            parser.ignore_nonstandard_types = False
                            parser.customization = convert_to_unicode
                            parser.homogenize_fields = True
                            bib_database = bibtexparser.loads(st, parser=parser)
                        except:
                            self.error.append(file)
                            raise

                    content_entries = len(bib_database.entries)
                    self.total_entries += content_entries
                    print 'O arquivo ' + file + ' tem: ' + str(content_entries) + ' registros'
                    for key, item in bib_database.entries_dict.items():
                        for k,v in item.iteritems():
                            item[k]=self.remove_chaves(v)
                        doc = []
                        # article = key

                        id = ('id', str(content_saved + 1))
                        doc.append(id)

                        try:
                            unique_id = ('unique_id', (self.remove_chaves(item['unique-id'])).strip())
                            doc.append(unique_id)

                            # print(unique_id)
                        except KeyError:
                            pass

                        try:
                            title = ('title', (self.remove_chaves(item['title'])).strip())
                            doc.append(title)
                            # print(title)
                        except KeyError:
                            pass

                        padrao = ',|\n|\*|;'
                        try:
                            authors = item['author']
                            for author in re.split('\sand\s', authors):
                                name_list = re.split(padrao, author)
                                name_list.reverse()
                                name = ('Author', (' '.join(name_list)).strip())
                                doc.append(name)

                            # docs.append(doc)
                        except KeyError:
                            pass

                        try:
                            publisher = (item['publisher']).strip()
                            publisher_add = ('publisher', publisher)
                            doc.append(publisher_add)
                            journal = (item['journal']).strip()

                            try:
                                volume = item['volume']
                                pub_journal = ('publisher_journal_volume_facet',
                                               "{}|{}|{}".format(publisher.strip(), journal.strip(), volume.strip()))
                                doc.append(pub_journal)
                            except:
                                pub_journal = (
                                    'publisher_journal_volume_facet',
                                    "{}|{}| ".format(publisher.strip(), journal.strip()))
                                doc.append(pub_journal)
                        except KeyError:
                            pass

                        # print(pub_journal)
                        try:

                            year = item['year']

                            group = gYear(int(year))

                        except:
                            year = ''
                            group = ''

                        doc.append(('Year', year.strip()))

                        try:

                            month = item['month'][0:4]
                            year_month = (
                                'Year_Month_facet', '{}|{}|{}'.format(group.strip(), year.strip(), month.strip()))
                            doc.append(year_month)
                        except KeyError:
                            year_month = ('Year_Month_facet', '{}|{}|'.format(group.strip(), year.strip()))
                            doc.append(year_month)
                        # print(year_month)

                        try:
                            abstract = ('abstract', self.remove_chaves(item['abstract']))
                            doc.append(abstract)
                            # print(abstract)
                        except KeyError:
                            pass

                        try:
                            if (item['address'].split(',')[-1]).split(' ') < 2:
                                address = ('address', (item['address'].split(',')[-1]).strip())
                            else:
                                address = (
                                    'address', (item['address'].split(',')[-1].split(' ')[-1]).strip())
                            doc.append(address)
                            # print((item['address'].split())[-1][0:-1])
                        except:
                            pass

                        try:

                            tipe = ('type', (item['type']).strip())
                            doc.append(tipe)
                            # print(type)
                        except KeyError:
                            pass

                        try:

                            affiliations = (self.remove_chaves(item['affiliation'])).split('\n')
                            for aff in affiliations:
                                affiliation = ('affiliation', aff)
                                # c = aff.split(',')[-1].replace('.','')
                                country_aff = (
                                'affiliation_country', aff.split(',')[-1].split(' ')[-1].replace('.', ''))
                                # print country_aff
                                doc.append(affiliation)
                                doc.append(country_aff)

                                # print(type)
                        except KeyError:
                            pass

                        try:
                            language = ('language', (self.remove_chaves(item['language'])).strip())
                            doc.append(language)
                            # print(language)
                        except KeyError:
                            pass

                        try:

                            DOI = ('DOI', (self.remove_chaves(item['doi'])).strip())
                            doc.append(DOI)
                        except KeyError:
                            pass

                        # print(DOI)
                        try:
                            ISSN = ('ISSN', (item['issn']).strip())
                            doc.append(ISSN)
                        except KeyError:
                            pass

                        # print(ISSN)

                        try:
                            EISSN = ('EISSN', (item['eissn']).strip())
                            doc.append(EISSN)

                            # print(EISSN)
                        except KeyError:
                            pass

                        try:
                            keywords = re.split(padrao, self.remove_chaves(item['keywords']))
                            for k in keywords:
                                if k:
                                    doc.append(('keyword', k.strip()))

                        except KeyError:
                            pass

                        try:
                            keywords_plus = re.split(padrao, (item['keywords-plus']).strip())
                            for k in keywords_plus:
                                if k:
                                    doc.append(('keyword_plus', k.strip()))
                        except KeyError:
                            pass

                        try:

                            research_area = (
                                'research_areas', ((item['research-areas']).replace('; ', '|')).strip())
                            doc.append(research_area)
                            # print(research_area)
                        except KeyError:
                            pass

                        try:
                            web_of_science_categories = (
                                'web_of_science_categories',
                                ((item['web-of-science-categories']).replace('; ', '|')).strip())
                            doc.append(web_of_science_categories)

                            # print(web_of_science_categories)
                        except KeyError:
                            pass

                        try:
                            funding_acknowledgement = (
                                'funding_acknowledgement', (item['funding-acknowledgement']).strip())
                            # print(funding_acknowledgement)
                            doc.append(funding_acknowledgement)
                        except KeyError:
                            pass

                        try:
                            nro_cited_ref = (
                                'number_of_cited_references', (item['number-of-cited-references']).strip())
                            doc.append(nro_cited_ref)

                            # print(nro_cited_ref)
                        except KeyError:
                            pass

                        try:
                            tc = (item['times-cited']).strip()
                            times_cited = ('times_cited', tc)
                            doc.append(times_cited)

                            # print(times_cited)
                        except KeyError:
                            pass

                        try:
                            times_cited_facet = ('times_cited_facet',facet_citations(tc))
                            doc.append(times_cited_facet)
                        except:
                            pass


                        try:
                            journal_iso = ('journal_iso', (item['journal-iso']).strip())
                            doc.append(journal_iso)

                            # print(journal_iso)
                        except KeyError:
                            pass

                        # docs.append(doc)
                        self.generateDoc(doc)
                        content_saved += 1
                    # print("{} arquivos salvos".format(content))
                    # print(file)
        # for doc in docs:
        #     self.generateDoc(doc)

        self.save_xml()
        self.total_saved = content_saved
        self.logger()
Ejemplo n.º 10
0
Archivo: u2o.py Proyecto: USEPA/USEEIO
def generate_sources(bib_path, bibids):
    import bibtexparser
    from bibtexparser.bparser import BibTexParser


    def customizations(record):
        """Use some functions delivered by the library

        :param record: a record
        :returns: -- customized record
        """
        #record = bibtexparser.customization.author(record)
        record = bibtexparser.customization.add_plaintext_fields(record)
        record = bibtexparser.customization.doi(record)

        return record

    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenize_fields = True
    parser.customization = customizations

    def read_bib_file(path: str):
        with open(path) as bibtex_file:
            bib_database = parser.parse_file(bibtex_file)

        return bib_database.entries_dict


    def parse_for_olca(bibids, d):

        key_dict = {'description': ['plain_author',
                                    'plain_publisher',
                                    'plain_title',
                                    'plain_journal',
                                    'year'],
                    'textReference': '',
                    'year': 'plain_year',
                    'url': 'url',
                    }
        s = []
        for bibid, name in bibids.items():
            try:
                record = d[bibid]
            except KeyError:
                print(f'{bibid} not found')
                continue
            source = {}
            source['name'] = bibids[bibid]
            for key, value in key_dict.items():
                try:
                    if isinstance(value, list):
                        source[key] = ', '.join([record[v] for v in value if v in record])
                    else:
                        source[key] = record[value]
                except KeyError:
                    source[key] = ''
            s.append(_Source(source))
        return s

    d = read_bib_file(bib_path)
    source_list = parse_for_olca(bibids, d)
    return source_list
Ejemplo n.º 11
0
    save_bib_hash()


def show_error(obj):
    if "reason" in obj:
        if obj["reason"] == "access_denied":
            print(
                "[!] Access denied! Your token is not valid for this operation. Verify whether the file '%s' contains a valid token."
                % args.token_file)


action = args.action

parser = BibTexParser(common_strings=True)
parser.ignore_nonstandard_types = False
parser.homogenize_fields = True

if not os.path.exists("main.bib") or os.stat("main.bib").st_size == 0:
    bib_database = bibtexparser.loads("\n")
else:
    try:
        with open('main.bib') as bibtex_file:
            bib_database = bibtexparser.load(bibtex_file, parser)
            #print(bib_database.entries)
    except Exception as e:
        print("Malformed bibliography file!\n")
        print(e)
        sys.exit(1)

response = requests.get(server + "version")
version_info = response.json()
Ejemplo n.º 12
0
def convert():
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types=False
    parser.homogenize_fields= False
    
    if request.method== 'POST':
        file = request.files['filer_input']
        if file.filename == '':
            flash('no selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            if file and file_bib(file.filename):
                bibtex_file = file.stream
                bibtex_str = bibtex_file.read() 
                bib_database = bibtexparser.loads(bibtex_str, parser)
                df = pd.DataFrame(bib_database.entries)
                df.index += 1
                return render_template_string(html, filenm = file.filename, 
                                                table = df.to_html(index='Nomor', header='true', table_id='example', 
                                                classes='table table-striped table-bordered'))
            elif file and file_ris(file.filename):
                ris_file = file.stream.read()
                reader = io.BytesIO(ris_file)
                wrapper = io.TextIOWrapper(reader, encoding='utf-8')
                entries = readris(wrapper) 
                df = pd.DataFrame(entries)
                df['publication_year'] = df['publication_year'].apply(remove_punch)
                df.index += 1
                return render_template_string(html, filenm = file.filename, 
                                                table = df.to_html(header='true', table_id='example', classes='table table-striped table-bordered'))
            else:
                data = xmltodict.parse(file)
                json_data = json.dumps(data)
                wanda = pd.read_json(StringIO(json_data))
                vision = wanda["xml"]['records']['record']
                axel = pd.DataFrame(vision)
                if axel.index[0] == '@name':
                    if axel['database'] is not None:
                        del axel['database']
                        # axel.index +=1
                    else:
                        None
                else:
                    if axel['database'] is not None:
                        del axel['database']
                    else:
                        None
                    #for get type value
                    type_data = []
                    for i, each in enumerate(axel['ref-type']):
                        if '@name' in axel['ref-type'][i].keys():
                            try:
                                data_type = axel['ref-type'][i]['@name']
                                type_data.append(data_type)
                            except Exception as e:
                                print(e)
                    axel['ref-type'] = type_data
                    #for get author value
                    authors = []
                    for i, each in enumerate(axel['contributors']):
                        if 'authors' in axel['contributors'][i].keys():
                            try:
                                data_author = axel['contributors'][i]['authors']['author']
                                authors.append(data_author)
                            except Exception as e:
                                print(e)
                    axel['contributors'] = authors
                    # for get title
                    titles=[]
                    for i, each in enumerate(axel['titles']):
                        title_data = axel['titles'][i].values()
                        titles.append(title_data)
                    axel['titles']= titles
                    # for get full-tittle
                    periodical=[]
                    for i, each in enumerate(axel['periodical']):
                        hulk = axel['periodical'][i]
                        if hulk is not None: # else hulk = 'NaN' 
                            periodical_data = hulk.values()
                        periodical.append(periodical_data)
                    axel['periodical']= periodical
                    # FOR GET keywords
                    # keyw=[]
                    # for i, each in enumerate(axel['keywords']):
                    #     hawk_eye = axel['keywords'][i]
                    #     if hawk_eye is not None: # else hulk = 'NaN' 
                    #         keyw_data = hawk_eye.values()
                    # keyw.append(keyw_data)
                    # axel['keywords']= keyw
                    axel.index += 1
                return render_template_string(html, filenm = file.filename, 
                                                table = axel.to_html(header='true', table_id='example', 
                                                classes='table table-striped table-bordered'))
        else:
            return render_template('output1.html')
    return render_template('index.html')