Ejemplo n.º 1
0
def collect_author_name(fn):
    writer = BibTexWriter()
    #create the  output file
    output = fn.replace('.bib', '.author')
    #open the output file
    output_file = open(output, 'w+')
    #open the bibtex file
    with open(fn, encoding='ISO-8859-1') as bibtex_file:
        #read the bibtex file into a list of dictionary
        parser = BibTexParser(common_strings=True)
        parser.ignore_nonstandard_types = True
        parser.homogenise_fields = False
        bib_database = bibtexparser.loads(bibtex_file.read(), parser)
        entries = bib_database.entries
        #for each bibtex item
        for entry in entries:
            #go through all the entries
            for key, val in entry.items():
                #if the label for this entry is author
                if key.lower() == 'author':
                    all_authors = val.split('and')
                    #write all the values in this entry to authors' name file
                    for one_author in all_authors:
                        one_author = one_author.strip()
                        output_file.write(one_author + '\n')
        print(format_errors)
        print(parsing_errors)
    return output
Ejemplo n.º 2
0
def get_bibtex(f):
    parser = BibTexParser(common_strings=False)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = True
    parser.customization = clean_tex

    return bibtexparser.load(f, parser)
Ejemplo n.º 3
0
def inject_labels(input_fn, output_fn, writer):
    #open the reformated bibtex file
    with open(input_fn, encoding='ISO-8859-1') as bibtex_file:
        #setup the parser for bibtex
        parser = BibTexParser(common_strings=True)
        parser.ignore_nonstandard_types = True
        parser.homogenise_fields = False
        bib_database = bibtexparser.loads(bibtex_file.read(), parser)
        #the format of the bibtex database is a list of dictionary
        entries = bib_database.entries
        new_entries = []
        for entry in entries:
            new_entry = {}
            #The key is the label and the val is the value in each entry
            for key, val in entry.items():
                #ignor the labels of id, entrytype and author
                if not key.lower() in ['id', 'entrytype', 'author']:
                    #append begining and ending labels to the value string
                    new_entry[key] = '@@@{}@@@ {} @@@@{}@@@@'.format(
                        key, val, key)
                else:
                    #assign the new value to the key
                    new_entry[key] = val
            new_entries.append(new_entry)
        #assign the new entries to the database
        bib_database.entries = new_entries
    #write the labeled bibtex file into the output file
    with open(output_fn, 'w') as out_file:
        out_file.write(writer.write(bib_database))
Ejemplo n.º 4
0
def load_bib(bib, titles):
    """Returns dict {'BibTeX ID': {record}}
    """
    with open(bib) as bibtex_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        # Find the url field of a misc entry
        # https://github.com/sciunto-org/python-bibtexparser/issues/93
        parser.homogenise_fields = False
        bib = bibtexparser.load(bibtex_file, parser=parser)

    with open(titles) as titles_file:
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        titles = bibtexparser.load(titles_file, parser=parser)

    res = {}
    for entry in bib.entries:
        if 'journal' in entry and entry['journal'].lower() in titles.strings:
            entry['journal'] = titles.strings[entry['journal'].lower()]
        if 'author' in entry:
            # F**k me
            entry['author'] = entry['author'].replace('{́i}', 'í')
        res[entry['id'].strip()] = entry
    return res
Ejemplo n.º 5
0
    def add_entry_by_string(self,
                            bib_string,
                            file_name=None,
                            skip_if_file_exists=True,
                            skip_if_doi_exists=False,
                            parser=None):
        """
        Add a new entry corresponding to a BibTex string.
        :param bib_string: a string giving the section in a BibTex file that would represent this reference.
        :param file_name: the name of a local file to include in the reference section. Optional.
        :param skip_if_file_exists: boolean, default is True, meaning that if a reference pointing to the same local
        file already exists in the database, this reference will not be added. Intended to make it easy to update a
        database without worrying about overwriting existing files.
        :param skip_if_doi_exists: boolean, default is False, but if True, do not add this reference if another
        reference with the same DOI already exists. Intended to avoid adding duplicate files.
        :param parser: An instance of bibtexparser.bparser.BibTextParser customized to parse the new string. The default
        parser is set with:
            * ignore_nonstandard_types = False
            * parser.homogenise_fields = True
            * parser.customization = lambda entry: self.format_entry(entry)
        thus, the custom parsing uses the format_entry method of this class with the instance of the class at the time
        this method was called.
        :return: none, adds entry in place.
        """
        if skip_if_file_exists and file_name is not None:
            if file_name in self.files:
                root_logger.info(
                    'Not adding {}, entry for that file already in .bib file'.
                    format(file_name))
                return

        # To ensure we get a properly formatted string, we'll parse it into a standard BibDatabase then steal
        # the entry from it
        if parser is None:
            parser = BibTexParser()
            parser.ignore_nonstandard_types = False
            parser.homogenise_fields = True
            # Create a lambda function that knows about the current state of the database
            parser.customization = lambda entry: self.format_entry(entry)

        tmpdat = parser.parse(bib_string)

        if skip_if_doi_exists and 'doi' in tmpdat.entries[
                0] and tmpdat.entries[0]['doi'] in self.dois:
            root_logger.info(
                'Not adding {}, entry with DOI "{}" already in bib file'.
                format(file_name, tmpdat.entries[0]['doi']))
            return

        if file_name is not None:
            tmpdat.entries[0]['file'] = file_name

        # We shouldn't need to do anything else. The other means of access entries (e.g. the dict) seem to be properties
        # created on the fly from the entries list
        self.entries.append(tmpdat.entries[0])
Ejemplo n.º 6
0
def get_bibtex_dict(stream):
    from bibtexparser.bparser import BibTexParser
    parser = BibTexParser()
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False

    # TODO: one bit of homogenization that might be nice: it seems that
    # newlines get preserved, in `author` records at least. Those should be
    # replaced with spaces (and multiple spaces collapsed if needed).

    return parser.parse_file(stream).get_entry_dict()
Ejemplo n.º 7
0
def get_bibtex_dict (stream):
    from bibtexparser.bparser import BibTexParser
    parser = BibTexParser ()
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False

    # TODO: one bit of homogenization that might be nice: it seems that
    # newlines get preserved, in `author` records at least. Those should be
    # replaced with spaces (and multiple spaces collapsed if needed).

    return parser.parse_file (stream).get_entry_dict ()
Ejemplo n.º 8
0
def generate_pubs(bibfile):
    """
        - parse the bibfile
        - transform each entry into an html string
        - keep pdf, doi, url separate if available

    Create a list of pubs:
        'id'     : bib key
        'type'   : pub type
        'year'   : year of bib (for sorting I guess)
        'bibtex' : raw bibtex text
        'html'   : formatted text
        'pdf'    : path/to/localpaper.pdf
        'doi'    : doi
        'url'    : some url
    """

    with open(bibfile) as bibtex_file:
        parser = BibTexParser()
        parser.customization = cust
        parser.homogenise_fields = False
        rawdata = bibtexparser.load(bibtex_file, parser=parser).entries

    pubs = []
    for rawentry in rawdata:
        entry = {}

        if 'pdf' in rawentry:
            print(rawentry)
        # first three: id, type, year
        entry['id'] = rawentry.pop('ID')
        entry['type'] = rawentry.pop('ENTRYTYPE')
        if 'year' in rawentry:
            entry['year'] = rawentry['year']

        # generate raw bibtex string
        entry['bibtex'] = generate_bibtex(rawentry, entry['id'], entry['type'])

        # generate html string
        entry['html'] = generate_html(rawentry, entry['id'], entry['type'])

        # pdf
        # doi
        # url
        extrafields = ['pdf', 'doi', 'url']
        for field in extrafields:
            if field in rawentry:
                entry[field] = rawentry[field]
        pubs.append(entry)

    return pubs
Ejemplo n.º 9
0
 def bibtex_reader(self, bibtextdata):
     """
     Parse the bibtex data
     
     Arguments:
         bibtextdata {str} -- bibtexdata
     
     Returns:
         list -- list of all entries of the bibtex
     """
     parser = BibTexParser()
     parser.ignore_nonstandard_types = False
     parser.homogenise_fields = False
     parser.common_strings = False
     bib_database = bibtexparser.loads(bibtextdata, parser)
     return bib_database.entries[0]
Ejemplo n.º 10
0
def read_bib(filename):
    """ read bibtex file and return bibtexparser object """

    if not os.path.exists(filename):
        print("... no bib file: {}".format(filename))
        os.exit(0)

    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False

    with open(filename) as f:
        bibtex_str = f.read()

    bib_database = bibtexparser.loads(bibtex_str, parser)
    return bib_database
Ejemplo n.º 11
0
    def main(self):
        # Parse arguments
        self.args = self.argument_parser.parse_args()
        self.verbose = self.args.verbose

        # Set up the BibTeX parser
        parser = BibTexParser()
        parser.homogenise_fields = False
        parser.ignore_nonstandard_types = False
        parser.customization = lambda r: BibItem(r,
                                                 self.keywords.update,
                                                 self.config)
        # Parse the database
        self.db = bibtexparser.load(self.args.infile, parser=parser)

        # Invoke the command chosen by the user
        command = getattr(self, self.args.command.replace('-', '_'))
        args = self.config.get(self.args.command, {})
        command(**args)
Ejemplo n.º 12
0
def bib_to_dict(bib_string):
    """ convert bibtex string to dictionary """

    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False
    parser.customization = convert_to_unicode

    bdb = bibtexparser.loads(bib_string, parser)

    if len(bdb.entries) > 0:
        for i in range(len(bdb.entries)):
            if bdb.entries[i].get('keywords', '') != '':
                bdb.entries[i]['keywords'] = bdb.entries[i].get(
                    'keywords').split(',')

        if len(bdb.entries) == 1: return bdb.entries[0]
        else: return bdb.entries
    else:
        return None
Ejemplo n.º 13
0
def proc_bibtex(text, reverse=False):
    targets = ['author', 'title', 'journal']
    converter = l2u if reverse else u2l
    parser = BibTexParser()
    parser.homogenise_fields = False
    bib = bibtex.loads(text, parser)
    for item in bib.entries:
        for target in targets:
            if target not in item:
                continue
            if '\$' in item[target]:
                sys.stderr.write('error: quoted latex math expression in {}:{}, abort\n'
                                 .format(item['id'], target))
                sys.exit(1)
            elif '$' in item[target]:
                sys.stderr.write('warning: latex math expression in {}:{}, skipping\n'
                                 .format(item['id'], target))
                continue
            item[target] = converter(item[target])
    return bibtex.dumps(bib)
Ejemplo n.º 14
0
    def open(self, bibfile):

        # read file
        with open(bibfile) as bibtex_file:
            bibtex_str = bibtex_file.read()

        # tune the parser
        parser = BibTexParser(common_strings=True)
        parser.ignore_nonstandard_types = True
        parser.homogenise_fields = True

        # generate database
        self.bib_database = bibtexparser.loads(bibtex_str, parser)

        # print(self.bib_database.entries)

        # get all PDFs and store them as a dictionary with the id as index
        for e in self.bib_database.entries:
            # print(e['ID'] + " " + e['file'].split(':')[1])
            self.pdf_files[e['ID']] = e['file'].split(':')[1]
Ejemplo n.º 15
0
def main():
    args = parser.parse_args()
    args.target = [abspath(expanduser(x)) for x in args.target]

    logging.info("Targeting: {}".format(args.target))

    # Get Bibtex files
    all_bib_paths = retrieval.get_data_files(args.targeet, ".bib")
    all_dbs = []
    for t in all_bib_paths:
        # Use a new bib_parser for each so library isn't shared
        bib_parser = BibTexParser(common_strings=False)
        bib_parser.ignore_nonstandard_types = False
        bib_parser.homogenise_fields = True

        with open(t, 'r') as f:
            db = b.load(f, bib_parser)
            all_dbs.append(db)

    logging.info("DB Sizes: {}".format(", ".join(
        [str(len(x.entries)) for x in all_dbs])))

    # Sort the bibtex's by their size
    sorted_dbs = sorted([(len(x.entries), x) for x in all_dbs], reverse=True)

    # Use the largest as Primary
    head = sorted_dbs[0][1]
    rst = sorted_dbs[1:]
    head_set = {x['ID'] for x in head.entries}
    missing_keys = set([])

    # For remaining, get entries that are missing
    for _, db in rst:
        db_set = {x['ID'] for x in db.entries}
        if head_set.issuperset(db_set):
            continue

        missing_keys.update(db_set.difference(head_set))

    logging.info("{} Keys missing from master: {}".format(
        len(missing_keys), "\n".join(missing_keys)))
Ejemplo n.º 16
0
def proc_bibtex(text, reverse=False):
    targets = ['author', 'title', 'journal']
    converter = l2u if reverse else u2l
    parser = BibTexParser()
    parser.homogenise_fields = False
    bib = bibtex.loads(text, parser)
    for item in bib.entries:
        for target in targets:
            if target not in item:
                continue
            if '\$' in item[target]:
                sys.stderr.write(
                    'error: quoted latex math expression in {}:{}, abort\n'.
                    format(item['id'], target))
                sys.exit(1)
            elif '$' in item[target]:
                sys.stderr.write(
                    'warning: latex math expression in {}:{}, skipping\n'.
                    format(item['id'], target))
                continue
            item[target] = converter(item[target])
    return bibtex.dumps(bib)
Ejemplo n.º 17
0
parser = argparse.ArgumentParser("")
parser.add_argument('-t', '--target', default="~/Mega/library.bib")
parser.add_argument('-o', '--output', default="bibtex")
parser.add_argument('-l', '--library', default="~/MEGA/Mendeley")
args = parser.parse_args()

args.target = realpath(abspath(expanduser(args.target)))
args.library = realpath(abspath(expanduser(args.library)))
assert(exists(args.target))

logging.info("Targeting: {}".format(args.target))
logging.info("Output to: {}".format(args.output))

parser = BibTexParser(common_strings=False)
parser.ignore_nonstandard_types = False
parser.homogenise_fields = True

def make_bar(k, v, left_pad_v, right_scale_v):
    pad = ((10 + left_pad_v) - len(k))
    bar = ceil(((100 - pad) / right_scale_v) * v)
    full_str = "{}{}({}) : {}>\n".format(k, " " * pad, v, "=" *  bar)
    return full_str

def file_to_hash(filename):
    if not isfile(filename):
        raise Exception(filename)
    with open(filename, 'rb') as f:
        return sha256(f.read()).hexdigest()

def add_slash_if_necessary(x):
    if x[0] != '/':
Ejemplo n.º 18
0
from habanero import Crossref
import pandas as pd
import bibtexparser
from bibtexparser.bparser import BibTexParser
import datetime

work = Crossref()
parser = BibTexParser()
parser.ignore_nonstandard_types = False
parser.homogenise_fields = False
parser.common_strings = False

# This list contains words that are not decisive to identify the publications,
# such as articles and adverbs. They must be removed because the Crossref
# API retrieves all the publications whose titles that have at least a word 
# in common with the publication under analysis
common_words = ['a','the','on','an','for','of','at','with', 'without', 'toward',
                'towards', 'not', 'but', 'in', 'is', 'are', 'that', 'and', 'or',
                'learning','into','to','its','which','do','does','using','via',
                'from']

def doisearcher(origtitle, inauthor):
    """
    This function calls the Crossref API and returns the most relevant publications
    whose title has at least one word in common with the input title (origtitle).
    This list is limited to the first 1000 entries.
    It is possible to run the search on the list of authors. In this version, this
    option is commented out.
    :param origtitle: title of the input publication 
    :param inauthor: list of authors of the input publication
    :return: the list of the matched publications (w2) and the title of the 
Ejemplo n.º 19
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 13 09:14:09 2020

@author: rayhan, rezvan
"""

import csv
import bibtexparser
from bibtexparser.bparser import BibTexParser

parser = BibTexParser(common_strings=True,
                      ignore_nonstandard_types=True,
                      interpolate_strings=True)
parser.homogenise_fields = True

for index in range(1, 2):
    with open(
            f'/Volumes/GoogleDrive/My Drive/Research/Structured data from text/tools/data/sciencedirect/q13/{index}.bib'
    ) as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

    #file = open(f'wiley-sm-20002020-{index}.csv', "a")
    #file.write(f'Document Title, Abstract, year, pdf Link ,label\n')

    with open(f'sciencedirect-q12-20002020-{index}.csv', mode='a') as file:
        file_writer = csv.writer(file, delimiter=',')
        file_writer.writerow(
            ['Document Title', 'Abstract', 'year', 'pdf Link', 'label'])
Ejemplo n.º 20
0
def main():

    local_dir =  os.getcwd() # 指定tex源文件的路径

    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--tex', 
                        help='the path of tex file')
    parser.add_argument('-o', '--output', 
                        help='the path of bib file you are using for latex. By default the current path')
    args = parser.parse_args()

    
    tex_files = args.tex.replace(' ', '').split(',') if args.tex else [os.path.join(local_dir, f) for f in get_tex_file(local_dir) ]  # 如未给出,则在当前路径中寻找tex文件
    bib_keys = []
    bib_name = None  # todo 不能处理多个bib_name,不过一般不存在这种情况,只有main.tex中会有这个命令
    for f in tex_files:
        key, temp_name = get_bibinfo(f)  # 获取bibkey和bib文件
        bib_keys.extend(key)
        if temp_name:
            bib_name = temp_name
            bib_dir = os.path.split(f)
    
    tex_dir = bib_dir if args.tex else local_dir    # 分离texfile的路径和文件
    bib_name = os.path.join(tex_dir, bib_name) # 拼接路径,指向tex相同路径下
    output_bib = args.output if args.output else bib_name   # 有命令行参数则选为参数,否则使用tex文件中指定的名称,放在相同路径下


    # 从zotero的API中读取数据
    try:
        r = requests.get(ZOTERO_API)
    except requests.exceptions.ConnectionError:
        print('zotero未启动,获取数据库失败')
        sys.exit(1)
    if r.status_code == 200:
        print('成功从zotero读取数据')
    else:
        raise Exception('未能从zotero读取数据,状态码:{}'.format(r.status_code))
        sys.exit(1)
    r.encoding = 'utf-8'
    bib_str = modify_bibs(r.text)

    # with open('./bib_str.txt', 'w', encoding='utf8') as out_bib:
    #     out_bib.write(bib_str)


    # 构建BibtexParser
    bibParser = BibTexParser(common_strings=False)
    bibParser.ignore_nonstandard_types = True
    bibParser.homogenise_fields = True
    bibdata = bp.loads(bib_str, bibParser)

    # for i in range(100,120):
    #     print(bibdata.entries[i])
    #     print(type(bibdata.entries[i]), '\n')

    # 对bib库进行格式处理
    # 此处效率低,应该直接从大库里读bib id,存在则append,否则,报错
    bibdata_out = bp.bibdatabase.BibDatabase()
    for d in bibdata.entries:
        if d['ID'] in bib_keys:
            bibdata_out.entries.append(d)
            entity_check = check_entity(d)
            entity_check_consequence = '---->题目:'+ re.sub(r'[{}]','', d['title']) +' 缺少字段:'+ str(entity_check) if entity_check else ''
            print('成功导入---->'+d['ID'], entity_check_consequence)
            bib_keys.remove(d['ID'])

    # TODO
    # 检查导入失败的是否在被引用的其它bib文件里
    
    bibkey_not_found = '\n'.join(bib_keys)
    print('以下导入失败(共{}个):\n'.format(len(bib_keys)), bibkey_not_found)
    print('------------end---------------')

    # print(bibdata_out)
    with open(output_bib, 'w', encoding='utf8') as bib_write:
        bp.dump(bibdata_out, bib_write)
Ejemplo n.º 21
0
def import_command(ctx, paths):
    """Read new entries into the database.

    PATHS may be zero or more .bib files or directories containing .bib files
    to import.

    Configuration file keys

    \b
    import:
      path: a default path to check for .bib files to import, if no PATHS are
            given.
    """
    # If no files
    if len(paths) == 0:
        # Directory from which to import entries
        paths = [ctx.cmd_config("import").get("path", ".")]

    paths = [
        os.path.join(p, "*.bib") if os.path.isdir(p) else p for p in paths
    ]

    # A parser for reading entries
    parser = BibTexParser()
    parser.homogenise_fields = False
    parser.customization = _add_clean

    # Iterate over files in the add_dir
    for fn in chain(*map(iglob, paths)):
        os.system("clear")
        print("Importing", fn, end=":\n\n")

        # Read and parse the file
        with open(fn, "r") as f:
            s = f.read()
            print(s, end="\n\n")

        try:
            e = parser.parse(clean_str(s)).entries[-1]
        except ParseException:
            print(clean_str(s))
            raise

        abstract = e.pop("abstract", None)

        print("Parsed entry:", to_string(e), sep="\n\n")

        if abstract is not None:
            print("Abstract:", abstract, sep="\n\n")

        # Ask user for a key
        while True:
            key = input_with_prefill(
                "\nEnter key for imported entry "
                "([] Skip, [D]elete without importing, [Q]uit): ",
                guess_key(e),
            )
            try:
                ctx.db.get_entry(key)
                print("Key already exists.")
            except KeyError:
                break

        if key == "":
            continue
        elif key.lower() == "d":
            os.remove(fn)
            continue
        elif key.lower() == "q":
            break
        else:
            # Change the entry key
            e["ID"] = key

        # Add a custom field with the current date
        e["entrydate"] = datetime.now().isoformat(timespec="minutes")

        # Select a full text file to go with the entry
        fn_local = _select_file(e["ID"],
                                ctx.cmd_config("import").get("path", "."))
        if fn_local:
            e["localfile"] = os.path.basename(fn_local)

        # Append the entry to the database
        with open(ctx.config["database"], "a") as f_db:
            f_db.write("\n")
            f_db.write(to_string(e))

        # Write the abstract
        if abstract:
            fn_abstract = ctx.config["path"] / "abstracts" / ("%s.tex" % key)
            with open(fn_abstract, "x") as f_abstract:
                f_abstract.write(abstract)

        # Move the full text file
        if fn_local:
            os.system('mv -n "{}" "{}"'.format(
                fn_local, ctx.config["path"] / e["localfile"]))

        # Remove the imported entry file
        remove = input("\nRemove imported file %s ([Y]es, [enter] to "
                       "keep)? " % fn)
        if remove.lower() == "y":
            os.remove(fn)
Ejemplo n.º 22
0
def main():

    parser = argparse.ArgumentParser(
        description=
        'Parses an ORCID bibtex export, and prints a nice flat text file with Altmetric and citations added.'
    )
    parser.add_argument(
        '-file',
        help='Alternative filename for the ORCID export. Default: "works.bib".',
        default='works.bib')
    parser.add_argument(
        '-logname',
        help='Where the logs should be sorted. Default: "arc_friend.log"',
        default='arc_friend.log')

    args = parser.parse_args()

    logging.basicConfig(filename=args.logname,
                        level=logging.INFO,
                        filemode='w')

    parser = BibTexParser(
        common_strings=False
    )  #The common_strings option needs to be set when the parser object is created and has no effect if changed afterwards.
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = True

    with open(args.file) as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser)
    '''
    I want the final output to look like this:
        10. Person, A., Person, B., Person, C., Another Person, C., 2011. Cautionary notes on the use of APIs. API journal 23, 3871–8.
        Impact Factor = 10.11
        Citations = 12
        AltMetric score = 0
    '''

    now = datetime.datetime.now()
    now = now.strftime('%dth %B, %Y')

    print(f'Citations and Altmetric scores obtained on {now}.')
    print('Citations obtained from CrossRef.'
          )  # important to mention this as Google Scholar has higher citations
    counter = 1
    for e in bib_database.entries:
        e = bibtexparser.customization.homogenize_latex_encoding(e)
        # get rid of names like "Marie Kubal\\'akov\\'a"
        e = bibtexparser.customization.convert_to_unicode(e)
        author_string = e['author']
        author_list = author_string.replace('{', '').replace('}',
                                                             '').split(' and ')
        # shorten the first names
        # Farquard Banana becomes F.B.
        shortened_author_list = []
        # two possible names for some reason returned by ORCID
        # Bayer, {Philipp E.} so the last name is first
        # Candy M. Taylor so the last name is last
        for a in author_list:
            a = a.strip()
            newa = ''
            if not a: continue
            # I have now encountered three ways names are encoded
            if ',' in a:
                # Bayer, Philipp E
                # last name is first:
                # last name
                newa += a.split()[0] + ' '
                # first name
                newa += '.'.join([substring[0] for substring in a.split()[1:]])
            elif a.split()[-1].isupper():
                # Bayer PE
                newa += a.replace(' ', ', ')
            else:
                # Philipp Bayer
                # last name is last
                newa += a.split()[-1] + ', '
                newa += '.'.join(
                    [substring[0] for substring in a.split()[:-1]])

            # add missing dot at end of first name
            if newa[-1] != '.':
                newa += '.'
            shortened_author_list.append(newa)

        shortened_author_string = ', '.join(shortened_author_list)

        # is this a book chapter, or a paper?
        if 'booktitle' in e:
            journal = e['booktitle']
        else:
            try:
                journal = e['journal'].replace('\\', '').replace('}',
                                                                 '').replace(
                                                                     '{', '')
            except KeyError:
                journal = False

        try:
            doi = e['doi']
        except KeyError:
            logging.info(f'{title} has no doi, skipping (for now?)')
            continue

        title = e['title'].replace('}', '').replace('{', '').replace(
            '\n', '').replace('\r', '')
        if journal == 'Zenodo' or 'ZENODO' in doi:
            logging.info(
                f'Skipping cited dataset {title}, {doi} at Zenodo (for now?)')
            continue
        try:
            year = e['year']
        except KeyError:
            year = False

        try:
            volume = e['volume']
        except KeyError:
            volume = False
        try:
            pages = e['pages']
        except KeyError:
            pages = False

        overall_string = f'{counter}. {shortened_author_string}, {year}. {title}.'
        if journal:
            overall_string += f' {journal}, '
        if volume:
            overall_string += f' {volume}, '
        if pages:
            overall_string += f' {pages}.'

        overall_string = overall_string.strip()
        overall_string = overall_string.replace('  ', ' ')
        if overall_string[-1] == ',':
            overall_string = overall_string.rstrip(',') + '.'

        # now get the citations
        # http://api.crossref.org/works/10.1179/1942787514y.0000000039 for example
        crossref_url = f'http://api.crossref.org/works/{doi}'

        r = requests.get(crossref_url)
        reference_count = r.json()['message']['is-referenced-by-count']

        # 'https://api.altmetric.com/v1/doi/10.1038/news.2011.490'
        altmetric_url = f'https://api.altmetric.com/v1/doi/{doi}'
        r = requests.get(altmetric_url)
        try:
            altmetric_score = r.json()['score']
        except json.decoder.JSONDecodeError:
            # try again
            r = requests.get(altmetric_url)
            try:
                altmetric_score = r.json()['score']
            except json.decoder.JSONDecodeError:
                logging.info(f'Cannot get Altmetric score for {doi}. {title}')
                continue

        overall_string += f'\nImpact Factor = FILLME\nCitations = {reference_count}\nAltmetric score = {altmetric_score}\n'
        overall_string = overall_string.replace('..', '')
        print(overall_string)
        counter += 1
Ejemplo n.º 23
0
def make_parser(func):
    bparser = BibTexParser(common_strings=False)
    bparser.ignore_nonstandard_types = False
    bparser.homogenise_fields = True
    bparser.customization = func
    return bparser
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 13 09:14:09 2020

@author: rayhan, rezvan
"""

import csv
import bibtexparser
from bibtexparser.bparser import BibTexParser

# For Springerlink
parser = BibTexParser(common_strings=True,
                      ignore_nonstandard_types=True,
                      interpolate_strings=True)
parser.homogenise_fields = True

for index in range(1, 22):
    with open(
            f'/Volumes/GoogleDrive/My Drive/Research/Structured data from text/tools/data/for rezvan/springerlink/{index}.bib'
    ) as bibtex_file:
        bib_database = bibtexparser.load(bibtex_file, parser=parser)

        with open(
                f'/Volumes/GoogleDrive/My Drive/Research/Structured data from text/tools/data/for rezvan/csv/springer-{index}.csv',
                mode='a') as file:
            file_writer = csv.writer(file, delimiter=',')
            file_writer.writerow(
                ['Document Title', 'Abstract', 'year', 'pdf Link', 'label'])

            for item in bib_database.entries:
Ejemplo n.º 25
0
def get_parser():
    parser = BibTexParser(common_strings=True, interpolate_strings=False)
    parser.ignore_nonstandard_types = False
    parser.homogenise_fields = False
    return parser