Esempio n. 1
0
File: zbl.py Progetto: siudej/Cite
 def _processResults(self, data):
     """ Get bibtex data from zbMATH website. """
     bibs = re.findall("(?si)bibtex/.*?\d{3,}\.bib", data)
     data = []
     import bibtexparser
     from bibtexparser.bparser import BibTexParser
     parser = BibTexParser()
     parser.customization = customizations
     if self.otherID:
         # setup for MRef fetching
         from msn import MRef
         mr = MRef()
     for bib in bibs:
         bibtext = urllib.urlopen("https://zbmath.org/" + bib).read()
         zbl = bibtexparser.loads(bibtext, parser=parser)
         if self.otherID and mr.fetch(bibtext):
             # found MRef match for zbMATH record
             msn = bibtexparser.loads(mr.refs)
             # use MSN bibtex entry with zbl number added
             # and doi transfered if missing
             msn.entries[0]['zbl'] = zbl.entries[0]['zbl']
             if 'doi' not in msn.entries[0] and 'doi' in zbl.entries[0]:
                 msn.entries[0]['doi'] = zbl.entries[0]['doi']
             zbl = msn
         data.append(bibtexparser.dumps(zbl))
     self.refs = "\n".join(data)
Esempio n. 2
0
def prototype():
    md5     = request.forms.md5
    bib     = request.forms.bib
    notes   = request.forms.notes
    tags    = request.forms.tags.split(',')
    if md5:
        doc = Document.select().where(Document.md5 == md5).get()
        if doc:
            if notes:
                doc.notes = notes
            if bib:
                try:
                    bibtexparser.loads(bib)
                    doc.bib = bib.strip()
                except:
                    session()['msg'] = "Invalid bibtex."
                    return redirect('/annotate/'+md5)
            if tags:
                with db.atomic():
                    Tag.delete().where(Tag.document == doc).execute()
                    for tag in tags:
                        try:
                            Tag.insert(document=doc, value=tag).execute()
                        except Exception:
                            pass
            doc.save()
            session()['msg'] = " Success"
            return redirect('/annotate/'+md5)
    else:
        session()['msg'] = "Invalid request. No document specified."
        return redirect('/annotate/'+md5)

    session()['msg'] = "You missed a field, or something went wrong."
    return redirect('/annotate/'+md5)
def get_all_sources():
	config = get_credentials()
	start = 0
	limit = 100
	chunks = [get_sources(config['user'], config['key'], start, limit)]
	while len(bibtexparser.loads(chunks[-1]).entries) == limit:
		start += limit
		# print('Chunk {0} full, getting another one with items {1}-{2}'.format(len(chunks), start, start+limit))
		chunks.append(get_sources(config['user'], config['key'], start, limit))
	print('Successfully fetched {0} items.'.format((len(chunks)-1) * limit + len(bibtexparser.loads(chunks[-1]).entries)))
	return '\n'.join(chunks)
Esempio n. 4
0
def get_publications(path):
    """
    Get a list of all publications.

    Parameter
    ---------
    path : str
        Path to a BibTeX file.
    """
    with open(path) as bibtex_file:
        bibtex_str = bibtex_file.read()
    bib_database = bibtexparser.loads(bibtex_str)
    months = {}
    months['jan'] = 1
    months['feb'] = 2
    months['mar'] = 3
    months['apr'] = 4
    months['may'] = 5
    months['jun'] = 6
    months['jul'] = 7
    months['aug'] = 8
    months['sep'] = 9
    months['oct'] = 10
    months['nov'] = 11
    months['dec'] = 12
    return sorted(bib_database.entries,
                  key=lambda n: (n['year'], months[n['month']]),
                  reverse=True)
Esempio n. 5
0
def parseBibtexFile(fileString):
    "Opens a bibtext file and prints a list of dictionaries for reference entries"
    with open(fileString) as bibtex_file:
        bibtex_str = bibtex_file.read()
 
    bib_database = bibtexparser.loads(bibtex_str)
    return bib_database
    def run(self):
        sort_type = self.options.get('sort', 'date')

        # Load the publications template
        if 'template' in self.options:
            template_path = self.options['template']
            template_dir, template_name = os.path.split(template_path)
            env = Environment(loader=FileSystemLoader(template_dir))
            template = env.get_template(template_name)
        else:
            # Use template from the Pelican theme
            template = pelican_generator.get_template('publications')

        parser = BibTexParser(common_strings=True)
        parser.customization = customize

        if self.arguments:
            bibtex_path = self.arguments[0].strip()
            with open(bibtex_path, 'r') as bibtex_file:
                bib = bibtexparser.load(bibtex_file, parser=parser)
        else:
            bib = bibtexparser.loads('\n'.join(self.content), parser=parser)

        entries_to_select = self.options.get('entries', [])
        if entries_to_select:
            d = bib.entries_dict
            entries = [d[e] for e in entries_to_select]
        else:
            entries = bib.entries
        entries = sort_entries(entries, sort_type)

        rendered_template = template.render(publications=entries)
        return [nodes.raw('', rendered_template, format='html')]
Esempio n. 7
0
def main(argv=None) :
    if argv is None:
        argv = sys.argv
        # etc., replacing sys.argv with argv in the getopt() call.

    filename = ""

    parser = BibTexParser()
    parser.customization = customizations

    if len(argv) > 1 : 
        filename = argv[1]
    else:
        filename = "example.bib"

    with open(filename) as bibtex_file:
        bibtex_str = bibtex_file.read()

    bib_database = bibtexparser.loads(bibtex_str, parser=parser)

    #print_books(bib_database.entries)
    print_summary(bib_database.entries)
    print_journals(bib_database.entries)
    print_conferences(bib_database.entries)

    return 0;
Esempio n. 8
0
def read_bib_file(filename, homogenize=False):
    """
    Read bibtex file.

    Args:
        filename (str): path of the bibtex file.
        homogenize (bool): whether to homogenize the entries upon reading.

    Returns:
        A BibDatabase object.
    """

    # Read input bibtex file
    bibtex_str = " "
    if os.path.exists(filename):
        with open(filename, 'r', encoding='utf-8') as bibfile:
            bibtex_str = bibfile.read()

    # Choose parser
    parser = None
    if homogenize:
        parser = BibTexParser()
        parser.customization = nomenclature.homogenize_latex_encoding

    # Create database from string
    return bibtexparser.loads(bibtex_str, parser=parser)
def main(bibtexfilepath, out_fh, output_type):
    with open(bibtexfilepath) as bibtex_file:
        bibtex_str = bibtex_file.read()
        bib_database = bibtexparser.loads(bibtex_str)
        #print(bib_database.entries)

        (topics_to_titles_with_id, id_to_entry) = build_topics_to_titles_with_id(bib_database)

        ignore_topics = ['', 'misc']

        out_fh.write(codecs.open('header.html',encoding="utf-8").read())

        # a) create hyperlinks to topics
        create_hyperlinks_to_topics(topics_to_titles_with_id, ignore_topics, out_fh, output_type=HTML)

        # b) create list of titles per topic
        create_list_of_titles_per_topic(topics_to_titles_with_id, ignore_topics, out_fh, output_type=HTML)

        # c) create bibtex list at the end, that get pointed to by 2
        #for pubid in sorted(id_to_entry):
        #    print '''<a name="%s"></a>''' % (pubid)

        #parser = BibTexParser()
        #parser.customization = customizations
        #bib_database = bibtexparser.loads(bibtex_str, parser=parser)
        #print(bib_database.entries)
        out_fh.write("<h1>BIBLIOGRAPHY</h1>")
        out_fh.write("<pre>\n")
        create_bibtex_bibliography(id_to_entry,out_fh=out_fh,output_type=HTML)
        out_fh.write("</pre>\n")
        out_fh.write("</ul>")
Esempio n. 10
0
def import_bibtex(request):
    review_id = request.POST['review-id']
    source_id = request.POST['source-id']

    review = Review.objects.get(pk=review_id)
    source = Source.objects.get(pk=source_id)

    bibtex_file = request.FILES['bibtex']
    list_bibtex_file = fix_bibtex_file(bibtex_file.readlines())
    str_bibtex_file = '\r\n'.join(list_bibtex_file)

    ext = os.path.splitext(bibtex_file.name)[1]
    valid_extensions = ['.bib', '.bibtex']

    if ext in valid_extensions or bibtex_file.content_type == 'application/x-bibtex':
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_database = bibtexparser.loads(str_bibtex_file, parser=parser)
        articles = bibtex_to_article_object(bib_database, review, source)
        import pdb; pdb.set_trace()
        _import_articles(request, source, articles)
    else:
        messages.error(request, u'Invalid file type. Only .bib or .bibtex files are accepted.')

    return redirect(r('import_studies', args=(review.author.username, review.name)))
Esempio n. 11
0
def get_latest_version(arxiv_id):
    """
    Find the latest version of a given arXiv eprint.

    :param arxiv_id: The (canonical) arXiv ID to query.
    :returns: The latest version on eprint as a string, or ``None``.

    >>> get_latest_version('1401.2910')
    '1401.2910v1'

    >>> get_latest_version('1401.2910v1')
    '1401.2910v1'

    >>> get_latest_version('1506.06690v1')
    '1506.06690v2'

    >>> get_latest_version('1506.06690')
    '1506.06690v2'
    """
    # Get updated bibtex
    # Trick: strip the version from the arXiv id, to query updated BibTeX for
    # the preprint and not the specific version
    arxiv_preprint_id = strip_version(arxiv_id)
    updated_bibtex = bibtexparser.loads(get_bibtex(arxiv_preprint_id))
    updated_bibtex = next(iter(updated_bibtex.entries_dict.values()))

    try:
        return updated_bibtex["eprint"]
    except KeyError:
        return None
    def process_bibtex(self,item):
        '''
        process all bibtex links and update general self.args.bibtex_database

        :param tuple item: (identifier,bibtex_url,bibtex_pdf)
        '''
        identifier,bibtex_url,bibtex_pdf = item
        response =  requests.get(bibtex_url)
        if response.status_code == 200:

            #load bibtex as dict
            bibtex_string = response.text[1:]
            bibtex = bibtexparser.loads(bibtex_string)

            bibtex.entries_dict[identifier]['Keyword'] = self.args.date

            #add pdf link to bibtex
            if 'url' not in bibtex.entries_dict[identifier]:
                bibtex.entries_dict[identifier]['url'] = bibtex_pdf

            self.total_found += 1

            if identifier not in args.bibtex_database.entries_dict:
                self.total_added += 1
                self.args.bibtex_database.entries.append(bibtex.entries[0])
Esempio n. 13
0
File: fetch.py Progetto: siudej/Cite
 def _bibtexQuery(self, query):
     """ Turn query into bibtex dictionary. """
     import bibtexparser
     from bibtexparser.bparser import BibTexParser
     parser = BibTexParser()
     parser.customization = homogeneize_latex_encoding
     bib = bibtexparser.loads(query, parser=parser)
     if bib.entries:
         # only the first record
         record = bib.entries[0]
         # clean up entries
         if "author" in record:
             # just last name
             record["author"] = re.sub(r',.*?(and\s*|$)', ' ',
                                       record['author'])
         if "title" in record:
             record["title"] = self._citationQuery(record["title"])[0][1]
         if "journal" in record:
             record["journal"] = self._citationQuery(record["journal"])[0][1]
         if "year" in record:
             record["date"] = record["year"]
         # only use a few fields
         # TODO add numbers
         return [(k, v) for k, v in record.items() if k in
                 {"author", "title", "journal", "mrnumber", "date",
                  "arxiv", "zbl"}]
     else:
         return []
Esempio n. 14
0
def normalize_keyword_case():
    for d in review.documents:
        bib = bibtexparser.loads(d.bib)
        if bib.entries[0].has_key('keyword'):
            bib.entries[0]['keyword'] = bib.entries[0]['keyword'].lower()
            d.bib = bibtexparser.dumps(bib)
            d.save()
Esempio n. 15
0
def parse_bibtex(bib):
    '''Parses the BibTex returned by the DOI resolver

    Args:
        bib (str): a BibTex record

    Returns:
        Dict containing reference data
    '''
    for entity, repl in ENTITIES.iteritems():
        bib = bib.replace(entity, repl)
    # Parse BibTex using the handy dandy bibtexparser module
    import bibtexparser
    from bibtexparser.bparser import BibTexParser
    from bibtexparser.customization import convert_to_unicode
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    parsed = bibtexparser.loads(bib, parser=parser).entries[0]
    # Miscellaneous clean up
    braces = re.compile(u'\{([A-Z_ \-]+|[\u0020-\uD7FF])\}', re.U)
    for key, val in parsed.iteritems():
        val = braces.sub(r'\1', val)
        if '{' in val:
            raise Exception('Unhandled LaTeX: {}'.format(val.encode('cp1252')))
        parsed[key] = val
    parsed['pages'] = parsed.get('pages', '').replace('--', '-')
    if parsed.get('publisher', '').endswith(')'):
        parsed['publisher'] = parsed['publisher'].rsplit('(', 1)[0].rstrip()
    #pp.pprint(parsed)
    return parsed
def bib2jekyllcol (inputFile, outputDir):
    "This prints the bibtex file to output directory as jekyll collection folder(s)" 
           
    # read and parse bib file
    with open(inputFile) as bibtex_file:
        bibtex_str = bibtex_file.read()
    
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bib_database = bibtexparser.loads(bibtex_str, parser=parser)

    # create dictionary for transformation of month to number
    month_list = ["jan", "feb", "mar", "apr", "may", "june", "july", "aug", "sept", "oct", "nov", "dec"]
    
    # type names:
    type_list = ["title", "author", "journal", "volume", "number",
                  "year", "month", "doi", "pages", "publisher", "booktitle", "note"]
    
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)
    else:
        print("Deleting existing collection file...\n")
        for file in os.listdir(outputDir):
            file_path = os.path.join(outputDir, file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception, e:
            print e
Esempio n. 17
0
 def __init__(self, path, ads_cache=None):
     super(BibTexDB, self).__init__()
     self._filepath = path
     with open(path) as bibtex_file:
         bibtex_str = bibtex_file.read()
     self._db = bibtexparser.loads(bibtex_str)
     self._ads_cache = ads_cache
Esempio n. 18
0
def normalize_keyword_delimitter():
    for d in review.documents:
        bib = bibtexparser.loads(d.bib)
        if bib.entries[0].has_key('keyword'):
            bib.entries[0]['keyword'] = bib.entries[0]['keyword'].replace(';',',')
            d.bib = bibtexparser.dumps(bib)
            d.save()
Esempio n. 19
0
def normalize_keyword_visualization():
    for d in review.documents:
        bib = bibtexparser.loads(d.bib)
        if bib.entries[0].has_key('keyword'):
            bib.entries[0]['keyword'] = bib.entries[0]['keyword'].replace('visualis','visualiz')
            d.bib = bibtexparser.dumps(bib)
            d.save()
Esempio n. 20
0
def parse_urlfile(url_file):
    """
    take a file of the form

    category: ads url

    and get the bibtex from the URL and return a list of Paper objects
    with the category stored as the subject

    """

    papers = []

    with open(url_file) as f:

        parser = BibTexParser()
        parser.customization = customizations

        for line in f:
            if line.startswith("#") or line.strip() == "": continue

            subject, url = line.split(": ")

            # for the ADS bibtex URL, lop off the paper_id
            paper_id = url.strip().split("/")[-1]
            bibtex_url = "http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX".format(paper_id)

            # get the bibtex in html -- this is a little tricky, since
            # urlopen gives us a byte object that we need to decode
            # into unicode before we can play with it.
            print(bibtex_url)
            with urllib.request.urlopen(bibtex_url) as response:
                bibtex_html = response.read()

            raw_bibtex_html = bibtex_html.splitlines()

            bibtex_string = ""
            for line in raw_bibtex_html:
                bibtex_string += "{}\n".format(line.decode("utf8"))

            # strip off any header and just leave the bibtex
            found_start = False
            bibtex = ""
            for line in bibtex_string:
                if line.startswith("@"):
                    found_start = True
                if found_start:
                    bibtex += line

            # parse the bibtex string
            bib_database = bibtexparser.loads(bibtex, parser=parser)

            for e in bib_database.entries:
                p = extract_paper_info(e)
                if not e is None:
                    p.subject = subject
                    papers.append(p)

    papers.sort(reverse=True)
    return papers
 def test_multiple_string_parse(self):
     bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
     bib_database = bibtexparser.loads(bibtex_str)
     expected = OrderedDict()
     expected['name1'] = 'value1'
     expected['name2'] = 'value2'
     self.assertEqual(bib_database.strings, expected)
Esempio n. 22
0
def normalize(input_file, output_file):
    """
    read a *.bib file, change every 'title' and 'booktitle' field to only
    use uppercase for the first letter and write the changes to the output
    file.

    Parameters
    ----------
    input_file : file
        the *.bib file to normalized
    output_file : file
        the *.bib output file
    """
    bibtex_str = input_file.read()
    bib_database = bibtexparser.loads(bibtex_str)

    for entry in bib_database.entries:
        for field in ('title', 'booktitle'):
            if field in entry:
                field_str = entry[field]
                # don't touch titles that are (partially) enclosed in brackets
                if (not FIXED_TITLE_RE.match(field_str)
                   and not BRACKETS_RE.search(field_str)):
                    if ':' in field_str:
                        # split no more than once
                        title, subtitle = field_str.split(':', 1)
                        entry[field] = u'{}: {}'.format(title,
                                                        subtitle.lower())
                    else:
                        new_field_str = field_str.capitalize()
                        entry[field] = new_field_str

    new_bibstr = bibtexparser.dumps(bib_database)
    output_file.write(new_bibstr.encode('utf-8'))
def merge_data(paper_list, scholar_data_list):
    # Merge yml data with google scholar data
    assert(len(paper_list) == len(scholar_data_list))
    papers = []
    for yaml_paper_info, scholar_data in zip(paper_list, scholar_data_list):
        paper = dict()

        # see __getitem__ of ScholarArticle
        attrs = dict([(key, scholar_data.attrs[key][0]) for key in scholar_data.attrs.keys()])
        paper.update(attrs)

        if scholar_data.citation_data:
            paper['citation_data'] = scholar_data.citation_data
            print 'citation data %s' % scholar_data.citation_data
            bibdata = bibtexparser.loads(scholar_data.citation_data)
            bibinfo = bibdata.entries[0]
            paper.update(bibdata.entries[0])
        else:
            print 'Warning: %s does not have citation_data' % yaml_paper_info['title']

        paper.update(yaml_paper_info)
        # This should have the highest priority and overwrite others

        # if len(papers) == 0:
        #     # Only do it once
        #     print 'Scholar data field %s' % attrs.keys()
        #     print 'Bib data fields %s' % bibinfo.keys()

        if paper.get('author'):
            paper['first_author'] = paper['author'].split('and')[0].strip()
        papers.append(paper)


    print 'Available data fields %s' % papers[0].keys()
    return papers
Esempio n. 24
0
def save_citation(citation_record):
    cite_anchor = citation_record.find('a', {'class': 'gs_nph', 'href': '#', "role": "button"})
    if not cite_anchor or not cite_anchor['onclick']:
        logging.warn("No Cite anchor for citation: %s" % citation_record)
        return
    citation_id = cite_anchor['onclick'].split(',')[1][1:-1]
    logging.info("Getting formated cite from citation id: " + citation_id)
    params = {"q": "info:%s:scholar.google.com/" % citation_id, "output": "cite"}
    soup = create_soup_by_url("https://scholar.google.com/scholar", params)
    bib_anchor = soup.find('a', {"class": "gs_citi"})
    if not bib_anchor:
        logging.debug("BibTex page soup is: %s" % soup.getText())
        logging.warn("No BibTex citation provided for citation: %s" % citation_id)
        return
    soup = create_soup_by_url(bib_anchor['href'])
    global citation_num
    citation_num += 1
    # Adding a tag to the bib entry about google scholar citation ID
    citation_entry = bibtexparser.loads(soup.getText()).entries[0]
    citationID = citation_entry['ID'] # e.g., melville2004review
    citation_entry["gscholar_id"] = citation_id
    db_entry=[]
    db_entry.append(citation_entry)
    db = BibDatabase()
    db.entries = db_entry
    g_bib_entry = bibtexparser.dumps(db)
    bib_entry = "%% [%d]\n%s" % (citation_num, g_bib_entry)
    logging.info(bib_entry.strip())
    with open(opts.citation_name, "a+") as f:
        f.write(bib_entry.encode('utf-8'))
    if opts.should_download:
        pdf_div = citation_record.find('div', {"class": "gs_ggs gs_fl"})
        if pdf_div:
            download_pdf(pdf_div.a['href'], citationID)
def parseEntry(s):
    # normalize unicode by reparsing
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    db1 = bibtexparser.loads(s, parser=parser)
    es = db1.entries
    return es[0]
Esempio n. 26
0
def getsource(material):
    print('Grabbing MP BIB info for ' + material + '...')

    key = '0cVziFePTUfsawW8'

    url = 'https://www.materialsproject.org/materials/' + material + '/bibtex?API_KEY=' + key
    
    t=0
    rbib = []
    while t<4:
        try:
            r = requests.get(url)
            if r.status_code == 200:
                rbib = bibtexparser.loads(r.text).entries
                break
            else:
                print('error' + str(t))
                t = t+1
            
        except requests.ConnectionError:
            print('error' + str(t))
            t = t+1
    
    source = []
    
    for entry in rbib:
        if entry['ID'] != 'MaterialsProject' and entry['ID'] != 'Bergerhoff1983' and entry['ID'] != 'Karlsruhe':
            try:
                source.append(entry)
            except KeyError:
                pass
    
    return source
Esempio n. 27
0
def get_bibtex_dict(bib_fpath):
    r"""
    Args:
        bib_fpath (str):

    Returns:
        dict: bibtex_dict

    CommandLine:
        python -m utool.util_latex --test-get_bibtex_dict
        pip install bibtexparser

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_latex import *  # NOQA
        >>> import utool as ut
        >>> bib_fpath = ut.truepath('~/latex/crall-candidacy-2015/My_Library_clean.bib')
        >>> bibtex_dict = get_bibtex_dict(bib_fpath)
        >>> result = ('bibtex_dict = %s' % (str(bibtex_dict),))
        >>> print(result)
    """
    import bibtexparser
    import utool as ut
    bibtex_str   = ut.readfrom(bib_fpath, verbose=False)
    bib_database = bibtexparser.loads(bibtex_str)
    bibtex_dict  = bib_database.get_entry_dict()
    return bibtex_dict
Esempio n. 28
0
 def parse_volume_bib(self, response):
     """
     Parses the volume bib page.
     :param category: in which the volume bib should be stored.
     :return: nothing but the bib page is stored.
     """
     category = response.meta["category"]
     txt = response.body.decode("utf-8")
     volume = response.meta["volume_url"].split("/")[-1][: -4]  # extract volume name from url
     if txt.startswith(u'\ufeff'):
         txt = txt[1:]
     if len(txt) == 0:
         logger.warning("empty volume bib on %s", response.url)
         request = scrapy.Request(response.meta["event_url"], callback=self.parse_event_page_precisely, dont_filter=True)
         request.meta["volume_url"] = response.meta["volume_url"]
         request.meta["category"] = category
         yield request
     else:
         bib_tex = bibtexparser.loads(txt)
         entries = bib_tex.entries
         # print file_name, len(entries)
         for bib in entries:
             bib["event"] = category
             bib["volume"] = volume
             self.insert(bib)
         self.num_volume_crawled += 1
         self.num_paper_crawled += len(entries)
         self.db_mark_volume(response.url)
Esempio n. 29
0
File: backend.py Progetto: m000/BMC
def updateArXiv(entry):
    """Look for new versions of arXiv entry `entry`

    Returns False if no new versions or not an arXiv entry,
    Returns the new bibtex otherwise.
    """
    bibtex = getBibtex(entry)
    # Check arXiv
    if('archiveprefix' not in bibtex or
       'arXiv' not in bibtex['archiveprefix']):
        return False

    arxiv_id = bibtex['eprint']
    arxiv_id_no_v = re.sub(r'v\d+\Z', '', arxiv_id)
    ids = set(arxiv_id)

    for entry in getEntries():
        if('archiveprefix' not in bibtex or
           'arXiv' not in bibtex['archiveprefix']):
            continue
        ids.add(bibtex['eprint'])

    last_bibtex = bibtexparser.loads(fetcher.arXiv2Bib(arxiv_id_no_v))
    last_bibtex = last_bibtex.entries_dict
    last_bibtex = last_bibtex[list(last_bibtex.keys())[0]]

    if last_bibtex['eprint'] not in ids:
        return last_bibtex
    else:
        return False
 def load_database(self):
     '''
     load bibtex file if needed
     '''
     if any([not os.path.exists(self.args.output_path),
         self.args.overwrite == 'y']):
         self.args.bibtex_database = bibtexparser.loads('')
     else:
         self.args.bibtex_database = bibtexparser.load(open(args.output_path))
Esempio n. 31
0
    def fill(self, publication: Publication) -> Publication:
        """Populate the Publication with information from its profile

        :param publication: Scholar or Citation publication container object that is not filled
        :type publication: PublicationCitation or PublicationScholar
        """
        if publication['source'] == PublicationSource.AUTHOR_PUBLICATION_ENTRY:
            url = _CITATIONPUB.format(publication['author_pub_id'])
            soup = self.nav._get_soup(url)
            publication['bib']['title'] = soup.find('div',
                                                    id='gsc_oci_title').text
            if publication['bib']['title'][-1] == '\u2026':
                merged_snippet = soup.find('div',
                                           class_='gsc_oci_merged_snippet')
                if merged_snippet:
                    title_div = merged_snippet.find('div')
                    if title_div:
                        publication['bib']['title'] = title_div.text
            if soup.find('a', class_='gsc_oci_title_link'):
                publication['pub_url'] = soup.find(
                    'a', class_='gsc_oci_title_link')['href']
            for item in soup.find_all('div', class_='gs_scl'):
                key = item.find(class_='gsc_oci_field').text.strip().lower()
                val = item.find(class_='gsc_oci_value')
                if key == 'authors' or key == 'inventors':
                    publication['bib']['author'] = ' and '.join(
                        [i.strip() for i in val.text.split(',')])
                elif key == 'journal':
                    publication['bib']['journal'] = val.text
                elif key == 'conference':
                    publication['bib']['conference'] = val.text
                elif key == 'volume':
                    publication['bib']['volume'] = val.text
                elif key == 'issue':
                    publication['bib']['number'] = val.text
                elif key == 'pages':
                    publication['bib']['pages'] = val.text
                elif key == 'publisher':
                    publication['bib']['publisher'] = val.text
                elif key == 'publication date':

                    patterns = [
                        'YYYY/M', 'YYYY/MM/DD', 'YYYY', 'YYYY/M/DD',
                        'YYYY/M/D', 'YYYY/MM/D'
                    ]
                    publication['bib']['pub_year'] = arrow.get(
                        val.text, patterns).year
                elif key == 'description':
                    # try to find all the gsh_csp if they exist
                    abstract = val.find_all(class_='gsh_csp')
                    result = ""

                    # append all gsh_csp together as there can be multiple in certain scenarios
                    for item in abstract:
                        if item.text[0:8].lower() == 'abstract':
                            result += item.text[9:].strip()
                        else:
                            result += item.text

                    if len(abstract) == 0:  # if no gsh_csp were found
                        abstract = val.find(class_='gsh_small')
                        if abstract:
                            if abstract.text[0:8].lower() == 'abstract':
                                result = abstract.text[9:].strip()
                            else:
                                result = abstract.text
                        else:
                            result = ' '.join(
                                [description_part for description_part in val])

                    publication['bib']['abstract'] = result
                elif key == 'total citations':
                    publication['cites_id'] = re.findall(
                        _SCHOLARPUBRE, val.a['href'])[0].split(',')
                    publication['citedby_url'] = _CITEDBYLINK.format(','.join(
                        publication['cites_id']))
                elif key == 'scholar articles':
                    for entry in val.find_all('a'):
                        if entry.text.lower() == 'related articles':
                            publication['url_related_articles'] = entry.get(
                                'href')[26:]
            # number of citation per year
            years = [int(y.text) for y in soup.find_all(class_='gsc_oci_g_t')]
            cites = [int(c.text) for c in soup.find_all(class_='gsc_oci_g_al')]
            cites_year = [
                int(c.get('href')[-4:])
                for c in soup.find_all(class_='gsc_oci_g_a')
            ]
            nonzero_cites_per_year = dict(zip(cites_year, cites))
            res_dict = {}
            for year in years:
                res_dict[year] = (nonzero_cites_per_year[year]
                                  if year in nonzero_cites_per_year else 0)
            publication['cites_per_year'] = res_dict

            if soup.find('div', class_='gsc_vcd_title_ggi'):
                publication['eprint_url'] = soup.find(
                    'div', class_='gsc_vcd_title_ggi').a['href']
            publication['filled'] = True
        elif publication[
                'source'] == PublicationSource.PUBLICATION_SEARCH_SNIPPET:
            bibtex_url = self._get_bibtex(publication['url_scholarbib'])
            bibtex = self.nav._get_page(bibtex_url)
            parser = bibtexparser.bparser.BibTexParser(common_strings=True)
            parsed_bib = remap_bib(
                bibtexparser.loads(bibtex, parser).entries[-1], _BIB_MAPPING,
                _BIB_DATATYPES)
            publication['bib'].update(parsed_bib)
            publication['filled'] = True
        return publication
Esempio n. 32
0
 def fetch_content(self):
     doi_url = self.doi_base_url + self.identifier
     page = self.get_page(doi_url, headers=self.headers)
     bibtex = bibtexparser.loads(page)
     return bibtex.entries[0]
Esempio n. 33
0
# get list of immediate child subdirs SO:973473 :
subdirs = sorted(next(os.walk(dir_data))[1])  #ok
# 02_SMC Conference 2015:044/74: orig 'G. Presti and D.A. Mauro and G. Haus' ->  _DATA_/02_SMC\ Conference\ 2015/smc_2015_044.pdf
numcommas = 0
# homogenize_fields: Sanitize BibTeX field names, for example change `url` to `link` etc.
tbparser = BibTexParser()
tbparser.homogenize_fields = False  # no dice
tbparser.alt_dict[
    'url'] = 'url'  # this finally prevents change 'url' to 'link'
for subdir in subdirs:
    bibfile = os.path.join(dir_data, subdir, "%s.bib" % (subdir))
    print((bibfile, os.path.isfile(bibfile)))
    with open(bibfile) as bibtex_file:
        bibtex_str = bibtex_file.read()
    bib_database = bibtexparser.loads(bibtex_str, tbparser)
    #pprint.pprint(bib_database.entries) # already here,replaces 'url' with 'link'
    confbiblen = len(bib_database.entries)
    for icpbe, confpaperbibentry in enumerate(bib_database.entries):
        authstr = confpaperbibentry['author']
        if ("," in authstr):
            numcommas += 1
            report = "%d/%d: Comma present: '%s'" % (icpbe + 1, confbiblen,
                                                     authstr)
            authstrauthors = authstr.split(" and ")
            for ia, author in enumerate(authstrauthors):
                if ("," in author):
                    authorparts = author.split(", ")
                    # the first part [0] is last name, needs to become last
                    # get and remove the first part, then append it as last
                    lastname = authorparts.pop(0)

from utils import generate_md_file
import bibtexparser
import os

file_name = str(os.path.join(os.getcwd(),'bibtex.bib'))

with open(file_name) as bibtex_file:
    bibtex_str = bibtex_file.read()

bib_db = bibtexparser.loads(bibtex_str, parser=bibtexparser.bparser.BibTexParser(ignore_nonstandard_types=False))

################################### Create Readme ####################################
def plot_titles(titles):
    return '\n' + "## " + titles[0] + '\n'

list_types = [["Classics", "Classic"],
               ["Empirical Study", "Empirical"],
               ["Surveys", "Survey", "survey"],
               ["Influentials", "Influential"],
               ["New Settings or Metrics", "Setting", "Metric"],
               ["Regularization Methods", "Regularization"],
               ["Distillation Methods", "Distillation"],
               ["Rehearsal Methods", "Rehearsal"],
               ["Generative Replay Methods", "Generative Replay"],
               ["Dynamic Architectures or Routing Methods", "Architectures", "Dynamic Architecture"],
               ["Hybrid Methods", "Hybrid"],
               ["Continual Few-Shot Learning", "Continual-Meta Learning"],
               ["Meta-Continual Learning"],
               ["Lifelong Reinforcement Learning", "Reinforcement"],
Esempio n. 35
0
@STRING{ aug = "aug"}
@STRING{ sep = "sep"}
@STRING{ oct = "oct"}
@STRING{ nov = "nov"}
@STRING{ dec = "dec"}
"""

print 'Parsing files in ' + folder + '/'
for file in os.listdir(folder):
    if file.endswith(".bib"):
        print(os.path.join(folder, file))
        with open(os.path.join(folder, file)) as bibtex_file:
            content = Months + bibtex_file.read()
            parser = BibTexParser()
            parser.common_strings = True
            bib_database = bibtexparser.loads(content, parser)
            for entry in bib_database.entries:
                #print(entry['ID'])
                entry['keywords'] = entry.get('keywords', '')
                if (entry['keywords'] != ''):
                    entry['keywords'] = 'cleBib/' + entry[
                        'ID'] + ', article/' + os.path.splitext(
                            file)[0] + ', ' + entry['keywords']
                else:
                    entry['keywords'] = 'cleBib/' + entry[
                        'ID'] + ', article/' + os.path.splitext(file)[0]
            with open(os.path.join(folder + '-clean', file),
                      'w') as bibtex_export:
                bibtex_export_str = bibtexparser.dumps(bib_database, writer)
                bibtex_export.write(bibtex_export_str.encode('utf8'))
 def test_multiple_string_parse_count(self):
     bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n'
     bib_database = bibtexparser.loads(bibtex_str)
     self.assertEqual(len(bib_database.strings), 2)
Esempio n. 37
0
    def fill(self):
        """Populate the Publication with information from its profile"""
        if self.source == 'citations':
            url = _CITATIONPUB.format(self.id_citations)
            soup = self.nav._get_soup(url)
            self.bib['title'] = soup.find('div', id='gsc_vcd_title').text
            if soup.find('a', class_='gsc_vcd_title_link'):
                self.bib['url'] = soup.find(
                    'a', class_='gsc_vcd_title_link')['href']
            for item in soup.find_all('div', class_='gs_scl'):
                key = item.find(class_='gsc_vcd_field').text.strip().lower()
                val = item.find(class_='gsc_vcd_value')
                if key == 'authors':
                    self.bib['author'] = ' and '.join(
                        [i.strip() for i in val.text.split(',')])
                elif key == 'journal':
                    self.bib['journal'] = val.text
                elif key == 'volume':
                    self.bib['volume'] = val.text
                elif key == 'issue':
                    self.bib['number'] = val.text
                elif key == 'pages':
                    self.bib['pages'] = val.text
                elif key == 'publisher':
                    self.bib['publisher'] = val.text
                elif key == 'Publication date':

                    patterns = [
                        'YYYY/M', 'YYYY/MM/DD', 'YYYY', 'YYYY/M/DD',
                        'YYYY/M/D', 'YYYY/MM/D'
                    ]
                    self.bib['year'] = arrow.get(val.text, patterns).year
                elif key == 'description':
                    # try to find all the gsh_csp if they exist
                    abstract = val.find_all(class_='gsh_csp')
                    result = ""

                    # append all gsh_csp together as there can be multiple in certain scenarios
                    for item in abstract:
                        if item.text[0:8].lower() == 'abstract':
                            result += item.text[9:].strip()
                        else:
                            result += item.text

                    if len(abstract) == 0:  # if no gsh_csp were found
                        abstract = val.find(class_='gsh_small')
                        if abstract:
                            if abstract.text[0:8].lower() == 'abstract':
                                result = abstract.text[9:].strip()
                            else:
                                result = abstract.text
                        else:
                            result = ' '.join(
                                [description_part for description_part in val])

                    self.bib['abstract'] = result
                elif key == 'total citations':
                    self.bib['cites_id'] = re.findall(_SCHOLARPUBRE,
                                                      val.a['href'])[0]
                    self.citations_link = _CITEDBYLINK.format(
                        self.bib['cites_id'])
            # number of citation per year
            years = [int(y.text) for y in soup.find_all(class_='gsc_vcd_g_t')]
            cites = [int(c.text) for c in soup.find_all(class_='gsc_vcd_g_al')]
            self.cites_per_year = dict(zip(years, cites))

            if soup.find('div', class_='gsc_vcd_title_ggi'):
                self.bib['eprint'] = soup.find(
                    'div', class_='gsc_vcd_title_ggi').a['href']
            self._filled = True
        elif self.source == 'scholar':
            bibtex_url = self._get_bibtex(self.url_scholarbib)
            bibtex = self.nav._get_page(bibtex_url)
            parser = bibtexparser.bparser.BibTexParser(common_strings=True)
            self.bib.update(bibtexparser.loads(bibtex, parser).entries[-1])
            self._filled = True
        return self
Esempio n. 38
0
def main():

    local_dir =  os.getcwd() # 指定tex源文件的路径

    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--tex', 
                        help='the path of tex file')
    parser.add_argument('-o', '--output', 
                        help='the path of bib file you are using for latex. By default the current path')
    args = parser.parse_args()

    
    tex_files = args.tex.replace(' ', '').split(',') if args.tex else [os.path.join(local_dir, f) for f in get_tex_file(local_dir) ]  # 如未给出,则在当前路径中寻找tex文件
    bib_keys = []
    bib_name = None  # todo 不能处理多个bib_name,不过一般不存在这种情况,只有main.tex中会有这个命令
    for f in tex_files:
        key, temp_name = get_bibinfo(f)  # 获取bibkey和bib文件
        bib_keys.extend(key)
        if temp_name:
            bib_name = temp_name
            bib_dir = os.path.split(f)
    
    tex_dir = bib_dir if args.tex else local_dir    # 分离texfile的路径和文件
    bib_name = os.path.join(tex_dir, bib_name) # 拼接路径,指向tex相同路径下
    output_bib = args.output if args.output else bib_name   # 有命令行参数则选为参数,否则使用tex文件中指定的名称,放在相同路径下


    # 从zotero的API中读取数据
    try:
        r = requests.get(ZOTERO_API)
    except requests.exceptions.ConnectionError:
        print('zotero未启动,获取数据库失败')
        sys.exit(1)
    if r.status_code == 200:
        print('成功从zotero读取数据')
    else:
        raise Exception('未能从zotero读取数据,状态码:{}'.format(r.status_code))
        sys.exit(1)
    r.encoding = 'utf-8'
    bib_str = modify_bibs(r.text)

    # with open('./bib_str.txt', 'w', encoding='utf8') as out_bib:
    #     out_bib.write(bib_str)


    # 构建BibtexParser
    bibParser = BibTexParser(common_strings=False)
    bibParser.ignore_nonstandard_types = True
    bibParser.homogenise_fields = True
    bibdata = bp.loads(bib_str, bibParser)

    # for i in range(100,120):
    #     print(bibdata.entries[i])
    #     print(type(bibdata.entries[i]), '\n')

    # 对bib库进行格式处理
    # 此处效率低,应该直接从大库里读bib id,存在则append,否则,报错
    bibdata_out = bp.bibdatabase.BibDatabase()
    for d in bibdata.entries:
        if d['ID'] in bib_keys:
            bibdata_out.entries.append(d)
            entity_check = check_entity(d)
            entity_check_consequence = '---->题目:'+ re.sub(r'[{}]','', d['title']) +' 缺少字段:'+ str(entity_check) if entity_check else ''
            print('成功导入---->'+d['ID'], entity_check_consequence)
            bib_keys.remove(d['ID'])

    # TODO
    # 检查导入失败的是否在被引用的其它bib文件里
    
    bibkey_not_found = '\n'.join(bib_keys)
    print('以下导入失败(共{}个):\n'.format(len(bib_keys)), bibkey_not_found)
    print('------------end---------------')

    # print(bibdata_out)
    with open(output_bib, 'w', encoding='utf8') as bib_write:
        bp.dump(bibdata_out, bib_write)
 def test_single_string_parse(self):
     bibtex_str = '@string{name1 = "value1"}\n\n'
     bib_database = bibtexparser.loads(bibtex_str)
     expected = {'name1': 'value1'}
     self.assertEqual(bib_database.strings, expected)
Esempio n. 40
0
    def __init__(self, j):
        """
        Constructor.

        @param [in] j JSON representation of the case study.
        """

        ## The user community name.
        self.__full_user_community_name = j['full_name']

        ## The label.
        self.__label = j['label']

        ## The sector.
        self.__sector = j['sector']

        ## The raw overview text.
        self.__overview_raw = j['overview']

        ## The raw "The problem" text.
        self.__the_problem_raw = j['the_problem']

        ## The raw "The solution" text.
        self.__the_solution_raw = j['the_solution']

        ## The "What they said" raw quote text.
        self.__what_they_said = None
        #
        ## Who said it?
        self.__who_said_it = None
        #
        if 'what_they_said' in j.keys():
            self.__what_they_said = j['what_they_said']['quote']
            self.__who_said_it = j['what_they_said']['contact']

        ## A dictionary of the supporting sites.
        self.__sites = {}
        #
        for site in j['supporting_sites']:
            self.__sites[site.keys()[0]] = site.values()[0]

        ## A dictionary of services.
        self.__services = {}
        #
        for service in j['services']:
            self.__services[service.keys()[0]] = service.values()[0]

        ## A dictionary of Virtual Organisations (VOs).
        self.__vos = {}
        #
        if 'vos' in j.keys():
            for vo in j['vos']:
                my_vo = VirtualOrganisation(vo)
                self.__vos[my_vo.get_name()] = my_vo

        ## The acknowledgements raw text.
        self.__acknowledgements = None
        #
        if 'acknowledgements' in j.keys():
            self.__acknowledgements = j['acknowledgements']

        ## Dictionary of the hyperlinks.
        self.__links = {}
        #
        for link in j['links']:
            self.__links[link.keys()[0]] = link.values()[0]

        ## A dictionary of figures.
        self.__figures = {}
        #
        for fig in j['figures']:
            self.__figures[fig['label']] = Figure(fig)

        # Get the BibTeX items from the BibTeX file.
        with open("common/bib/GridPP.bib", 'r') as bibtex_file:
            bibtex_str = bibtex_file.read()

        ## The BibTeX database.
        bib_database = bibtexparser.loads(bibtex_str)

        lg.info(" *")
        lg.info(" * Number of entries in the BibTeX file: %d" % (len(bib_database.entries)))
        lg.info(" *")

        ## A dictionary of the papers.
        papers = {}

        # Get the papers (and check whether the PDF is there).
        for entry in bib_database.entries:
            if entry['ENTRYTYPE'] == 'article':
                paper = Paper(entry)
                papers[paper.get_id()] = paper

        ## A dictionary of publications used in the case study.
        self.__papers = {}
        #
        if 'references' in j.keys():
            for p in j['references']:
                # Get the paper.
                citecode = p.keys()[0]
                if citecode in papers.keys():
                    self.__papers[citecode] = papers[citecode]
Esempio n. 41
0
        def prase_wid():
            a = bibtexparser.loads(str(textedit.toPlainText()))
            if len(a.entries) == 0:
                QtWidgets.QMessageBox.critical(self, "Wrong Prasing",
                                               "Wrong Bibtex reference.")
                return False
            d = a.entries[0]
            if not 'file' in d:
                QtWidgets.QMessageBox.critical(
                    self, "Wrong Prasing", "Bibtex do not contain the file.")
                return False
            if not ('year' in d or 'date' in d):
                QtWidgets.QMessageBox.critical(
                    self, "Wrong Prasing",
                    "Bibtex do not contain the year or the date.")
                return False
            if not 'author' in d:
                QtWidgets.QMessageBox.critical(
                    self, "Wrong Prasing", "Bibtex do not contain the author.")
                return False
            if not 'title' in d:
                QtWidgets.QMessageBox.critical(
                    self, "Wrong Prasing", "Bibtex do not contain the title.")
                return False

            if 'year' in d: y = d['year']
            else:
                tmp = re.findall('[0-9][0-9][0-9][0-9]', d['date'])
                if len(tmp) == 0:
                    QtWidgets.QMessageBox.critical(
                        self, "Wrong Prasing",
                        "Bibtex do not understand the year format of "
                        "the date")
                else:
                    y = tmp[0]
            authors = d["author"].split(" and ")
            authors = [a.split(',') for a in authors]
            authors = [[aa.strip() for aa in a] for a in authors]
            short = authors[0][0] + y

            authors_str = []
            for a in authors:
                new_a = a[0]
                if len(a) >= 2:
                    for aa in a[1:]:
                        new_a += ' ' + ' '.join([
                            firstname[0].upper() + '.'
                            for firstname in aa.split(" ")
                        ])
                authors_str.append(new_a)

            if len(authors_str) >= 5:
                authors_str = authors_str[:4] + ['...'] + [authors_str[-1]]
            authors_str = ", ".join(authors_str)
            print("authors_str", authors_str)
            title = d['title']
            title = title.replace('{', '')
            title = title.replace('}', '')

            files = d['file'].split(';')
            motif = ':application/pdf'
            for i, f in enumerate(files):
                if f.endswith(motif):
                    files[i] = f[:-len(motif)]

            files = filter(lambda f: f.endswith('.pdf') or f.endswith('.PDF'),
                           files)
            files = list(files)
            if len(files) == 0:
                QtWidgets.QMessageBox.critical(
                    self, "Wrong Prasing",
                    "No pdf file detected in bibtex file.")
                return False
            elif len(files) == 1:
                file = files[0]
                if onWindows:
                    file = file.replace(r'C\:', 'C:')  # on windows
                    file = file.replace(r'\\', '\\')  # on windows

            else:
                item = QtWidgets.QInputDialog.getItem(textedit,
                                                      'Choose pdf file',
                                                      'File: ', files, 0,
                                                      False)
                if not item[1]:
                    return False
                file = str(item[0])

            m = file.find(':')
            if m < -1:
                QtWidgets.QMessageBox.critical(
                    self, "Wrong Prasing",
                    "No pdf file detected in bibtex file.")
                return False
            file = file[m + 1:]

            notes = ""
            if 'annote' in d:
                notes = d['annote']

            wid.close()
            self.short_edit.setText(short)
            self.authors_edit.setText(authors_str)
            self.title_edit.setText(title)
            self.file_edit.setText(file)
            self.notes_edit.setText(notes)
Esempio n. 42
0
    def fill(self):
        """Populate the Publication with information from its profile"""
        if self.source == 'citations':
            url = _CITATIONPUB.format(self.id_citations)
            soup = self.nav._get_soup(url)
            self.bib['title'] = soup.find('div', id='gsc_vcd_title').text
            if soup.find('a', class_='gsc_vcd_title_link'):
                self.bib['url'] = soup.find(
                    'a', class_='gsc_vcd_title_link')['href']
            for item in soup.find_all('div', class_='gs_scl'):
                key = item.find(class_='gsc_vcd_field').text.strip().lower()
                val = item.find(class_='gsc_vcd_value')
                if key == 'authors':
                    self.bib['author'] = ' and '.join(
                        [i.strip() for i in val.text.split(',')])
                elif key == 'conference':
                    self.bib['conference'] = val.text
                elif key == 'journal':
                    self.bib['journal'] = val.text
                elif key == 'book':
                    self.bib['book'] = val.text
                elif key == 'volume':
                    self.bib['volume'] = val.text
                elif key == 'issue':
                    self.bib['number'] = val.text
                elif key == 'pages':
                    self.bib['pages'] = val.text
                elif key == 'publisher':
                    self.bib['publisher'] = val.text
                elif key == 'Publication date':

                    patterns = ['YYYY/M',
                                'YYYY/MM/DD',
                                'YYYY',
                                'YYYY/M/DD',
                                'YYYY/M/D',
                                'YYYY/MM/D']
                    self.bib['year'] = arrow.get(val.text, patterns).year
                elif key == 'description':
                    if val.text[0:8].lower() == 'abstract':
                        val = val.text[9:].strip()
                    abstract = val.find(class_='gsh_csp')
                    if abstract is None:
                        abstract = val.find(class_='gsh_small')
                    self.bib['abstract'] = abstract.text
##                elif key == 'total citations':
##                    self.bib['cites'] = re.findall(
##                        _SCHOLARPUBRE, val.a['href'])[0]
                elif key == 'total citations':
                    m=re.search('by (.*?)<',str(val))
                    self.bib['cites'] = m.group(1)

            # number of citation per year
            years = [int(y.text) for y in soup.find_all(class_='gsc_vcd_g_t')]
            cites = [int(c.text) for c in soup.find_all(class_='gsc_vcd_g_al')]
            self.cites_per_year = dict(zip(years, cites))

            if soup.find('div', class_='gsc_vcd_title_ggi'):
                self.bib['eprint'] = soup.find(
                    'div', class_='gsc_vcd_title_ggi').a['href']
            self._filled = True
        elif self.source == 'scholar':
            bibtex = self.nav._get_page(self.url_scholarbib)
            self.bib.update(bibtexparser.loads(bibtex).entries[0])
            self._filled = True
        return self
Esempio n. 43
0
def bib_pars(item1):
    bib_database = bibtexparser.loads(item1)
    return (bib_database.entries[0])
Esempio n. 44
0
import pyperclip # dependency, need to pip install
# import getPDF_url as gpdf
import codecs

# get citation from clipboard
# we assume it is in valid bibtex
# we assume has title, authors, year, and publication; lazy for now, should add edge cases later
r = Tk()
r.withdraw()
clip_text = r.clipboard_get()

# parse the bibtex
# need to define a parser with custom settings bc zotero has nonstandard bibtex items like "jan" for month
# per https://github.com/sciunto-org/python-bibtexparser/issues/192
parser = BibTexParser(common_strings=True)
bib = bibtexparser.loads(clip_text, parser)
entries = bib.entries
# print(entry)
print(f"Processing {len(entries)} entries")

for entry in entries:
    # parse title
    print(entry)
    title = entry['title'].replace("{", "").replace("}", "").replace("\n", " ")

    # build author string
    authors = []
    for author in entry['author'].split(" and "):
        author = author.strip().replace("\n", " ").split(",")
        authors.append("[[%s %s]]" %(author[-1].strip(), author[0].strip()))
Esempio n. 45
0
def save_titles(bibtex_file, username, password):

    # read bibtex file
    with open(bibtex_file) as f:
        bibtex_str = f.read()
    bib_database = bibtexparser.loads(bibtex_str)
    entries = bib_database.entries

    # connect to Arxiv Sanity
    driver = webdriver.PhantomJS()
    driver.get('http://www.arxiv-sanity.com')

    # login
    username_elem = driver.find_element_by_name("username")
    password_elem = driver.find_element_by_name("password")
    username_elem.send_keys(username)
    password_elem.send_keys(password)
    driver.find_element_by_css_selector(".btn-fancy").click()

    # search for the title of each BibTeX entry
    for e, entry in enumerate(entries):

        time.sleep(5)

        title = entry['title']

        print('-' * 100)
        print('%.0f%% | BibTeX title: %s' % (100. *
                                             (e + 1) / len(entries), title))

        qfield = driver.find_element_by_id('qfield')
        qfield.clear()
        qfield.send_keys(title)
        qfield.send_keys(Keys.ENTER)

        papers = driver.find_elements_by_class_name('apaper')
        imgs = driver.find_elements_by_class_name('save-icon')
        assert len(imgs) == len(papers)

        if len(imgs) == 0:
            print('No search results')
            continue

        site_titles = []
        for paper in papers:
            site_title = paper.find_element_by_class_name(
                'paperdesc').find_element_by_tag_name('a').get_attribute(
                    'text')
            site_titles.append(site_title)
        distances = [
            editdistance.eval(title, site_title) for site_title in site_titles
        ]

        if min(distances) > 10:
            print('No match found within threshold, closest was: %s' %
                  site_titles[i])
            continue

        i = distances.index(min(distances))
        img = imgs[i]

        src = img.get_attribute('src')
        if src.endswith('saved.png'):
            print('Paper already saved')
            continue

        img.click()
        print('Saved paper with title: %s' % site_titles[i])
Esempio n. 46
0
import os
import bibtexparser
import django
from datetime import datetime

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "document_management.settings")
django.setup()

from document.models import Document, Author, Field, Publisher, Project

with open('/home/each/Documents/document_management/static/test/ml.bib',
          'r') as bibfile:
    bibstring = bibfile.read()

bd = bibtexparser.loads(bibstring)
docs = bd.entries

pro = Project(name="deep learning study",
              description="this is project is used for deep learning study")
pro.save()

for doc in docs:
    if "title" in doc:
        title = doc['title'].replace("{", "").replace("}", "")
        try:
            document = Document.objects.get(title=title)
            print "sucess"
        except:
            print "except"
            # exit()
Esempio n. 47
0
def dashboard_publications(request):
    all_journal = JournalImage.objects.all()
    print(all_journal)
    all_publications = Publication.objects.all()
    context = {'all_journal': all_journal,
               'all_publications': all_publications}

    if request.method == 'POST':
        if 'journal' in request.POST:
            submitted_form = AddEditJournalForm(request.POST, request.FILES)
            if submitted_form.is_valid():
                submitted_form.save()
                return redirect(reverse('dashboard_publications'))
            else:
                messages.error(request, submitted_form.errors)
                context['journal_form'] = submitted_form
                return render(request, 'website/dashboard_publications.html', context)

        if 'manual' in request.POST:
            submitted_form = AddEditPublicationForm(request.POST, request.FILES)
            if submitted_form.is_valid():
                submitted_form.save()
                return redirect(reverse('dashboard_publications'))
            else:
                messages.error(request, submitted_form.errors)
                context['form'] = submitted_form
                return render(request, 'website/dashboard_publications.html', context)

        elif 'bibtex' in request.POST:
            bibtex_entered = request.POST.get('bibtex')
            try:
                bib_parsed = bibtexparser.loads(bibtex_entered)
                bib_info = bib_parsed.entries[0]

                if 'title' in bib_info:
                    title = bib_info['title']
                else:
                    title = None

                if 'author' in bib_info:
                    authors = bib_info['author']
                elif 'authors' in bib_info:
                    authors = bib_info['aithors']
                else:
                    authors = None

                if 'url' in bib_info:
                    url = bib_info['url']
                elif 'link' in bib_info:
                    url = bib_info['link']
                elif 'doi' in bib_info:
                    url = "http://dx.doi.org/" + bib_info['doi']
                else:
                    url = None

                if title and authors and url:
                    publication_obj = Publication(title=title, author=authors, url=url)
                    if 'ENTRYTYPE' in bib_info:
                        publication_obj.entry_type = bib_info['ENTRYTYPE']
                    if 'doi' in bib_info:
                        publication_obj.doi = bib_info['doi']
                    if 'journal' in bib_info:
                        publication_obj.published_in = bib_info['journal']
                    if 'booktitle' in bib_info:
                        publication_obj.published_in = bib_info['booktitle']
                    if 'publisher' in bib_info:
                        publication_obj.publisher = bib_info['publisher']
                    if 'year' in bib_info:
                        publication_obj.year_of_publication = bib_info['year']
                    if 'month' in bib_info:
                        publication_obj.month_of_publication = bib_info['month']
                        publication_obj.bibtex = bibtex_entered
                        publication_obj.save()
                    return redirect(reverse('dashboard_publications'))

                else:
                    return render(request, 'website/dashboard_publications.html', context)
            except Exception as e:
                messages.error(request, str(e))
                return render(request, 'website/dashboard_publications.html', context)

        else:
            raise Http404("Not a valid method for adding publications.")

    journal_form = AddEditJournalForm()
    form = AddEditPublicationForm()
    context['form'] = form
    context['journal_form'] = journal_form

    return render(request, 'website/dashboard_publications.html', context)
 def test_multiple_preamble_parse(self):
     bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n'
     bib_database = bibtexparser.loads(bibtex_str)
     expected = [' a ', 'b']
     self.assertEqual(bib_database.preambles, expected)
 def test_single_preamble_parse_count(self):
     bibtex_str = '@preamble{" a "}\n\n'
     bib_database = bibtexparser.loads(bibtex_str)
     self.assertEqual(len(bib_database.preambles), 1)
Esempio n. 50
0
for it in (db.entries):
    title = it['title']
    id = it['ID']
    title = title.strip('{}')
    succ = False
    while not succ:
        try:
            res = gscholar.query(title)
            time.sleep(10)
            succ = True
        except Exception as e:
            print(e)
            # sleep(10)
            break
    if not succ: break
    it_gs = bibtexparser.loads(res[0])
    it_gs = it_gs.entries[0]
    # from IPython import embed; embed()
    it_gs['ID'] = id

    ress.append(it_gs)
    print(it_gs)
    # break

from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase

db = BibDatabase()
db.entries = ress

writer = BibTexWriter()
Esempio n. 51
0
def _parse_bibtex(bib):
    return bibtexparser.loads(bib).entries
Esempio n. 52
0
def bibstr2dict(bibstr):
    bibdict = bibtexparser.loads(bibstr)
    return bibdict.entries
Esempio n. 53
0
    def readDBs(self, Directory):

        with open(Directory + os.sep + self.mPapersDBFileNames) as bibtex_file:
            bibtex_str = bibtex_file.read()

        self.mPapersDB = bibtexparser.loads(bibtex_str)
Esempio n. 54
0
# -*- coding: utf-8 -*-
# Copyright (c) Ezcad Development Team. All Rights Reserved.

import matplotlib.pyplot as plt
import bibtexparser

fn = "mcmechan.bib"
with open(fn) as bibtex_file:
    bibtex_str = bibtex_file.read()
bdb = bibtexparser.loads(bibtex_str)

year_pubs = {}
for entry in bdb.entries:
    year = entry['year']
    if year not in year_pubs:
        year_pubs[year] = 1
    else:
        year_pubs[year] += 1

fig, ax = plt.subplots(1, 1)
fig.set_size_inches(10, 6)
ax.grid(zorder=0)
ax.bar(year_pubs.keys(), year_pubs.values(), color='g', zorder=3)
ax.set_ylabel('Number of Papers')
plt.xticks(rotation='vertical')
# plt.show()

fn = "year_pubs.png"
plt.savefig(fn)
plt.close()
Esempio n. 55
0
import bibtexparser

# Import PURE
fname = 'all.bib'
parser = bibtexparser.bparser.BibTexParser(common_strings=True)
with open(fname, encoding="utf8") as bibtex_file:
    bibtex_str = bibtex_file.read()

bib_pure = bibtexparser.loads(bibtex_str, parser=parser)
print(fname + ' contains ', len(bib_pure.entries), ' entries')

# Import WEBSITE
fname = 'website_export.bib'
parser2 = bibtexparser.bparser.BibTexParser(common_strings=True)
with open(fname, encoding="utf8") as bibtex_file2:
    bibtex_str2 = bibtex_file2.read()

bib_website = bibtexparser.loads(bibtex_str2, parser=parser2)
print(fname + ' contains ', len(bib_website.entries), ' entries')

# Results
mavlab_missing = bibtexparser.bibdatabase.BibDatabase()
mavlab_merged = bibtexparser.bibdatabase.BibDatabase()

verbose = False


def cleanup_title(txt):
    txt = txt.replace('{',
                      '').replace('}',
                                  '').replace('¿',
Esempio n. 56
0
bib = re.sub("\\\\enquote{(.+?)}", r"»\1«", bib)  # Latex-Quotation austauschen
bib = re.sub("\\\\emph{(.+?)}", r"\1", bib)  # Latex-Quotation austauschen
bib = re.sub("„", "»", bib)  # Anführungszeichen austauschen
bib = re.sub("“", "«", bib)  # Abführungszeichen austauschen
bib = re.sub("--", "–", bib)  # Bindestriche austauschen
bib = re.sub("~", " ", bib)  # Gesicherte Spaces austauschen
bib = re.sub("\\\\&", "&", bib)  # Und-Zeichen austauschen
bib = re.sub("%.+?@", "@", bib,
             flags=re.MULTILINE | re.DOTALL)  # %-Umgebung aus dial entfernen

# BIBTEX parsen und nach JS exportieren:
import bibtexparser
from bibtexparser.bparser import BibTexParser
parser = BibTexParser()
parser.ignore_nonstandard_types = False
bibdb = bibtexparser.loads(bib, parser)

with open("dial.js", "w") as fh:
    fh.write('var rows = [\n')
    for entry in bibdb.entries:
        fh.write(' {\n')
        # Wenn keywords == wenn Erstpublikation:
        if 'keywords' in entry:
            entry['Erstpublikation'] = 'ja'
        else:
            entry['Erstpublikation'] = 'nein'
        # Genre eintragen
        if 'keywords' not in entry:
            # Genre aus dem Werk-Element mit selber wikidata-ID holen:
            for entry2 in bibdb.entries:
                if 'keywords' in entry2 and entry2['wikidata'] == entry[
Esempio n. 57
0
with open(args.infile) as input_file:
    bib_database = bibtexparser.load(input_file)

keys_done = []
dblp_entries = []
non_dblp_entries = []
num_skipped = 0

for entry in bib_database.get_entry_list():
    id = entry['ID']

    if not (id in keys_done):
        if id[0:4] == "DBLP":
            print("downloading " + id)
            bib_str = download_dblp(id[5:])
            temp_db = bibtexparser.loads(bib_str)
            dblp_entries.append(temp_db.entries[0])
        else:
            non_dblp_entries.append(entry)

        keys_done.append(id)

    else:
        num_skipped += 1
        print(id + " skipped")

print("#DBLP entries = " + str(len(dblp_entries)))
print("#non DBLP entries = " + str(len(non_dblp_entries)))
print("#entries skipped = " + str(num_skipped))

print("writing new bib...", end="")
Esempio n. 58
0
def tex_to_word(tex_fn, repo_dir, bib_fn=None):
    r"""Convert a LaTeX formatted file to docx format
    
    Parses ``tex_fn`` and converts text and some markup tags and environments
    into Word constructs. Creates a file with the same basename as ``tex_fn``
    but with ``.docx`` extension, e.g. ``main.tex -> main.docx``.
    
    If ``bib_fn`` is not provided, all ``\cite`` tags are replaced by parentheses,
    leaving keys as is. If ``bib_fn`` is provided, all ``\cite`` tags are replaced
    by <Author> <Year> formatted references, and if a ``\bibliography`` tag is
    present, a Reference section is formatted at the end of the document.
    
    :param tex_fn: path to LaTeX formatted file
    :param bib_fn: optional path to BibTeX formatted file containing citation
        information
    :return: nothing
    """

    print(
        '\n-------------------------------------------------------------------'
    )
    print(tex_fn)

    with open(tex_fn) as f:
        tex = f.read()
    parsed = lexer.input(tex)

    bibdb = None
    if bib_fn:
        with open(bib_fn) as f:
            bibtex_str = f.read()
        if len(bibtex_str) > 0:
            bibdb = bibtexparser.loads(bibtex_str)
            bibdb = {_['ID']: _ for _ in bibdb.entries}

    def is_heading(args):
        return 'section' in args

    def get_heading_level(args):
        return args.count('sub') + 1

    heading_level = 1
    in_doc = False

    # in_section is a stack of nested \begin{XXX} commands
    # useful for nested elements like itemize, enumerate, etc
    in_section = []
    prev_token = None

    doc = docx.Document()
    text_started = False
    refs = set()
    words = []

    while True:
        tok = lexer.token()
        if not tok: break

        # handle commands, which control the structure of the document
        # and special elements like tables and figures (not yet implemented)
        if tok.type == 'COMMAND':

            if tok.command == 'title':
                doc.add_heading(tok.args, 0)

            elif tok.command == 'begin':

                # don't insert anything until we have seen a begin{document}
                if tok.args == 'document':
                    in_doc = True

                # other \begin's to be supported:
                # table
                # tabular
                # figure
                in_section.append(tok.args)
            elif tok.command == 'end':
                in_section.pop()
            elif tok.command == 'item':
                style = None
                level = len(in_section)
                if level == 1:
                    print(
                        'saw \\item outside of command, I dont know what to '
                        'do so I will very cowardly ignore token:',
                        tok.command, tok.opts, tok.args, tok.post_opts,
                        tok.rest)
                elif in_section[-1] == 'itemize':
                    style = 'List Bullet'
                elif in_section[-1] == 'enumerate':
                    style = 'List Number'
                else:
                    print(
                        'saw \\item inside a command I dont recognize, '
                        'I dont know what to do so I will very cowardly '
                        'ignore token:', tok.command, tok.opts, tok.args,
                        tok.post_opts, tok.rest)
                if style is not None:
                    if level > 2:
                        style += ' {}'.format(level - 1)
                    doc.add_paragraph(tok.rest.strip(), style=style)
            # \section, \subsection, \subsubsection, etc
            elif is_heading(tok.command):
                if words:
                    add_paragraph(doc, words)
                    words = []
                heading_level = get_heading_level(tok.command)
                doc.add_heading(tok.args, heading_level)

            # insert citation text
            elif tok.command == 'cite':
                ref_strs = tok.args
                if bibdb:
                    refids = tok.args.split(',')
                    refids = [_ for _ in refids if _]
                    refs.update(set(refids))
                    ref_strs = []
                    for refid in refids:
                        entry = bibdb[refid]
                        author = entry.get('author',
                                           entry.get('title',
                                                     '')).split(',')[0]
                        year = entry.get('year', '')
                        ref_strs.append(' '.join([author, year]))
                    ref_strs = ','.join(ref_strs)
                citation = Text(text=''.join(['(', ref_strs, ')']),
                                type='cite',
                                style=None,
                                props=None)
                words.append(citation)

            elif tok.command == 'clearpage':
                doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE)

            elif tok.command == 'includegraphics':
                pic_path = os.path.join(repo_dir, tok.args)
                img = Image.open(pic_path)
                # calculate the image width in inches assuming 72 dpi
                # maximum 6 inches
                dpi = 72
                img_width = min(img.size[0] / 72, 6)

                doc.add_picture(pic_path, width=Inches(img_width))

            elif tok.command == 'newline':
                words.append(Word(text='\n'))

            else:
                print('unrecognized command:', tok.command, tok.opts, tok.args,
                      tok.post_opts)

        if tok.type == 'EQUATION':
            print('found an equation, dont know how to do those yet',
                  tok.value)

        # regular text word
        if tok.type == 'WORD':
            # replace escaped chars with literal chars
            tok.value = tok.value.replace(r'\%', '%')
            tok.value = tok.value.replace(r'\$', '$')

            text_started = True
            text = Word(text=tok.value)
            words.append(text)

        if tok.type == 'NEWLINE':
            # if we hit two newlines in a row, create a new paragraph
            if prev_token and prev_token.type == 'NEWLINE' and text_started:
                add_paragraph(doc, words)
                words = []

        if tok.type == 'MANUALNEWLINE':
            words.append(Word(text='\n'))

        if tok.type == 'TEXTFMT':
            if tok.command == 'textbf':
                bold = Text(text=tok.args,
                            type='textbf',
                            style=None,
                            props={'bold': True})
                words.append(bold)
            if tok.command == 'textit':
                italic = Text(text=tok.args,
                              type='textit',
                              style=None,
                              props={'italic': True})
                words.append(italic)

        prev_token = tok

    # do refs if there are refs

    if refs:
        doc.add_heading('References', heading_level)

        refs = sorted(list(refs))
        for i, refid in enumerate(refs):
            ref = bibdb[refid]
            author = ''
            if 'author' in ref:
                author = ref['author'].split(' and ')
                author = author[0] + u' et al. '
            title = (tou(ref.get('title', '')).replace('{', '').replace(
                '}', '').replace('\n', ' '))
            ref_words = [
                Word(text='{}. '.format(i + 1)),
                Word(text=author),
                Word(text=title + u'. ')
            ]

            def fmt(key, pref='', suff=''):
                if key in ref:
                    return Word(tou(pref + ref[key] + suff))

            ref_words.extend([
                fmt('journal', suff=u'. '),
                fmt('booktitle', suff=u'. '),
                fmt('volume', suff=u', '),
                fmt('pages', suff=u' '),
                fmt('year', pref=u'(', suff=u')'),
                fmt('howpublished', pref=u'(', suff=u')'),
                fmt('note'),
                Word(text=u'.')
            ])
            ref_words = [_ for _ in ref_words if _]
            add_paragraph(doc, ref_words)
    """
    [{'journal': 'Nice Journal',
      'comments': 'A comment',
      'pages': '12--23',
      'month': 'jan',
      'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...',
      'title': 'An amazing title',
      'year': '2013',
      'volume': '12',
      'ID': 'Cesar2013',
      'author': 'Jean César',
      'keyword': 'keyword1, keyword2',
      'ENTRYTYPE': 'article'}]
    """

    # write out the doc
    basedir = os.path.dirname(tex_fn)
    basename, ext = os.path.splitext(os.path.basename(tex_fn))
    doc_fn = os.path.join(basedir, '{}.docx'.format(basename))
    doc.save(doc_fn)
Esempio n. 59
0
#!/usr/bin/env python

import sys
import bibtexparser
from collections import Counter
import matplotlib.pyplot as plt

filename = sys.argv[1]
text = open(filename).read()
bib = bibtexparser.loads(text).entries
years = [int(e['year']) for e in bib]
counts = Counter(years)
keys = list(counts.keys())
for i in range(min(keys), max(keys) + 1):
    counts[i] += 0
    print('{},{}'.format(i, counts[i]))

plt.hist(years, bins=1 + max(keys) - min(keys))
plt.title("Publications by Year")
plt.savefig('years.png')
Esempio n. 60
0
    title = ' '.join(new_words)
    w1 = work.works(query_title=title,
                    #query_author=inauthor,
                    select=['title','DOI'],
                    cursor="*", limit=1000,
                    sort='relevance', order = "desc"
                    )
    w2 = []
    for z in w1:
        w2 = w2 + [item for item in z['message']['items']]
    return w2, new_words

# Retrieving the list of BibTex-formatted publications 
bibtext = pd.read_csv('bibtext.csv', encoding='ISO-8859-1')
for k,row in bibtext.iterrows():
    bib = bibtexparser.loads(row[0], parser)

outdf = pd.DataFrame()
# Searching the DOIs of each publication in the input bibtext file and returning
# them as a CSV file
for bibitem in bib.entries:
    intitle = bibitem['title']
    inauthor = bibitem['author'].split()[0]
    inauthor = inauthor.replace(',','')
    w, words = doisearcher(intitle, inauthor)
    for item in w:
        titleword = item['title'][0].split()
        titleword = [x.lower() for x in titleword]
        if all(x.lower() in titleword for x in words):
            newrow = pd.Series([bibitem['ID'], intitle,item['title'][0],item['DOI']],
                                index=['ID','orig_title','doi_title','doi'])