예제 #1
0
def download():
    querier = ScholarQuerier()
    settings = ScholarSettings()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_phrase("eelbrain")
    query.set_timeframe(2012, None)
    query.set_include_patents(False)
    # download entries
    bib = BibliographyData()
    start = 0
    while True:
        querier.send_query(query)
        if len(querier.articles) == 0:
            break
        # extract citation data
        for article in querier.articles:
            querier.get_citation_data(article)
            # parse entry
            data = parse_bytes(article.citation_data, 'bibtex')
            assert len(data.entries) == 1
            for key, entry in data.entries.items():
                # make sure URL is present
                if 'url' not in entry.fields:
                    url = article.attrs['url'][0]
                    if url:
                        entry.fields['url'] = url
                # store
                bib.add_entry(key, entry)
        # next page
        start += 10
        query.set_start(start)
    # write to file
    CACHE.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
예제 #2
0
def load(collname, bib_content):
    """Load uploaded data into the database"""
    coll = Collection(collname)
    db.session.add(coll)
    bib_data = parse_bytes(bib_content, bib_format="bibtex")
    for k, v in bib_data.entries.items():
        citation_tag = k
        title = v.fields['Title'].replace('{', '').replace('}', '')
        year = v.fields['Year']
        try:
            # If nothing in the 'Journal' tag, try 'BookTitle'
            journal_book = v.fields['Journal']
            try:
                journal_book = journal_abbrev(journal_book)
            except KeyError:
                pass
        except KeyError:
            try:
                journal_book = v.fields['BookTitle']
            except KeyError:
                journal_book = ''
        try:
            volume = v.fields['Volume']
        except KeyError:
            volume = ''
        try:
            pages = v.fields['Pages'].replace('+', '')
        except:
            pages = ''
        author_list, editor_list = get_author_list(v.persons)
        item = Item(citation_tag=citation_tag,
                    author_list=author_list,
                    editor_list=editor_list,
                    journal_book=journal_book,
                    volume=volume,
                    pages=pages,
                    year=year,
                    title=title,
                    collection=coll)
        db.session.add(item)
    db.session.commit()
query.set_phrase("eelbrain")
query.set_timeframe(2012, None)
query.set_include_patents(False)


bib = parse_file(DST, 'bibtex')
start = 0
while True:
    querier.send_query(query)
    if len(querier.articles) == 0:
        break
    # extract articles
    for article in querier.articles:
        querier.get_citation_data(article)
        # convert to pybtex entry
        data = parse_bytes(article.citation_data, 'bibtex')
        assert len(data.entries) == 1
        for entry in data.entries.values():
            if entry.key in IGNORE:
                continue
            elif entry.type != 'article':
                continue
            elif entry.key in bib.entries:
                if entry.fields['journal'] == bib.entries[entry.key].fields['journal']:
                    continue
            # fix title
            for repl in ACRONYMS:
                entry.fields['title'] = re.sub(repl, '{' + repl + '}', entry.fields['title'])
            # add info
            if 'url' not in entry.fields:
                url = article.attrs['url'][0]
예제 #4
0
 def deserialize(self, string):
     return parse_bytes(string, self.bib_format)
예제 #5
0
query = SearchScholarQuery()
query.set_phrase("eelbrain")
query.set_timeframe(2012, None)
query.set_include_patents(False)

bib = parse_file(DST, 'bibtex')
start = 0
while True:
    querier.send_query(query)
    if len(querier.articles) == 0:
        break
    # extract articles
    for article in querier.articles:
        querier.get_citation_data(article)
        # convert to pybtex entry
        data = parse_bytes(article.citation_data, 'bibtex')
        assert len(data.entries) == 1
        for entry in data.entries.values():
            if entry.key in IGNORE:
                continue
            elif entry.type != 'article':
                continue
            elif entry.key in bib.entries:
                if entry.fields['journal'] == bib.entries[
                        entry.key].fields['journal']:
                    continue
            # fix title
            for repl in ACRONYMS:
                entry.fields['title'] = re.sub(repl, '{' + repl + '}',
                                               entry.fields['title'])
            # add info