def download(): querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) # download entries bib = BibliographyData() start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract citation data for article in querier.articles: querier.get_citation_data(article) # parse entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for key, entry in data.entries.items(): # make sure URL is present if 'url' not in entry.fields: url = article.attrs['url'][0] if url: entry.fields['url'] = url # store bib.add_entry(key, entry) # next page start += 10 query.set_start(start) # write to file CACHE.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
def load(collname, bib_content): """Load uploaded data into the database""" coll = Collection(collname) db.session.add(coll) bib_data = parse_bytes(bib_content, bib_format="bibtex") for k, v in bib_data.entries.items(): citation_tag = k title = v.fields['Title'].replace('{', '').replace('}', '') year = v.fields['Year'] try: # If nothing in the 'Journal' tag, try 'BookTitle' journal_book = v.fields['Journal'] try: journal_book = journal_abbrev(journal_book) except KeyError: pass except KeyError: try: journal_book = v.fields['BookTitle'] except KeyError: journal_book = '' try: volume = v.fields['Volume'] except KeyError: volume = '' try: pages = v.fields['Pages'].replace('+', '') except: pages = '' author_list, editor_list = get_author_list(v.persons) item = Item(citation_tag=citation_tag, author_list=author_list, editor_list=editor_list, journal_book=journal_book, volume=volume, pages=pages, year=year, title=title, collection=coll) db.session.add(item) db.session.commit()
query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles for article in querier.articles: querier.get_citation_data(article) # convert to pybtex entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for entry in data.entries.values(): if entry.key in IGNORE: continue elif entry.type != 'article': continue elif entry.key in bib.entries: if entry.fields['journal'] == bib.entries[entry.key].fields['journal']: continue # fix title for repl in ACRONYMS: entry.fields['title'] = re.sub(repl, '{' + repl + '}', entry.fields['title']) # add info if 'url' not in entry.fields: url = article.attrs['url'][0]
def deserialize(self, string): return parse_bytes(string, self.bib_format)
query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles for article in querier.articles: querier.get_citation_data(article) # convert to pybtex entry data = parse_bytes(article.citation_data, 'bibtex') assert len(data.entries) == 1 for entry in data.entries.values(): if entry.key in IGNORE: continue elif entry.type != 'article': continue elif entry.key in bib.entries: if entry.fields['journal'] == bib.entries[ entry.key].fields['journal']: continue # fix title for repl in ACRONYMS: entry.fields['title'] = re.sub(repl, '{' + repl + '}', entry.fields['title']) # add info