def persons(s): for name in re.split(r'\s+&\s+|\s+and\s+', s.strip()): if name: parts = name.split(',') if len(parts) > 2: for part in parts: yield database.Person(part.strip()) else: yield database.Person(name)
def add(self, entry: pybtex.Entry): """ Returns if the entry was added or if it was a duplicate""" # TODO: make this a better sanity checking and perhaps report errors if not entry.key: return False if not entry.persons.get("author"): entry.persons["author"] = [pybtex.Person("UNKNOWN")] original_key = entry.key entry.fields["original_key"] = original_key utf_author = bibutils.authors_to_unicode(entry) utf_title = bibutils.field_to_unicode(entry, "title") utf_venue = bibutils.field_to_unicode(entry, "journal") if not utf_venue: utf_venue = bibutils.field_to_unicode(entry, "booktitle") custom_key_tries = 0 added = False warnings = [] while not added: custom_key = None if custom_key_tries < 27: try: custom_key = bibutils.generate_custom_key( entry, self.config.custom_key_format, custom_key_tries) except Exception as e: pass else: warnings.append( "Could not generate a unique custom key for entry %s" % original_key) custom_key = original_key try: self.cursor.execute( 'INSERT INTO bib(key, custom_key, author, title, venue, year, fulltext) VALUES (?,?,?,?,?,?,?)', (original_key, custom_key, utf_author, utf_title, utf_venue, str(entry.fields.get("year")), bibutils.single_entry_to_fulltext(entry, custom_key))) added = True except sqlite3.IntegrityError as e: error_message = str(e) if "UNIQUE" in error_message: if "bib.custom_key" in error_message: # custom_key was already in the DB custom_key_tries += 1 elif "bib.key" in error_message: # duplicate entry break else: raise else: raise return added, warnings
def _arxiv(args, config): import feedparser db = BibDB(config) query = ' AND '.join(['"{}"'.format(x) for x in args.query]) # move the query field search terms outside the quotes query = re.sub(r'"au(thor)?:', 'au:"', query) query = re.sub(r'"ti(tle)?:', 'ti:"', query) query = 'http://export.arxiv.org/api/query?{}'.format( urllib.parse.urlencode({ 'search_query': query, 'max_results': args.max_results })) logging.info('ARXIV QUERY: %s', query) response = download_file(query) feedparser._FeedParserMixin.namespaces[ 'http://a9.com/-/spec/opensearch/1.1/'] = 'opensearch' feedparser._FeedParserMixin.namespaces[ 'http://arxiv.org/schemas/atom'] = 'arxiv' feed = feedparser.parse(response) # print out feed information # print('Feed title: %s' % feed.feed.title) # print('Feed last updated: %s' % feed.feed.updated) # # print opensearch metadata # print('totalResults for this query: %s' % feed.feed.opensearch_totalresults) # print('itemsPerPage for this query: %s' % feed.feed.opensearch_itemsperpage) # print('startIndex for this query: %s' % feed.feed.opensearch_startindex) # Run through each entry, and print out information results_to_save = [] for entry in feed.entries: arxiv_id = re.sub(r'v\d+$', '', entry.id.split('/abs/')[-1]) fields = { 'title': entry.title, 'journal': 'Computing Research Repository', 'year': str(entry.published[:4]), 'abstract': entry.summary, 'volume': 'abs/{}'.format(arxiv_id), 'archivePrefix': 'arXiv', 'eprint': arxiv_id, } try: fields['comment'] = entry.arxiv_comment except AttributeError: pass # get the links to the pdf for link in entry.links: try: if link.title == 'pdf': fields['url'] = link.href except: pass authors = { 'author': [pybtex.Person(author.name) for author in entry.authors] } bib_entry = pybtex.Entry('article', persons=authors, fields=fields) bib_entry.key = bibutils.generate_custom_key(bib_entry, config.custom_key_format) print( format_search_results( [(bibutils.single_entry_to_fulltext(bib_entry), arxiv_id)], bibtex_output=False, use_original_key=True)) if args.add: db.add(bib_entry) results_to_save.append( (bibutils.single_entry_to_fulltext(bib_entry), bib_entry.key)) else: results_to_save.append( (bibutils.single_entry_to_fulltext(bib_entry), arxiv_id)) db.save_to_search_cache(results_to_save) if args.add: db.save()