コード例 #1
0
ファイル: sources.py プロジェクト: Anaphory/pycldf
 def persons(s):
     for name in re.split(r'\s+&\s+|\s+and\s+', s.strip()):
         if name:
             parts = name.split(',')
             if len(parts) > 2:
                 for part in parts:
                     yield database.Person(part.strip())
             else:
                 yield database.Person(name)
コード例 #2
0
ファイル: bibdb.py プロジェクト: mjpost/bibsearch
    def add(self, entry: pybtex.Entry):
        """ Returns if the entry was added or if it was a duplicate"""

        # TODO: make this a better sanity checking and perhaps report errors
        if not entry.key:
            return False
        if not entry.persons.get("author"):
            entry.persons["author"] = [pybtex.Person("UNKNOWN")]

        original_key = entry.key
        entry.fields["original_key"] = original_key
        utf_author = bibutils.authors_to_unicode(entry)
        utf_title = bibutils.field_to_unicode(entry, "title")
        utf_venue = bibutils.field_to_unicode(entry, "journal")
        if not utf_venue:
            utf_venue = bibutils.field_to_unicode(entry, "booktitle")
        custom_key_tries = 0
        added = False
        warnings = []
        while not added:
            custom_key = None
            if custom_key_tries < 27:
                try:
                    custom_key = bibutils.generate_custom_key(
                        entry, self.config.custom_key_format, custom_key_tries)
                except Exception as e:
                    pass
            else:
                warnings.append(
                    "Could not generate a unique custom key for entry %s" %
                    original_key)
                custom_key = original_key
            try:
                self.cursor.execute(
                    'INSERT INTO bib(key, custom_key, author, title, venue, year, fulltext) VALUES (?,?,?,?,?,?,?)',
                    (original_key, custom_key, utf_author, utf_title,
                     utf_venue, str(entry.fields.get("year")),
                     bibutils.single_entry_to_fulltext(entry, custom_key)))
                added = True
            except sqlite3.IntegrityError as e:
                error_message = str(e)
                if "UNIQUE" in error_message:
                    if "bib.custom_key" in error_message:
                        # custom_key was already in the DB
                        custom_key_tries += 1
                    elif "bib.key" in error_message:
                        # duplicate entry
                        break
                    else:
                        raise
                else:
                    raise
        return added, warnings
コード例 #3
0
def _arxiv(args, config):
    import feedparser

    db = BibDB(config)

    query = ' AND '.join(['"{}"'.format(x) for x in args.query])
    # move the query field search terms outside the quotes
    query = re.sub(r'"au(thor)?:', 'au:"', query)
    query = re.sub(r'"ti(tle)?:', 'ti:"', query)
    query = 'http://export.arxiv.org/api/query?{}'.format(
        urllib.parse.urlencode({
            'search_query': query,
            'max_results': args.max_results
        }))
    logging.info('ARXIV QUERY: %s', query)
    response = download_file(query)

    feedparser._FeedParserMixin.namespaces[
        'http://a9.com/-/spec/opensearch/1.1/'] = 'opensearch'
    feedparser._FeedParserMixin.namespaces[
        'http://arxiv.org/schemas/atom'] = 'arxiv'
    feed = feedparser.parse(response)

    # print out feed information
    # print('Feed title: %s' % feed.feed.title)
    # print('Feed last updated: %s' % feed.feed.updated)

    # # print opensearch metadata
    # print('totalResults for this query: %s' % feed.feed.opensearch_totalresults)
    # print('itemsPerPage for this query: %s' % feed.feed.opensearch_itemsperpage)
    # print('startIndex for this query: %s'   % feed.feed.opensearch_startindex)

    # Run through each entry, and print out information
    results_to_save = []
    for entry in feed.entries:
        arxiv_id = re.sub(r'v\d+$', '', entry.id.split('/abs/')[-1])

        fields = {
            'title': entry.title,
            'journal': 'Computing Research Repository',
            'year': str(entry.published[:4]),
            'abstract': entry.summary,
            'volume': 'abs/{}'.format(arxiv_id),
            'archivePrefix': 'arXiv',
            'eprint': arxiv_id,
        }

        try:
            fields['comment'] = entry.arxiv_comment
        except AttributeError:
            pass

        # get the links to the pdf
        for link in entry.links:
            try:
                if link.title == 'pdf':
                    fields['url'] = link.href
            except:
                pass

        authors = {
            'author': [pybtex.Person(author.name) for author in entry.authors]
        }
        bib_entry = pybtex.Entry('article', persons=authors, fields=fields)
        bib_entry.key = bibutils.generate_custom_key(bib_entry,
                                                     config.custom_key_format)

        print(
            format_search_results(
                [(bibutils.single_entry_to_fulltext(bib_entry), arxiv_id)],
                bibtex_output=False,
                use_original_key=True))

        if args.add:
            db.add(bib_entry)
            results_to_save.append(
                (bibutils.single_entry_to_fulltext(bib_entry), bib_entry.key))
        else:
            results_to_save.append(
                (bibutils.single_entry_to_fulltext(bib_entry), arxiv_id))

        db.save_to_search_cache(results_to_save)

    if args.add:
        db.save()