def importbib(db, bibfile, tags=[], overwrite=False): errors = [] sources = Sources() for entry in sorted(Bibtex(bibfile), key=lambda entry: entry.key): print >>sys.stderr, entry.key try: docs = [] # check for doc with this bibkey bdoc = db.doc_for_bib(entry.key) if bdoc: docs.append(bdoc) # check for known sids for source in sources.scan_bibentry(entry): sdoc = db.doc_for_source(source.sid) # FIXME: why can't we match docs in list? if sdoc and sdoc.docid not in [doc.docid for doc in docs]: docs.append(sdoc) if len(docs) == 0: doc = Document(db) elif len(docs) > 0: if len(docs) > 1: print >>sys.stderr, " Multiple distinct docs found for entry. Using first found." doc = docs[0] print >>sys.stderr, " Updating id:%d..." % (doc.docid) doc.add_bibentry(entry) filepath = entry.get_file() if filepath: print >>sys.stderr, " Adding file: %s" % filepath doc.add_file(filepath) doc.add_tags(tags) doc.sync() except BibtexError as e: print >>sys.stderr, " Error processing entry %s: %s" % (entry.key, e) print >>sys.stderr errors.append(entry.key) if errors: print >>sys.stderr print >>sys.stderr, "Failed to import %d" % (len(errors)), if len(errors) == 1: print >>sys.stderr, "entry", else: print >>sys.stderr, "entries", print >>sys.stderr, "from bibtex:" for error in errors: print >>sys.stderr, " %s" % (error) sys.exit(1) else: sys.exit(0)
def __repr__(self): attributes = { 'extent': self.extent, 'name': self.name, 'type': self.type, 'typename': self.typename, } output = '<%s %s>\n' % (self.kind, ' '.join([ "%s='%s'" % (k,xml.escape(str(v))) for k,v in attributes.items() ])) prev = self.cursor.extent.start for c in self.children: output += Sources.getchunk(Extent(start=prev, end=c.cursor.extent.start)) output += str(c) + '\n' prev = c.cursor.extent.end output += Sources.getchunk(Extent(start=prev, end=self.cursor.extent.end)) output += '</%s>' % self.kind return output
def __init__(self, cursor): super(Type, self).__init__(cursor) if self.cursor.displayname in ['%sint%d_t' % (s, i) for i in [8,16,32,64] for s in ['','u']]: self.type = self.cursor.displayname.replace('_t', '') elif self.cursor.kind.is_attribute(): if len(self.cursor.displayname) > 0: self.type = '__attr_%s' % self.cursor.displayname else: self.type = '__attr_%s' % Sources.getchunk(self.extent).strip('_') else: self.type = Type.nameof(self.cursor.type) self.typename = Type.typenameof(self.cursor.type) self.name = self.cursor.displayname
def get_urls(self): """Get all URLs associated with document.""" sources = Sources() urls = [] # get urls associated with known sources for sid in self.get_sids(): urls.append(sources[sid].url) # get urls from bibtex self._load_bib() if self.bibentry: fields = self.bibentry.get_fields() if 'url' in fields: urls.append(fields['url']) if 'adsurl' in fields: urls.append(fields['adsurl']) return urls
def __init__(self, cursor): super(MacroDefinition, self).__init__(cursor) self.value = Sources.getchunk(Extent(cursor=cursor)) match = re.search(r'(?P<name>%s\([^)]*\)|%s)' % (self.name, self.name), self.value) if match is not None: self.name = match.group('name') self.value = self.value.replace(self.name, '').strip('\\\n\t ') while self.value in MacroDefinition.macros: self.value = MacroDefinition.macros[self.value] self.value = xml.escape(self.value) MacroDefinition.macros[self.name] = self.value
def _index_bibentry(self, bibentry): authors = bibentry.get_authors() fields = bibentry.get_fields() if 'title' in fields: self._set_title(fields['title']) if 'year' in fields: self._set_year(fields['year']) if authors: # authors should be a list, so we make a single text string # FIXME: better way to do this? self._set_authors(' '.join(authors)) # add any sources in the bibtex for source in Sources().scan_bibentry(bibentry): self.add_sid(source.sid) # FIXME: index 'keywords' field as regular terms self._set_bibkey(bibentry.key)
def __init__(self, cursor): super(EnumValue, self).__init__(cursor) if len(self.children) == 1: child = self.children[0] self.value = Sources.getchunk(child.extent) if self.value in MacroDefinition.macros: self.value = MacroDefinition.macros[self.value] if len(self.value) == 0: self.value = None elif self.value[0] == "'": self.value = ord(self.value[1]) elif all([ c in '0123456789' for c in self.value]): self.value = eval(self.value) elif len(self.children) == 0: self.value = None else: self.value = None
def importbib(db, bibfile, tags=[], overwrite=False): errors = [] sources = Sources() for entry in sorted(Bibtex(bibfile), key=lambda entry: entry.key): print >> sys.stderr, entry.key try: docs = [] # check for doc with this bibkey bdoc = db.doc_for_bib(entry.key) if bdoc: docs.append(bdoc) # check for known sids for source in sources.scan_bibentry(entry): sdoc = db.doc_for_source(source.sid) # FIXME: why can't we match docs in list? if sdoc and sdoc.docid not in [doc.docid for doc in docs]: docs.append(sdoc) if len(docs) == 0: doc = Document(db) elif len(docs) > 0: if len(docs) > 1: print >> sys.stderr, " Multiple distinct docs found for entry. Using first found." doc = docs[0] print >> sys.stderr, " Updating id:%d..." % (doc.docid) doc.add_bibentry(entry) filepath = entry.get_file() if filepath: print >> sys.stderr, " Adding file: %s" % filepath doc.add_file(filepath) doc.add_tags(tags) doc.sync() except BibtexError as e: print >> sys.stderr, " Error processing entry %s: %s" % ( entry.key, e) print >> sys.stderr errors.append(entry.key) if errors: print >> sys.stderr print >> sys.stderr, "Failed to import %d" % (len(errors)), if len(errors) == 1: print >> sys.stderr, "entry", else: print >> sys.stderr, "entries", print >> sys.stderr, "from bibtex:" for error in errors: print >> sys.stderr, " %s" % (error) sys.exit(1) else: sys.exit(0)
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False): doc = None bibtex = None sources = Sources() doc_sid = sid source = None file_data = None if infile and infile is not True: infile = os.path.expanduser(infile) ################################## # if query provided, find single doc to update if query_string: if db.count(query_string) != 1: print >> sys.stderr, "Search '%s' did not match a single document." % query_string print >> sys.stderr, "Aborting." sys.exit(1) for doc in db.search(query_string): break ################################## # do fancy option prompting if prompt: doc_sids = [] if doc_sid: doc_sids = [doc_sid] # scan the file for source info if infile is not True: infile = prompt_for_file(infile) print >> sys.stderr, "Scanning document for source identifiers..." try: ss = sources.scan_file(infile) except ParseError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Parse error: %s" % e sys.exit(1) if len(ss) == 0: print >> sys.stderr, "0 source ids found." else: if len(ss) == 1: print >> sys.stderr, "1 source id found:" else: print >> sys.stderr, "%d source ids found:" % (len(ss)) for sid in ss: print >> sys.stderr, " %s" % (sid) doc_sids += [s.sid for s in ss] doc_sid = prompt_for_source(db, doc_sids) tags = prompt_for_tags(db, tags) if not query_string and not infile and not doc_sid: print >> sys.stderr, "Must specify file or source to import, or query to update existing document." sys.exit(1) ################################## # process source and get bibtex # check if source is a file, in which case interpret it as bibtex if doc_sid and os.path.exists(doc_sid): bibtex = doc_sid elif doc_sid: # get source object for sid string try: source = sources.match_source(doc_sid) except SourceError as e: print >> sys.stderr, e sys.exit(1) # check that the source doesn't match an existing doc sdoc = db.doc_for_source(source.sid) if sdoc: if doc and sdoc != doc: print >> sys.stderr, "A different document already exists for source '%s'." % ( doc_sid) print >> sys.stderr, "Aborting." sys.exit(1) print >> sys.stderr, "Source '%s' found in database. Updating existing document..." % ( doc_sid) doc = sdoc try: print >> sys.stderr, "Retrieving bibtex...", bibtex = source.fetch_bibtex() print >> sys.stderr, "done." except SourceError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Could not retrieve bibtex: %s" % e sys.exit(1) if infile is True: try: print >> sys.stderr, "Retrieving file...", file_name, file_data = source.fetch_file() print >> sys.stderr, "done." except SourceError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Could not retrieve file: %s" % e sys.exit(1) elif infile is True: print >> sys.stderr, "Must specify source with retrieve file option." sys.exit(1) if infile and not file_data: with open(infile, 'r') as f: file_data = f.read() file_name = os.path.basename(infile) ################################## # if we still don't have a doc, create a new one if not doc: doc = Document(db) ################################## # add stuff to the doc if bibtex: try: print >> sys.stderr, "Adding bibtex...", doc.add_bibtex(bibtex) print >> sys.stderr, "done." except BibtexError as e: print >> sys.stderr, "\n" print >> sys.stderr, e print >> sys.stderr, "Bibtex must be a plain text file with a single bibtex entry." sys.exit(1) except: print >> sys.stderr, "\n" raise # add source sid if it hasn't been added yet if source and not doc.get_sids(): doc.add_sid(source.sid) if infile: try: print >> sys.stderr, "Adding file...", doc.add_file_data(file_name, file_data) print >> sys.stderr, "done." except ParseError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Parse error: %s" % e sys.exit(1) except: print >> sys.stderr, "\n" raise if tags: try: print >> sys.stderr, "Adding tags...", doc.add_tags(tags) print >> sys.stderr, "done." except: print >> sys.stderr, "\n" raise ################################## # sync the doc to db and disk try: print >> sys.stderr, "Syncing document...", doc.sync() print >> sys.stderr, "done.\n", except: print >> sys.stderr, "\n" raise print_doc_summary(doc) return doc.docid
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False): doc = None bibtex = None sources = Sources() doc_sid = sid source = None file_data = None if infile and infile is not True: infile = os.path.expanduser(infile) ################################## # if query provided, find single doc to update if query_string: if db.count(query_string) != 1: print >>sys.stderr, "Search '%s' did not match a single document." % query_string print >>sys.stderr, "Aborting." sys.exit(1) for doc in db.search(query_string): break ################################## # do fancy option prompting if prompt: doc_sids = [] if doc_sid: doc_sids = [doc_sid] # scan the file for source info if infile is not True: infile = prompt_for_file(infile) print >>sys.stderr, "Scanning document for source identifiers..." try: ss = sources.scan_file(infile) except ParseError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Parse error: %s" % e sys.exit(1) if len(ss) == 0: print >>sys.stderr, "0 source ids found." else: if len(ss) == 1: print >>sys.stderr, "1 source id found:" else: print >>sys.stderr, "%d source ids found:" % (len(ss)) for sid in ss: print >>sys.stderr, " %s" % (sid) doc_sids += [s.sid for s in ss] doc_sid = prompt_for_source(db, doc_sids) tags = prompt_for_tags(db, tags) if not query_string and not infile and not doc_sid: print >>sys.stderr, "Must specify file or source to import, or query to update existing document." sys.exit(1) ################################## # process source and get bibtex # check if source is a file, in which case interpret it as bibtex if doc_sid and os.path.exists(doc_sid): bibtex = doc_sid elif doc_sid: # get source object for sid string try: source = sources.match_source(doc_sid) except SourceError as e: print >>sys.stderr, e sys.exit(1) # check that the source doesn't match an existing doc sdoc = db.doc_for_source(source.sid) if sdoc: if doc and sdoc != doc: print >>sys.stderr, "A different document already exists for source '%s'." % (doc_sid) print >>sys.stderr, "Aborting." sys.exit(1) print >>sys.stderr, "Source '%s' found in database. Updating existing document..." % (doc_sid) doc = sdoc try: print >>sys.stderr, "Retrieving bibtex...", bibtex = source.fetch_bibtex() print >>sys.stderr, "done." except SourceError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Could not retrieve bibtex: %s" % e sys.exit(1) if infile is True: try: print >>sys.stderr, "Retrieving file...", file_name, file_data = source.fetch_file() print >>sys.stderr, "done." except SourceError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Could not retrieve file: %s" % e sys.exit(1) elif infile is True: print >>sys.stderr, "Must specify source with retrieve file option." sys.exit(1) if infile and not file_data: with open(infile, 'r') as f: file_data = f.read() file_name = os.path.basename(infile) ################################## # if we still don't have a doc, create a new one if not doc: doc = Document(db) ################################## # add stuff to the doc if bibtex: try: print >>sys.stderr, "Adding bibtex...", doc.add_bibtex(bibtex) print >>sys.stderr, "done." except BibtexError as e: print >>sys.stderr, "\n" print >>sys.stderr, e print >>sys.stderr, "Bibtex must be a plain text file with a single bibtex entry." sys.exit(1) except: print >>sys.stderr, "\n" raise # add source sid if it hasn't been added yet if source and not doc.get_sids(): doc.add_sid(source.sid) if infile: try: print >>sys.stderr, "Adding file...", doc.add_file_data(file_name, file_data) print >>sys.stderr, "done." except ParseError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Parse error: %s" % e sys.exit(1) except: print >>sys.stderr, "\n" raise if tags: try: print >>sys.stderr, "Adding tags...", doc.add_tags(tags) print >>sys.stderr, "done." except: print >>sys.stderr, "\n" raise ################################## # sync the doc to db and disk try: print >>sys.stderr, "Syncing document...", doc.sync() print >>sys.stderr, "done.\n", except: print >>sys.stderr, "\n" raise print_doc_summary(doc) return doc.docid
######################################## elif cmd in ['export']: outdir = sys.argv[2] query = make_query_string(sys.argv[3:]) set_stdout_codec() with cli.initdb() as db: cli.export(db, outdir, query) ######################################## elif cmd in ['restore']: with cli.initdb(writable=True, create=True, force=True) as db: db.restore(log=True) ######################################## elif cmd in ['sources']: sources = Sources() w = 0 for source in sources: w = max(len(source.name), w) format = '%'+str(w)+'s: %s[%s]' for source in sources: name = source.name desc = '' try: desc += '%s ' % source.description except AttributeError: pass try: desc += '(%s) ' % source.url except AttributeError: pass
try: desc = '%s ' % source.description except AttributeError: desc = '' print format % (name, desc, path) ######################################## elif cmd in ['source2bib','s2b']: try: string = sys.argv[2] except IndexError: print >>sys.stderr, "Must specify source to retrieve." sys.exit(1) try: item = Sources().match_source(string) except SourceError as e: print >>sys.stderr, e sys.exit(1) try: bibtex = item.fetch_bibtex() except SourceError as e: print >>sys.stderr, "Could not retrieve bibtex: %s" % e sys.exit(1) try: print Bibtex(bibtex)[0].as_string() except BibtexError as e: print >>sys.stderr, "Error parsing bibtex: %s" % e print >>sys.stderr, "Outputting raw..."
def __init__(self, root, writable=False, create=False, force=False): # xapers root self.root = os.path.abspath(os.path.expanduser(root)) # xapers db directory xapers_path = os.path.join(self.root, '.xapers') # xapes directory initialization if not os.path.exists(xapers_path): if create: if os.path.exists(self.root): if os.listdir(self.root) and not force: raise DatabaseInitializationError( 'Uninitialized Xapers root directory exists but is not empty.' ) os.makedirs(xapers_path) else: if os.path.exists(self.root): raise DatabaseInitializationError( "Xapers directory '%s' does not contain a database." % (self.root)) else: raise DatabaseUninitializedError( "Xapers directory '%s' not found." % (self.root)) # the Xapian db xapian_path = os.path.join(xapers_path, 'xapian') if writable: try: self.xapian = xapian.WritableDatabase(xapian_path, xapian.DB_CREATE_OR_OPEN) except xapian.DatabaseLockError: raise DatabaseLockError("Xapers database locked.") else: self.xapian = xapian.Database(xapian_path) stemmer = xapian.Stem("english") # The Xapian TermGenerator # http://trac.xapian.org/wiki/FAQ/TermGenerator self.term_gen = xapian.TermGenerator() self.term_gen.set_stemmer(stemmer) # The Xapian QueryParser self.query_parser = xapian.QueryParser() self.query_parser.set_database(self.xapian) self.query_parser.set_stemmer(stemmer) self.query_parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME) self.query_parser.set_default_op(xapian.Query.OP_AND) # add boolean internal prefixes for name, prefix in self.BOOLEAN_PREFIX_EXTERNAL.iteritems(): self.query_parser.add_boolean_prefix(name, prefix) # add probabalistic prefixes for name, prefix in self.PROBABILISTIC_PREFIX.iteritems(): self.query_parser.add_prefix(name, prefix) # add value facets for name, facet in self.NUMBER_VALUE_FACET.iteritems(): self.query_parser.add_valuerangeprocessor( xapian.NumberValueRangeProcessor(facet, name + ':')) # register known source prefixes # FIXME: can we do this by just finding all XSOURCE terms in # db? Would elliminate dependence on source modules at # search time. for source in Sources(): name = source.name self.query_parser.add_boolean_prefix( name, self._make_source_prefix(name))