def importbib(db, bibfile, tags=[], overwrite=False): errors = [] sources = Sources() for entry in sorted(Bibtex(bibfile), key=lambda entry: entry.key): print >>sys.stderr, entry.key try: docs = [] # check for doc with this bibkey bdoc = db.doc_for_bib(entry.key) if bdoc: docs.append(bdoc) # check for known sids for source in sources.scan_bibentry(entry): sdoc = db.doc_for_source(source.sid) # FIXME: why can't we match docs in list? if sdoc and sdoc.docid not in [doc.docid for doc in docs]: docs.append(sdoc) if len(docs) == 0: doc = Document(db) elif len(docs) > 0: if len(docs) > 1: print >>sys.stderr, " Multiple distinct docs found for entry. Using first found." doc = docs[0] print >>sys.stderr, " Updating id:%d..." % (doc.docid) doc.add_bibentry(entry) filepath = entry.get_file() if filepath: print >>sys.stderr, " Adding file: %s" % filepath doc.add_file(filepath) doc.add_tags(tags) doc.sync() except BibtexError as e: print >>sys.stderr, " Error processing entry %s: %s" % (entry.key, e) print >>sys.stderr errors.append(entry.key) if errors: print >>sys.stderr print >>sys.stderr, "Failed to import %d" % (len(errors)), if len(errors) == 1: print >>sys.stderr, "entry", else: print >>sys.stderr, "entries", print >>sys.stderr, "from bibtex:" for error in errors: print >>sys.stderr, " %s" % (error) sys.exit(1) else: sys.exit(0)
def restore(self, log=False): """Restore a database from an existing root.""" docdirs = os.listdir(self.root) docdirs.sort() for ddir in docdirs: if ddir == '.xapers': continue docdir = os.path.join(self.root, ddir) if not os.path.isdir(docdir): # skip things that aren't directories continue if log: print >>sys.stderr, docdir # if we can't convert the directory name into an integer, # assume it's not relevant to us and continue try: docid = int(ddir) except ValueError: continue docfiles = os.listdir(docdir) if not docfiles: # skip empty directories continue if log: print >>sys.stderr, ' docid:', docid try: doc = self[docid] except xapian.DocNotFoundError: doc = Document(self, docid=docid) for dfile in docfiles: dpath = os.path.join(docdir, dfile) if dfile == 'bibtex': if log: print >>sys.stderr, ' adding bibtex' doc.add_bibtex(dpath) elif os.path.splitext(dpath)[1] == '.pdf': if log: print >>sys.stderr, ' adding file:', dfile doc.add_file(dpath) elif dfile == 'tags': if log: print >>sys.stderr, ' adding tags' with open(dpath, 'r') as f: tags = f.read().strip().split('\n') doc.add_tags(tags) doc.sync()
def restore(self, log=False): """Restore a database from an existing root.""" docdirs = os.listdir(self.root) docdirs.sort() for ddir in docdirs: if ddir == '.xapers': continue docdir = os.path.join(self.root, ddir) if not os.path.isdir(docdir): # skip things that aren't directories continue if log: print >> sys.stderr, docdir # if we can't convert the directory name into an integer, # assume it's not relevant to us and continue try: docid = int(ddir) except ValueError: continue docfiles = os.listdir(docdir) if not docfiles: # skip empty directories continue if log: print >> sys.stderr, ' docid:', docid try: doc = self[docid] except xapian.DocNotFoundError: doc = Document(self, docid=docid) for dfile in docfiles: dpath = os.path.join(docdir, dfile) if dfile == 'bibtex': if log: print >> sys.stderr, ' adding bibtex' doc.add_bibtex(dpath) elif os.path.splitext(dpath)[1] == '.pdf': if log: print >> sys.stderr, ' adding file:', dfile doc.add_file(dpath) elif dfile == 'tags': if log: print >> sys.stderr, ' adding tags' with open(dpath, 'r') as f: tags = f.read().strip().split('\n') doc.add_tags(tags) doc.sync()
def importbib(db, bibfile, tags=[], overwrite=False): errors = [] sources = Sources() for entry in sorted(Bibtex(bibfile), key=lambda entry: entry.key): print >> sys.stderr, entry.key try: docs = [] # check for doc with this bibkey bdoc = db.doc_for_bib(entry.key) if bdoc: docs.append(bdoc) # check for known sids for source in sources.scan_bibentry(entry): sdoc = db.doc_for_source(source.sid) # FIXME: why can't we match docs in list? if sdoc and sdoc.docid not in [doc.docid for doc in docs]: docs.append(sdoc) if len(docs) == 0: doc = Document(db) elif len(docs) > 0: if len(docs) > 1: print >> sys.stderr, " Multiple distinct docs found for entry. Using first found." doc = docs[0] print >> sys.stderr, " Updating id:%d..." % (doc.docid) doc.add_bibentry(entry) filepath = entry.get_file() if filepath: print >> sys.stderr, " Adding file: %s" % filepath doc.add_file(filepath) doc.add_tags(tags) doc.sync() except BibtexError as e: print >> sys.stderr, " Error processing entry %s: %s" % ( entry.key, e) print >> sys.stderr errors.append(entry.key) if errors: print >> sys.stderr print >> sys.stderr, "Failed to import %d" % (len(errors)), if len(errors) == 1: print >> sys.stderr, "entry", else: print >> sys.stderr, "entries", print >> sys.stderr, "from bibtex:" for error in errors: print >> sys.stderr, " %s" % (error) sys.exit(1) else: sys.exit(0)
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False): doc = None bibtex = None sources = Sources() doc_sid = sid source = None file_data = None if infile and infile is not True: infile = os.path.expanduser(infile) ################################## # if query provided, find single doc to update if query_string: if db.count(query_string) != 1: print >> sys.stderr, "Search '%s' did not match a single document." % query_string print >> sys.stderr, "Aborting." sys.exit(1) for doc in db.search(query_string): break ################################## # do fancy option prompting if prompt: doc_sids = [] if doc_sid: doc_sids = [doc_sid] # scan the file for source info if infile is not True: infile = prompt_for_file(infile) print >> sys.stderr, "Scanning document for source identifiers..." try: ss = sources.scan_file(infile) except ParseError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Parse error: %s" % e sys.exit(1) if len(ss) == 0: print >> sys.stderr, "0 source ids found." else: if len(ss) == 1: print >> sys.stderr, "1 source id found:" else: print >> sys.stderr, "%d source ids found:" % (len(ss)) for sid in ss: print >> sys.stderr, " %s" % (sid) doc_sids += [s.sid for s in ss] doc_sid = prompt_for_source(db, doc_sids) tags = prompt_for_tags(db, tags) if not query_string and not infile and not doc_sid: print >> sys.stderr, "Must specify file or source to import, or query to update existing document." sys.exit(1) ################################## # process source and get bibtex # check if source is a file, in which case interpret it as bibtex if doc_sid and os.path.exists(doc_sid): bibtex = doc_sid elif doc_sid: # get source object for sid string try: source = sources.match_source(doc_sid) except SourceError as e: print >> sys.stderr, e sys.exit(1) # check that the source doesn't match an existing doc sdoc = db.doc_for_source(source.sid) if sdoc: if doc and sdoc != doc: print >> sys.stderr, "A different document already exists for source '%s'." % ( doc_sid) print >> sys.stderr, "Aborting." sys.exit(1) print >> sys.stderr, "Source '%s' found in database. Updating existing document..." % ( doc_sid) doc = sdoc try: print >> sys.stderr, "Retrieving bibtex...", bibtex = source.fetch_bibtex() print >> sys.stderr, "done." except SourceError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Could not retrieve bibtex: %s" % e sys.exit(1) if infile is True: try: print >> sys.stderr, "Retrieving file...", file_name, file_data = source.fetch_file() print >> sys.stderr, "done." except SourceError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Could not retrieve file: %s" % e sys.exit(1) elif infile is True: print >> sys.stderr, "Must specify source with retrieve file option." sys.exit(1) if infile and not file_data: with open(infile, 'r') as f: file_data = f.read() file_name = os.path.basename(infile) ################################## # if we still don't have a doc, create a new one if not doc: doc = Document(db) ################################## # add stuff to the doc if bibtex: try: print >> sys.stderr, "Adding bibtex...", doc.add_bibtex(bibtex) print >> sys.stderr, "done." except BibtexError as e: print >> sys.stderr, "\n" print >> sys.stderr, e print >> sys.stderr, "Bibtex must be a plain text file with a single bibtex entry." sys.exit(1) except: print >> sys.stderr, "\n" raise # add source sid if it hasn't been added yet if source and not doc.get_sids(): doc.add_sid(source.sid) if infile: try: print >> sys.stderr, "Adding file...", doc.add_file_data(file_name, file_data) print >> sys.stderr, "done." except ParseError as e: print >> sys.stderr, "\n" print >> sys.stderr, "Parse error: %s" % e sys.exit(1) except: print >> sys.stderr, "\n" raise if tags: try: print >> sys.stderr, "Adding tags...", doc.add_tags(tags) print >> sys.stderr, "done." except: print >> sys.stderr, "\n" raise ################################## # sync the doc to db and disk try: print >> sys.stderr, "Syncing document...", doc.sync() print >> sys.stderr, "done.\n", except: print >> sys.stderr, "\n" raise print_doc_summary(doc) return doc.docid
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False): doc = None bibtex = None sources = Sources() doc_sid = sid source = None file_data = None if infile and infile is not True: infile = os.path.expanduser(infile) ################################## # if query provided, find single doc to update if query_string: if db.count(query_string) != 1: print >>sys.stderr, "Search '%s' did not match a single document." % query_string print >>sys.stderr, "Aborting." sys.exit(1) for doc in db.search(query_string): break ################################## # do fancy option prompting if prompt: doc_sids = [] if doc_sid: doc_sids = [doc_sid] # scan the file for source info if infile is not True: infile = prompt_for_file(infile) print >>sys.stderr, "Scanning document for source identifiers..." try: ss = sources.scan_file(infile) except ParseError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Parse error: %s" % e sys.exit(1) if len(ss) == 0: print >>sys.stderr, "0 source ids found." else: if len(ss) == 1: print >>sys.stderr, "1 source id found:" else: print >>sys.stderr, "%d source ids found:" % (len(ss)) for sid in ss: print >>sys.stderr, " %s" % (sid) doc_sids += [s.sid for s in ss] doc_sid = prompt_for_source(db, doc_sids) tags = prompt_for_tags(db, tags) if not query_string and not infile and not doc_sid: print >>sys.stderr, "Must specify file or source to import, or query to update existing document." sys.exit(1) ################################## # process source and get bibtex # check if source is a file, in which case interpret it as bibtex if doc_sid and os.path.exists(doc_sid): bibtex = doc_sid elif doc_sid: # get source object for sid string try: source = sources.match_source(doc_sid) except SourceError as e: print >>sys.stderr, e sys.exit(1) # check that the source doesn't match an existing doc sdoc = db.doc_for_source(source.sid) if sdoc: if doc and sdoc != doc: print >>sys.stderr, "A different document already exists for source '%s'." % (doc_sid) print >>sys.stderr, "Aborting." sys.exit(1) print >>sys.stderr, "Source '%s' found in database. Updating existing document..." % (doc_sid) doc = sdoc try: print >>sys.stderr, "Retrieving bibtex...", bibtex = source.fetch_bibtex() print >>sys.stderr, "done." except SourceError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Could not retrieve bibtex: %s" % e sys.exit(1) if infile is True: try: print >>sys.stderr, "Retrieving file...", file_name, file_data = source.fetch_file() print >>sys.stderr, "done." except SourceError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Could not retrieve file: %s" % e sys.exit(1) elif infile is True: print >>sys.stderr, "Must specify source with retrieve file option." sys.exit(1) if infile and not file_data: with open(infile, 'r') as f: file_data = f.read() file_name = os.path.basename(infile) ################################## # if we still don't have a doc, create a new one if not doc: doc = Document(db) ################################## # add stuff to the doc if bibtex: try: print >>sys.stderr, "Adding bibtex...", doc.add_bibtex(bibtex) print >>sys.stderr, "done." except BibtexError as e: print >>sys.stderr, "\n" print >>sys.stderr, e print >>sys.stderr, "Bibtex must be a plain text file with a single bibtex entry." sys.exit(1) except: print >>sys.stderr, "\n" raise # add source sid if it hasn't been added yet if source and not doc.get_sids(): doc.add_sid(source.sid) if infile: try: print >>sys.stderr, "Adding file...", doc.add_file_data(file_name, file_data) print >>sys.stderr, "done." except ParseError as e: print >>sys.stderr, "\n" print >>sys.stderr, "Parse error: %s" % e sys.exit(1) except: print >>sys.stderr, "\n" raise if tags: try: print >>sys.stderr, "Adding tags...", doc.add_tags(tags) print >>sys.stderr, "done." except: print >>sys.stderr, "\n" raise ################################## # sync the doc to db and disk try: print >>sys.stderr, "Syncing document...", doc.sync() print >>sys.stderr, "done.\n", except: print >>sys.stderr, "\n" raise print_doc_summary(doc) return doc.docid