Example #1
0
def importbib(db, bibfile, tags=[], overwrite=False):
    errors = []

    sources = Sources()

    for entry in sorted(Bibtex(bibfile), key=lambda entry: entry.key):
        print >>sys.stderr, entry.key

        try:
            docs = []

            # check for doc with this bibkey
            bdoc = db.doc_for_bib(entry.key)
            if bdoc:
                docs.append(bdoc)

            # check for known sids
            for source in sources.scan_bibentry(entry):
                sdoc = db.doc_for_source(source.sid)
                # FIXME: why can't we match docs in list?
                if sdoc and sdoc.docid not in [doc.docid for doc in docs]:
                    docs.append(sdoc)

            if len(docs) == 0:
                doc = Document(db)
            elif len(docs) > 0:
                if len(docs) > 1:
                    print >>sys.stderr, "  Multiple distinct docs found for entry.  Using first found."
                doc = docs[0]
                print >>sys.stderr, "  Updating id:%d..." % (doc.docid)

            doc.add_bibentry(entry)

            filepath = entry.get_file()
            if filepath:
                print >>sys.stderr, "  Adding file: %s" % filepath
                doc.add_file(filepath)

            doc.add_tags(tags)

            doc.sync()

        except BibtexError as e:
            print >>sys.stderr, "  Error processing entry %s: %s" % (entry.key, e)
            print >>sys.stderr
            errors.append(entry.key)

    if errors:
        print >>sys.stderr
        print >>sys.stderr, "Failed to import %d" % (len(errors)),
        if len(errors) == 1:
            print >>sys.stderr, "entry",
        else:
            print >>sys.stderr, "entries",
        print >>sys.stderr, "from bibtex:"
        for error in errors:
            print >>sys.stderr, "  %s" % (error)
        sys.exit(1)
    else:
        sys.exit(0)
Example #2
0
  def __repr__(self):
    attributes = {
      'extent': self.extent,
      'name': self.name,
      'type': self.type,
      'typename': self.typename,
    }
    output = '<%s %s>\n' % (self.kind, ' '.join([ "%s='%s'" % (k,xml.escape(str(v))) for k,v in attributes.items() ]))

    prev = self.cursor.extent.start
    for c in self.children:
      output += Sources.getchunk(Extent(start=prev, end=c.cursor.extent.start))
      output += str(c) + '\n'
      prev = c.cursor.extent.end

    output += Sources.getchunk(Extent(start=prev, end=self.cursor.extent.end))
    output += '</%s>' % self.kind
    return output
Example #3
0
  def __init__(self, cursor):
    super(Type, self).__init__(cursor)

    if self.cursor.displayname in ['%sint%d_t' % (s, i) for i in [8,16,32,64] for s in ['','u']]:
      self.type = self.cursor.displayname.replace('_t', '')
    elif self.cursor.kind.is_attribute():
      if len(self.cursor.displayname) > 0:
        self.type = '__attr_%s' % self.cursor.displayname
      else:
        self.type = '__attr_%s' % Sources.getchunk(self.extent).strip('_')
    else:
      self.type = Type.nameof(self.cursor.type)

    self.typename = Type.typenameof(self.cursor.type)
    self.name = self.cursor.displayname
Example #4
0
 def get_urls(self):
     """Get all URLs associated with document."""
     sources = Sources()
     urls = []
     # get urls associated with known sources
     for sid in self.get_sids():
         urls.append(sources[sid].url)
     # get urls from bibtex
     self._load_bib()
     if self.bibentry:
         fields = self.bibentry.get_fields()
         if 'url' in fields:
             urls.append(fields['url'])
         if 'adsurl' in fields:
             urls.append(fields['adsurl'])
     return urls
Example #5
0
  def __init__(self, cursor):
    super(MacroDefinition, self).__init__(cursor)

    self.value = Sources.getchunk(Extent(cursor=cursor))

    match = re.search(r'(?P<name>%s\([^)]*\)|%s)' % (self.name, self.name), self.value)
    if match is not None:
      self.name = match.group('name')

    self.value = self.value.replace(self.name, '').strip('\\\n\t ')

    while self.value in MacroDefinition.macros:
      self.value = MacroDefinition.macros[self.value]

    self.value = xml.escape(self.value)

    MacroDefinition.macros[self.name] = self.value
Example #6
0
    def _index_bibentry(self, bibentry):
        authors = bibentry.get_authors()
        fields = bibentry.get_fields()
        if 'title' in fields:
            self._set_title(fields['title'])
        if 'year' in fields:
            self._set_year(fields['year'])
        if authors:
            # authors should be a list, so we make a single text string
            # FIXME: better way to do this?
            self._set_authors(' '.join(authors))

        # add any sources in the bibtex
        for source in Sources().scan_bibentry(bibentry):
            self.add_sid(source.sid)

        # FIXME: index 'keywords' field as regular terms

        self._set_bibkey(bibentry.key)
Example #7
0
  def __init__(self, cursor):
    super(EnumValue, self).__init__(cursor)

    if len(self.children) == 1:
      child = self.children[0]
      self.value = Sources.getchunk(child.extent)

      if self.value in MacroDefinition.macros:
        self.value = MacroDefinition.macros[self.value]

      if len(self.value) == 0:
        self.value = None
      elif self.value[0] == "'":
        self.value = ord(self.value[1])
      elif all([ c in '0123456789' for c in self.value]):
        self.value = eval(self.value)

    elif len(self.children) == 0:
      self.value = None
    else:
      self.value = None
Example #8
0
def importbib(db, bibfile, tags=[], overwrite=False):
    errors = []

    sources = Sources()

    for entry in sorted(Bibtex(bibfile), key=lambda entry: entry.key):
        print >> sys.stderr, entry.key

        try:
            docs = []

            # check for doc with this bibkey
            bdoc = db.doc_for_bib(entry.key)
            if bdoc:
                docs.append(bdoc)

            # check for known sids
            for source in sources.scan_bibentry(entry):
                sdoc = db.doc_for_source(source.sid)
                # FIXME: why can't we match docs in list?
                if sdoc and sdoc.docid not in [doc.docid for doc in docs]:
                    docs.append(sdoc)

            if len(docs) == 0:
                doc = Document(db)
            elif len(docs) > 0:
                if len(docs) > 1:
                    print >> sys.stderr, "  Multiple distinct docs found for entry.  Using first found."
                doc = docs[0]
                print >> sys.stderr, "  Updating id:%d..." % (doc.docid)

            doc.add_bibentry(entry)

            filepath = entry.get_file()
            if filepath:
                print >> sys.stderr, "  Adding file: %s" % filepath
                doc.add_file(filepath)

            doc.add_tags(tags)

            doc.sync()

        except BibtexError as e:
            print >> sys.stderr, "  Error processing entry %s: %s" % (
                entry.key, e)
            print >> sys.stderr
            errors.append(entry.key)

    if errors:
        print >> sys.stderr
        print >> sys.stderr, "Failed to import %d" % (len(errors)),
        if len(errors) == 1:
            print >> sys.stderr, "entry",
        else:
            print >> sys.stderr, "entries",
        print >> sys.stderr, "from bibtex:"
        for error in errors:
            print >> sys.stderr, "  %s" % (error)
        sys.exit(1)
    else:
        sys.exit(0)
Example #9
0
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False):

    doc = None
    bibtex = None

    sources = Sources()
    doc_sid = sid
    source = None
    file_data = None

    if infile and infile is not True:
        infile = os.path.expanduser(infile)

    ##################################
    # if query provided, find single doc to update

    if query_string:
        if db.count(query_string) != 1:
            print >> sys.stderr, "Search '%s' did not match a single document." % query_string
            print >> sys.stderr, "Aborting."
            sys.exit(1)

        for doc in db.search(query_string):
            break

    ##################################
    # do fancy option prompting

    if prompt:
        doc_sids = []
        if doc_sid:
            doc_sids = [doc_sid]
        # scan the file for source info
        if infile is not True:
            infile = prompt_for_file(infile)

            print >> sys.stderr, "Scanning document for source identifiers..."
            try:
                ss = sources.scan_file(infile)
            except ParseError as e:
                print >> sys.stderr, "\n"
                print >> sys.stderr, "Parse error: %s" % e
                sys.exit(1)
            if len(ss) == 0:
                print >> sys.stderr, "0 source ids found."
            else:
                if len(ss) == 1:
                    print >> sys.stderr, "1 source id found:"
                else:
                    print >> sys.stderr, "%d source ids found:" % (len(ss))
                for sid in ss:
                    print >> sys.stderr, "  %s" % (sid)
                doc_sids += [s.sid for s in ss]
        doc_sid = prompt_for_source(db, doc_sids)
        tags = prompt_for_tags(db, tags)

    if not query_string and not infile and not doc_sid:
        print >> sys.stderr, "Must specify file or source to import, or query to update existing document."
        sys.exit(1)

    ##################################
    # process source and get bibtex

    # check if source is a file, in which case interpret it as bibtex
    if doc_sid and os.path.exists(doc_sid):
        bibtex = doc_sid

    elif doc_sid:
        # get source object for sid string
        try:
            source = sources.match_source(doc_sid)
        except SourceError as e:
            print >> sys.stderr, e
            sys.exit(1)

        # check that the source doesn't match an existing doc
        sdoc = db.doc_for_source(source.sid)
        if sdoc:
            if doc and sdoc != doc:
                print >> sys.stderr, "A different document already exists for source '%s'." % (
                    doc_sid)
                print >> sys.stderr, "Aborting."
                sys.exit(1)
            print >> sys.stderr, "Source '%s' found in database.  Updating existing document..." % (
                doc_sid)
            doc = sdoc

        try:
            print >> sys.stderr, "Retrieving bibtex...",
            bibtex = source.fetch_bibtex()
            print >> sys.stderr, "done."
        except SourceError as e:
            print >> sys.stderr, "\n"
            print >> sys.stderr, "Could not retrieve bibtex: %s" % e
            sys.exit(1)

        if infile is True:
            try:
                print >> sys.stderr, "Retrieving file...",
                file_name, file_data = source.fetch_file()
                print >> sys.stderr, "done."
            except SourceError as e:
                print >> sys.stderr, "\n"
                print >> sys.stderr, "Could not retrieve file: %s" % e
                sys.exit(1)

    elif infile is True:
        print >> sys.stderr, "Must specify source with retrieve file option."
        sys.exit(1)

    if infile and not file_data:
        with open(infile, 'r') as f:
            file_data = f.read()
        file_name = os.path.basename(infile)

    ##################################

    # if we still don't have a doc, create a new one
    if not doc:
        doc = Document(db)

    ##################################
    # add stuff to the doc

    if bibtex:
        try:
            print >> sys.stderr, "Adding bibtex...",
            doc.add_bibtex(bibtex)
            print >> sys.stderr, "done."
        except BibtexError as e:
            print >> sys.stderr, "\n"
            print >> sys.stderr, e
            print >> sys.stderr, "Bibtex must be a plain text file with a single bibtex entry."
            sys.exit(1)
        except:
            print >> sys.stderr, "\n"
            raise

    # add source sid if it hasn't been added yet
    if source and not doc.get_sids():
        doc.add_sid(source.sid)

    if infile:
        try:
            print >> sys.stderr, "Adding file...",
            doc.add_file_data(file_name, file_data)
            print >> sys.stderr, "done."
        except ParseError as e:
            print >> sys.stderr, "\n"
            print >> sys.stderr, "Parse error: %s" % e
            sys.exit(1)
        except:
            print >> sys.stderr, "\n"
            raise

    if tags:
        try:
            print >> sys.stderr, "Adding tags...",
            doc.add_tags(tags)
            print >> sys.stderr, "done."
        except:
            print >> sys.stderr, "\n"
            raise

    ##################################
    # sync the doc to db and disk

    try:
        print >> sys.stderr, "Syncing document...",
        doc.sync()
        print >> sys.stderr, "done.\n",
    except:
        print >> sys.stderr, "\n"
        raise

    print_doc_summary(doc)
    return doc.docid
Example #10
0
def add(db, query_string, infile=None, sid=None, tags=None, prompt=False):

    doc = None
    bibtex = None

    sources = Sources()
    doc_sid = sid
    source = None
    file_data = None

    if infile and infile is not True:
        infile = os.path.expanduser(infile)

    ##################################
    # if query provided, find single doc to update

    if query_string:
        if db.count(query_string) != 1:
            print >>sys.stderr, "Search '%s' did not match a single document." % query_string
            print >>sys.stderr, "Aborting."
            sys.exit(1)

        for doc in db.search(query_string):
            break

    ##################################
    # do fancy option prompting

    if prompt:
        doc_sids = []
        if doc_sid:
            doc_sids = [doc_sid]
        # scan the file for source info
        if infile is not True:
            infile = prompt_for_file(infile)

            print >>sys.stderr, "Scanning document for source identifiers..."
            try:
                ss = sources.scan_file(infile)
            except ParseError as e:
                print >>sys.stderr, "\n"
                print >>sys.stderr, "Parse error: %s" % e
                sys.exit(1)
            if len(ss) == 0:
                print >>sys.stderr, "0 source ids found."
            else:
                if len(ss) == 1:
                    print >>sys.stderr, "1 source id found:"
                else:
                    print >>sys.stderr, "%d source ids found:" % (len(ss))
                for sid in ss:
                    print >>sys.stderr, "  %s" % (sid)
                doc_sids += [s.sid for s in ss]
        doc_sid = prompt_for_source(db, doc_sids)
        tags = prompt_for_tags(db, tags)

    if not query_string and not infile and not doc_sid:
        print >>sys.stderr, "Must specify file or source to import, or query to update existing document."
        sys.exit(1)

    ##################################
    # process source and get bibtex

    # check if source is a file, in which case interpret it as bibtex
    if doc_sid and os.path.exists(doc_sid):
        bibtex = doc_sid

    elif doc_sid:
        # get source object for sid string
        try:
            source = sources.match_source(doc_sid)
        except SourceError as e:
            print >>sys.stderr, e
            sys.exit(1)

        # check that the source doesn't match an existing doc
        sdoc = db.doc_for_source(source.sid)
        if sdoc:
            if doc and sdoc != doc:
                print >>sys.stderr, "A different document already exists for source '%s'." % (doc_sid)
                print >>sys.stderr, "Aborting."
                sys.exit(1)
            print >>sys.stderr, "Source '%s' found in database.  Updating existing document..." % (doc_sid)
            doc = sdoc

        try:
            print >>sys.stderr, "Retrieving bibtex...",
            bibtex = source.fetch_bibtex()
            print >>sys.stderr, "done."
        except SourceError as e:
            print >>sys.stderr, "\n"
            print >>sys.stderr, "Could not retrieve bibtex: %s" % e
            sys.exit(1)

        if infile is True:
            try:
                print >>sys.stderr, "Retrieving file...",
                file_name, file_data = source.fetch_file()
                print >>sys.stderr, "done."
            except SourceError as e:
                print >>sys.stderr, "\n"
                print >>sys.stderr, "Could not retrieve file: %s" % e
                sys.exit(1)

    elif infile is True:
        print >>sys.stderr, "Must specify source with retrieve file option."
        sys.exit(1)

    if infile and not file_data:
        with open(infile, 'r') as f:
            file_data = f.read()
        file_name = os.path.basename(infile)

    ##################################

    # if we still don't have a doc, create a new one
    if not doc:
        doc = Document(db)

    ##################################
    # add stuff to the doc

    if bibtex:
        try:
            print >>sys.stderr, "Adding bibtex...",
            doc.add_bibtex(bibtex)
            print >>sys.stderr, "done."
        except BibtexError as e:
            print >>sys.stderr, "\n"
            print >>sys.stderr, e
            print >>sys.stderr, "Bibtex must be a plain text file with a single bibtex entry."
            sys.exit(1)
        except:
            print >>sys.stderr, "\n"
            raise

    # add source sid if it hasn't been added yet
    if source and not doc.get_sids():
        doc.add_sid(source.sid)

    if infile:
        try:
            print >>sys.stderr, "Adding file...",
            doc.add_file_data(file_name, file_data)
            print >>sys.stderr, "done."
        except ParseError as e:
            print >>sys.stderr, "\n"
            print >>sys.stderr, "Parse error: %s" % e
            sys.exit(1)
        except:
            print >>sys.stderr, "\n"
            raise

    if tags:
        try:
            print >>sys.stderr, "Adding tags...",
            doc.add_tags(tags)
            print >>sys.stderr, "done."
        except:
            print >>sys.stderr, "\n"
            raise

    ##################################
    # sync the doc to db and disk

    try:
        print >>sys.stderr, "Syncing document...",
        doc.sync()
        print >>sys.stderr, "done.\n",
    except:
        print >>sys.stderr, "\n"
        raise

    print_doc_summary(doc)
    return doc.docid
Example #11
0
    ########################################
    elif cmd in ['export']:
        outdir = sys.argv[2]
        query = make_query_string(sys.argv[3:])
        set_stdout_codec()
        with cli.initdb() as db:
            cli.export(db, outdir, query)

    ########################################
    elif cmd in ['restore']:
        with cli.initdb(writable=True, create=True, force=True) as db:
            db.restore(log=True)

    ########################################
    elif cmd in ['sources']:
        sources = Sources()
        w = 0
        for source in sources:
            w = max(len(source.name), w)
        format = '%'+str(w)+'s: %s[%s]'
        for source in sources:
            name = source.name
            desc = ''
            try:
                desc += '%s ' % source.description
            except AttributeError:
                pass
            try:
                desc += '(%s) ' % source.url
            except AttributeError:
                pass
Example #12
0
            try:
                desc = '%s ' % source.description
            except AttributeError:
                desc = ''
            print format % (name, desc, path)

    ########################################
    elif cmd in ['source2bib','s2b']:
        try:
            string = sys.argv[2]
        except IndexError:
            print >>sys.stderr, "Must specify source to retrieve."
            sys.exit(1)

        try:
            item = Sources().match_source(string)
        except SourceError as e:
            print >>sys.stderr, e
            sys.exit(1)

        try:
            bibtex = item.fetch_bibtex()
        except SourceError as e:
            print >>sys.stderr, "Could not retrieve bibtex: %s" % e
            sys.exit(1)

        try:
            print Bibtex(bibtex)[0].as_string()
        except BibtexError as e:
            print >>sys.stderr, "Error parsing bibtex: %s" % e
            print >>sys.stderr, "Outputting raw..."
Example #13
0
    def __init__(self, root, writable=False, create=False, force=False):
        # xapers root
        self.root = os.path.abspath(os.path.expanduser(root))

        # xapers db directory
        xapers_path = os.path.join(self.root, '.xapers')

        # xapes directory initialization
        if not os.path.exists(xapers_path):
            if create:
                if os.path.exists(self.root):
                    if os.listdir(self.root) and not force:
                        raise DatabaseInitializationError(
                            'Uninitialized Xapers root directory exists but is not empty.'
                        )
                os.makedirs(xapers_path)
            else:
                if os.path.exists(self.root):
                    raise DatabaseInitializationError(
                        "Xapers directory '%s' does not contain a database." %
                        (self.root))
                else:
                    raise DatabaseUninitializedError(
                        "Xapers directory '%s' not found." % (self.root))

        # the Xapian db
        xapian_path = os.path.join(xapers_path, 'xapian')
        if writable:
            try:
                self.xapian = xapian.WritableDatabase(xapian_path,
                                                      xapian.DB_CREATE_OR_OPEN)
            except xapian.DatabaseLockError:
                raise DatabaseLockError("Xapers database locked.")
        else:
            self.xapian = xapian.Database(xapian_path)

        stemmer = xapian.Stem("english")

        # The Xapian TermGenerator
        # http://trac.xapian.org/wiki/FAQ/TermGenerator
        self.term_gen = xapian.TermGenerator()
        self.term_gen.set_stemmer(stemmer)

        # The Xapian QueryParser
        self.query_parser = xapian.QueryParser()
        self.query_parser.set_database(self.xapian)
        self.query_parser.set_stemmer(stemmer)
        self.query_parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        self.query_parser.set_default_op(xapian.Query.OP_AND)

        # add boolean internal prefixes
        for name, prefix in self.BOOLEAN_PREFIX_EXTERNAL.iteritems():
            self.query_parser.add_boolean_prefix(name, prefix)

        # add probabalistic prefixes
        for name, prefix in self.PROBABILISTIC_PREFIX.iteritems():
            self.query_parser.add_prefix(name, prefix)

        # add value facets
        for name, facet in self.NUMBER_VALUE_FACET.iteritems():
            self.query_parser.add_valuerangeprocessor(
                xapian.NumberValueRangeProcessor(facet, name + ':'))

        # register known source prefixes
        # FIXME: can we do this by just finding all XSOURCE terms in
        #        db?  Would elliminate dependence on source modules at
        #        search time.
        for source in Sources():
            name = source.name
            self.query_parser.add_boolean_prefix(
                name, self._make_source_prefix(name))