def __init__(self, indexname, parser=None): """__init__(self, indexname, parser=None) Open a Prodoc Dictionary. indexname is the name of the index for the dictionary. The index should have been created using the index_file function. parser is an optional Parser object to change the results into another form. If set to None, then the raw contents of the file will be returned. """ self._index = Index.Index(indexname) self._handle = open(self._index[Dictionary.__filename_key]) self._parser = parser
def index_file(filename, indexname, rec2key=None): """index_file(filename, indexname, rec2key=None) Index a Prodoc file. filename is the name of the file. indexname is the name of the dictionary. rec2key is an optional callback that takes a Record and generates a unique key (e.g. the accession number) for the record. If not specified, the id name will be used. """ import os if not os.path.exists(filename): raise ValueError("%s does not exist" % filename) index = Index.Index(indexname, truncate=1) index[Dictionary._Dictionary__filename_key] = filename handle = open(filename) records = parse(handle) end = 0L for record in records: start = end end = long(handle.tell()) length = end - start if rec2key is not None: key = rec2key(record) else: key = record.accession if not key: raise KeyError("empty key was produced") elif key in index: raise KeyError("duplicate key %s found" % key) index[key] = start, length