def initWithFlatFile(self, filepath, useGzip=False): """ Initialize the database cache with the content of a ott flat file. """ if not useGzip: tOpen = open else: tOpen = gzip.open mapDict = {} with tOpen(filepath) as flatfile: flatfile.next() #skip header for line in flatfile: arr = line.strip().split("\t|\t") if not arr[0] or not arr[4]: continue try: source = dict( [entry.split(":") for entry in arr[4].split(",")]) except Exception as e: import pdb pdb.set_trace() try: mapDict[arr[0]] = source["ncbi"] except KeyError: pass self.cacheList[0] = SqliteCache(filePath=self.cacheList[0].filePath, indict=mapDict, **self.cacheList[0].conf)
def __init__(self, dbPath): kegg = KeggKoIdToDefMap(useCache=False) database = SqliteCache(filePath=dbPath, indict=None, table="keggkoId2def", key="koId", value="koDef") MultiCachedDict.__init__(self, None, [database, kegg])
def __init__(self, dbPath): kegg = KeggPathwayIdToNameMap(useCache=False) database = SqliteCache(filePath=dbPath, indict=None, table="keggpathwayId2name", key="pathId", value="pathName") MultiCachedDict.__init__(self, None, [database, kegg])
def __init__(self, dbPath, email): ncbi = TaxonomyParentMap(email, useCache=False) database = SqliteCache(filePath=dbPath, indict=None, table="tax2parent", key="tax", value="parent") MultiCachedDict.__init__(self, None, [database, ncbi])
def __init__(self, dbPath, email): ncbi = NuclId2TaxIdMap(email, useCache=False) database = SqliteCache(filePath=dbPath, indict=None, table="gi2tax", key="gi", value="tax") MultiCachedDict.__init__(self, None, [database, ncbi])
def initWithUniprotFlatFile(filepath, gzip=True): """ Initialize the database cache with the content of a uniprot flat file. This will only work if source and target ID type a part of the flat file. If the file is not gzipped this has to be specified. """ if not gzip: tOpen = open else: tOpen = gzip.open name2pos = { "ACC": 0, "ID": 1, "P_ENTREZGENEID": 2, "P_REFSEQ_AC": 3, "P_GI": 4, "PDB_ID": 5, "NF100": 7, "NF90": 8, "NF50": 9, "UPARC": 10, "PIR": 11, "MIM_ID": 13, "UNIGENE_ID": 14, "EMBL_ID": 16, "EMBL": 17, "ENSEMBL_ID": 18, "ENSEMBL_TRS_ID": 19, "ENSEMBL_PRO_ID": 20, } if self.target not in name2pos: raise ValueError( "Map target '%s' is not available from flat file." % self.target) targetPos = name2pos[target] if self.source not in name2pos: raise ValueError( "Map source '%s' is not available from flat file." % self.source) sourcePos = name2pos[source] mapDict = {} for line in tOpen(filepath): arr = line.strip().split("\t") if not arr[targetPos] or not arr[sourcePos]: continue mapDict[arr[sourcePos]] = arr[targetPos] self.cacheList[0] = SqliteCache(filePath=self.cacheList[0].filepath, indict=mapDict, **self.cacheList[0].conf)
def __init__(self, dbpath, context=None, tablename="ott2ncbi", keyname="ottId", valuename="ncbiTaxId"): dbMap = SqliteCache(dbpath, table=tablename, key=keyname, value=valuename) ottMap = OttId2NcbiTaxIdMap() MultiCachedDict.__init__(self, None, [dbMap, ottMap])
def __init__(self, dbpath, source="ACC", target="P_REFSEQ_AC", retry=0, delay=1, contact=None, returnNone=False, tablename=None, keyname=None, valuename=None): self.source = source self.target = target dbMap = SqliteCache(dbpath, table=tablename, key=keyname, value=valuename) uniprotMap = UniprotIdMap(source, target, retry, delay, contact, returnNone) MultiCachedDict.__init__(self, None, [dbMap, uniprotMap])
def __init__(self, dbPath, email): ncbi = ProtId2ProtNameMap(email, useCache=False) database = SqliteCache(filePath=dbPath, indict=None, table="gi2name", key="gi", value="name") MultiCachedDict.__init__(self, None, [database, ncbi])