Esempio n. 1
0
    def initWithFlatFile(self, filepath, useGzip=False):
        """
        Initialize the database cache with the content of a ott flat file.
        
        """

        if not useGzip:
            tOpen = open
        else:
            tOpen = gzip.open

        mapDict = {}
        with tOpen(filepath) as flatfile:
            flatfile.next()
            #skip header
            for line in flatfile:
                arr = line.strip().split("\t|\t")
                if not arr[0] or not arr[4]:
                    continue
                try:
                    source = dict(
                        [entry.split(":") for entry in arr[4].split(",")])
                except Exception as e:
                    import pdb
                    pdb.set_trace()
                try:
                    mapDict[arr[0]] = source["ncbi"]
                except KeyError:
                    pass
        self.cacheList[0] = SqliteCache(filePath=self.cacheList[0].filePath,
                                        indict=mapDict,
                                        **self.cacheList[0].conf)
Esempio n. 2
0
 def __init__(self, dbPath):
     kegg = KeggKoIdToDefMap(useCache=False)
     database = SqliteCache(filePath=dbPath,
                            indict=None,
                            table="keggkoId2def",
                            key="koId",
                            value="koDef")
     MultiCachedDict.__init__(self, None, [database, kegg])
Esempio n. 3
0
 def __init__(self, dbPath):
     kegg = KeggPathwayIdToNameMap(useCache=False)
     database = SqliteCache(filePath=dbPath,
                            indict=None,
                            table="keggpathwayId2name",
                            key="pathId",
                            value="pathName")
     MultiCachedDict.__init__(self, None, [database, kegg])
Esempio n. 4
0
 def __init__(self, dbPath, email):
     ncbi = TaxonomyParentMap(email, useCache=False)
     database = SqliteCache(filePath=dbPath,
                            indict=None,
                            table="tax2parent",
                            key="tax",
                            value="parent")
     MultiCachedDict.__init__(self, None, [database, ncbi])
Esempio n. 5
0
 def __init__(self, dbPath, email):
     ncbi = NuclId2TaxIdMap(email, useCache=False)
     database = SqliteCache(filePath=dbPath,
                            indict=None,
                            table="gi2tax",
                            key="gi",
                            value="tax")
     MultiCachedDict.__init__(self, None, [database, ncbi])
Esempio n. 6
0
    def initWithUniprotFlatFile(filepath, gzip=True):
        """
        Initialize the database cache with the content of a uniprot flat file.
        
        This will only work if source and target ID type a part of the 
        flat file. If the file is not gzipped this has to be specified.
        """

        if not gzip:
            tOpen = open
        else:
            tOpen = gzip.open

        name2pos = {
            "ACC": 0,
            "ID": 1,
            "P_ENTREZGENEID": 2,
            "P_REFSEQ_AC": 3,
            "P_GI": 4,
            "PDB_ID": 5,
            "NF100": 7,
            "NF90": 8,
            "NF50": 9,
            "UPARC": 10,
            "PIR": 11,
            "MIM_ID": 13,
            "UNIGENE_ID": 14,
            "EMBL_ID": 16,
            "EMBL": 17,
            "ENSEMBL_ID": 18,
            "ENSEMBL_TRS_ID": 19,
            "ENSEMBL_PRO_ID": 20,
        }
        if self.target not in name2pos:
            raise ValueError(
                "Map target '%s' is not available from flat file." %
                self.target)
        targetPos = name2pos[target]
        if self.source not in name2pos:
            raise ValueError(
                "Map source '%s' is not available from flat file." %
                self.source)
        sourcePos = name2pos[source]

        mapDict = {}
        for line in tOpen(filepath):
            arr = line.strip().split("\t")
            if not arr[targetPos] or not arr[sourcePos]:
                continue
            mapDict[arr[sourcePos]] = arr[targetPos]
        self.cacheList[0] = SqliteCache(filePath=self.cacheList[0].filepath,
                                        indict=mapDict,
                                        **self.cacheList[0].conf)
Esempio n. 7
0
    def __init__(self,
                 dbpath,
                 context=None,
                 tablename="ott2ncbi",
                 keyname="ottId",
                 valuename="ncbiTaxId"):

        dbMap = SqliteCache(dbpath,
                            table=tablename,
                            key=keyname,
                            value=valuename)
        ottMap = OttId2NcbiTaxIdMap()

        MultiCachedDict.__init__(self, None, [dbMap, ottMap])
Esempio n. 8
0
    def __init__(self,
                 dbpath,
                 source="ACC",
                 target="P_REFSEQ_AC",
                 retry=0,
                 delay=1,
                 contact=None,
                 returnNone=False,
                 tablename=None,
                 keyname=None,
                 valuename=None):

        self.source = source
        self.target = target

        dbMap = SqliteCache(dbpath,
                            table=tablename,
                            key=keyname,
                            value=valuename)
        uniprotMap = UniprotIdMap(source, target, retry, delay, contact,
                                  returnNone)

        MultiCachedDict.__init__(self, None, [dbMap, uniprotMap])
Esempio n. 9
0
 def __init__(self, dbPath, email):
     ncbi = ProtId2ProtNameMap(email, useCache=False)
     database = SqliteCache(filePath=dbPath, indict=None, table="gi2name", 
                            key="gi", value="name")
     MultiCachedDict.__init__(self, None, [database, ncbi])