def LoadAssociation(self, line): #header line if line[0] == "#": return False cols = line.split("\t") success = False geneA = int(cols[1]) geneB = int(cols[2]) name = "biogrid:%s" % (cols[0]) cantContinue = False taxA = cols[15].strip() taxB = cols[16].strip() if taxA == taxB and taxA == "9606": if geneA not in self.biosettings.regions.genes: self.missing.add(geneA) cantContinue = True if geneB not in self.biosettings.regions.genes: self.missing.add(geneB) cantContinue = True if not cantContinue: groupID = self.biosettings.NextID() pathway = Pathway(self.groupID, groupID, name, "") pathway.AddGene(geneA) pathway.AddGene(geneB) pathway.Commit(self.biosettings) self.biosettings.RelatePathways(self.groupID, groupID, cols[12].strip(), "") success = True #else: # print "Unknown tax ID: ", taxA, taxB else: print "Unable to find one or more genes (%s, %s) in %s total genes" % (geneA, geneB, len(self.biosettings.regions.genes)) return success
def Commit(self, groupTypeID, biosettings): #First, we'll add ourselves to the groups table pathway = Pathway(groupTypeID, self.groupID, self.term_id, self.description) #Next, add our associations to the association table for association in self.associations: pathway.AddGene(association[0]) #finally, we can add the various groups. If we have no parents, we can set ourselves as our own parent (since we don't have any root) parentCount = len(self.parents) for parent in self.parents: biosettings.RelatePathways(parent, self.groupID, self.parents[parent][0], self.parents[parent][1]) if parentCount == 0: biosettings.RelatePathways(groupTypeID, self.groupID, "", "") pathway.Commit(biosettings)
def LoadAssociation(self, line): cols = line.split("\t") success = False if len(cols[0].split(":")) > 1: idA = cols[0].split(":") #type:id idB = cols[1].split(":") geneA = None geneB = None acceptableTypes = ["uniprotkb"] taxA = cols[9].split(":")[-1].strip() taxB = cols[10].split(":")[-1].strip() idLookup = BuildLookup(cols[13]) if taxA == taxB and taxA == "9606(H**o sapiens)": #we can also attempt to id a gene using columns 4 & 5 if idA[0] in acceptableTypes: ids = self.biosettings.regions.AliasToGeneID([idA[1]]) if len(ids) == 1: geneA = list(ids)[0] elif len(ids) == 0: self.missingUni = idA[1] else: self.ambiguousUni = idA[1] if idB[0] in acceptableTypes: ids = self.biosettings.regions.AliasToGeneID([idB[1]]) if len(ids) == 1: geneB = list(ids)[0] elif len(ids) == 0: self.missingUni = idB[1] else: self.ambiguousUni = idB[1] if geneA != None and geneB != None and geneA != geneB: groupID = self.biosettings.NextID() if idLookup["mint"] in self.observedIDs: print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" print thisdoesnt.exit self.observedIDs.add(idLookup["mint"]) pathway = Pathway(self.groupID, groupID, idLookup["mint"], "") pathway.AddGene(geneA) pathway.AddGene(geneB) pathway.Commit(self.biosettings) self.biosettings.RelatePathways(self.groupID, groupID, "mint", "") success = True #else: # print "Unknown tax ID: ", taxA, taxB return success
def LoadAssociation(self, line): cols = line.split("\t") words = cols[0].split("|") dipA = words[0].strip() uniA = words[len(words) - 1][10:] words = cols[1].split("|") dipB = words[0].strip() uniB = words[len(words) - 1][10:] taxA = cols[9][6:] taxB = cols[10][6:] comment = cols[6] name = cols[13] success = False if taxA == taxB: idsA = self.biosettings.regions.AliasToGeneID([uniA]) idsB = self.biosettings.regions.AliasToGeneID([uniB]) if len(idsA) == 1 and len(idsB) == 1: geneA = list(idsA)[0] geneB = list(idsB)[0] if geneA != geneB: groupID = self.biosettings.NextID() pathway = Pathway(self.groupID, groupID, name, comment) pathway.AddGene(geneA) pathway.AddGene(geneB) pathway.Commit(self.biosettings) self.biosettings.RelatePathways(self.groupID, groupID, "PFAM", "") success = True else: if len(idsA) == 0: self.missingUni.add(uniA) else: self.ambiguousUni.add(uniA) if len(idsB) == 0: self.missingUni.add(uniB) else: self.ambiguousUni.add(uniB) else: print >> sys.stdout, "Misaligned Taxonomy (%s), %s %s : %s" % ( name, taxA, taxB, comment) return success
def LoadPathway(self, groupID, remotePathway): definition = remotePathway['definition'] entryID = remotePathway['entry_id'] genes = set() # geneIDs identified with pathway failedInsertions = 0 geneCount = 0 print "\n\nPathway(%s): %s - %s" % (groupID, entryID, definition) #because this could timeout, let's make sure it doesn't just faile doContinue = False while not doContinue: #try remoteGeneList = self.srv.get_genes_by_pathway(entryID) doContinue = True for geneHSA in remoteGeneList: gene = geneHSA[4:] geneIDs = self.biosettings.regions.AliasToGeneID([gene]) if len(geneIDs) == 0: print "-----(%s, %s)\tUnable to recognize gene" % (gene, geneHSA) failedInsertions+=1 elif len(geneIDs) != 1: failedInsertions+=1 print "-----(%s,%s)\tUnable to disambiguate gene ID (%s distinct ids) " % (gene, geneHSA, len(geneIDs)) else: genes.add(geneIDs[0]) if len(genes) > 0: pathway = Pathway(self.groupID, groupID, entryID[5:], definition) for geneID in genes: pathway.AddGene(geneID) geneCount+=1 pathway.Commit(self.biosettings) self.biosettings.RelatePathways(self.groupID, groupID, "", "") print "Genes Identified: %s\nGenes unable to be inserted: %s\nTotal genes associated : %s" % (geneCount, failedInsertions, len(genes))