Example #1
0
	def LoadAssociation(self, line):
		#header line
		if line[0] == "#":
			return False
		cols							= line.split("\t")
		success							= False
		geneA							= int(cols[1])
		geneB							= int(cols[2])
		name							= "biogrid:%s" % (cols[0])
		cantContinue					= False
		taxA						= cols[15].strip()
		taxB						= cols[16].strip()
		
		
		if taxA == taxB and taxA == "9606":
			if geneA not in self.biosettings.regions.genes:
				self.missing.add(geneA)
				cantContinue				= True
			if geneB not in self.biosettings.regions.genes:
				self.missing.add(geneB)
				cantContinue				= True
		
			if not cantContinue:
				groupID			= self.biosettings.NextID()
				pathway			= Pathway(self.groupID, groupID, name, "")
				pathway.AddGene(geneA)
				pathway.AddGene(geneB)
				pathway.Commit(self.biosettings)
				self.biosettings.RelatePathways(self.groupID, groupID, cols[12].strip(), "")
				success			= True
			#else:
			#	print "Unknown tax ID: ", taxA, taxB
		else:
			print "Unable to find one or more genes (%s, %s) in %s total genes" % (geneA, geneB, len(self.biosettings.regions.genes))
		return success
Example #2
0
	def Commit(self, groupTypeID, biosettings):
		#First, we'll add ourselves to the groups table
		pathway							= Pathway(groupTypeID, self.groupID, self.term_id, self.description)
		#Next, add our associations to the association table
		for association in self.associations:
			pathway.AddGene(association[0])
			
		#finally, we can add the various groups. If we have no parents, we can set ourselves as our own parent (since we don't have any root)
		
		parentCount = len(self.parents)
		for parent in self.parents:
			biosettings.RelatePathways(parent, self.groupID, self.parents[parent][0], self.parents[parent][1])
		if parentCount == 0:
			biosettings.RelatePathways(groupTypeID, self.groupID, "", "")
		pathway.Commit(biosettings)
Example #3
0
	def LoadAssociation(self, line):
		cols							= line.split("\t")
		success							= False
		if len(cols[0].split(":")) > 1:
			idA							= cols[0].split(":")		#type:id 
			idB							= cols[1].split(":")
			geneA						= None
			geneB						= None
			acceptableTypes				= ["uniprotkb"]
			
			taxA						= cols[9].split(":")[-1].strip()
			taxB						= cols[10].split(":")[-1].strip()
			idLookup					= BuildLookup(cols[13])
			if taxA == taxB and taxA == "9606(H**o sapiens)":
				#we can also attempt to id a gene using columns 4 & 5
				if idA[0] in acceptableTypes:
					ids						= self.biosettings.regions.AliasToGeneID([idA[1]])
					if len(ids) == 1:
						geneA				= list(ids)[0]
					elif len(ids) == 0:
						self.missingUni		= idA[1]
					else:
						self.ambiguousUni	= idA[1]
				if idB[0] in acceptableTypes:
					ids						= self.biosettings.regions.AliasToGeneID([idB[1]])
					if len(ids) == 1:
						geneB				= list(ids)[0]
					elif len(ids) == 0:
						self.missingUni		= idB[1]
					else:
						self.ambiguousUni	= idB[1]
				if geneA != None and geneB != None and geneA != geneB:
					groupID			= self.biosettings.NextID()
					if idLookup["mint"] in self.observedIDs:
						print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
						print thisdoesnt.exit
					self.observedIDs.add(idLookup["mint"])
					pathway			= Pathway(self.groupID, groupID, idLookup["mint"], "")
					pathway.AddGene(geneA)
					pathway.AddGene(geneB)
					pathway.Commit(self.biosettings)
					self.biosettings.RelatePathways(self.groupID, groupID, "mint", "")
					success			= True
			#else:
			#	print "Unknown tax ID: ", taxA, taxB
		return success
Example #4
0
    def LoadAssociation(self, line):
        cols = line.split("\t")
        words = cols[0].split("|")
        dipA = words[0].strip()
        uniA = words[len(words) - 1][10:]

        words = cols[1].split("|")
        dipB = words[0].strip()
        uniB = words[len(words) - 1][10:]
        taxA = cols[9][6:]
        taxB = cols[10][6:]
        comment = cols[6]
        name = cols[13]
        success = False
        if taxA == taxB:
            idsA = self.biosettings.regions.AliasToGeneID([uniA])
            idsB = self.biosettings.regions.AliasToGeneID([uniB])
            if len(idsA) == 1 and len(idsB) == 1:
                geneA = list(idsA)[0]
                geneB = list(idsB)[0]

                if geneA != geneB:
                    groupID = self.biosettings.NextID()
                    pathway = Pathway(self.groupID, groupID, name, comment)
                    pathway.AddGene(geneA)
                    pathway.AddGene(geneB)
                    pathway.Commit(self.biosettings)
                    self.biosettings.RelatePathways(self.groupID, groupID,
                                                    "PFAM", "")
                    success = True
            else:
                if len(idsA) == 0:
                    self.missingUni.add(uniA)
                else:
                    self.ambiguousUni.add(uniA)
                if len(idsB) == 0:
                    self.missingUni.add(uniB)
                else:
                    self.ambiguousUni.add(uniB)
        else:
            print >> sys.stdout, "Misaligned Taxonomy (%s), %s %s  : %s" % (
                name, taxA, taxB, comment)
        return success
Example #5
0
	def LoadPathway(self, groupID, remotePathway):
		definition					= remotePathway['definition']
		entryID						= remotePathway['entry_id']	
		
		genes						= set()			# geneIDs identified with pathway
		failedInsertions			= 0
		geneCount					= 0
		print "\n\nPathway(%s): %s - %s" % (groupID, entryID, definition)
		
		#because this could timeout, let's make sure it doesn't just faile
		doContinue					= False
		
		while not doContinue:
			#try
			remoteGeneList				= self.srv.get_genes_by_pathway(entryID)
			doContinue					= True
			for geneHSA in remoteGeneList:
				gene					= geneHSA[4:]
				geneIDs					= self.biosettings.regions.AliasToGeneID([gene])
				if len(geneIDs) == 0:
					print "-----(%s, %s)\tUnable to recognize gene" % (gene, geneHSA)
					failedInsertions+=1
				elif len(geneIDs) != 1:
					failedInsertions+=1
					print "-----(%s,%s)\tUnable to disambiguate gene ID (%s distinct ids) " % (gene, geneHSA, len(geneIDs))
				else:
					genes.add(geneIDs[0])
			
		
		if len(genes) > 0:
			pathway						= Pathway(self.groupID, groupID, entryID[5:], definition)
			for geneID in genes:
				pathway.AddGene(geneID)
				geneCount+=1
			pathway.Commit(self.biosettings)
			self.biosettings.RelatePathways(self.groupID, groupID, "", "")
		print "Genes Identified: %s\nGenes unable to be inserted: %s\nTotal genes associated : %s" % (geneCount, failedInsertions, len(genes))