def save(self, *args, **kwargs): if not self.pk: if MAPPING: #print("lifespan.models.Factors.save()") if self.species: self.taxid = taxid = self.species.taxid elif self.taxid: taxid = self.taxid self.species = Species.objects.get(taxid=taxid) else: taxid = None ids = [self.entrez_gene_id, self.ensembl_gene_id, self.symbol, self.name] ids = m([str(id) for id in ids if id], taxid) entrez_gene_id = ids[0] if entrez_gene_id and isinstance(entrez_gene_id, int): entrez = Entrez.objects.get(entrez_gene_id=ids[0]) self.entrez_gene_id = self.entrez_gene_id or entrez_gene_id self.ensembl_gene_id = self.ensembl_gene_id or entrez.ensembl_gene_id self.symbol = self.symbol or entrez.gene_symbol self.name = self.name or entrez.gene_name if not self.taxid: taxid = entrez.taxid self.species = Species.objects.get(taxid=taxid) super(Factor, self).save(*args, **kwargs)
def save(self, *args, **kwargs): #TODO: must be splitted into several methods if not self.pk: if MAPPING: #print("lifespan.models.Factors.save()") if self.species: self.taxid = taxid = self.species.taxid elif self.taxid: taxid = self.taxid self.species = Species.objects.get(taxid=taxid) else: taxid = None ids = [ self.entrez_gene_id, self.ensembl_gene_id, self.symbol, self.name ] ids = m([str(id) for id in ids if id], taxid) entrez_gene_id = ids[0] if entrez_gene_id and isinstance(entrez_gene_id, int): self.entrez_gene_id = self.entrez_gene_id or entrez_gene_id if self.taxid == 4932: self.ensembl_gene_id = ids[1][ 'ensembl_gene'] # ro maybe sgd annotation = retrieve(self.ensembl_gene_id) self.symbol = annotation['symbol'] self.name = annotation['name'] self.description = annotation['description'] if not self.function: self.functional_description = self.description if self.symbol and self.name: number = re.findall( '\d+', self.symbol ) # match would be more suitable here. if number: self.name += " " + number[0] else: try: entrez = Entrez.objects.get(entrez_gene_id=ids[0]) self.ensembl_gene_id = self.ensembl_gene_id or entrez.ensembl_gene_id self.symbol = self.symbol or entrez.gene_symbol self.name = self.name or entrez.gene_name except ObjectDoesNotExist: self.entrez_gene_id = ids[0] if not self.taxid: taxid = entrez.taxid self.species = Species.objects.get(taxid=taxid) super(Factor, self).save(*args, **kwargs)
def save(self, *args, **kwargs): if not self.pk: if MAPPING: #print("lifespan.models.Factors.save()") if self.species: self.taxid = taxid = self.species.taxid elif self.taxid: taxid = self.taxid self.species = Species.objects.get(taxid=taxid) else: taxid = None ids = [self.entrez_gene_id, self.ensembl_gene_id, self.symbol, self.name] ids = m([str(id) for id in ids if id], taxid) entrez_gene_id = ids[0] if entrez_gene_id and isinstance(entrez_gene_id, int): self.entrez_gene_id = self.entrez_gene_id or entrez_gene_id if self.taxid == 4932: self.ensembl_gene_id = ids[1]['ensembl_gene'] # ro maybe sgd annotation = retrieve(self.ensembl_gene_id) self.symbol = annotation['symbol'] self.name = annotation['name'] self.description = annotation['description'] if not self.function: self.functional_description = self.description if self.symbol and self.name: number = re.findall('\d+', self.symbol) # match would be more suitable here. if number: self.name += " " + number[0] else: try: entrez = Entrez.objects.get(entrez_gene_id=ids[0]) self.ensembl_gene_id = self.ensembl_gene_id or entrez.ensembl_gene_id self.symbol = self.symbol or entrez.gene_symbol self.name = self.name or entrez.gene_name except ObjectDoesNotExist: self.entrez_gene_id = ids[0] if not self.taxid: taxid = entrez.taxid self.species = Species.objects.get(taxid=taxid) super(Factor, self).save(*args, **kwargs)
def main(memory=True, header=True): """Integrating interaction information...""" remove = ['AfCS'] #'STRING' for i in remove: if i in databases: databases.remove(i) #: continue # Problematic os.chdir(path) # Collect output = open(os.path.join(path, 'integrated.txt'), 'w') GeneList = {} for database in databases: #if database != "BioGRID": continue data_file = os.path.join(path, database, 'interactions.txt') if database != "BioGRID": #load file in memory if memory: data = file(data_file).read().split('\n') L = len(data); n = 0; PB = 0 #Start Counter else: data = fileinput.input([data_file]) L = bufcount(data_file); n = 0; PB = 0 print("%s(%s):" % (database, L)) # in for i in data: n += 1; PA = 100*n/L #Continue Counter if PA != PB: print PA, PB = PA if not i: continue s = i.split('\t') if s[6] and s[6] == s[7] and int(s[6]) in taxid_list and int(s[7]) in taxid_list : alias_a, taxid_a = s[0], int(s[6]) alias_b, taxid_b = s[1], int(s[7]) if taxid_a not in GeneList: GeneList[taxid_a] = {} if alias_a not in GeneList[taxid_a]: GeneList[taxid_a][alias_a] = '' if taxid_b not in GeneList: GeneList[taxid_b] = {} if alias_b not in GeneList[taxid_b]: GeneList[taxid_b][alias_b] = '' # Map: # print '\n' #for taxid, genes in GeneList.items(): # print taxid, len(genes) #print('') UniqueIDs = {} for taxid, aliases in GeneList.items(): #Ma = M(taxid) # Alter Map to convert int to string! #print taxid, len(GeneList[taxid]) L = len(GeneList[taxid]); n = 0; PB = 0 #Start Counter for alias, unique_id in aliases.items(): n += 1; PA = 100*n/L #Continue Counter if PA != PB: print PA, PB = PA GeneList[taxid][alias] = m(alias.split('; '), taxid)[0] #print('') for database in databases: data_file = os.path.join(path, database, 'interactions.txt') # Load file in memory or read from disk: if memory: data = file(data_file).read().split('\n') L = len(data); n = 0; PB = 0 #Start Counter else: data = fileinput.input([data_file]) L = bufcount(data_file); n = 0; PB = 0 for i in data: n += 1; PA = 100*n/L #Continue Counter if PA != PB: print PA, PB = PA if i != '' and i != '\n': s = i.split('\t') if s[6] != "" and s[6] == s[7] and int(s[6]) in taxid_list and int(s[7]) in taxid_list: alias_a, taxid_a = s[0], int(s[6]) alias_b, taxid_b = s[1], int(s[7]) if database != "BioGRID": source, target = GeneList[taxid_a][alias_a], GeneList[taxid_b][alias_b] #['SecondaryEntrezGeneID'] ensembl_a, ensembl_b = GeneList[taxid_a][alias_a]['EnsemblGeneID'], GeneList[taxid_b][alias_b]['EnsemblGeneID'] else: source, target = alias_a.split('; ')[0], alias_b.split('; ')[0] output.write('\t'.join(map(str, [source,target,i]))+'\n') #read line from file without loading into memory ## GeneAlias_List[taxid] = ## for taxid in GenAlias_List: ## import Map with taxid output.close() GeneList = '' #Merg ##MergedInteractions = {} def Combine(IdX, IdY): Together = [] for n in range(0, 10): if n != 6 and n != 7: Xs = IdX[n].split('; ') ## print Xs Ys = IdY[n].split('; ') ## print Ys for a in Xs: if a != "" and a not in Ys: Ys.append(a) ## print Ys if '' in Ys: Ys.remove('') #Added this last time to remove empty strings Together.append('; '.join(Ys)) else: Together.append(IdY[n]) return Together I = {} if memory: data = file(os.path.join(path, 'integrated.txt')).read().split('\n') print len(data) else: data = fileinput.input([os.path.join(path, 'integrated.txt')]) output = open(os.path.join(path, 'merged.txt'), 'w') for i in data: if not i: continue s = i.split('\t') try: IdA, IdB = int(s[0]), int(s[1]) if IdA not in I: I[IdA] = {IdB:s[2:]} elif IdA in I and IdB not in I[IdA]: I[IdA][IdB] = s[2:] else: I[IdA][IdB] = Combine(I[IdA][IdB], s[2:]) ## print Together except: pass #print s input = '' for IdA, IdBs in I.items(): for IdB, info in IdBs.items(): if IdB in I and IdA in I[IdB]: if ("regulatory" not in I[IdB][IdA][2] and I[IdB][IdA][5] == '') and ("regulatory" not in I[IdA][IdB][2] and I[IdA][IdB][5] == ''): I[IdA][IdB] = Combine(I[IdB][IdA], I[IdA][IdB]) ## print I[IdA][IdB] del I[IdB][IdA] ## alias_a = I[IdA][IdB][0] ## alias_b = I[IdA][IdB][1] ## experimental_system_type = I[IdA][IdB][2] #put both togehter to IdA[IdB} #delete the IdB[IdA] elif ("regulatory" in I[IdA][IdB][2] or I[IdA][IdB][5] != '') and ("regulatory" not in I[IdB][IdA][2] and I[IdB][IdA][5] == ''): I[IdA][IdB] = Combine(I[IdB][IdA], I[IdA][IdB]) ## print I[IdA][IdB] del I[IdB][IdA] #put both togehter to IdA[IdB} #delete the IdB[IdA] elif ("regulatory" not in I[IdA][IdB][2] and I[IdA][IdB][5] == '') and ("regulatory" in I[IdB][IdA][2] or I[IdB][IdA][5] != ''): I[IdB][IdA] = Combine(I[IdA][IdB], I[IdB][IdA]) ## print I[IdA][IdB] del I[IdA][IdB] #put both togehter to IdB[IdA} #delete the IdA[IdB] ## if ("regulatory" in I[IdA][IdB][2] or I[IdA][IdB][5] != '') and ("regulatory" in I[IdB][IdA][2] or I[IdB][IdA][5] == ''): ## pass #keep both if header: output.write('\t'.join(['unique_id_a', 'unique_id_b', 'alias_a', 'alias_b', 'experimental_system_type', 'interaction_type', 'experimental_system', 'modification','taxid_a', 'taxid_b', 'pmid', 'source_database', 'score'])+'\n') for IdA, IdBs in I.items(): for IdB, info in IdBs.items(): experimental_system_type = info[2].split('; ') if '' in experimental_system_type: experimental_system_type.remove('') experimental_system = info[4].split('; ') if '' in experimental_system: experimental_system.remove('') modification = info[5].split('; ') if '' in modification: modification.remove('') pmid = info[8].split('; ') if '' in pmid: pmid.remove('') source_database = info[9].split('; ') if '' in source_database: source_database.remove('') score = str(len(experimental_system_type)+len(experimental_system)+len(modification)+len(pmid)+len(source_database)) I[IdA][IdB].append(score) output.write('\t'.join(map(str, [IdA, IdB, '\t'.join(info)]))+'\n') output.close()