def __get_uniprot_reactions(self): """ Purpose: Takes reactions in the form of 'HSAXXX HSAXXX ReactionType HSAPathway and converts each hsaXXX id to a swissprot uniprot id. Stores results inside a set of tuples with the form (uprotA, uprotB, reaction label). @return: Set of 4-tuples with the form (uprotA, uprotB, reaction label, direction). """ if self.__verbose: print("Converting KEGG reactions to UniProt reactions...") reaction_set = set() for reaction in self.kegg_reactions: hsa1 = reaction[0] hsa2 = reaction[1] reaction_type = reaction[2] if hsa1 not in self.hsa_to_uprot.keys() \ or hsa2 not in self.hsa_to_uprot.keys(): continue else: u_prot_1 = self.hsa_to_uprot[hsa1] u_prot_2 = self.hsa_to_uprot[hsa2] ppi = PPI(u_prot_1, u_prot_2, [reaction_type]) if u_prot_1 != u_prot_2: reaction_set.add(ppi) if self.__verbose: print("\tReaction: {}\t{}\t{}".format( ppi.p1, ppi.p2, ppi.get_reaction_type_string() )) reaction_set = set([(ppi.p1, ppi.p2, ppi.get_reaction_type_string()) for ppi in reaction_set]) return reaction_set
ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0] ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0] for ppi in unknown_ppis: if ppi.p1 != '-': ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0] if ppi.p2 != '-': ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0] # Filter out small samples, self interactions and interactions with missing p2/p1 ppis = filter(lambda ppi: ppi.p1 != '-' and ppi.p2 != '-', ppis) ppis = filter(lambda ppi: ppi.p1 != ppi.p2, ppis) ppis = [ppi.split() for ppi in ppis] ppis = set([ppi for sl in ppis for ppi in sl]) types = [ppi.get_reaction_type_string() for ppi in ppis] type_counts = {k:0 for k in types} for t in types: type_counts[t] += 1 ppis = [ppi for ppi in ppis if type_counts[ppi.get_reaction_type_string()] >= 5] fp1 = open('tmp/hprd_training_ppi.tsv', 'w') fp2 = open('tmp/hprd_testing_ppi.tsv', 'w') fp3 = open('tmp/unknown_hprd_ppi.tsv', 'w') fp1.write("uniprot_a\tuniprot_a\tlabel\n") fp2.write("uniprot_a\tuniprot_a\tlabel\n") fp3.write("uniprot_a\tuniprot_a\tlabel\n") test_type = [ 'phosphorylation', 'dephosphorylation'