def getConservedELMs(virus, subtypes): ls = [utils_motif.annotation2protein(os.path.join(local_settings.RESULTSDIR, virus + '.' + subtype + '.elms.70.controled'), {'ELM':True}) for subtype in subtypes[virus]] return utils_graph.intersectLists(ls)
def expandProteinsForELMs(humanAnnotationFile, elm_pairs_dir, useELMs, proteins, domain_tools): prosite2protein = utils_motif.annotation2protein(humanAnnotationFile, domain_tools) for domain in domain_tools.keys(): f = open(elm_pairs_dir + 'ELM.' + domain + '.pairs') for line in f.readlines(): sp = map(string.strip, line.split('\t')) if len(sp) > 1: if sp[1] != '': elm = sp[0] if useELMs.has_key(elm): for protein in prosite2protein[ sp[1] ]: useELMs[elm][protein] = True if not proteins.has_key(protein): proteins[protein] = {} proteins[protein][elm] = True f.close()
import sys, utils_motif, utils from collections import defaultdict elm2proteins = utils_motif.annotation2protein(sys.argv[1], {'ELM':True}) for elm in elm2proteins: seqs = defaultdict(utils.init_zero) total = 0 for protein in elm2proteins[elm]: for [st, stp, seq] in elm2proteins[elm][protein]: seqs[seq] += 1 total += 1 for seq in seqs: print elm + '\t' + seq + '\t' + str(seqs[seq]) + '\t' + str(float(seqs[seq])/float(total))
"../../Data/ELM/HIV-1/Subtypes_B_C/HIV1.BC.70.conserved", "ELM", "../../Data/ELM/Human/human.website.elm", "ELM", "../../Data/ProfileScan/all.ProfileScan.scanHPRD.notNCBI", "ProfileScan", "../../Data/Network/Human/HPRD/hprd.intr", "../../Data/human.hprd.prosite", "../../Data/Network/Human/HPRD/version2entrezgeneid", "../../Data/Binding_Relations/ELM.ProfileScan.pairs", "some out 1", "some out 2", ] utils_scripting.checkStart(sys.argv, req_args, examples, len(req_args), True) virus_elm2protein = utils_motif.annotation2protein(sys.argv[1], {sys.argv[2]: True}) study_hps = utils_graph.getNodes(sys.argv[8]) human_elm2protein = utils_motif.annotation2protein_forProteins(sys.argv[3], {sys.argv[4]: True}, study_hps) human_cd2protein = utils_motif.annotation2protein_forProteins(sys.argv[5], {sys.argv[6]: True}, study_hps) network = utils_graph.getEdges(sys.argv[7]) version2geneid = utils_humanVirus.get_version2entrez(sys.argv[9]) elm2cd = utils_humanVirus.get_elm2prosites(sys.argv[10]) outf1 = sys.argv[11] outf2 = sys.argv[12] vp_to_h1_to_h2 = {} with open(outf1, "w") as f: for elm in virus_elm2protein.keys(): if human_elm2protein.has_key(elm): h2_noRestrictions = human_elm2protein[elm] h2 = {}