Ejemplo n.º 1
0
def getConservedELMs(virus, subtypes):
    ls = [utils_motif.annotation2protein(os.path.join(local_settings.RESULTSDIR,
                                                      virus + '.' + subtype 
                                                      + '.elms.70.controled'),
                                         {'ELM':True}) 
          for subtype in subtypes[virus]]
    return utils_graph.intersectLists(ls)
Ejemplo n.º 2
0
def expandProteinsForELMs(humanAnnotationFile, elm_pairs_dir, 
                          useELMs, proteins, domain_tools):
    prosite2protein = utils_motif.annotation2protein(humanAnnotationFile,
                                                     domain_tools)
    for domain in domain_tools.keys():
        f = open(elm_pairs_dir + 'ELM.' + domain + '.pairs')
        for line in f.readlines():
            sp = map(string.strip, line.split('\t'))
            if len(sp) > 1:
                if sp[1] != '':
                    elm = sp[0]
                    if useELMs.has_key(elm):
                        for protein in prosite2protein[ sp[1] ]:
                            useELMs[elm][protein] = True
                            if not proteins.has_key(protein): proteins[protein] = {}
                            proteins[protein][elm] = True                 
        f.close()
Ejemplo n.º 3
0
import sys, utils_motif, utils
from collections import defaultdict
elm2proteins = utils_motif.annotation2protein(sys.argv[1], {'ELM':True})
for elm in elm2proteins:
    seqs = defaultdict(utils.init_zero)
    total = 0
    for protein in elm2proteins[elm]:
        for [st, stp, seq] in elm2proteins[elm][protein]:
            seqs[seq] += 1
            total += 1
    for seq in seqs:
        print elm + '\t' + seq + '\t' + str(seqs[seq]) + '\t' + str(float(seqs[seq])/float(total))
Ejemplo n.º 4
0
    "../../Data/ELM/HIV-1/Subtypes_B_C/HIV1.BC.70.conserved",
    "ELM",
    "../../Data/ELM/Human/human.website.elm",
    "ELM",
    "../../Data/ProfileScan/all.ProfileScan.scanHPRD.notNCBI",
    "ProfileScan",
    "../../Data/Network/Human/HPRD/hprd.intr",
    "../../Data/human.hprd.prosite",
    "../../Data/Network/Human/HPRD/version2entrezgeneid",
    "../../Data/Binding_Relations/ELM.ProfileScan.pairs",
    "some out 1",
    "some out 2",
]
utils_scripting.checkStart(sys.argv, req_args, examples, len(req_args), True)

virus_elm2protein = utils_motif.annotation2protein(sys.argv[1], {sys.argv[2]: True})
study_hps = utils_graph.getNodes(sys.argv[8])
human_elm2protein = utils_motif.annotation2protein_forProteins(sys.argv[3], {sys.argv[4]: True}, study_hps)
human_cd2protein = utils_motif.annotation2protein_forProteins(sys.argv[5], {sys.argv[6]: True}, study_hps)
network = utils_graph.getEdges(sys.argv[7])
version2geneid = utils_humanVirus.get_version2entrez(sys.argv[9])
elm2cd = utils_humanVirus.get_elm2prosites(sys.argv[10])
outf1 = sys.argv[11]
outf2 = sys.argv[12]

vp_to_h1_to_h2 = {}
with open(outf1, "w") as f:
    for elm in virus_elm2protein.keys():
        if human_elm2protein.has_key(elm):
            h2_noRestrictions = human_elm2protein[elm]
            h2 = {}