Exemple #1
0
#!/usr/bin/env python

"""For each HCV protein, calcuate the likelyhood
   of the GO BP similarity between predictions
   and gold standard. Do this for H1H2 & H1.
"""
import sys, utils_stats, utils_graph, utils_humanVirus, random, os

hhe_file = sys.argv[1]
hhp_file = sys.argv[2]
background_file = sys.argv[3]
out_file = sys.argv[4]

# this takes a long time
# utils_stats.gene_set_go_sim(background_file, 'results/HPRD.ls.entrez.gosim')

hhe_vp2hp = utils_humanVirus.loadHHETargetPairs(hhe_file)
pred2vp2hp = utils_humanVirus.loadPredictions_predType2vp2hp(hhp_file)
all_hps = utils_graph.getNodes(background_file)

for pred_type in ('h1', 'h1h2'):
    for vp in pred2vp2hp[pred_type].keys():
        if hhe_vp2hp.has_key(vp):
            hhe = utils_graph.intersectLists([hhe_vp2hp[vp], all_hps]).keys()
            preds = pred2vp2hp[pred_type][vp].keys()
            go_pval = utils_stats.gene_set_go_sim_pval(preds, hhe,
                                                       'results/HPRD.ls.entrez.gosim')
            print('%s\t%s\t%.3f' %
                  (vp, pred_type, go_pval))
    vp elm hp predType,
    give precision and recall
    for each vp and all.
"""
import utils_scripting, utils_humanVirus, utils_graph, sys, utils_stats

req_args = ["niaid triplet file", "prediction file", "human proteins in study", "output file"]
examples = [
    "../../Runs/Clustering.domain.s/all_niaid_triplets",
    "../../Runs/Conservation70_Cutoff.2_Window10",
    "../../Data/human.hprd.prosite",
    "some out file",
]
utils_scripting.checkStart(sys.argv, req_args, examples, len(req_args), True)

hhe_vp2hp = utils_humanVirus.loadHHETargetPairs(sys.argv[1])
pred2vp2hp = utils_humanVirus.loadPredictions_predType2vp2hp(sys.argv[2])
all_hps = utils_graph.getNodes(sys.argv[3])

with open(sys.argv[4], "w") as fout:
    fout.write("Prediction Type\tVP\tHHE\tHHP\tMatch\tPrecsion\tRecall\tRandomPrecision\tPval\n")
    for predtype in pred2vp2hp.keys():
        for vp in pred2vp2hp[predtype].keys():
            if hhe_vp2hp.has_key(vp):
                hhe = utils_graph.intersectLists([hhe_vp2hp[vp], all_hps])

                hhe_len = len(hhe.keys())
                preds = pred2vp2hp[predtype][vp]
                preds_len = len(preds.keys())
                match = utils_graph.intersectLists([hhe, preds])
                match_len = len(match.keys())