out_suffixe = sys.argv[5]


# define log
logger = logging.getLogger("module1-build phenotypic benchmark")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)
#
# Step1: Determine data file used to compute similarity score
#
logger.info("1) File used to compute phenotypic semantic similarity score between genes pairs")
f_dcas, f_term, f_redund, f_annot = data.get_list_file(dir_data)
logger.debug("disjonct common ancestor file: %s" % f_dcas)
logger.debug("phenotype information (relation with others terms) file: %s " % f_term)
logger.debug("phenotype redundant file: %s" % f_redund)
logger.debug("phenotype annotations of genes file: %s" % f_annot)

#
# Step2: Keep only relevant annotation"
#
f_annot_subset = dir_work + "/" + "genes_ens_to_phenotype_no_red_" + out_suffixe + ".txt"
logger.info("2) Keep only relevant annotation...")
if exclude == "0" or exclude == "1":
    list_arg = (
        "python $AP_PLN_HOME/src/scripts_python/reannotation_with_subset/reannotation_with_subset.py %s %s/%s %s/%s %s/%s %s %s"
        % (f_annot_phen, dir_data, f_term, dir_data, f_redund, dir_data, f_annot, f_annot_subset, exclude)
    )
예제 #2
0
#
print "The phenotypic semantic similarity associated with random genes:"
semantic_score = np.loadtxt("%s" % f_bench, usecols=[2])
if median_y_n == "1":
    random_value = np.median(semantic_score)
elif median_y_n == "0":
    random_value = np.mean(semantic_score)
else:
    print "error: median must be 0 or 1"
    exit()

#
# Step2: get the list of file to integrate
#
print "Get the list of file to integrate..."
list_file = data.get_list_file(f_listfile)
print "Number of file", len(list_file)
nb_dataset = len(list_file)

#
# Step3: for each re-scored dataset: compute for bin of gene pairs sorted in ascending order the cumulative semantic similarity
#
f_list_cum = dir_work + "/list_cum_" + out
if nb_dataset > 1:
    f = open(f_list_cum, "w")
    for i in range(0, nb_dataset):
        # gene pair value vs benchmark
        f_in = dir_data + "/" + list_file[i]
        f_out = dir_work + "/" + list_file[i] + ".benchcum"
        os.system(
            "python $AP_PLN_HOME/src/scripts_python/benchcum_versus_dataset/eval_with_scale_cum.py %s %s %s %s %s %s"
예제 #3
0
import sys, os
import data

# Parameters
f_listfile = sys.argv[1]
dir_data = sys.argv[2]
f_out = sys.argv[3]
nb_m = sys.argv[4]
nb_m = int(nb_m)

# Get the list of file to integrate
print "Get the list of file to integrate..."
list_file = data.get_list_file(f_listfile)
print "Number of file", len(list_file)
nb_dataset = len(list_file)

# Read Score for the different dataset
print "Read Score for the different dataset"
score_pair = dict()
for i in range(0, len(list_file)):
    print "* file", list_file[i]
    data.read_pair_score(dir_data + "/" + list_file[i], nb_dataset, i,
                         score_pair)
print "Number of genes pairs", len(score_pair.keys())

# Integration and report
print "Integration and Report of Pairs"
list_d = range(1, nb_m + 1)
data.report_integration(f_out, score_pair, list_d)
logger.info("Parameters:%s" % sys.argv)

#
# Test the phenotype annotation
#
pheno = data.test_file_annot(f_annot_phen)
dir_data = os.environ['AP_PLN_HOME'] + "/data/%s" % pheno

#
# Step1: Determine data file used to compute similarity score
#
logger.info(
    "1) File used to compute phenotypic semantic similarity score between genes pairs"
)
f_dcas, f_term, f_redund, f_annot = data.get_list_file(dir_data)
logger.debug("disjonct common ancestor file: %s" % f_dcas)
logger.debug("phenotype information (relation with others terms) file: %s " %
             f_term)
logger.debug("phenotype redundant file: %s" % f_redund)
logger.debug("phenotype annotations of genes file: %s" % f_annot)
#
# Step2: Keep only relevant annotation"
#
f_annot_subset = dir_work + '/' + "genes_ens_to_phenotype_no_red_" + out_suffixe + ".txt"
logger.info("2) Keep only relevant annotation...")
exclude = 0
list_arg = "python $AP_PLN_HOME/src/scripts_python/reannotation_with_subset/reannotation_with_subset.py %s %s/%s %s/%s %s/%s %s %s" % (
    f_annot_phen, dir_data, f_term, dir_data, f_redund, dir_data, f_annot,
    f_annot_subset, exclude)
proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)