out_suffixe = sys.argv[5] # define log logger = logging.getLogger("module1-build phenotypic benchmark") logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) logger.addHandler(ch) # # Step1: Determine data file used to compute similarity score # logger.info("1) File used to compute phenotypic semantic similarity score between genes pairs") f_dcas, f_term, f_redund, f_annot = data.get_list_file(dir_data) logger.debug("disjonct common ancestor file: %s" % f_dcas) logger.debug("phenotype information (relation with others terms) file: %s " % f_term) logger.debug("phenotype redundant file: %s" % f_redund) logger.debug("phenotype annotations of genes file: %s" % f_annot) # # Step2: Keep only relevant annotation" # f_annot_subset = dir_work + "/" + "genes_ens_to_phenotype_no_red_" + out_suffixe + ".txt" logger.info("2) Keep only relevant annotation...") if exclude == "0" or exclude == "1": list_arg = ( "python $AP_PLN_HOME/src/scripts_python/reannotation_with_subset/reannotation_with_subset.py %s %s/%s %s/%s %s/%s %s %s" % (f_annot_phen, dir_data, f_term, dir_data, f_redund, dir_data, f_annot, f_annot_subset, exclude) )
# print "The phenotypic semantic similarity associated with random genes:" semantic_score = np.loadtxt("%s" % f_bench, usecols=[2]) if median_y_n == "1": random_value = np.median(semantic_score) elif median_y_n == "0": random_value = np.mean(semantic_score) else: print "error: median must be 0 or 1" exit() # # Step2: get the list of file to integrate # print "Get the list of file to integrate..." list_file = data.get_list_file(f_listfile) print "Number of file", len(list_file) nb_dataset = len(list_file) # # Step3: for each re-scored dataset: compute for bin of gene pairs sorted in ascending order the cumulative semantic similarity # f_list_cum = dir_work + "/list_cum_" + out if nb_dataset > 1: f = open(f_list_cum, "w") for i in range(0, nb_dataset): # gene pair value vs benchmark f_in = dir_data + "/" + list_file[i] f_out = dir_work + "/" + list_file[i] + ".benchcum" os.system( "python $AP_PLN_HOME/src/scripts_python/benchcum_versus_dataset/eval_with_scale_cum.py %s %s %s %s %s %s"
import sys, os import data # Parameters f_listfile = sys.argv[1] dir_data = sys.argv[2] f_out = sys.argv[3] nb_m = sys.argv[4] nb_m = int(nb_m) # Get the list of file to integrate print "Get the list of file to integrate..." list_file = data.get_list_file(f_listfile) print "Number of file", len(list_file) nb_dataset = len(list_file) # Read Score for the different dataset print "Read Score for the different dataset" score_pair = dict() for i in range(0, len(list_file)): print "* file", list_file[i] data.read_pair_score(dir_data + "/" + list_file[i], nb_dataset, i, score_pair) print "Number of genes pairs", len(score_pair.keys()) # Integration and report print "Integration and Report of Pairs" list_d = range(1, nb_m + 1) data.report_integration(f_out, score_pair, list_d)
logger.info("Parameters:%s" % sys.argv) # # Test the phenotype annotation # pheno = data.test_file_annot(f_annot_phen) dir_data = os.environ['AP_PLN_HOME'] + "/data/%s" % pheno # # Step1: Determine data file used to compute similarity score # logger.info( "1) File used to compute phenotypic semantic similarity score between genes pairs" ) f_dcas, f_term, f_redund, f_annot = data.get_list_file(dir_data) logger.debug("disjonct common ancestor file: %s" % f_dcas) logger.debug("phenotype information (relation with others terms) file: %s " % f_term) logger.debug("phenotype redundant file: %s" % f_redund) logger.debug("phenotype annotations of genes file: %s" % f_annot) # # Step2: Keep only relevant annotation" # f_annot_subset = dir_work + '/' + "genes_ens_to_phenotype_no_red_" + out_suffixe + ".txt" logger.info("2) Keep only relevant annotation...") exclude = 0 list_arg = "python $AP_PLN_HOME/src/scripts_python/reannotation_with_subset/reannotation_with_subset.py %s %s/%s %s/%s %s/%s %s %s" % ( f_annot_phen, dir_data, f_term, dir_data, f_redund, dir_data, f_annot, f_annot_subset, exclude) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)