def evaluate_free_param(f_integration, f_bench, bin_size, median_y_n, f_ens_red): # define log logger = logging.getLogger('evaluation free parameter') logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) # determine the number of free parameter to evaluate f = open(f_integration, 'r') head = f.readline() nb_d = len(head.rstrip().split('\t')) - 2 logger.debug("number of free parameter", nb_d) # gene pair in bench sem_sim = data.read_sem_sim(f_bench) # comparison phenotypic bench vs integrated datase for d in range(1, nb_d + 1): # keep only gene pairs with phenotypic semantic similarity score f_out1 = f_integration + "_d" + str(d) data.report_pair_with_sem_sim(f_integration, f_out1, sem_sim, d) # sort gene pair by value f_out2 = f_integration + "_d" + str(d) + ".ord" list_arg = "python $AP_PLN_HOME/src/scripts_python/sort_gene_pairs_by_value/sort_pair_value.py %s %s" % ( f_out1, f_out2) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() [logger.debug(val) for val in out.split("\n")] if err is not None: logger.error(err) os.system("rm %s" % f_out1) # scale dataset f_out3 = f_integration + "_d" + str(d) + ".ord.scale" list_arg = "python $AP_PLN_HOME/src/scripts_python/scale_dataset/scale_dataset.py %s %s" % ( f_out2, f_out3) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() [logger.debug(val) for val in out.split("\n")] if err is not None: logger.error(err) os.system("rm %s" % f_out2) # gene pair value vs benchmark f_out4 = f_integration + "_d" + str(d) + "_bench" list_arg = "python $AP_PLN_HOME/src/scripts_python/bench_versus_dataset/eval_with_scale.py %s %s %s %s %s %s" % ( f_ens_red, f_bench, f_out3, f_out4, bin_size, median_y_n) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() [logger.debug(val) for val in out.split("\n")] if err is not None: logger.error(err)
def evaluate_free_param(f_integration,f_bench,bin_size,median_y_n,f_ens_red): # define log logger = logging.getLogger('evaluation free parameter') logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) # determine the number of free parameter to evaluate f=open(f_integration,'r') head=f.readline() nb_d=len(head.rstrip().split('\t'))-2 logger.debug("number of free parameter",nb_d) # gene pair in bench sem_sim=data.read_sem_sim(f_bench) # comparison phenotypic bench vs integrated datase for d in range(1,nb_d+1): # keep only gene pairs with phenotypic semantic similarity score f_out1=f_integration+"_d"+str(d) data.report_pair_with_sem_sim(f_integration,f_out1,sem_sim,d) # sort gene pair by value f_out2=f_integration+"_d"+str(d)+".ord" list_arg="python $AP_PLN_HOME/src/scripts_python/sort_gene_pairs_by_value/sort_pair_value.py %s %s" % (f_out1,f_out2) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() [logger.debug(val) for val in out.split("\n")] if err is not None: logger.error(err) os.system("rm %s" % f_out1) # scale dataset f_out3=f_integration+"_d"+str(d)+".ord.scale" list_arg="python $AP_PLN_HOME/src/scripts_python/scale_dataset/scale_dataset.py %s %s" % (f_out2,f_out3) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() [logger.debug(val) for val in out.split("\n")] if err is not None: logger.error(err) os.system("rm %s" % f_out2) # gene pair value vs benchmark f_out4=f_integration+"_d"+str(d)+"_bench" list_arg="python $AP_PLN_HOME/src/scripts_python/bench_versus_dataset/eval_with_scale.py %s %s %s %s %s %s" % (f_ens_red,f_bench,f_out3,f_out4,bin_size,median_y_n) proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True) (out, err) = proc.communicate() [logger.debug(val) for val in out.split("\n")] if err is not None: logger.error(err)
f_ens_rd = sys.argv[1] f_sem_sim = sys.argv[2] f_dataset = sys.argv[3] f_out = sys.argv[4] bin_size = sys.argv[5] bin_size = int(bin_size) median_y_n = sys.argv[6] # Redundant ens annotation print "1) Read ens redundant annotations" gene_convert = data.read_ens_rd(f_ens_rd) print "Number of gene", len(gene_convert.keys()) # Read semantic similarity score (benchmark, y) print "2) Read MGI similarity..." sem_sim = data.read_sem_sim(f_sem_sim) # Read genomic dataset (x) with gene pair benchmark value (y) print "3) Read Score for pair with MGI Score..." score_pair = data.read_score(f_dataset, gene_convert, sem_sim) print "Number of gene pairs", len(score_pair.keys()) # sort pair according to score print "4) Sort pair according Score..." list_pair = functions.sort_by_val(score_pair) print "Number of Pair", len(list_pair) # Report dataset by bin versus semantic similarity print "5) read and look at distribution mgi/hpo..." data.report_data_ben_bin(f_out, score_pair, list_pair, sem_sim, bin_size, median_y_n)
f_integration=sys.argv[1] f_bench=sys.argv[2] bin_size=sys.argv[3] median_y_n=sys.argv[4] # fixed parameters f_ens_red="../../data/others/ensg_63symb_redundancy" # determine the number of free parameter to evaluate f=open(f_integration,'r') head=f.readline() nb_d=len(head.rstrip().split('\t'))-2 print "number of free parameter",nb_d # gene pair in bench sem_sim=data.read_sem_sim(f_bench) # comparison phenotypic bench vs integrated datase for d in range(1,nb_d+1): # keep only gene pairs with phenotypic semantic similarity score f_out1=f_integration+"_d"+str(d) data.report_pair_with_sem_sim(f_integration,f_out1,sem_sim,d) # sort gene pair by value f_out2=f_integration+"_d"+str(d)+".ord" os.system("python ../sort_gene_pairs_by_value/sort_pair_value.py %s %s" % (f_out1,f_out2)) os.system("rm %s" % f_out1) # scale dataset f_out3=f_integration+"_d"+str(d)+".ord.scale"
f_sem_sim=sys.argv[2] f_dataset=sys.argv[3] f_out=sys.argv[4] bin_size=sys.argv[5] bin_size=int(bin_size) median_y_n=sys.argv[6] # Redundant ens annotation print "1) Read ens redundant annotations" gene_convert=data.read_ens_rd(f_ens_rd) print "Number of gene",len(gene_convert.keys()) # Read semantic similarity score (benchmark, y) print "2) Read MGI similarity..." sem_sim=data.read_sem_sim(f_sem_sim) # Read genomic dataset (x) with gene pair benchmark value (y) print "3) Read Score for pair with MGI Score..." score_pair=data.read_score(f_dataset,gene_convert,sem_sim) print "Number of gene pairs",len(score_pair.keys()) # sort pair according to score print "4) Sort pair according Score..." list_pair=functions.sort_by_val(score_pair) print "Number of Pair",len(list_pair) # Report dataset by bin versus semantic similarity print "5) read and look at distribution mgi/hpo..." data.report_data_ben_bin(f_out,score_pair,list_pair,sem_sim,bin_size,median_y_n)
f_integration = sys.argv[1] f_bench = sys.argv[2] bin_size = sys.argv[3] median_y_n = sys.argv[4] # fixed parameters f_ens_red = "../../data/others/ensg_63symb_redundancy" # determine the number of free parameter to evaluate f = open(f_integration, 'r') head = f.readline() nb_d = len(head.rstrip().split('\t')) - 2 print "number of free parameter", nb_d # gene pair in bench sem_sim = data.read_sem_sim(f_bench) # comparison phenotypic bench vs integrated datase for d in range(1, nb_d + 1): # keep only gene pairs with phenotypic semantic similarity score f_out1 = f_integration + "_d" + str(d) data.report_pair_with_sem_sim(f_integration, f_out1, sem_sim, d) # sort gene pair by value f_out2 = f_integration + "_d" + str(d) + ".ord" os.system("python ../sort_gene_pairs_by_value/sort_pair_value.py %s %s" % (f_out1, f_out2)) os.system("rm %s" % f_out1) # scale dataset