Esempio n. 1
0
def evaluate_free_param(f_integration, f_bench, bin_size, median_y_n,
                        f_ens_red):

    # define log
    logger = logging.getLogger('evaluation free parameter')
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    logger.addHandler(ch)

    # determine the number of free parameter to evaluate
    f = open(f_integration, 'r')
    head = f.readline()
    nb_d = len(head.rstrip().split('\t')) - 2
    logger.debug("number of free parameter", nb_d)

    # gene pair in bench
    sem_sim = data.read_sem_sim(f_bench)

    # comparison phenotypic bench vs integrated datase
    for d in range(1, nb_d + 1):

        # keep only gene pairs with phenotypic semantic similarity score
        f_out1 = f_integration + "_d" + str(d)
        data.report_pair_with_sem_sim(f_integration, f_out1, sem_sim, d)

        # sort gene pair by value
        f_out2 = f_integration + "_d" + str(d) + ".ord"
        list_arg = "python $AP_PLN_HOME/src/scripts_python/sort_gene_pairs_by_value/sort_pair_value.py %s %s" % (
            f_out1, f_out2)
        proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)
        (out, err) = proc.communicate()
        [logger.debug(val) for val in out.split("\n")]
        if err is not None: logger.error(err)
        os.system("rm %s" % f_out1)

        # scale dataset
        f_out3 = f_integration + "_d" + str(d) + ".ord.scale"
        list_arg = "python $AP_PLN_HOME/src/scripts_python/scale_dataset/scale_dataset.py %s %s" % (
            f_out2, f_out3)
        proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)
        (out, err) = proc.communicate()
        [logger.debug(val) for val in out.split("\n")]
        if err is not None: logger.error(err)
        os.system("rm %s" % f_out2)

        # gene pair value vs benchmark
        f_out4 = f_integration + "_d" + str(d) + "_bench"
        list_arg = "python $AP_PLN_HOME/src/scripts_python/bench_versus_dataset/eval_with_scale.py %s %s %s %s %s %s" % (
            f_ens_red, f_bench, f_out3, f_out4, bin_size, median_y_n)
        proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)
        (out, err) = proc.communicate()
        [logger.debug(val) for val in out.split("\n")]
        if err is not None: logger.error(err)
Esempio n. 2
0
def evaluate_free_param(f_integration,f_bench,bin_size,median_y_n,f_ens_red):

	# define log
	logger = logging.getLogger('evaluation free parameter')
	logger.setLevel(logging.DEBUG)
	ch = logging.StreamHandler()
	ch.setLevel(logging.INFO)
	formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	ch.setFormatter(formatter)
	logger.addHandler(ch)

	# determine the number of free parameter to evaluate
	f=open(f_integration,'r')
	head=f.readline()
	nb_d=len(head.rstrip().split('\t'))-2
	logger.debug("number of free parameter",nb_d)

	# gene pair in bench
	sem_sim=data.read_sem_sim(f_bench)

	# comparison phenotypic bench vs integrated datase
	for d in range(1,nb_d+1):

		# keep only gene pairs with phenotypic semantic similarity score
		f_out1=f_integration+"_d"+str(d)
		data.report_pair_with_sem_sim(f_integration,f_out1,sem_sim,d)

		# sort gene pair by value
		f_out2=f_integration+"_d"+str(d)+".ord"
		list_arg="python $AP_PLN_HOME/src/scripts_python/sort_gene_pairs_by_value/sort_pair_value.py %s %s" % (f_out1,f_out2)
		proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)
		(out, err) = proc.communicate()
		[logger.debug(val) for val in out.split("\n")]
		if err is not None: logger.error(err)
		os.system("rm %s" % f_out1)

		# scale dataset
		f_out3=f_integration+"_d"+str(d)+".ord.scale"
		list_arg="python $AP_PLN_HOME/src/scripts_python/scale_dataset/scale_dataset.py %s %s" % (f_out2,f_out3)
                proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)
                (out, err) = proc.communicate()
                [logger.debug(val) for val in out.split("\n")]
                if err is not None: logger.error(err)
		os.system("rm %s" % f_out2)
		
		# gene pair value vs benchmark
		f_out4=f_integration+"_d"+str(d)+"_bench"
		list_arg="python $AP_PLN_HOME/src/scripts_python/bench_versus_dataset/eval_with_scale.py %s %s %s %s %s %s" % (f_ens_red,f_bench,f_out3,f_out4,bin_size,median_y_n)
                proc = subprocess.Popen(list_arg, stdout=subprocess.PIPE, shell=True)
                (out, err) = proc.communicate()
                [logger.debug(val) for val in out.split("\n")]
                if err is not None: logger.error(err)
Esempio n. 3
0
f_ens_rd = sys.argv[1]
f_sem_sim = sys.argv[2]
f_dataset = sys.argv[3]
f_out = sys.argv[4]
bin_size = sys.argv[5]
bin_size = int(bin_size)
median_y_n = sys.argv[6]

# Redundant ens annotation
print "1) Read ens redundant annotations"
gene_convert = data.read_ens_rd(f_ens_rd)
print "Number of gene", len(gene_convert.keys())

# Read semantic similarity score (benchmark, y)
print "2) Read MGI similarity..."
sem_sim = data.read_sem_sim(f_sem_sim)

# Read genomic dataset (x) with gene pair benchmark value (y)
print "3) Read Score for pair with MGI Score..."
score_pair = data.read_score(f_dataset, gene_convert, sem_sim)
print "Number of gene pairs", len(score_pair.keys())

# sort pair according to score
print "4) Sort pair according Score..."
list_pair = functions.sort_by_val(score_pair)
print "Number of Pair", len(list_pair)

# Report dataset by bin versus semantic similarity
print "5) read and look at distribution mgi/hpo..."
data.report_data_ben_bin(f_out, score_pair, list_pair, sem_sim, bin_size,
                         median_y_n)
Esempio n. 4
0
f_integration=sys.argv[1]
f_bench=sys.argv[2]
bin_size=sys.argv[3]
median_y_n=sys.argv[4]

# fixed parameters
f_ens_red="../../data/others/ensg_63symb_redundancy"

# determine the number of free parameter to evaluate
f=open(f_integration,'r')
head=f.readline()
nb_d=len(head.rstrip().split('\t'))-2
print "number of free parameter",nb_d

# gene pair in bench
sem_sim=data.read_sem_sim(f_bench)

# comparison phenotypic bench vs integrated datase
for d in range(1,nb_d+1):

	# keep only gene pairs with phenotypic semantic similarity score
	f_out1=f_integration+"_d"+str(d)
	data.report_pair_with_sem_sim(f_integration,f_out1,sem_sim,d)

	# sort gene pair by value
	f_out2=f_integration+"_d"+str(d)+".ord"
	os.system("python ../sort_gene_pairs_by_value/sort_pair_value.py %s %s" % (f_out1,f_out2))
	os.system("rm %s" % f_out1)

	# scale dataset
	f_out3=f_integration+"_d"+str(d)+".ord.scale"
Esempio n. 5
0
f_sem_sim=sys.argv[2]
f_dataset=sys.argv[3] 
f_out=sys.argv[4] 
bin_size=sys.argv[5]
bin_size=int(bin_size)
median_y_n=sys.argv[6]


# Redundant ens annotation
print "1) Read ens redundant annotations"
gene_convert=data.read_ens_rd(f_ens_rd)
print "Number of gene",len(gene_convert.keys())

# Read semantic similarity score (benchmark, y)
print "2) Read MGI similarity..."
sem_sim=data.read_sem_sim(f_sem_sim)

# Read genomic dataset (x) with gene pair benchmark value (y)
print "3) Read Score for pair with MGI Score..."
score_pair=data.read_score(f_dataset,gene_convert,sem_sim)
print "Number of gene pairs",len(score_pair.keys())

# sort pair according to score
print "4) Sort pair according Score..."
list_pair=functions.sort_by_val(score_pair)
print "Number of Pair",len(list_pair)

# Report dataset by bin versus semantic similarity
print "5) read and look at distribution mgi/hpo..."
data.report_data_ben_bin(f_out,score_pair,list_pair,sem_sim,bin_size,median_y_n)
Esempio n. 6
0
f_integration = sys.argv[1]
f_bench = sys.argv[2]
bin_size = sys.argv[3]
median_y_n = sys.argv[4]

# fixed parameters
f_ens_red = "../../data/others/ensg_63symb_redundancy"

# determine the number of free parameter to evaluate
f = open(f_integration, 'r')
head = f.readline()
nb_d = len(head.rstrip().split('\t')) - 2
print "number of free parameter", nb_d

# gene pair in bench
sem_sim = data.read_sem_sim(f_bench)

# comparison phenotypic bench vs integrated datase
for d in range(1, nb_d + 1):

    # keep only gene pairs with phenotypic semantic similarity score
    f_out1 = f_integration + "_d" + str(d)
    data.report_pair_with_sem_sim(f_integration, f_out1, sem_sim, d)

    # sort gene pair by value
    f_out2 = f_integration + "_d" + str(d) + ".ord"
    os.system("python ../sort_gene_pairs_by_value/sort_pair_value.py %s %s" %
              (f_out1, f_out2))
    os.system("rm %s" % f_out1)

    # scale dataset