Example #1
0
def create_population(number_of_individual, data_file_name, intervals_min, intervals_max):
	"""
	-> Create a population of random initialized individuals
	"""
	pack = dichotomization.extract_matrix_from(data_file_name)
	variable_to_position = pack[1]

	population = []
	for x in range(0, number_of_individual):
		individual = Individual()
		individual._id = x
		individual._intervals_to_variables = create_random_individual(variable_to_position, intervals_min, intervals_max)
		population.append(individual)

	return population
Example #2
0
def run_ag_exploration(data_file, number_of_individual_per_generation, max_iteration, score_method, filter_strat):
	"""
	-> Run the genetic algorithm
	-> data_file used in the evaluation process
	-> number_of_individual_per_generation is an int
	-> max_iteration is an int
	-> score_method is a string, the method to use for scoring, could be:
		- nn (neural network)
		- svm (support vector machine)
		- tree (decision tree)
	-> filter_strat is a string, name of the filter aplly n cohorte,
	   could be:
	   			- random
	   			- any disesae (SLE, SjS ...)
	-> return nothing but write a few results file in DATA/EXPLORATION
	"""

	#--------------------#
	# General parameters #
	#--------------------#
	
	progress = 0
	mutation_rate = 5
	intervals_min = 2
	intervals_max = 80
	number_of_good_parents = 6
	number_of_bad_parents = 2
	score_threshold = 80


	result_file_name = "undef"
	solution_file_name = "undef"
	filter_name = "control_vs_"+str(filter_strat)


	time_file = open("time.log", "w")
	time_file.close()


	# ZONE 1 START
	start_zone_1 = time.clock()

	if(platform.system() == "Linux"):
		result_file_name = data_file.split("/")
		result_file_name = result_file_name[-1]
		result_file_name = result_file_name.split(".")
		max_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_max.csv"
		min_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_min.csv"
		result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+".csv"
		result_file_name = "DATA/EXPLORATION/"+result_file_name
		max_result_file_name = "DATA/EXPLORATION/"+max_result_file_name
		min_result_file_name = "DATA/EXPLORATION/"+min_result_file_name

		solution_file_name = data_file.split("/")
		solution_file_name = solution_file_name[-1]
		solution_file_name = solution_file_name.split(".")
		solution_file_name = solution_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_FixeStep.log"
		solution_file_name = "DATA/EXPLORATION/"+solution_file_name	

	elif(platform.system() == "Windows"):
		result_file_name = data_file.split("\\")
		result_file_name = result_file_name[-1]
		result_file_name = result_file_name.split(".")
		max_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_max.csv"
		min_result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_min.csv"
		result_file_name = result_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+".csv"
		result_file_name = "DATA\\EXPLORATION\\"+result_file_name
		max_result_file_name = "DATA\\EXPLORATION\\"+max_result_file_name
		min_result_file_name = "DATA\\EXPLORATION\\"+min_result_file_name

		solution_file_name = data_file.split("\\")
		solution_file_name = solution_file_name[-1]
		solution_file_name = solution_file_name.split(".")
		solution_file_name = solution_file_name[0]+"_"+str(filter_name)+"_"+str(score_method)+"_FixeStep.log"
		solution_file_name = "DATA\\EXPLORATION\\"+solution_file_name


	# ZONE 1 END
	end_zone_1 = time.clock() - start_zone_1
	time_file = open("time.log", "a")
	time_file.write("zone1,"+str(end_zone_1)+"\n")
	time_file.close()


	#--------------------#
	# Prepare Population #
	#--------------------#


	# ZONE 2 START
	start_zone_2 = time.clock()

	# Generate matrix from data file
	pack = dichotomization.extract_matrix_from(data_file)
	data = pack[0]
	variable_to_position = pack[1]

	# init population
	pop = create_population(number_of_individual_per_generation, data_file, intervals_min, intervals_max)

	# ZONE 2 END
	end_zone_2 = time.clock() - start_zone_2
	time_file = open("time.log", "a")
	time_file.write("zone2,"+str(end_zone_2))
	time_file.close()


	# init results files
	result_file = open(result_file_name, "w")
	result_file.close()

	max_result_file = open(max_result_file_name, "w")
	max_result_file.close()

	min_result_file = open(min_result_file_name, "w")
	min_result_file.close()

	for x in range(0, max_iteration):


		# ZONE 3 START
		start_zone_3 = time.clock()

		#-------------------------#
		# Evaluate the individual #
		#-------------------------#

		# evaluate population
		g = grade_population(pop, data_file, score_method, filter_strat, True)


		# ZONE 3 END
		end_zone_3 = time.clock() - start_zone_3
		time_file = open("time.log", "a")
		time_file.write("zone3,"+str(end_zone_3)+"\n")
		time_file.close()



		# write result in file
		result_file = open(result_file_name, "a")
		result_file.write(str(x)+","+str(g[0])+"\n")
		result_file.close()

		# write solution in file if one of the individual in
		# population looks like a good solution (i.e score >= threshold)
		save_pop = False
		score_list = []
		for individual in pop:
			individual_score = g[1][individual._id]
			score_list.append(individual_score)
			if(float(individual_score) >= float(score_threshold)):
				save_pop = True

		if(save_pop):
			solution_file_name_processed = solution_file_name.replace("FixeStep", str(progress))
			solution_file = open(solution_file_name_processed, "w")
			for individual in pop:
				solution_file.write(">"+str(individual._id)+","+str(g[1][individual._id])+"\n")
				for key in individual._intervals_to_variables.keys():
					solution_file.write(str(key) +","+str(individual._intervals_to_variables[key])+"\n")
			solution_file.close()

		# Get the best score in population and write 
		# the result in a file
		best_score = max(score_list)
		max_score_file = open(max_result_file_name, "a")
		max_score_file.write(str(progress)+","+str(best_score)+"\n")
		max_score_file.close()

		# Get the worst score in population and write 
		# the result in a file
		worst_score = min(score_list)
		min_score_file = open(min_result_file_name, "a")
		min_score_file.write(str(progress)+","+str(worst_score)+"\n")
		min_score_file.close()

		


		#--------#
		# Evolve #
		#--------#


		# ZONE 4 START
		start_zone_4 = time.clock()

		# => Get the Bests in population
		bests = get_best_individual_in_population(number_of_good_parents, g, pop)

		# => Randomly select bad individuals
		bads = random_selection_of_bad_candidates(bests, pop, number_of_bad_parents)

		# => Mutate a small random portion of the population
		parents = bests + bads
		mutation(mutation_rate, intervals_min, intervals_max, parents)

		# => crossover parents to create children
		children = create_children(parents, pop)

		# => Merge parent and child to constitute the next population
		parents.extend(children)

		# progress bar
		step = float((100/float(max_iteration)))
		progress += 1
		progress_perc = progress*step
		factor = math.ceil((progress_perc/2))
		progress_bar = "#" * int(factor)
		progress_bar += "-" * int(50 - factor)
		display_line = "["+str(score_method)+"]|"+progress_bar+"|"+str(progress)+"|"+str(g[0])
		sys.stdout.write("\r%d%%" % progress_perc)
		sys.stdout.write(display_line)
		sys.stdout.flush()


		# ZONE 4 END
		end_zone_4 = time.clock() - start_zone_4
		time_file = open("time.log", "a")
		time_file.write("zone4,"+str(end_zone_4)+"\n")
		time_file.close()
Example #3
0
def evaluate_individual(individual, data_file_name, method, filter_strat):
    """
	-> Evaluate the individual using NN project
	-> individual is a Individual object
	-> data_file_name is the matrix file name
	-> method is a string, the method to use to compute the score,
	   could be:
	   		- nn (for neural network)
	   		- svm (for support vector machine)
	   		- tree (decision tree)
	-> filter_strat is a string, name of the filter aplly n cohorte,
	   could be:
	   			- random
	   			- any disesae (SLE, SjS ...)
	-> run evaluation script in the NN folder
	"""

    # Generate matrix from data file
    pack = dichotomization.extract_matrix_from(data_file_name)
    data = pack[0]
    variable_to_position = pack[1]

    # Create disjonct Table for Matrix
    disjonctif_tables = create_disjonctTable_for_matrix(
        data, variable_to_position, individual._intervals_to_variables)

    # use disjonct table for dichotomization
    #	- use matrix and table as input
    #	- return a new matrix
    data_dichotomized = dichotomize(data, disjonctif_tables)
    if (platform.system() == "Windows"):
        save_file_name = "DATA\\MATRIX\\data_dichotomized_pattern_individual_to_evaluate.csv"
    elif (platform.system() == "Linux"):
        save_file_name = "DATA/MATRIX/data_dichotomized_pattern_individual_to_evaluate.csv"
    save_dichotomized_matrix_in_file(pack[1], pack[2], data_dichotomized,
                                     individual._intervals_to_variables,
                                     save_file_name)

    # compute the score
    if (method == "nn"):
        # Run the NN and clean the data
        if (platform.system() == "Windows"):
            os.chdir("..\\..\\NN")
            os.system("python evaluation.py " + str(filter_strat))
            os.chdir(
                "C:\\Users\\PC_immuno\\Desktop\\Nathan\\SpellCraft\\RD\\sample"
            )
        elif (platform.system() == "Linux"):
            os.chdir("../../NN")
            os.system("python evaluation.py " + str(filter_strat))
            os.chdir(
                "/home/foulquier/Bureau/SpellCraft/WorkSpace/Github/RD/sample")

    elif (method == "svm"):
        # Run SVM evaluation in NN folder
        if (platform.system() == "Windows"):
            os.chdir("..\\..\\NN")
            os.system("python svm_evaluation.py " + str(filter_strat))
            os.chdir(
                "C:\\Users\\PC_immuno\\Desktop\\Nathan\\SpellCraft\\RD\\sample"
            )
        elif (platform.system() == "Linux"):
            os.chdir("../../NN")
            os.system("python svm_evaluation.py " + str(filter_strat))
            os.chdir(
                "/home/foulquier/Bureau/SpellCraft/WorkSpace/Github/RD/sample")

    elif (method == "tree"):
        # Run decision tree evaluation in NN folder
        if (platform.system() == "Windows"):
            os.chdir("..\\..\\NN")
            os.system("python svm_evaluation.py " + str(filter_strat))
            os.chdir(
                "C:\\Users\\PC_immuno\\Desktop\\Nathan\\SpellCraft\\RD\\sample"
            )
        elif (platform.system() == "Linux"):
            os.chdir("../../NN")
            os.system("python tree_evaluation.py " + str(filter_strat))
            os.chdir(
                "/home/foulquier/Bureau/SpellCraft/WorkSpace/Github/RD/sample")

    else:
        print "[ERROR] method: " + str(method) + " is not recognized"

    #os.remove(save_file_name)

    # Get the score
    score = -1
    if (platform.system() == "Windows"):
        score_file = open("..\\..\\NN\\evaluation_score.log", "r")
    elif (platform.system() == "Linux"):
        score_file = open("../../NN/evaluation_score.log", "r")
    for line in score_file:
        line = line.split("\n")
        line = line[0]
        score = line
    score_file.close()
    return score
Example #4
0
def evaluate_individual(individual, data_file_name, method, filter_strat):
	"""
	-> Evaluate the individual using NN project
	-> individual is a Individual object
	-> data_file_name is the matrix file name
	-> method is a string, the method to use to compute the score,
	   could be:
	   		- nn (for neural network)
	   		- svm (for support vector machine)
	   		- tree (decision tree)
	-> filter_strat is a string, name of the filter aplly n cohorte,
	   could be:
	   			- random
	   			- any disesae (SLE, SjS ...)
	-> return a dict {score, individual_id}
	"""

	# Generate matrix from data file
	pack = dichotomization.extract_matrix_from(data_file_name)
	data = pack[0]
	variable_to_position = pack[1]

	# Create disjonct Table for Matrix
	disjonctif_tables = create_disjonctTable_for_matrix(data, variable_to_position, individual._intervals_to_variables)

	# use disjonct table for dichotomization
	#	- use matrix and table as input
	#	- return a new matrix
	data_dichotomized = dichotomize(data, disjonctif_tables)
	if(platform.system() == "Windows"):
		save_file_name = "DATA\\MATRIX\\data_dichotomized_pattern_individual_to_evaluate_"+str(individual._id)+".csv"
	elif(platform.system() == "Linux"):
		save_file_name = "DATA/MATRIX/data_dichotomized_pattern_individual_to_evaluate.csv"
	save_dichotomized_matrix_in_file(pack[1], pack[2], data_dichotomized, individual._intervals_to_variables, save_file_name)

	# compute the score
	if(method == "nn"):
		# Run the NN and clean the data
		nn_evaluation.run_nn_scoring(filter_strat, individual._id)
	
	elif(method == "svm"):
		# Run SVM evaluation in NN folder
		svm_evaluation.run_svm_scoring(filter_strat, individual._id)
	
	elif(method == "tree"):
		# Run decision tree evaluation in NN folder
		tree_evaluation.run_tree_scoring(filter_strat, individual._id)
	


	else:
		print "[ERROR] method: "+str(method)+" is not recognized"

	#os.remove(save_file_name)

	# Get the score
	score = -1
	score_file = open("evaluation_score.log", "r")
	for line in score_file:
		line = line.split("\n")
		line = line[0]
		score = line
	score_file.close()

	results = {}
	results["score"] = float(score)
	results["id"] = individual._id
	return results
Example #5
0
File: demo.py Project: Nurtal/RD
            amplitude = max_interval - 2
            step = float((100 / amplitude))

            # progress bar
            progress += 1
            progress_perc = progress * step
            factor = math.ceil((progress_perc / 2))
            progress_bar = "#" * int(factor)
            progress_bar += "-" * int(50 - factor)
            display_line = "[panel " + str(panel) + "]|" + progress_bar + "|"
            sys.stdout.write("\r%d%%" % progress_perc)
            sys.stdout.write(display_line)
            sys.stdout.flush()

            # Generate matrix from data file
            pack = dichotomization.extract_matrix_from(
                "DATA/MATRIX/panel_" + str(panel) + "_filtered_processed.txt")
            data = pack[0]

            # create disjonct table for all variable in a matrix
            #	-> input : a matrix
            #	-> output : dict of table {variableIndex : disjonctTable}
            tables_test = dichotomization.create_disjonctTable_for_matrix(
                data, number_of_interval)

            # use disjonct table for dichotomization
            #	- use matrix and table as input
            #	- return a new matrix
            truc = dichotomization.dichotomize(data, tables_test)

            dichotomization.save_dichotomized_matrix_in_file(
                pack[1], pack[2], truc, number_of_interval,