def read_values_properly(): dadi_code_file = os.path.join( self.params.resume_dir, self.prefix, 'current_best_logLL_model_dadi_code.py') moments_code_file = os.path.join( self.params.resume_dir, self.prefix, 'current_best_logLL_model_moments_code.py') par_values = None for code_file in [dadi_code_file, moments_code_file]: if os.path.isfile(code_file): with open(code_file) as f: key_start_1 = '#current best params = ' key_start_2 = 'popt = ' for key in [key_start_1, key_start_2]: for line in f: if line.startswith(key): par_values = [ float(x) for x in line.strip()[len(key) + 1:-1].split(',') ] break break if par_values is not None: support.write_log( self.log_file, 'GA number ' + self.prefix + ' find good file to restore') self.models[0].construct_from_vector(par_values)
def run_one_ga_and_one_ls(): while (not self.is_stoped() and self.run_before_ls): self.run_one_iteration() if shared_dict is not None: shared_dict[self.prefix] = (copy.deepcopy(self.models[0]), self.final_models) if not self.run_before_ls: self.run_before_ls = True if self.run_ls: best_model = copy.deepcopy(self.models[0]) support.write_to_file( self.log_file, '\nTry to improve best model (' + self.params.optimize_name + ')') try: # catch error of `Factor is exactly singular` if self.params.optimize_name != 'hill_climbing': if self.out_dir is not None: self.models[0].run_local_search( self.params.optimize_name, os.path.join( self.out_dir, self.params.optimize_name + '_' + str(self.cur_iteration) + '_out')) else: self.models[0].run_local_search( self.params.optimize_name, None) self.check_best_aic() else: self.run_hill_climbing_of_best() self.print_and_draw_best_model(suffix='_ls') except RuntimeError as e: if e.message == 'Factor is exactly singular': support.write_log( self.log_file, 'Local search failed of the following error: Factor is exactly singular.' ) self.models[0] = best_model if not self.run_ls: self.run_ls = True if shared_dict is not None: shared_dict[self.prefix] = (copy.deepcopy(self.models[0]), self.final_models) self.check_best_aic() self.check_claic() self.final_models.append(self.best_model()) self.pickle_final_models() support.print_final_model(self.log_file, self.models[0], self.params) self.cur_iteration += 1
def restore(self): def restore_from_cur_pop_of_models(list_of_str): for restore_str in list_of_str: self.models.append( Demographic_model( self.params, restore_string=restore_str.strip().split('\t')[-2])) if self.is_custom_model: if len(self.params.lower_bound) != len(self.params.upper_bound): raise RuntimeError( 'New number of parameters is different than in previous run.') else: if not (self.models[0].get_structure() <= self.params.final_structure).all(): raise RuntimeError( 'New final structure is less than current structure.') def restore_iteration_state(iter_out, size): if not self.params.only_models: self.cur_iteration = int(iter_out[0].split('#')[-1][:-1]) self.first_iteration = self.cur_iteration start_ind = 3 end_ind = 3 + size restore_from_cur_pop_of_models(iter_out[start_ind: end_ind]) if self.params.only_models: return if not iter_out[end_ind].startswith('Current mean mutation rate:'): support.warning( "Expect 'Current mean mutation rate:' after current population during restoring.") else: self.cur_mutation_rate = float(iter_out[end_ind].split(':')[-1]) if not iter_out[ end_ind + 1].startswith('Current mean number of params to change:'): support.warning( "Expect 'Current mean number of params to change:' after current population during restoring.") else: self.cur_mutation_strength = float( iter_out[end_ind + 1].split(':')[-1]) / self.models[0].get_number_of_params() def restore_from_ls_string(ls_string, is_best=False): if is_best: index = 1 else: index = 2 self.models[0] = Demographic_model( self.params, restore_string=ls_string.strip().split('\t')[index]) def read_values_properly(): dadi_code_file = os.path.join(self.params.resume_dir, self.prefix, 'current_best_logLL_model_dadi_code.py') moments_code_file = os.path.join(self.params.resume_dir, self.prefix, 'current_best_logLL_model_moments_code.py') par_values = None for code_file in [dadi_code_file, moments_code_file]: if os.path.isfile(code_file): with open(code_file) as f: key_start_1 = '#current best params = ' key_start_2 = 'popt = ' for key in [key_start_1, key_start_2]: for line in f: if line.startswith(key): par_values = [float(x) for x in line.strip()[len(key) + 1: -1].split(',')] break break if par_values is not None: support.write_log( self.log_file, 'GA number ' + self.prefix + ' find good file to restore') self.models[0].construct_from_vector(par_values) if not os.path.isfile( os.path.join( self.params.resume_dir, self.prefix, 'GADMA_GA.log')): return support.write_log( self.log_file, 'GA number ' + self.prefix + ' find dir to restore') iter_out = [] prev_iter_out = [] self.models = [] with open(os.path.join(self.params.resume_dir, self.prefix, 'GADMA_GA.log')) as log_file: for line in log_file: if line.startswith('Iteration #'): prev_iter_out = iter_out iter_out = [] iter_out.append(line.strip()) if len(iter_out) == 0 or not iter_out[0].startswith('Iteration #'): support.write_log( self.log_file, 'GA number ' + self.prefix + ' find empty dir to restore. It would be run from random models.') return pos_of_empty_str = 0 for line in iter_out: if line == '': break pos_of_empty_str += 1 # if there is no empty line then we need prev iteration if len(iter_out) == pos_of_empty_str: iter_out = prev_iter_out pos_of_empty_str = 0 for line in iter_out: if line == '': break pos_of_empty_str += 1 size = pos_of_empty_str - 5 restore_iteration_state(iter_out, size) pos_of_last_empty_str = len(iter_out) for line in reversed(iter_out): if line != '': break pos_of_last_empty_str -= 1 # try to find file with all parameters read_values_properly() load_final_models_file = os.path.join(self.params.resume_dir, self.prefix, 'final_models_pickle') if pos_of_last_empty_str - 11 > size: if iter_out[-1].startswith( 'BEST') and iter_out[-2].startswith('Try to improve'): # when we have not print final result self.run_before_ls = False self.run_ls = False self.select(size) self.pickle_final_models(load=load_final_models_file) return if iter_out[-1].startswith('Try to improve'): # when we have not print final result too self.run_before_ls = False self.select(size) self.pickle_final_models(load=load_final_models_file) return if iter_out[-1].startswith('BEST'): # remove string with BEST iter_out.pop() # if hill climbing there can be extra strings (in reverse order): if iter_out[-1].startswith( 'Current mean number of params to change:'): # remove string iter_out.pop() if iter_out[-1].startswith('Current mean mutation rate:'): # remove string iter_out.pop() # now we want to restore model from last string self.run_before_ls = False restore_from_ls_string( iter_out[-1], iter_out[-2].startswith('BEST')) if iter_out[-2].startswith('BEST'): self.run_ls = False read_values_properly() self.pickle_final_models(load=load_final_models_file) self.select(size)
def write_func(string): return support.write_log(log_file, string, write_to_stdout=not params.silence) def my_str(x): return support.float_representation(x, precision)
def write_func(string): return support.write_log(log_file, string, write_to_stdout=not params.silence) number, params, log_file, shared_dict = params_tuple
def main(): params = options.parse_args() log_file = os.path.join( params.output_dir, 'GADMA.log') open(log_file, 'w').close() support.write_log(log_file, "--Successful arguments' parsing--\n") params_filepath = os.path.join( params.output_dir, 'params') params.save(params.output_dir) if not params.test: support.write_log( log_file, 'You can find all parameters of this run in:\t\t' + params_filepath + '\n') support.write_log(log_file, 'All output is saved (without warnings and errors) in:\t' + os.path.join(params.output_dir, 'GADMA.log\n')) support.write_log(log_file, '--Start pipeline--\n') # For debug # run_genetic_algorithm((1, params, log_file, None)) # Create shared dictionary m = Manager() shared_dict = m.dict() # Start pool of processes start_time = datetime.now() pool = Pool(processes=params.processes, initializer=worker_init) try: pool_map = pool.map_async( run_genetic_algorithm, [(i + 1, params, log_file, shared_dict) for i in range(params.repeats)]) pool.close() precision = 1 - int(math.log(params.epsilon, 10)) # graceful way to interrupt all processes by Ctrl+C min_counter = 0 while True: try: multiple_results = pool_map.get( 60 * params.time_for_print) break # catch TimeoutError and get again except multiprocessing.TimeoutError as ex: print_best_solution_now( start_time, shared_dict, params, log_file, precision, draw_model=params.matplotlib_available) except Exception, e: pool.terminate() support.error(str(e)) print_best_solution_now(start_time, shared_dict, params,log_file, precision, draw_model=params.matplotlib_available) support.write_log(log_file, '\n--Finish pipeline--\n') if params.test: support.write_log(log_file, '--Test passed correctly--') if params.theta is None: support.write_log( log_file, "\nYou didn't specify theta at the beginning. If you want change it and rescale parameters, please see tutorial.\n") if params.resume_dir is not None and (params.initial_structure != params.final_structure).any(): support.write_log( log_file, '\nYou have resumed from another launch. Please, check best AIC model, as information about it was lost.\n') support.write_log(log_file, 'Thank you for using GADMA!')