Beispiel #1
0
 def read_values_properly():
     dadi_code_file = os.path.join(
         self.params.resume_dir, self.prefix,
         'current_best_logLL_model_dadi_code.py')
     moments_code_file = os.path.join(
         self.params.resume_dir, self.prefix,
         'current_best_logLL_model_moments_code.py')
     par_values = None
     for code_file in [dadi_code_file, moments_code_file]:
         if os.path.isfile(code_file):
             with open(code_file) as f:
                 key_start_1 = '#current best params = '
                 key_start_2 = 'popt = '
                 for key in [key_start_1, key_start_2]:
                     for line in f:
                         if line.startswith(key):
                             par_values = [
                                 float(x)
                                 for x in line.strip()[len(key) +
                                                       1:-1].split(',')
                             ]
                             break
             break
     if par_values is not None:
         support.write_log(
             self.log_file,
             'GA number ' + self.prefix + ' find good file to restore')
         self.models[0].construct_from_vector(par_values)
Beispiel #2
0
        def run_one_ga_and_one_ls():
            while (not self.is_stoped() and self.run_before_ls):
                self.run_one_iteration()
                if shared_dict is not None:
                    shared_dict[self.prefix] = (copy.deepcopy(self.models[0]),
                                                self.final_models)
            if not self.run_before_ls:
                self.run_before_ls = True
            if self.run_ls:
                best_model = copy.deepcopy(self.models[0])
                support.write_to_file(
                    self.log_file, '\nTry to improve best model (' +
                    self.params.optimize_name + ')')
                try:  # catch error of `Factor is exactly singular`
                    if self.params.optimize_name != 'hill_climbing':
                        if self.out_dir is not None:
                            self.models[0].run_local_search(
                                self.params.optimize_name,
                                os.path.join(
                                    self.out_dir, self.params.optimize_name +
                                    '_' + str(self.cur_iteration) + '_out'))
                        else:
                            self.models[0].run_local_search(
                                self.params.optimize_name, None)
                        self.check_best_aic()
                    else:
                        self.run_hill_climbing_of_best()
                    self.print_and_draw_best_model(suffix='_ls')
                except RuntimeError as e:
                    if e.message == 'Factor is exactly singular':
                        support.write_log(
                            self.log_file,
                            'Local search failed of the following error: Factor is exactly singular.'
                        )
                    self.models[0] = best_model

            if not self.run_ls:
                self.run_ls = True

            if shared_dict is not None:
                shared_dict[self.prefix] = (copy.deepcopy(self.models[0]),
                                            self.final_models)
            self.check_best_aic()
            self.check_claic()
            self.final_models.append(self.best_model())
            self.pickle_final_models()

            support.print_final_model(self.log_file, self.models[0],
                                      self.params)

            self.cur_iteration += 1
Beispiel #3
0
    def restore(self):
        def restore_from_cur_pop_of_models(list_of_str):
            for restore_str in list_of_str:
                self.models.append(
                    Demographic_model(
                        self.params,
                        restore_string=restore_str.strip().split('\t')[-2]))
            if self.is_custom_model:
                if len(self.params.lower_bound) != len(self.params.upper_bound):
                    raise RuntimeError(
                        'New number of parameters is different than in previous run.')
            else:
                if not (self.models[0].get_structure() <=
                        self.params.final_structure).all():
                    raise RuntimeError(
                        'New final structure is less than current structure.')

        def restore_iteration_state(iter_out, size):
            if not self.params.only_models:
                self.cur_iteration = int(iter_out[0].split('#')[-1][:-1])
                self.first_iteration = self.cur_iteration

            start_ind = 3
            end_ind = 3 + size
            restore_from_cur_pop_of_models(iter_out[start_ind: end_ind])
            
            if self.params.only_models:
                return

            if not iter_out[end_ind].startswith('Current mean mutation rate:'):
                support.warning(
                    "Expect 'Current mean mutation rate:' after current population during restoring.")
            else:
                self.cur_mutation_rate = float(iter_out[end_ind].split(':')[-1])

            if not iter_out[
                    end_ind +
                    1].startswith('Current mean number of params to change:'):
                support.warning(
                    "Expect 'Current mean number of params to change:' after current population during restoring.")
            else:
                self.cur_mutation_strength = float(
                    iter_out[end_ind + 1].split(':')[-1]) / self.models[0].get_number_of_params()

        def restore_from_ls_string(ls_string, is_best=False):
            if is_best:
                index = 1
            else:
                index = 2
            self.models[0] = Demographic_model(
                self.params, restore_string=ls_string.strip().split('\t')[index])

        def read_values_properly():
            dadi_code_file = os.path.join(self.params.resume_dir, self.prefix, 'current_best_logLL_model_dadi_code.py')
            moments_code_file = os.path.join(self.params.resume_dir, self.prefix, 'current_best_logLL_model_moments_code.py')
            par_values = None
            for code_file in [dadi_code_file, moments_code_file]:
                if os.path.isfile(code_file):
                    with open(code_file) as f:
                        key_start_1 = '#current best params = '
                        key_start_2 = 'popt = '
                        for key in [key_start_1, key_start_2]:
                            for line in f:
                                if line.startswith(key):
                                    par_values = [float(x) for x in line.strip()[len(key) + 1: -1].split(',')]
                                    break
                    break
            if par_values is not None:
                support.write_log(
                        self.log_file, 
                        'GA number ' +
                        self.prefix +
                        ' find good file to restore')
                self.models[0].construct_from_vector(par_values)


        if not os.path.isfile(
            os.path.join(
                self.params.resume_dir,
                self.prefix,
                'GADMA_GA.log')):
            return
        support.write_log(
            self.log_file,
            'GA number ' +
            self.prefix +
            ' find dir to restore')
        iter_out = []
        prev_iter_out = []
        self.models = []
        with open(os.path.join(self.params.resume_dir, self.prefix, 'GADMA_GA.log')) as log_file:
            for line in log_file:
                if line.startswith('Iteration #'):
                    prev_iter_out = iter_out
                    iter_out = []
                iter_out.append(line.strip())

        if len(iter_out) == 0 or not iter_out[0].startswith('Iteration #'):
            support.write_log(
                self.log_file,
                'GA number ' +
                self.prefix +
                ' find empty dir to restore. It would be run from random models.')
            return

        pos_of_empty_str = 0
        for line in iter_out:
            if line == '':
                break
            pos_of_empty_str += 1
        
        # if there is no empty line then we need prev iteration
        if len(iter_out) == pos_of_empty_str:
            iter_out = prev_iter_out
            pos_of_empty_str = 0
            for line in iter_out:
                if line == '':
                    break
                pos_of_empty_str += 1
        size = pos_of_empty_str - 5
        restore_iteration_state(iter_out, size)

        pos_of_last_empty_str = len(iter_out)
        for line in reversed(iter_out):
                if line != '':
                    break
                pos_of_last_empty_str -= 1
                
        # try to find file with all parameters
        read_values_properly()

        load_final_models_file = os.path.join(self.params.resume_dir, self.prefix, 'final_models_pickle')
        
        if pos_of_last_empty_str - 11 > size:
            if iter_out[-1].startswith(
                    'BEST') and iter_out[-2].startswith('Try to improve'):
                # when we have not print final result
                self.run_before_ls = False
                self.run_ls = False
                self.select(size)
                self.pickle_final_models(load=load_final_models_file)
                return
            if iter_out[-1].startswith('Try to improve'):
                # when we have not print final result too
                self.run_before_ls = False
                self.select(size)
                self.pickle_final_models(load=load_final_models_file)
                return
            if iter_out[-1].startswith('BEST'):
                # remove string with BEST
                iter_out.pop()
            # if hill climbing there can be extra strings (in reverse order):
            if iter_out[-1].startswith(
                    'Current mean number of params to change:'):
                # remove string
                iter_out.pop()
            if iter_out[-1].startswith('Current mean mutation rate:'):
                # remove string
                iter_out.pop()

            # now we want to restore model from last string
            self.run_before_ls = False
            restore_from_ls_string(
                iter_out[-1], iter_out[-2].startswith('BEST'))
            if iter_out[-2].startswith('BEST'):
                self.run_ls = False
        read_values_properly()
        self.pickle_final_models(load=load_final_models_file)
        self.select(size)
Beispiel #4
0
    def write_func(string): return support.write_log(log_file, string,
                                                     write_to_stdout=not params.silence)

    def my_str(x): return support.float_representation(x, precision)
Beispiel #5
0
        def write_func(string): return support.write_log(log_file, string,
                                                         write_to_stdout=not params.silence)

        number, params, log_file, shared_dict = params_tuple
Beispiel #6
0
def main():
    params = options.parse_args()

    log_file = os.path.join(
        params.output_dir, 'GADMA.log')
    open(log_file, 'w').close()

    support.write_log(log_file, "--Successful arguments' parsing--\n")
    params_filepath = os.path.join(
        params.output_dir, 'params')
    params.save(params.output_dir)
    if not params.test:
        support.write_log(
            log_file, 'You can find all parameters of this run in:\t\t' + params_filepath + '\n')
        support.write_log(log_file, 'All output is saved (without warnings and errors) in:\t' +
                          os.path.join(params.output_dir, 'GADMA.log\n'))

    support.write_log(log_file, '--Start pipeline--\n')

    # For debug
#    run_genetic_algorithm((1, params, log_file, None))

    # Create shared dictionary
    m = Manager()
    shared_dict = m.dict()

    # Start pool of processes
    start_time = datetime.now()
    
    pool = Pool(processes=params.processes,
                initializer=worker_init)
    try:
        pool_map = pool.map_async(
            run_genetic_algorithm,
            [(i + 1, params, log_file, shared_dict)
             for i in range(params.repeats)])
        pool.close()

        precision = 1 - int(math.log(params.epsilon, 10))

        # graceful way to interrupt all processes by Ctrl+C
        min_counter = 0
        while True:
            try:
                multiple_results = pool_map.get(
                    60 * params.time_for_print)
                break
            # catch TimeoutError and get again
            except multiprocessing.TimeoutError as ex:
                print_best_solution_now(
                    start_time, shared_dict, params, log_file, 
                    precision, draw_model=params.matplotlib_available)
            except Exception, e:
                pool.terminate()
                support.error(str(e))
        print_best_solution_now(start_time, shared_dict, params,log_file, 
                precision, draw_model=params.matplotlib_available)
        support.write_log(log_file, '\n--Finish pipeline--\n')
        if params.test:
            support.write_log(log_file, '--Test passed correctly--')
        if params.theta is None:
            support.write_log(
                log_file, "\nYou didn't specify theta at the beginning. If you want change it and rescale parameters, please see tutorial.\n")
        if params.resume_dir is not None and (params.initial_structure != params.final_structure).any():
            support.write_log(
                log_file, '\nYou have resumed from another launch. Please, check best AIC model, as information about it was lost.\n')

        support.write_log(log_file, 'Thank you for using GADMA!')