def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None): watch = autosklearn.util.stopwatch.StopWatch() watch.start_task("ensemble_builder") used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble.log"), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f" % (limit - used_time)) logging.debug("Time last iteration: %f" % time_iter) # Load the true labels of the validation data true_labels = np.load( os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models all_predictions_train = [] dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble/") dir_valid = os.path.join(predictions_dir, "predictions_valid/") dir_test = os.path.join(predictions_dir, "predictions_test/") if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \ not os.path.isdir(dir_test): logging.debug("Prediction directory does not exist") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) dir_test_list = sorted(os.listdir(dir_test)) if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_valid_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_test_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # Binary mask where True indicates that the corresponding will be excluded from the ensemble exclude_mask = [] if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] model_idx = 0 for f in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, f)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) if ensemble_size is not None: if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) # If we have less model in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) exclude_mask.append(False) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if (scores_nbest[idx] < score): logging.debug( "Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f" % (idx, scores_nbest[idx], model_idx, score)) scores_nbest[idx] = score # Exclude the old model exclude_mask[int(indices_nbest[idx])] = True indices_nbest[idx] = model_idx exclude_mask.append(False) # Otherwise exclude the current model from the ensemble else: exclude_mask.append(True) else: # Load all predictions that are better than random if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) else: exclude_mask.append(False) all_predictions_train.append(predictions) model_idx += 1 print exclude_mask all_predictions_valid = [] for i, f in enumerate(dir_valid_list): predictions = np.load(os.path.join(dir_valid, f)) if not exclude_mask[i]: all_predictions_valid.append(predictions) all_predictions_test = [] for i, f in enumerate(dir_test_list): predictions = np.load(os.path.join(dir_test, f)) if not exclude_mask[i]: all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len( all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue if len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") Y_valid = all_predictions_valid[0] Y_test = all_predictions_test[0] else: try: # Compute the weights for the ensemble # Use equally initialized weights n_models = len(all_predictions_train) init_weights = np.ones([n_models]) / n_models weights = weighted_ensemble(np.array(all_predictions_train), true_labels, task_type, metric, init_weights) except (ValueError): logging.error("Caught ValueError!") used_time = watch.wall_elapsed("ensemble_builder") continue except: logging.error("Caught error!") used_time = watch.wall_elapsed("ensemble_builder") continue # Compute the ensemble predictions for the valid data Y_valid = ensemble_prediction(np.array(all_predictions_valid), weights) # Compute the ensemble predictions for the test data Y_test = ensemble_prediction(np.array(all_predictions_test), weights) # Save predictions for valid and test data set filename_test = os.path.join( output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), Y_valid) filename_test = os.path.join( output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), Y_test) current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None, seed=1, indices_output_dir="."): watch = StopWatch() watch.start_task("ensemble_builder") task_type = STRING_TO_TASK_TYPES[task_type] used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble_%d.log" % seed), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f", limit - used_time) logging.debug("Time last iteration: %f", time_iter) # Load the true labels of the validation data true_labels = np.load(os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble_%s/" % seed) dir_valid = os.path.join(predictions_dir, "predictions_valid_%s/" % seed) dir_test = os.path.join(predictions_dir, "predictions_test_%s/" % seed) paths_ = [dir_ensemble, dir_valid, dir_test] exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: #all(exists): logging.debug("Prediction directory %s does not exist!" % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] re_num_run = re.compile(r'_([0-9]*)\.npy$') if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] # The num run of the models num_runs = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, model_name)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) model_names_to_scores[model_name] = score num_run = int(re_num_run.search(model_name).group(1)) if ensemble_size is not None: if score <= 0.001: # include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) # If we have less models in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append(num_run) model_names.append(model_name) num_runs.append(num_run) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if(scores_nbest[idx] < score): logging.debug("Worst model in our ensemble: %s with " "score %f will be replaced by model %s " "with score %f", model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append(num_run) del model_names[idx] model_names.append(model_name) del num_runs[idx] num_runs.append(num_run) # Otherwise exclude the current model from the ensemble else: #include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: #include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) else: include_num_runs.append(num_run) model_idx += 1 indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = num_run #logging.info("Indices to model names:") #logging.info(indices_to_model_names) #for i, item in enumerate(sorted(model_names_to_scores.items(), # key=lambda t: t[1])): # logging.info("%d: %s", i, item) include_num_runs = set(include_num_runs) all_predictions_train = [] for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_ensemble, model_name)) all_predictions_train.append(predictions) all_predictions_valid = [] for i, model_name in enumerate(dir_valid_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_valid, model_name)) all_predictions_valid.append(predictions) all_predictions_test = [] for i, model_name in enumerate(dir_test_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_test, model_name)) all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len(all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue elif len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") ensemble_members_run_numbers = {0: 1.0} # Output the score logging.info("Training performance: %f" % np.max( model_names_to_scores.values())) else: try: indices, trajectory = ensemble_selection( np.array(all_predictions_train), true_labels, ensemble_size, task_type, metric) logging.info("Trajectory and indices!") logging.info(trajectory) logging.info(indices) except ValueError as e: logging.error("Caught ValueError: " + str(e)) used_time = watch.wall_elapsed("ensemble_builder") continue except Exception as e: logging.error("Caught error! %s", e.message) used_time = watch.wall_elapsed("ensemble_builder") continue # Output the score logging.info("Training performance: %f" % trajectory[-1]) # Print the ensemble members: ensemble_members_run_numbers = dict() ensemble_members = Counter(indices).most_common() ensemble_members_string = "Ensemble members:\n" logging.info(ensemble_members) for ensemble_member in ensemble_members: weight = float(ensemble_member[1]) / len(indices) ensemble_members_string += \ (" %s; weight: %10f; performance: %10f\n" % (indices_to_model_names[ensemble_member[0]], weight, model_names_to_scores[indices_to_model_names[ensemble_member[0]]])) ensemble_members_run_numbers[indices_to_run_num[ ensemble_member[0]]] = weight logging.info(ensemble_members_string) # Save the ensemble indices for later use! filename_indices = os.path.join(indices_output_dir, str(index_run).zfill(5) + ".indices") logging.info(ensemble_members_run_numbers) with open(filename_indices, "w") as fh: pickle.dump(ensemble_members_run_numbers, fh) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): ensemble_predictions_valid = np.mean( all_predictions_valid[indices.astype(int)], axis=0) filename_test = os.path.join(output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), ensemble_predictions_valid) else: logging.info("Could not find as many validation set predictions " "as ensemble predictions!.") if len(dir_test_list) == len(dir_ensemble_list): ensemble_predictions_test = np.mean( all_predictions_test[indices.astype(int)], axis=0) filename_test = os.path.join(output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), ensemble_predictions_test) else: logging.info("Could not find as many test set predictions as " "ensemble predictions!") current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None, seed=1, indices_output_dir="."): watch = StopWatch() watch.start_task("ensemble_builder") task_type = STRING_TO_TASK_TYPES[task_type] used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble_%d.log" % seed), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f", limit - used_time) logging.debug("Time last iteration: %f", time_iter) # Load the true labels of the validation data true_labels = np.load( os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble_%s/" % seed) dir_valid = os.path.join(predictions_dir, "predictions_valid_%s/" % seed) dir_test = os.path.join(predictions_dir, "predictions_test_%s/" % seed) paths_ = [dir_ensemble, dir_valid, dir_test] exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: #all(exists): logging.debug("Prediction directory %s does not exist!" % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] re_num_run = re.compile(r'_([0-9]*)\.npy$') if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] # The num run of the models num_runs = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, model_name)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) model_names_to_scores[model_name] = score num_run = int(re_num_run.search(model_name).group(1)) if ensemble_size is not None: if score <= 0.001: # include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) # If we have less models in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append(num_run) model_names.append(model_name) num_runs.append(num_run) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if (scores_nbest[idx] < score): logging.debug( "Worst model in our ensemble: %s with " "score %f will be replaced by model %s " "with score %f", model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append(num_run) del model_names[idx] model_names.append(model_name) del num_runs[idx] num_runs.append(num_run) # Otherwise exclude the current model from the ensemble else: #include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: #include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) else: include_num_runs.append(num_run) model_idx += 1 indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = num_run #logging.info("Indices to model names:") #logging.info(indices_to_model_names) #for i, item in enumerate(sorted(model_names_to_scores.items(), # key=lambda t: t[1])): # logging.info("%d: %s", i, item) include_num_runs = set(include_num_runs) all_predictions_train = [] for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_ensemble, model_name)) all_predictions_train.append(predictions) all_predictions_valid = [] for i, model_name in enumerate(dir_valid_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_valid, model_name)) all_predictions_valid.append(predictions) all_predictions_test = [] for i, model_name in enumerate(dir_test_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_test, model_name)) all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len( all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue elif len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") ensemble_members_run_numbers = {0: 1.0} # Output the score logging.info("Training performance: %f" % np.max(model_names_to_scores.values())) else: try: indices, trajectory = ensemble_selection( np.array(all_predictions_train), true_labels, ensemble_size, task_type, metric) logging.info("Trajectory and indices!") logging.info(trajectory) logging.info(indices) except ValueError as e: logging.error("Caught ValueError: " + str(e)) used_time = watch.wall_elapsed("ensemble_builder") continue except Exception as e: logging.error("Caught error! %s", e.message) used_time = watch.wall_elapsed("ensemble_builder") continue # Output the score logging.info("Training performance: %f" % trajectory[-1]) # Print the ensemble members: ensemble_members_run_numbers = dict() ensemble_members = Counter(indices).most_common() ensemble_members_string = "Ensemble members:\n" logging.info(ensemble_members) for ensemble_member in ensemble_members: weight = float(ensemble_member[1]) / len(indices) ensemble_members_string += \ (" %s; weight: %10f; performance: %10f\n" % (indices_to_model_names[ensemble_member[0]], weight, model_names_to_scores[indices_to_model_names[ensemble_member[0]]])) ensemble_members_run_numbers[indices_to_run_num[ ensemble_member[0]]] = weight logging.info(ensemble_members_string) # Save the ensemble indices for later use! filename_indices = os.path.join(indices_output_dir, str(index_run).zfill(5) + ".indices") logging.info(ensemble_members_run_numbers) with open(filename_indices, "w") as fh: pickle.dump(ensemble_members_run_numbers, fh) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): ensemble_predictions_valid = np.mean( all_predictions_valid[indices.astype(int)], axis=0) filename_test = os.path.join( output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), ensemble_predictions_valid) else: logging.info("Could not find as many validation set predictions " "as ensemble predictions!.") if len(dir_test_list) == len(dir_ensemble_list): ensemble_predictions_test = np.mean( all_predictions_test[indices.astype(int)], axis=0) filename_test = os.path.join( output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), ensemble_predictions_test) else: logging.info("Could not find as many test set predictions as " "ensemble predictions!") current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None): watch = autosklearn.util.stopwatch.StopWatch() watch.start_task("ensemble_builder") used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble.log"), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f" % (limit - used_time)) logging.debug("Time last iteration: %f" % time_iter) # Load the true labels of the validation data true_labels = np.load(os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models all_predictions_train = [] dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble/") dir_valid = os.path.join(predictions_dir, "predictions_valid/") dir_test = os.path.join(predictions_dir, "predictions_test/") if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \ not os.path.isdir(dir_test): logging.debug("Prediction directory does not exist") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) dir_test_list = sorted(os.listdir(dir_test)) if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_valid_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_test_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # Binary mask where True indicates that the corresponding will be excluded from the ensemble exclude_mask = [] if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] model_idx = 0 for f in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, f)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) if ensemble_size is not None: if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) # If we have less model in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) exclude_mask.append(False) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if(scores_nbest[idx] < score): logging.debug("Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f" % (idx, scores_nbest[idx], model_idx, score)) scores_nbest[idx] = score # Exclude the old model exclude_mask[int(indices_nbest[idx])] = True indices_nbest[idx] = model_idx exclude_mask.append(False) # Otherwise exclude the current model from the ensemble else: exclude_mask.append(True) else: # Load all predictions that are better than random if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) else: exclude_mask.append(False) all_predictions_train.append(predictions) model_idx += 1 print exclude_mask all_predictions_valid = [] for i, f in enumerate(dir_valid_list): predictions = np.load(os.path.join(dir_valid, f)) if not exclude_mask[i]: all_predictions_valid.append(predictions) all_predictions_test = [] for i, f in enumerate(dir_test_list): predictions = np.load(os.path.join(dir_test, f)) if not exclude_mask[i]: all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len(all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue if len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") Y_valid = all_predictions_valid[0] Y_test = all_predictions_test[0] else: try: # Compute the weights for the ensemble # Use equally initialized weights n_models = len(all_predictions_train) init_weights = np.ones([n_models]) / n_models weights = weighted_ensemble(np.array(all_predictions_train), true_labels, task_type, metric, init_weights) except (ValueError): logging.error("Caught ValueError!") used_time = watch.wall_elapsed("ensemble_builder") continue except: logging.error("Caught error!") used_time = watch.wall_elapsed("ensemble_builder") continue # Compute the ensemble predictions for the valid data Y_valid = ensemble_prediction(np.array(all_predictions_valid), weights) # Compute the ensemble predictions for the test data Y_test = ensemble_prediction(np.array(all_predictions_test), weights) # Save predictions for valid and test data set filename_test = os.path.join(output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), Y_valid) filename_test = os.path.join(output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), Y_test) current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return