def ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): ''' Fast version of Rich Caruana's ensemble selection method ''' ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = evaluator.calculate_score( labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size = ensemble_size - n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1)) * ensemble_prediction for j, pred in enumerate(predictions): #ensemble.append(pred) #ensemble_prediction = np.mean(np.array(ensemble), axis=0) fant_ensemble_prediction = weighted_ensemble_prediction + ( 1. / float(s + 1)) * pred scores[j] = evaluator.calculate_score( labels, fant_ensemble_prediction, task_type, metric, fant_ensemble_prediction.shape[1]) # ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def predict(self): Y_optimization_pred = self.predict_function(self.X_optimization, self.model, self.task_type) if self.X_valid is not None: Y_valid_pred = self.predict_function(self.X_valid, self.model, self.task_type) else: Y_valid_pred = None if self.X_test is not None: Y_test_pred = self.predict_function(self.X_test, self.model, self.task_type) else: Y_test_pred = None score = calculate_score(self.Y_optimization, Y_optimization_pred, self.task_type, self.metric, self.D.info['target_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, "__len__"): err = {key: 1 - score[key] for key in score} else: err = 1 - score if self.with_predictions: return err, Y_optimization_pred, Y_valid_pred, Y_test_pred return err
def pruning(predictions, labels, n_best, task_type, metric): perf = np.zeros([predictions.shape[0]]) for i, p in enumerate(predictions): perf[i] = evaluator.calculate_score(labels, predictions, task_type, metric, predictions.shape[1]) indcies = np.argsort(perf)[perf.shape[0] - n_best:] return indcies
def ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): ''' Fast version of Rich Caruana's ensemble selection method ''' ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = evaluator.calculate_score(labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size = ensemble_size - n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) s = len(ensemble) if s == 0: weighted_ensemble_prediction = np.zeros(predictions[0].shape) else: ensemble_prediction = np.mean(np.array(ensemble), axis=0) weighted_ensemble_prediction = (s / float(s + 1)) * ensemble_prediction for j, pred in enumerate(predictions): #ensemble.append(pred) #ensemble_prediction = np.mean(np.array(ensemble), axis=0) fant_ensemble_prediction = weighted_ensemble_prediction + (1. / float(s + 1)) * pred scores[j] = evaluator.calculate_score(labels, fant_ensemble_prediction, task_type, metric, fant_ensemble_prediction.shape[1]) # ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def original_ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): ''' Rich Caruana's ensemble selection method ''' ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = evaluator.calculate_score( labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size = ensemble_size - n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = evaluator.calculate_score(labels, ensemble_prediction, task_type, metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def weighted_ensemble_error(weights, *args): predictions = args[0] true_labels = args[1] metric = args[2] task_type = args[3] weight_prime = weights / weights.sum() weighted_predictions = ensemble_prediction(predictions, weight_prime) score = evaluator.calculate_score(true_labels, weighted_predictions, task_type, metric, weighted_predictions.shape[1]) return 1 - score
def original_ensemble_selection(predictions, labels, ensemble_size, task_type, metric, do_pruning=False): ''' Rich Caruana's ensemble selection method ''' ensemble = [] trajectory = [] order = [] if do_pruning: n_best = 20 indices = pruning(predictions, labels, n_best, task_type, metric) for idx in indices: ensemble.append(predictions[idx]) order.append(idx) ensemble_ = np.array(ensemble).mean(axis=0) ensemble_performance = evaluator.calculate_score(labels, ensemble_, task_type, metric, ensemble_.shape[1]) trajectory.append(ensemble_performance) ensemble_size = ensemble_size - n_best for i in range(ensemble_size): scores = np.zeros([predictions.shape[0]]) for j, pred in enumerate(predictions): ensemble.append(pred) ensemble_prediction = np.mean(np.array(ensemble), axis=0) scores[j] = evaluator.calculate_score(labels, ensemble_prediction, task_type, metric, ensemble_prediction.shape[1]) ensemble.pop() best = np.nanargmax(scores) ensemble.append(predictions[best]) trajectory.append(scores[best]) order.append(best) return np.array(order), np.array(trajectory)
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None): watch = autosklearn.util.stopwatch.StopWatch() watch.start_task("ensemble_builder") used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble.log"), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f" % (limit - used_time)) logging.debug("Time last iteration: %f" % time_iter) # Load the true labels of the validation data true_labels = np.load( os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models all_predictions_train = [] dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble/") dir_valid = os.path.join(predictions_dir, "predictions_valid/") dir_test = os.path.join(predictions_dir, "predictions_test/") if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \ not os.path.isdir(dir_test): logging.debug("Prediction directory does not exist") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) dir_test_list = sorted(os.listdir(dir_test)) if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_valid_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_test_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # Binary mask where True indicates that the corresponding will be excluded from the ensemble exclude_mask = [] if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] model_idx = 0 for f in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, f)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) if ensemble_size is not None: if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) # If we have less model in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) exclude_mask.append(False) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if (scores_nbest[idx] < score): logging.debug( "Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f" % (idx, scores_nbest[idx], model_idx, score)) scores_nbest[idx] = score # Exclude the old model exclude_mask[int(indices_nbest[idx])] = True indices_nbest[idx] = model_idx exclude_mask.append(False) # Otherwise exclude the current model from the ensemble else: exclude_mask.append(True) else: # Load all predictions that are better than random if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) else: exclude_mask.append(False) all_predictions_train.append(predictions) model_idx += 1 print exclude_mask all_predictions_valid = [] for i, f in enumerate(dir_valid_list): predictions = np.load(os.path.join(dir_valid, f)) if not exclude_mask[i]: all_predictions_valid.append(predictions) all_predictions_test = [] for i, f in enumerate(dir_test_list): predictions = np.load(os.path.join(dir_test, f)) if not exclude_mask[i]: all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len( all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue if len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") Y_valid = all_predictions_valid[0] Y_test = all_predictions_test[0] else: try: # Compute the weights for the ensemble # Use equally initialized weights n_models = len(all_predictions_train) init_weights = np.ones([n_models]) / n_models weights = weighted_ensemble(np.array(all_predictions_train), true_labels, task_type, metric, init_weights) except (ValueError): logging.error("Caught ValueError!") used_time = watch.wall_elapsed("ensemble_builder") continue except: logging.error("Caught error!") used_time = watch.wall_elapsed("ensemble_builder") continue # Compute the ensemble predictions for the valid data Y_valid = ensemble_prediction(np.array(all_predictions_valid), weights) # Compute the ensemble predictions for the test data Y_test = ensemble_prediction(np.array(all_predictions_test), weights) # Save predictions for valid and test data set filename_test = os.path.join( output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), Y_valid) filename_test = os.path.join( output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), Y_test) current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None, seed=1, indices_output_dir="."): watch = StopWatch() watch.start_task("ensemble_builder") task_type = STRING_TO_TASK_TYPES[task_type] used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble_%d.log" % seed), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f", limit - used_time) logging.debug("Time last iteration: %f", time_iter) # Load the true labels of the validation data true_labels = np.load( os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble_%s/" % seed) dir_valid = os.path.join(predictions_dir, "predictions_valid_%s/" % seed) dir_test = os.path.join(predictions_dir, "predictions_test_%s/" % seed) paths_ = [dir_ensemble, dir_valid, dir_test] exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: #all(exists): logging.debug("Prediction directory %s does not exist!" % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] re_num_run = re.compile(r'_([0-9]*)\.npy$') if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] # The num run of the models num_runs = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, model_name)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) model_names_to_scores[model_name] = score num_run = int(re_num_run.search(model_name).group(1)) if ensemble_size is not None: if score <= 0.001: # include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) # If we have less models in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append(num_run) model_names.append(model_name) num_runs.append(num_run) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if (scores_nbest[idx] < score): logging.debug( "Worst model in our ensemble: %s with " "score %f will be replaced by model %s " "with score %f", model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append(num_run) del model_names[idx] model_names.append(model_name) del num_runs[idx] num_runs.append(num_run) # Otherwise exclude the current model from the ensemble else: #include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: #include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) else: include_num_runs.append(num_run) model_idx += 1 indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = num_run #logging.info("Indices to model names:") #logging.info(indices_to_model_names) #for i, item in enumerate(sorted(model_names_to_scores.items(), # key=lambda t: t[1])): # logging.info("%d: %s", i, item) include_num_runs = set(include_num_runs) all_predictions_train = [] for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_ensemble, model_name)) all_predictions_train.append(predictions) all_predictions_valid = [] for i, model_name in enumerate(dir_valid_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_valid, model_name)) all_predictions_valid.append(predictions) all_predictions_test = [] for i, model_name in enumerate(dir_test_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_test, model_name)) all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len( all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue elif len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") ensemble_members_run_numbers = {0: 1.0} # Output the score logging.info("Training performance: %f" % np.max(model_names_to_scores.values())) else: try: indices, trajectory = ensemble_selection( np.array(all_predictions_train), true_labels, ensemble_size, task_type, metric) logging.info("Trajectory and indices!") logging.info(trajectory) logging.info(indices) except ValueError as e: logging.error("Caught ValueError: " + str(e)) used_time = watch.wall_elapsed("ensemble_builder") continue except Exception as e: logging.error("Caught error! %s", e.message) used_time = watch.wall_elapsed("ensemble_builder") continue # Output the score logging.info("Training performance: %f" % trajectory[-1]) # Print the ensemble members: ensemble_members_run_numbers = dict() ensemble_members = Counter(indices).most_common() ensemble_members_string = "Ensemble members:\n" logging.info(ensemble_members) for ensemble_member in ensemble_members: weight = float(ensemble_member[1]) / len(indices) ensemble_members_string += \ (" %s; weight: %10f; performance: %10f\n" % (indices_to_model_names[ensemble_member[0]], weight, model_names_to_scores[indices_to_model_names[ensemble_member[0]]])) ensemble_members_run_numbers[indices_to_run_num[ ensemble_member[0]]] = weight logging.info(ensemble_members_string) # Save the ensemble indices for later use! filename_indices = os.path.join(indices_output_dir, str(index_run).zfill(5) + ".indices") logging.info(ensemble_members_run_numbers) with open(filename_indices, "w") as fh: pickle.dump(ensemble_members_run_numbers, fh) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): ensemble_predictions_valid = np.mean( all_predictions_valid[indices.astype(int)], axis=0) filename_test = os.path.join( output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), ensemble_predictions_valid) else: logging.info("Could not find as many validation set predictions " "as ensemble predictions!.") if len(dir_test_list) == len(dir_ensemble_list): ensemble_predictions_test = np.mean( all_predictions_test[indices.astype(int)], axis=0) filename_test = os.path.join( output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions( os.path.join(predictions_dir, filename_test), ensemble_predictions_test) else: logging.info("Could not find as many test set predictions as " "ensemble predictions!") current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None): watch = autosklearn.util.stopwatch.StopWatch() watch.start_task("ensemble_builder") used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble.log"), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f" % (limit - used_time)) logging.debug("Time last iteration: %f" % time_iter) # Load the true labels of the validation data true_labels = np.load(os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models all_predictions_train = [] dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble/") dir_valid = os.path.join(predictions_dir, "predictions_valid/") dir_test = os.path.join(predictions_dir, "predictions_test/") if not os.path.isdir(dir_ensemble) or not os.path.isdir(dir_valid) or \ not os.path.isdir(dir_test): logging.debug("Prediction directory does not exist") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) dir_test_list = sorted(os.listdir(dir_test)) if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_valid_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) != len(dir_test_list): logging.debug("Directories are inconsistent") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # Binary mask where True indicates that the corresponding will be excluded from the ensemble exclude_mask = [] if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] model_idx = 0 for f in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, f)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) if ensemble_size is not None: if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) # If we have less model in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) exclude_mask.append(False) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if(scores_nbest[idx] < score): logging.debug("Worst model in our ensemble: %d with score %f will be replaced by model %d with score %f" % (idx, scores_nbest[idx], model_idx, score)) scores_nbest[idx] = score # Exclude the old model exclude_mask[int(indices_nbest[idx])] = True indices_nbest[idx] = model_idx exclude_mask.append(False) # Otherwise exclude the current model from the ensemble else: exclude_mask.append(True) else: # Load all predictions that are better than random if score <= 0.001: exclude_mask.append(True) logging.error("Model only predicts at random: " + f + " has score: " + str(score)) else: exclude_mask.append(False) all_predictions_train.append(predictions) model_idx += 1 print exclude_mask all_predictions_valid = [] for i, f in enumerate(dir_valid_list): predictions = np.load(os.path.join(dir_valid, f)) if not exclude_mask[i]: all_predictions_valid.append(predictions) all_predictions_test = [] for i, f in enumerate(dir_test_list): predictions = np.load(os.path.join(dir_test, f)) if not exclude_mask[i]: all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len(all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue if len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") Y_valid = all_predictions_valid[0] Y_test = all_predictions_test[0] else: try: # Compute the weights for the ensemble # Use equally initialized weights n_models = len(all_predictions_train) init_weights = np.ones([n_models]) / n_models weights = weighted_ensemble(np.array(all_predictions_train), true_labels, task_type, metric, init_weights) except (ValueError): logging.error("Caught ValueError!") used_time = watch.wall_elapsed("ensemble_builder") continue except: logging.error("Caught error!") used_time = watch.wall_elapsed("ensemble_builder") continue # Compute the ensemble predictions for the valid data Y_valid = ensemble_prediction(np.array(all_predictions_valid), weights) # Compute the ensemble predictions for the test data Y_test = ensemble_prediction(np.array(all_predictions_test), weights) # Save predictions for valid and test data set filename_test = os.path.join(output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), Y_valid) filename_test = os.path.join(output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), Y_test) current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def main(predictions_dir, basename, task_type, metric, limit, output_dir, ensemble_size=None, seed=1, indices_output_dir="."): watch = StopWatch() watch.start_task("ensemble_builder") task_type = STRING_TO_TASK_TYPES[task_type] used_time = 0 time_iter = 0 index_run = 0 current_num_models = 0 logging.basicConfig(filename=os.path.join(predictions_dir, "ensemble_%d.log" % seed), level=logging.DEBUG) while used_time < limit: logging.debug("Time left: %f", limit - used_time) logging.debug("Time last iteration: %f", time_iter) # Load the true labels of the validation data true_labels = np.load(os.path.join(predictions_dir, "true_labels_ensemble.npy")) # Load the predictions from the models dir_ensemble = os.path.join(predictions_dir, "predictions_ensemble_%s/" % seed) dir_valid = os.path.join(predictions_dir, "predictions_valid_%s/" % seed) dir_test = os.path.join(predictions_dir, "predictions_test_%s/" % seed) paths_ = [dir_ensemble, dir_valid, dir_test] exists = [os.path.isdir(dir_) for dir_ in paths_] if not exists[0]: #all(exists): logging.debug("Prediction directory %s does not exist!" % dir_ensemble) time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue dir_ensemble_list = sorted(os.listdir(dir_ensemble)) dir_valid_list = sorted(os.listdir(dir_valid)) if exists[1] else [] dir_test_list = sorted(os.listdir(dir_test)) if exists[2] else [] if len(dir_ensemble_list) == 0: logging.debug("Directories are empty") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue if len(dir_ensemble_list) <= current_num_models: logging.debug("Nothing has changed since the last time") time.sleep(2) used_time = watch.wall_elapsed("ensemble_builder") continue watch.start_task("ensemble_iter_" + str(index_run)) # List of num_runs (which are in the filename) which will be included # later include_num_runs = [] re_num_run = re.compile(r'_([0-9]*)\.npy$') if ensemble_size is not None: # Keeps track of the single scores of each model in our ensemble scores_nbest = [] # The indices of the model that are currently in our ensemble indices_nbest = [] # The names of the models model_names = [] # The num run of the models num_runs = [] model_names_to_scores = dict() model_idx = 0 for model_name in dir_ensemble_list: predictions = np.load(os.path.join(dir_ensemble, model_name)) score = evaluator.calculate_score(true_labels, predictions, task_type, metric, predictions.shape[1]) model_names_to_scores[model_name] = score num_run = int(re_num_run.search(model_name).group(1)) if ensemble_size is not None: if score <= 0.001: # include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) # If we have less models in our ensemble than ensemble_size add the current model if it is better than random elif len(scores_nbest) < ensemble_size: scores_nbest.append(score) indices_nbest.append(model_idx) include_num_runs.append(num_run) model_names.append(model_name) num_runs.append(num_run) else: # Take the worst performing model in our ensemble so far idx = np.argmin(np.array([scores_nbest])) # If the current model is better than the worst model in our ensemble replace it by the current model if(scores_nbest[idx] < score): logging.debug("Worst model in our ensemble: %s with " "score %f will be replaced by model %s " "with score %f", model_names[idx], scores_nbest[idx], model_name, score) # Exclude the old model del scores_nbest[idx] scores_nbest.append(score) del include_num_runs[idx] del indices_nbest[idx] indices_nbest.append(model_idx) include_num_runs.append(num_run) del model_names[idx] model_names.append(model_name) del num_runs[idx] num_runs.append(num_run) # Otherwise exclude the current model from the ensemble else: #include_num_runs.append(True) pass else: # Load all predictions that are better than random if score <= 0.001: #include_num_runs.append(True) logging.error("Model only predicts at random: " + model_name + " has score: " + str(score)) else: include_num_runs.append(num_run) model_idx += 1 indices_to_model_names = dict() indices_to_run_num = dict() for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: num_indices = len(indices_to_model_names) indices_to_model_names[num_indices] = model_name indices_to_run_num[num_indices] = num_run #logging.info("Indices to model names:") #logging.info(indices_to_model_names) #for i, item in enumerate(sorted(model_names_to_scores.items(), # key=lambda t: t[1])): # logging.info("%d: %s", i, item) include_num_runs = set(include_num_runs) all_predictions_train = [] for i, model_name in enumerate(dir_ensemble_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_ensemble, model_name)) all_predictions_train.append(predictions) all_predictions_valid = [] for i, model_name in enumerate(dir_valid_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_valid, model_name)) all_predictions_valid.append(predictions) all_predictions_test = [] for i, model_name in enumerate(dir_test_list): num_run = int(re_num_run.search(model_name).group(1)) if num_run in include_num_runs: predictions = np.load(os.path.join(dir_test, model_name)) all_predictions_test.append(predictions) if len(all_predictions_train) == len(all_predictions_test) == len(all_predictions_valid) == 0: logging.error("All models do just random guessing") time.sleep(2) continue elif len(all_predictions_train) == 1: logging.debug("Only one model so far we just copy its predictions") ensemble_members_run_numbers = {0: 1.0} # Output the score logging.info("Training performance: %f" % np.max( model_names_to_scores.values())) else: try: indices, trajectory = ensemble_selection( np.array(all_predictions_train), true_labels, ensemble_size, task_type, metric) logging.info("Trajectory and indices!") logging.info(trajectory) logging.info(indices) except ValueError as e: logging.error("Caught ValueError: " + str(e)) used_time = watch.wall_elapsed("ensemble_builder") continue except Exception as e: logging.error("Caught error! %s", e.message) used_time = watch.wall_elapsed("ensemble_builder") continue # Output the score logging.info("Training performance: %f" % trajectory[-1]) # Print the ensemble members: ensemble_members_run_numbers = dict() ensemble_members = Counter(indices).most_common() ensemble_members_string = "Ensemble members:\n" logging.info(ensemble_members) for ensemble_member in ensemble_members: weight = float(ensemble_member[1]) / len(indices) ensemble_members_string += \ (" %s; weight: %10f; performance: %10f\n" % (indices_to_model_names[ensemble_member[0]], weight, model_names_to_scores[indices_to_model_names[ensemble_member[0]]])) ensemble_members_run_numbers[indices_to_run_num[ ensemble_member[0]]] = weight logging.info(ensemble_members_string) # Save the ensemble indices for later use! filename_indices = os.path.join(indices_output_dir, str(index_run).zfill(5) + ".indices") logging.info(ensemble_members_run_numbers) with open(filename_indices, "w") as fh: pickle.dump(ensemble_members_run_numbers, fh) # Save predictions for valid and test data set if len(dir_valid_list) == len(dir_ensemble_list): ensemble_predictions_valid = np.mean( all_predictions_valid[indices.astype(int)], axis=0) filename_test = os.path.join(output_dir, basename + '_valid_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), ensemble_predictions_valid) else: logging.info("Could not find as many validation set predictions " "as ensemble predictions!.") if len(dir_test_list) == len(dir_ensemble_list): ensemble_predictions_test = np.mean( all_predictions_test[indices.astype(int)], axis=0) filename_test = os.path.join(output_dir, basename + '_test_' + str(index_run).zfill(3) + '.predict') data_util.save_predictions(os.path.join(predictions_dir, filename_test), ensemble_predictions_test) else: logging.info("Could not find as many test set predictions as " "ensemble predictions!") current_num_models = len(dir_ensemble_list) watch.stop_task("ensemble_iter_" + str(index_run)) time_iter = watch.get_wall_dur("ensemble_iter_" + str(index_run)) used_time = watch.wall_elapsed("ensemble_builder") index_run += 1 return
def predict(self): Y_optimization_pred = [None] * self.cv_folds Y_targets = [None] * self.cv_folds Y_valid_pred = [None] * self.cv_folds Y_test_pred = [None] * self.cv_folds for i in range(self.cv_folds): # To support prediction when only partial_fit was called if self.models[i] is None: continue train_indices, test_indices = self.indices[i] opt_pred = self.predict_function(self.X_train[test_indices], self.models[i], self.task_type, self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function(X_valid, self.models[i], self.task_type, self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function(X_test, self.models[i], self.task_type, self.Y_train[train_indices]) Y_test_pred[i] = test_pred Y_optimization_pred = np.concatenate([ Y_optimization_pred[i] for i in range(self.cv_folds) if Y_optimization_pred[i] is not None ]) Y_targets = np.concatenate([ Y_targets[i] for i in range(self.cv_folds) if Y_targets[i] is not None ]) if self.X_valid is not None: Y_valid_pred = np.array([ Y_valid_pred[i] for i in range(self.cv_folds) if Y_valid_pred[i] is not None ]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([ Y_test_pred[i] for i in range(self.cv_folds) if Y_test_pred[i] is not None ]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets score = calculate_score( Y_targets, Y_optimization_pred, self.task_type, self.metric, self.D.info['target_num'], all_scoring_functions=self.all_scoring_functions) if hasattr(score, "__len__"): err = {key: 1 - score[key] for key in score} else: err = 1 - score if self.with_predictions: return err, Y_optimization_pred, Y_valid_pred, Y_test_pred return err
def predict(self): # First, obtain the predictions for the ensembles, the validation and # the test set! outer_scores = defaultdict(list) inner_scores = defaultdict(list) Y_optimization_pred = [None] * self.outer_cv_folds Y_targets = [None] * self.outer_cv_folds Y_valid_pred = [None] * self.outer_cv_folds Y_test_pred = [None] * self.outer_cv_folds for i in range(self.outer_cv_folds): train_indices, test_indices = self.outer_indices[i] opt_pred = self.predict_function( self.X_train[test_indices], self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function( X_valid, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function( X_test, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_test_pred[i] = test_pred # Calculate the outer scores for i in range(self.outer_cv_folds): scores = calculate_score( Y_targets[i], Y_optimization_pred[i], self.task_type, self.metric, self.D.info['target_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: outer_scores[score_name].append(scores[score_name]) else: outer_scores[self.metric].append(scores) Y_optimization_pred = np.concatenate([ Y_optimization_pred[i] for i in range(self.outer_cv_folds) if Y_optimization_pred[i] is not None ]) Y_targets = np.concatenate([ Y_targets[i] for i in range(self.outer_cv_folds) if Y_targets[i] is not None ]) if self.X_valid is not None: Y_valid_pred = np.array([ Y_valid_pred[i] for i in range(self.outer_cv_folds) if Y_valid_pred[i] is not None ]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([ Y_test_pred[i] for i in range(self.outer_cv_folds) if Y_test_pred[i] is not None ]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets # Second, calculate the inner score for outer_fold in range(self.outer_cv_folds): for inner_fold in range(self.inner_cv_folds): inner_train_indices, inner_test_indices = self.inner_indices[ outer_fold][inner_fold] Y_test = self.Y_train[inner_test_indices] X_test = self.X_train[inner_test_indices] model = self.inner_models[outer_fold][inner_fold] Y_hat = self.predict_function( X_test, model, self.task_type, Y_train=self.Y_train[inner_train_indices]) scores = calculate_score( Y_test, Y_hat, self.task_type, self.metric, self.D.info['target_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: inner_scores[score_name].append(scores[score_name]) else: inner_scores[self.metric].append(scores) # Average the scores! if self.all_scoring_functions: inner_err = { key: 1 - np.mean(inner_scores[key]) for key in inner_scores } outer_err = { "outer:%s" % key: 1 - np.mean(outer_scores[key]) for key in outer_scores } inner_err.update(outer_err) else: inner_err = 1 - np.mean(inner_scores[self.metric]) if self.with_predictions: return inner_err, Y_optimization_pred, Y_valid_pred, Y_test_pred return inner_err
def score(self, X, y): prediction = self.predict(X) return evaluator.calculate_score(y, prediction, self.task_, self.metric_, self.target_num_)
def predict(self): # First, obtain the predictions for the ensembles, the validation and # the test set! outer_scores = defaultdict(list) inner_scores = defaultdict(list) Y_optimization_pred = [None] * self.outer_cv_folds Y_targets = [None] * self.outer_cv_folds Y_valid_pred = [None] * self.outer_cv_folds Y_test_pred = [None] * self.outer_cv_folds for i in range(self.outer_cv_folds): train_indices, test_indices = self.outer_indices[i] opt_pred = self.predict_function(self.X_train[test_indices], self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_optimization_pred[i] = opt_pred Y_targets[i] = self.Y_train[test_indices] if self.X_valid is not None: X_valid = self.X_valid.copy() valid_pred = self.predict_function(X_valid, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_valid_pred[i] = valid_pred if self.X_test is not None: X_test = self.X_test.copy() test_pred = self.predict_function(X_test, self.outer_models[i], self.task_type, Y_train=self.Y_train[train_indices]) Y_test_pred[i] = test_pred # Calculate the outer scores for i in range(self.outer_cv_folds): scores = calculate_score(Y_targets[i], Y_optimization_pred[i], self.task_type, self.metric, self.D.info['target_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: outer_scores[score_name].append(scores[score_name]) else: outer_scores[self.metric].append(scores) Y_optimization_pred = np.concatenate([Y_optimization_pred[i] for i in range(self.outer_cv_folds) if Y_optimization_pred[ i] is not None]) Y_targets = np.concatenate([Y_targets[i] for i in range(self.outer_cv_folds) if Y_targets[i] is not None]) if self.X_valid is not None: Y_valid_pred = np.array([Y_valid_pred[i] for i in range( self.outer_cv_folds) if Y_valid_pred[i] is not None]) # Average the predictions of several models if len(Y_valid_pred.shape) == 3: Y_valid_pred = np.nanmean(Y_valid_pred, axis=0) if self.X_test is not None: Y_test_pred = np.array([Y_test_pred[i] for i in range( self.outer_cv_folds) if Y_test_pred[i] is not None]) # Average the predictions of several models if len(Y_test_pred.shape) == 3: Y_test_pred = np.nanmean(Y_test_pred, axis=0) self.Y_optimization = Y_targets # Second, calculate the inner score for outer_fold in range(self.outer_cv_folds): for inner_fold in range(self.inner_cv_folds): inner_train_indices, inner_test_indices = self.inner_indices[ outer_fold][inner_fold] Y_test = self.Y_train[inner_test_indices] X_test = self.X_train[inner_test_indices] model = self.inner_models[outer_fold][inner_fold] Y_hat = self.predict_function(X_test, model, self.task_type, Y_train=self.Y_train[inner_train_indices]) scores = calculate_score(Y_test, Y_hat, self.task_type, self.metric, self.D.info['target_num'], all_scoring_functions=self.all_scoring_functions) if self.all_scoring_functions: for score_name in scores: inner_scores[score_name].append(scores[score_name]) else: inner_scores[self.metric].append(scores) # Average the scores! if self.all_scoring_functions: inner_err = {key: 1 - np.mean(inner_scores[key]) for key in inner_scores} outer_err = {"outer:%s" % key: 1 - np.mean(outer_scores[key]) for key in outer_scores} inner_err.update(outer_err) else: inner_err = 1 - np.mean(inner_scores[self.metric]) if self.with_predictions: return inner_err, Y_optimization_pred, Y_valid_pred, Y_test_pred return inner_err