def parameterized_models(): for model_name, d in MODEL_PARAMS.iteritems(): Model = d['model'] params = d['parameters'] for dictionary in grid_search(params): model = Model(**dictionary) yield model_name, model
features_to_withhold = np.random.choice(np.arange(data.shape[1]), 2) y = data[:, features_to_withhold] data_limited = np.delete(data, features_to_withhold, axis=1) slc = FullSLCluster(**param_dict) wcv = WCVScore(slc) wcv_score, _ = wcv.score(data_limited) assignments = slc.fit_predict(data_limited) final_scores = np.apply_along_axis( lambda col: mean_cluster_variances(assignments, col), 0, y) final_score = np.mean(final_scores) print "done testing ", param_dict return wcv_score, final_score, features_to_withhold if __name__ == '__main__': params_list = grid_search(SLC_PARAMS) datasets = [ load_boston().data, load_diabetes().data, load_breast_cancer().data ] outputs = [] for data in datasets: outputs.append( Parallel(n_jobs=-1)(delayed(test_params)(param_dict, data) for param_dict in params_list)) with open('big_tune_params_by_model.pkl', 'w') as f: pickle.dump(outputs, f) with open('big_tune_params.pkl', 'w') as f: pickle.dump(params_list, f) initial_variances = [[
output.append((dataset, params[min_wcv])) return output def prepare_tuned_models(select_params_output): params = [] for row in select_params_output: param = row[1] param['dataset'] = row[0] param['model'] = FullSLCluster params.append(param) return params if __name__ == '__main__': models_to_search = grid_search(SLC_PARAMS) # fit_model_output = Parallel(n_jobs=-1)(delayed(fit_model)(model) for model in models_to_search) fit_model_output = [fit_model(model) for model in models_to_search] with open('big_ole_grid_search_new_datasets.pkl', 'w') as f: pickle.dump(fit_model_output, f) params_per_dataset = select_params_per_dataset_per_k(fit_model_output) params_to_retry = prepare_tuned_models(params_per_dataset) # final_models = Parallel(n_jobs=-1)(delayed(fit_model)(model) for model in params_to_retry) final_models = [fit_model(model) for model in params_to_retry] with open('final_models_from_grid_search_new_datasets.pkl', 'w') as f: pickle.dump(final_models, f) models_to_search = grid_search(KMEANS_PARAMS) # fit_model_output = Parallel(n_jobs=-1)(delayed(fit_model)(model) for model in models_to_search) fit_model_output = [fit_model(model) for model in models_to_search] with open('big_ole_grid_search_new_datasets_kmeans.pkl', 'w') as f: