Exemple #1
0
def parameterized_models():
    for model_name, d in MODEL_PARAMS.iteritems():
        Model = d['model']
        params = d['parameters']
        for dictionary in grid_search(params):
            model = Model(**dictionary)
            yield model_name, model
Exemple #2
0
    features_to_withhold = np.random.choice(np.arange(data.shape[1]), 2)
    y = data[:, features_to_withhold]
    data_limited = np.delete(data, features_to_withhold, axis=1)
    slc = FullSLCluster(**param_dict)
    wcv = WCVScore(slc)
    wcv_score, _ = wcv.score(data_limited)
    assignments = slc.fit_predict(data_limited)
    final_scores = np.apply_along_axis(
        lambda col: mean_cluster_variances(assignments, col), 0, y)
    final_score = np.mean(final_scores)
    print "done testing ", param_dict
    return wcv_score, final_score, features_to_withhold


if __name__ == '__main__':
    params_list = grid_search(SLC_PARAMS)
    datasets = [
        load_boston().data,
        load_diabetes().data,
        load_breast_cancer().data
    ]
    outputs = []
    for data in datasets:
        outputs.append(
            Parallel(n_jobs=-1)(delayed(test_params)(param_dict, data)
                                for param_dict in params_list))
    with open('big_tune_params_by_model.pkl', 'w') as f:
        pickle.dump(outputs, f)
    with open('big_tune_params.pkl', 'w') as f:
        pickle.dump(params_list, f)
    initial_variances = [[
            output.append((dataset, params[min_wcv]))
    return output


def prepare_tuned_models(select_params_output):
    params = []
    for row in select_params_output:
        param = row[1]
        param['dataset'] = row[0]
        param['model'] = FullSLCluster
        params.append(param)
    return params


if __name__ == '__main__':
    models_to_search = grid_search(SLC_PARAMS)
    # fit_model_output = Parallel(n_jobs=-1)(delayed(fit_model)(model) for model in models_to_search)
    fit_model_output = [fit_model(model) for model in models_to_search]
    with open('big_ole_grid_search_new_datasets.pkl', 'w') as f:
        pickle.dump(fit_model_output, f)
    params_per_dataset = select_params_per_dataset_per_k(fit_model_output)
    params_to_retry = prepare_tuned_models(params_per_dataset)
    # final_models = Parallel(n_jobs=-1)(delayed(fit_model)(model) for model in params_to_retry)
    final_models = [fit_model(model) for model in params_to_retry]
    with open('final_models_from_grid_search_new_datasets.pkl', 'w') as f:
        pickle.dump(final_models, f)

    models_to_search = grid_search(KMEANS_PARAMS)
    # fit_model_output = Parallel(n_jobs=-1)(delayed(fit_model)(model) for model in models_to_search)
    fit_model_output = [fit_model(model) for model in models_to_search]
    with open('big_ole_grid_search_new_datasets_kmeans.pkl', 'w') as f: