コード例 #1
0
ファイル: evaluator.py プロジェクト: jjfeng/aACP
def get_true_model_scores(seed, data_generator: DataGenerator,
                          trial_meta: TrialMetaData, num_test_obs: int,
                          approved_models: List):
    """
    @return Dict: key = endpoint, value = matrix of true model scores over time
    """
    print("Evaluating...")
    np.random.seed(seed)
    true_model_score_dict = {}
    is_const_dist = "constant" in data_generator.support_sim_settings.min_func_name
    if is_const_dist:
        batch_data = data_generator.create_data(num_test_obs * 2, 0)

    for batch_index in range(trial_meta.num_batches):
        if not is_const_dist:
            batch_data = data_generator.create_data(num_test_obs, batch_index)

        true_model_scores = [
            score_model_batch(model, batch_data, batch_index,
                              trial_meta.score_func)
            for model in approved_models
        ]
        for k in true_model_scores[0].keys():
            if k not in true_model_score_dict:
                true_model_score_dict[k] = np.array(
                    [[score_dict[k]] for score_dict in true_model_scores])
            else:
                new_scores = np.array([[score_dict[k]]
                                       for score_dict in true_model_scores])
                true_model_score_dict[k] = np.concatenate(
                    [true_model_score_dict[k], new_scores], axis=1)

    print("Done evaluating")
    return true_model_score_dict
コード例 #2
0
ファイル: generate_data.py プロジェクト: jjfeng/spinn
def main(args=sys.argv[1:]):
    args = parse_args()
    print(args)

    data_gen = DataGenerator(
            args.num_p,
            getattr(data_gen_funcs, args.func_name),
            data_gen_funcs.CLASSIFICATION_DICT[args.func_name],
            snr=args.snr)
    train_data = data_gen.create_data(args.n_train)
    test_data = data_gen.create_data(args.n_test)

    print("data_file %s" % args.out_file)
    with open(args.out_file, "wb") as f:
        pickle.dump({
            "train": train_data,
            "test": test_data},
            f)
コード例 #3
0
def main(args=sys.argv[1:]):
    args = parse_args()
    print(args)

    np.random.seed(args.seed)

    data_gen = DataGenerator(
        sim_func_form=args.sim_func_form,
        sim_func_name=args.sim_func,
        num_p=args.num_p,
        num_classes=args.num_classes,
        noise_sd=args.sim_noise_sd,
        std_dev_x=args.std_dev_x,
        max_x=args.max_x,
    )
    train_data, support_sim_settings = data_gen.create_data(args.num_train)

    # Write data to file
    pickle_to_file(
        {
            "train": train_data,
            "support_sim_settings": support_sim_settings,
            "data_gen": data_gen
        }, args.out_data_file)
コード例 #4
0
def main(args=sys.argv[1:]):
    args = parse_args()
    logging.basicConfig(format="%(message)s",
                        filename=args.log_file,
                        level=logging.DEBUG)
    print(args)
    logging.info(args)

    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    if args.sim_func is not None:
        # Create data
        data_gen = DataGenerator(
            func_name=args.sim_func,
            n_train=args.num_train,
            n_test=args.num_test,
            num_p=args.num_p,
            noise_sd=args.sim_noise_sd,
        )
        data = data_gen.create_data()

        # Write data
        pickle_to_file(data, args.data_file)
    else:
        # Read data
        data = pickle_from_file(args.input_data)

    param_grid = [{
        'layer_sizes': args.layer_sizes,
        'ridge_param': args.ridge_params,
        'max_iters': [args.max_iters],
        'num_inits': [args.num_inits],
        'act_func': [args.act_func],
        'output_act_func': [args.output_act_func],
    }]

    # Fit neural network and calculate variable importance
    var_imports, fitted_models = calculate_var_imports_refits(
        data,
        param_grid=param_grid,
        cond_layer_sizes=args.cond_layer_sizes_separate,
        var_import_idxs=args.var_import_idx)

    # Save model
    pickle_to_file(fitted_models, args.model_file)
    # Store var import results
    pickle_to_file(var_imports, args.var_import_file)

    # Print output
    for i in range(len(var_imports)):
        v = var_imports[i]["std-True"]
        if i == 0:
            logging.info("full final r2 %f (1 is best)", v["r2.full"])
            logging.info("full final r2 test %f", v["r2.test.full"])
        logging.info("small final r2 %d : %f", i, v["r2.small"])
        logging.info("small final r2 test %d : %f", i, v["r2.test.small"])
        logging.info("one step est std=True %d : %f, %s", i, v["onestep"],
                     v["onestep.ci"])
        v_not_std = var_imports[i]["std-False"]
        logging.info("one step est std=False %d : %f, %s", i,
                     v_not_std["onestep"], v_not_std["onestep.ci"])
コード例 #5
0
def main(args=sys.argv[1:]):
    args = parse_args()
    logging.basicConfig(format="%(message)s",
                        filename=args.log_file,
                        level=logging.DEBUG)
    logging.info(args)

    # This random seed thing seems to only apply to the data-generation process.
    # The initialization of the neural net doesn't seem to be affected by this
    # ... which is really annoying.
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    if args.sim_func is not None:
        # Create data
        data_gen = DataGenerator(
            func_name=args.sim_func,
            n_train=args.num_train,
            n_test=args.num_test,
            num_p=args.num_p,
            noise_sd=args.sim_noise_sd,
        )
        data = data_gen.create_data()

        # Write data to file
        pickle_to_file(data, args.data_file)
    else:
        # Read data
        data = pickle_from_file(args.input_data)

    param_grid = [{
        'layer_sizes': args.layer_sizes,
        'ridge_param': args.ridge_params,
        'max_iters': [args.max_iters],
        'num_inits': [args.num_inits],
        'act_func': [args.act_func],
        'output_act_func': [args.output_act_func],
        'var_import_idxs': [args.var_import_idx],
        'sgd_sample_size': [args.sgd_sample_size],
        'nan_fill_config': [args.nan_fill_config],
        'missing_value_fill': [args.missing_value_fill],
    }]

    # Fit neural network and calculate variable importance
    reduced_func = getattr(
        data_gen_funcs,
        args.sim_func_reduced) if args.sim_func_reduced else None
    var_imports, fitted_model = calculate_var_imports_no_refit(
        data, param_grid=param_grid, cv=args.cv, reduced_func=reduced_func)

    # Save model
    pickle_to_file(fitted_model, args.model_file)
    # Store var import results
    pickle_to_file(var_imports, args.var_import_file)

    # Print output for each var import estimate
    coverage = []
    for i, var_group in enumerate(args.var_import_idx):
        v = var_imports[i]["std-True"]
        if i == 0:
            logging.info("full final r2 %f (1 is best)", v["r2.full"])
            logging.info("full final r2 test %f", v["r2.test.full"])
            logging.info("full final mse train %f", v["mse.train.full"])
            logging.info("full final mse test %f", v["mse.test.full"])
        logging.info(" --- small --- %s ---- ", var_group)
        logging.info("small final r2: %f", v["r2.small"])
        logging.info("small final r2 test: %f", v["r2.test.small"])
        if v["mse.train.small"] is not None:
            logging.info("small final mse train: %f", v["mse.train.small"])
            logging.info("small final mse test: %f", v["mse.test.small"])
        logging.info("one step est std=True: %f, %s", v["onestep"],
                     v["onestep.ci"])
        v_not_std = var_imports[i]["std-False"]
        logging.info("one step est std=False: %f, %s", v_not_std["onestep"],
                     v_not_std["onestep.ci"])

    if args.sim_func_reduced:
        logging.info("Average coverage over all the groups: %f (%d/%d)",
                     np.mean(coverage), np.sum(coverage), len(coverage))