예제 #1
0
def test_cifar10_resnet_const_fixup():
    args = config.config(['--config', '../configs/classify/resnet/cifar10.yaml',
                          '--optim_lr', '0.01',
                          '--epochs', '80',
                          '--dataroot', '../data',
                          '--dataset_test_len', '256',
                          '--dataset_train_len', '256',
                          '--seed', '0',
                          '--run_id', '6'
                          ])
    ave_precision, best_precision, train_accuracy, test_accuracy = train_classifier.main(args)
    assert ave_precision > 0.2
    assert best_precision > 0.2
    assert train_accuracy > 20.0
def train_m_then_n_models(m, n, counter, total_evals, start_time, **kwargs):
    if kwargs["bert_embed"]:
        lr_lower_bound = BERT_LR_LOWER_BOUND
        lr_upper_bound = BERT_LR_UPPER_BOUND
    else:
        lr_lower_bound = LR_LOWER_BOUND
        lr_upper_bound = LR_UPPER_BOUND
    best_assignment = None
    best_valid_err = 1
    all_assignments = get_k_sorted_hparams(m, lr_lower_bound, lr_upper_bound)
    for i in range(m):
        cur_assignments = all_assignments[i]
        args = ExperimentParams(counter=counter[0],
                                **kwargs,
                                **cur_assignments)
        cur_valid_err, _, _ = train_classifier.main(args)
        if cur_valid_err < best_valid_err:
            best_assignment = cur_assignments
            best_valid_err = cur_valid_err
        counter[0] = counter[0] + 1
        print(
            "trained {} out of {} hyperparameter assignments, so far {} seconds"
            .format(counter[0], total_evals, round(time.time() - start_time,
                                                   3)))

    for i in range(n):
        args = ExperimentParams(counter=counter[0],
                                filename_suffix="_{}".format(i),
                                **kwargs,
                                **best_assignment)
        cur_valid_err, _, _ = train_classifier.main(args)
        counter[0] = counter[0] + 1
        print(
            "trained {} out of {} hyperparameter assignments, so far {} seconds"
            .format(counter[0], total_evals, round(time.time() - start_time,
                                                   3)))
    return best_assignment
예제 #3
0
def test_cifar10_resnet():
    args = config.config(['--config', '../configs/classify/resnet/cifar10-batchnorm.yaml',
                          '--epochs', '80',
                          '--optim_lr', '0.01',
                          '--dataroot', '../data',
                          '--dataset_test_len', '256',
                          '--dataset_train_len', '256',
                          '--seed', '0',
                          '--run_id', '4'
                          ])
    ave_precision, best_precision, train_accuracy, test_accuracy = train_classifier.main(args)

    """ WARNING this model does not run reliably due to the shortcut containing convnets"""
    assert best_precision > 0.13
    assert train_accuracy > 20.0
예제 #4
0
def eval_face(input_dir):
    """
    function that recognizes the face on a picture.
    input: location of the picture. (str)
    output: predicted label of the picture. (str)
    errors:
    + picture_name does not exist/is not and image - throw type error
    + no positive match to any of the labels - throw nonexistent error
    """

    # se procesa la foto.
    pre.main(input_dir, input_dir, 180)

    # se evalúa la foto con el modelo.
    return main(input_dir, conf["model_path"], conf["classifier_output_path"],
                conf["batch_size"], conf["num_threads"], conf["num_epochs"],
                conf["min_num_images_per_class"], conf["split_ratio"], False)
예제 #5
0
## Libraries
# Import custom modules
import feature_extraction
import feature_selection
import train_classifier
import train_regressor
import predict
import random

## Random seed
# Set random seeds
random.seed(seed)
np.random.seed(seed)

## Code
print('Feature extraction')
feature_extraction.main()
print('Feature selection')
feature_selection.main()
print('Training regressor')
train_regressor.main()
print('Training classifier')
train_classifier.main()
print('Predicting')
predict.main()
예제 #6
0
def main():

    exp_num = 6

    if exp_num != -2:
        loaded_embedding = preload_embed()
    else:
        loaded_data = preload_data()

    start_time = time.time()
    counter = [0]
    categories = get_categories()

    if exp_num == -2:
        patterns = ["4-gram", "3-gram", "2-gram", "1-gram"]
        m = 20
        n = 5
        total_evals = (m + n) * len(patterns)

        for pattern in patterns:
            train_m_then_n_models(
                m,
                n,
                counter,
                total_evals,
                start_time,
                pattern=pattern,
                d_out="24",
                depth=1,
                filename_prefix="all_cs_and_equal_rho/hparam_opt/",
                dataset="bert/sst/",
                use_rho=False,
                seed=None,
                bert_embed=True,
                batch_size=32,
                loaded_data=loaded_data)

    if exp_num == -1:

        args = ExperimentParams(
            pattern="4-gram",
            d_out="24",
            reg_goal_params=20,
            filename_prefix=
            "all_cs_and_equal_rho/saving_model_for_interpretability/",
            seed=314159,
            loaded_embedding=loaded_embedding,
            dataset="amazon_categories/original_mix/",
            use_rho=False,
            clip_grad=2.82,
            dropout=0.1809,
            rnn_dropout=0.1537,
            embed_dropout=0.3141,
            lr=2.407E-02,
            weight_decay=3.64E-07,
            depth=1,
            reg_strength=3.125E-04,
            sparsity_type="states")
        cur_valid_err, _, _ = train_classifier.main(args)

    # a basic experiment
    if exp_num == 0:
        args = ExperimentParams(use_rho=True,
                                pattern="4-gram",
                                sparsity_type="rho_entropy",
                                rho_sum_to_one=True,
                                reg_strength=0.01,
                                d_out="23",
                                lr=0.001,
                                seed=34159)
        train_classifier.main(args)

    # finding the largest learning rate that doesn't diverge, for evaluating the claims in this paper:
    # The Marginal Value of Adaptive Gradient Methods in Machine Learning
    # https://arxiv.org/abs/1705.08292
    # conclusion: their results don't hold for our models.
    elif exp_num == 1:
        lrs = np.linspace(2, 0.1, 10)
        for lr in lrs:
            args = ExperimentParams(pattern="4-gram",
                                    d_out="256",
                                    trainer="sgd",
                                    max_epoch=3,
                                    lr=lr,
                                    filename_prefix="lr_tuning/")
            train_classifier.main(args)

    # baseline experiments for 1-gram up to 4-gram models
    elif exp_num == 3:
        patterns = ["4-gram", "3-gram", "2-gram", "1-gram"]
        m = 20
        n = 5
        total_evals = len(categories) * (len(patterns) + 1) * (m + n)

        for category in categories:
            for pattern in patterns:
                train_m_then_n_models(
                    m,
                    n,
                    counter,
                    total_evals,
                    start_time,
                    pattern=pattern,
                    d_out="24",
                    depth=1,
                    filename_prefix="all_cs_and_equal_rho/hparam_opt/",
                    dataset="amazon_categories/" + category,
                    use_rho=False,
                    seed=None,
                    loaded_embedding=loaded_embedding)

            train_m_then_n_models(
                m,
                n,
                counter,
                total_evals,
                start_time,
                pattern="1-gram,2-gram,3-gram,4-gram",
                d_out="6,6,6,6",
                depth=1,
                filename_prefix="all_cs_and_equal_rho/hparam_opt/",
                dataset="amazon_categories/" + category,
                use_rho=False,
                seed=None,
                loaded_embedding=loaded_embedding)

    # to learn with an L_1 regularizer
    # first train with the regularizer, choose the best structure, then do hyperparameter search for that structure
    elif exp_num == 6:
        d_out = "24"
        k = 20
        l = 5
        m = 20
        n = 5
        reg_goal_params_list = [80, 60, 40, 20]
        total_evals = len(categories) * (m + n + k +
                                         l) * len(reg_goal_params_list)

        all_reg_search_counters = []

        for category in categories:
            for reg_goal_params in reg_goal_params_list:
                best, reg_search_counters = regularization_search_experiments.train_k_then_l_models(
                    k,
                    l,
                    counter,
                    total_evals,
                    start_time,
                    logging_dir=
                    "/home/jessedd/projects/rational-recurrences/classification/logging/",
                    reg_goal_params=reg_goal_params,
                    pattern="4-gram",
                    d_out=d_out,
                    sparsity_type="states",
                    use_rho=False,
                    filename_prefix=
                    "all_cs_and_equal_rho/hparam_opt/structure_search/add_reg_term_to_loss/",
                    seed=None,
                    loaded_embedding=loaded_embedding,
                    reg_strength=8 * 10**-6,
                    distance_from_target=10,
                    dataset="amazon_categories/" + category)

                all_reg_search_counters.append(reg_search_counters)

                args = train_m_then_n_models(
                    m,
                    n,
                    counter,
                    total_evals,
                    start_time,
                    pattern=best['learned_pattern'],
                    d_out=best["learned_d_out"],
                    learned_structure="l1-states-learned",
                    reg_goal_params=reg_goal_params,
                    filename_prefix=
                    "all_cs_and_equal_rho/hparam_opt/structure_search/add_reg_term_to_loss/",
                    seed=None,
                    loaded_embedding=loaded_embedding,
                    dataset="amazon_categories/" + category,
                    use_rho=False)
        print("search counters:")
        for search_counter in all_reg_search_counters:
            print(search_counter)

    # some rho_entropy experiments
    elif exp_num == 8:
        k = 20
        l = 5
        total_evals = len(categories) * (k + l)

        for d_out in ["24"]:  #, "256"]:
            for category in categories:
                # to learn the structure, and train with the regularizer
                best, reg_search_counters = regularization_search_experiments.train_k_then_l_models(
                    k,
                    l,
                    counter,
                    total_evals,
                    start_time,
                    use_rho=True,
                    pattern="4-gram",
                    sparsity_type="rho_entropy",
                    rho_sum_to_one=True,
                    reg_strength=1,
                    d_out=d_out,
                    filename_prefix="only_last_cs/hparam_opt/reg_str_search/",
                    dataset="amazon_categories/" + category,
                    seed=None,
                    distance_from_target=10,
                    loaded_embedding=loaded_embedding)

    # baseline for rho_entropy experiments
    elif exp_num == 9:
        categories = ["dvd/"]
        patterns = ["1-gram",
                    "2-gram"]  #["4-gram", "3-gram", "2-gram", "1-gram"]
        m = 20
        n = 5
        total_evals = len(categories) * (len(patterns) + 1) * (m + n)

        for category in categories:
            for pattern in patterns:
                # train and eval the learned structure
                args = train_m_then_n_models(
                    m,
                    n,
                    counter,
                    total_evals,
                    start_time,
                    pattern=pattern,
                    d_out="24",
                    filename_prefix="only_last_cs/hparam_opt/",
                    dataset="amazon_categories/" + category,
                    use_last_cs=True,
                    use_rho=False,
                    seed=None,
                    loaded_embedding=loaded_embedding)

    # baseline experiments for l1 regularization, on sst. very similar to exp_num 3
    elif exp_num == 10:
        patterns = ["4-gram", "3-gram", "2-gram", "1-gram"]
        m = 20
        n = 5
        total_evals = m * n
        for pattern in patterns:
            train_m_then_n_models(
                m,
                n,
                counter,
                total_evals,
                start_time,
                pattern=pattern,
                d_out="24",
                depth=1,
                filename_prefix="all_cs_and_equal_rho/hparam_opt/",
                dataset="sst/",
                use_rho=False,
                seed=None,
                loaded_embedding=loaded_embedding)

        train_m_then_n_models(
            m,
            n,
            counter,
            total_evals,
            start_time,
            pattern="1-gram,2-gram,3-gram,4-gram",
            d_out="6,6,6,6",
            depth=1,
            filename_prefix="all_cs_and_equal_rho/hparam_opt/",
            dataset="sst/",
            use_rho=False,
            seed=None,
            loaded_embedding=loaded_embedding)

    elif exp_num == 11:

        args = ExperimentParams(
            pattern="1-gram,2-gram,3-gram,4-gram",
            d_out="0,4,0,2",
            learned_structure="l1-states-learned",
            reg_goal_params=20,
            filename_prefix=
            "all_cs_and_equal_rho/saving_model_for_interpretability/",
            seed=None,
            loaded_embedding=loaded_embedding,
            dataset="amazon_categories/original_mix/",
            use_rho=False,
            clip_grad=1.09,
            dropout=0.1943,
            rnn_dropout=0.0805,
            embed_dropout=0.3489,
            lr=2.553E-02,
            weight_decay=1.64E-06,
            depth=1,
            batch_size=5)
        cur_valid_err, _, _ = train_classifier.main(args)
def search_reg_str_l1(cur_assignments,
                      kwargs,
                      global_counter,
                      distance_from_target=10):
    # the final number of params is within this amount of target
    smallest_reg_str = 10**-9
    largest_reg_str = 10**2
    starting_reg_str = kwargs["reg_strength"]
    found_good_reg_str = False
    too_small = False
    too_large = False
    counter = 0
    reg_str_growth_rate = 2.0
    reduced_model_path = ""

    while not found_good_reg_str:
        # deleting models which aren't going to be used

        save_learned_structure.remove_old(reduced_model_path)

        # if more than 25 regularization strengths have been tried, throw out hparam assignment and resample
        if counter > 25:
            kwargs["reg_strength"] = starting_reg_str
            return counter, "bad_hparams", cur_valid_err, learned_d_out, reduced_model_path

        counter += 1
        args = ExperimentParams(counter=global_counter,
                                **kwargs,
                                **cur_assignments)
        cur_valid_err, learned_d_out, reduced_model_path = train_classifier.main(
            args)

        num_params = sum([
            int(learned_d_out.split(",")[i]) * (i + 1)
            for i in range(len(learned_d_out.split(",")))
        ])

        if num_params < kwargs["reg_goal_params"] - distance_from_target:
            if too_large:
                # reduce size of steps for reg strength
                reg_str_growth_rate = (reg_str_growth_rate + 1) / 2.0
                too_large = False
            too_small = True
            kwargs[
                "reg_strength"] = kwargs["reg_strength"] / reg_str_growth_rate
            if kwargs["reg_strength"] < smallest_reg_str:
                kwargs["reg_strength"] = starting_reg_str
                return counter, "too_small_lr", cur_valid_err, learned_d_out, reduced_model_path
        elif num_params > kwargs["reg_goal_params"] + distance_from_target:
            if too_small:
                # reduce size of steps for reg strength
                reg_str_growth_rate = (reg_str_growth_rate + 1) / 2.0
                too_small = False
            too_large = True
            kwargs[
                "reg_strength"] = kwargs["reg_strength"] * reg_str_growth_rate

            if kwargs["reg_strength"] > largest_reg_str:
                kwargs["reg_strength"] = starting_reg_str

                # it diverged, and for some reason the weights didn't drop
                if num_params == int(args.d_out) * 4 and cur_assignments[
                        "lr"] > .1 and cur_valid_err > .3:
                    return counter, "too_big_lr", cur_valid_err, learned_d_out, reduced_model_path
                else:
                    return counter, "too_small_lr", cur_valid_err, learned_d_out, reduced_model_path
        else:
            found_good_reg_str = True

    return counter, "okay_lr", cur_valid_err, learned_d_out, reduced_model_path
def train_k_then_l_models(k, l, counter, total_evals, start_time, logging_dir,
                          distance_from_target, **kwargs):
    if "seed" in kwargs and kwargs["seed"] is not None:
        np.random.seed(kwargs["seed"])

    assert "reg_strength" in kwargs
    if "prox_step" not in kwargs:
        kwargs["prox_step"] = False
    elif kwargs["prox_step"]:
        assert False, "It's too unstable. books/all_cs_and_equal_rho/hparam_opt/structure_search/proximal_gradient too big then too small"
    assert kwargs[
        "sparsity_type"] == "states", "setting kwargs for structure learning works only with states"
    assert "lr_patience" not in kwargs, "lr_patience is set s.t. the lr never decreases during structure learning."
    kwargs["logging_dir"] = logging_dir

    file_base = logging_dir + kwargs["dataset"]
    best = {
        "assignment": None,
        "valid_err": 1,
        "learned_pattern": None,
        "learned_d_out": None,
        "reg_strength": None
    }

    reg_search_counters = []
    if kwargs["bert_embed"]:
        lr_lower_bound = BERT_LR_LOWER_BOUND
        lr_upper_bound = BERT_LR_UPPER_BOUND
    else:
        lr_lower_bound = LR_LOWER_BOUND
        lr_upper_bound = LR_UPPER_BOUND
    all_assignments = get_k_sorted_hparams(k, lr_lower_bound, lr_upper_bound)
    for i in range(len(all_assignments)):

        valid_assignment = False
        while not valid_assignment:
            cur_assignments = all_assignments[i]

            # to prevent the learning rate from decreasing during structure learning
            kwargs["lr_patience"] = 9999999

            if kwargs["sparsity_type"] == "rho_entropy":
                one_search_counter, lr_judgement = search_reg_str_entropy(
                    cur_assignments, kwargs)
            elif kwargs["sparsity_type"] == "states":
                one_search_counter, lr_judgement, cur_valid_err, learned_d_out, reduced_model_path = search_reg_str_l1(
                    cur_assignments, kwargs, counter[0], distance_from_target)
                learned_pattern = "1-gram,2-gram,3-gram,4-gram"

            del kwargs["lr_patience"]

            reg_search_counters.append(one_search_counter)
            if lr_judgement == "okay_lr":
                valid_assignment = True
            else:
                save_learned_structure.remove_old(reduced_model_path)
                new_assignments = get_k_sorted_hparams(k - i,
                                                       lr_lower_bound,
                                                       lr_upper_bound,
                                                       sort=False)
                all_assignments[i:len(all_assignments)] = new_assignments

                #if lr_judgement == "too_big_lr":
                #    # lower the upper bound
                #    lr_upper_bound = cur_assignments['lr']
                #    reverse = True
                #elif lr_judgement == "too_small_lr":
                #    # rase lower bound
                #    lr_lower_bound = cur_assignments['lr']
                #    reverse = False
                #else:
                #    assert False, "shouldn't be here."
                #new_assignments = get_k_sorted_hparams(k-i, lr_lower_bound, lr_upper_bound)
                #if reverse:
                #    new_assignments.reverse()
                #all_assignments[i:len(all_assignments)] = new_assignments

        # to fine tune the learned model
        kwargs_fine_tune = get_kwargs_for_fine_tuning(kwargs,
                                                      reduced_model_path,
                                                      learned_d_out,
                                                      learned_pattern)
        args = ExperimentParams(counter=counter[0],
                                **kwargs_fine_tune,
                                **cur_assignments)
        cur_valid_err, _, _ = train_classifier.main(args)

        if cur_valid_err < best["valid_err"]:
            best = {
                "assignment": cur_assignments,
                "valid_err": cur_valid_err,
                "learned_pattern": learned_pattern,
                "learned_d_out": learned_d_out,
                "reg_strength": kwargs["reg_strength"]
            }

        counter[0] = counter[0] + 1
        print(
            "trained {} out of {} hyperparameter assignments, so far {} seconds"
            .format(counter[0], total_evals, round(time.time() - start_time,
                                                   3)))

    kwargs["reg_strength"] = best["reg_strength"]
    for i in range(l):
        kwargs["lr_patience"] = 9999999
        args = ExperimentParams(counter=counter[0],
                                filename_suffix="_{}".format(i),
                                **kwargs,
                                **best["assignment"])
        cur_valid_err, learned_d_out, reduced_model_path = train_classifier.main(
            args)
        del kwargs["lr_patience"]

        # to fine tune the model trained on the above line
        kwargs_fine_tune = get_kwargs_for_fine_tuning(kwargs,
                                                      reduced_model_path,
                                                      learned_d_out,
                                                      learned_pattern)
        args = ExperimentParams(counter=counter[0],
                                filename_suffix="_{}".format(i),
                                **kwargs_fine_tune,
                                **best["assignment"])
        cur_valid_err, learned_d_out, reduced_model_path = train_classifier.main(
            args)

        counter[0] = counter[0] + 1

    return best, reg_search_counters