def test_cifar10_resnet_const_fixup(): args = config.config(['--config', '../configs/classify/resnet/cifar10.yaml', '--optim_lr', '0.01', '--epochs', '80', '--dataroot', '../data', '--dataset_test_len', '256', '--dataset_train_len', '256', '--seed', '0', '--run_id', '6' ]) ave_precision, best_precision, train_accuracy, test_accuracy = train_classifier.main(args) assert ave_precision > 0.2 assert best_precision > 0.2 assert train_accuracy > 20.0
def train_m_then_n_models(m, n, counter, total_evals, start_time, **kwargs): if kwargs["bert_embed"]: lr_lower_bound = BERT_LR_LOWER_BOUND lr_upper_bound = BERT_LR_UPPER_BOUND else: lr_lower_bound = LR_LOWER_BOUND lr_upper_bound = LR_UPPER_BOUND best_assignment = None best_valid_err = 1 all_assignments = get_k_sorted_hparams(m, lr_lower_bound, lr_upper_bound) for i in range(m): cur_assignments = all_assignments[i] args = ExperimentParams(counter=counter[0], **kwargs, **cur_assignments) cur_valid_err, _, _ = train_classifier.main(args) if cur_valid_err < best_valid_err: best_assignment = cur_assignments best_valid_err = cur_valid_err counter[0] = counter[0] + 1 print( "trained {} out of {} hyperparameter assignments, so far {} seconds" .format(counter[0], total_evals, round(time.time() - start_time, 3))) for i in range(n): args = ExperimentParams(counter=counter[0], filename_suffix="_{}".format(i), **kwargs, **best_assignment) cur_valid_err, _, _ = train_classifier.main(args) counter[0] = counter[0] + 1 print( "trained {} out of {} hyperparameter assignments, so far {} seconds" .format(counter[0], total_evals, round(time.time() - start_time, 3))) return best_assignment
def test_cifar10_resnet(): args = config.config(['--config', '../configs/classify/resnet/cifar10-batchnorm.yaml', '--epochs', '80', '--optim_lr', '0.01', '--dataroot', '../data', '--dataset_test_len', '256', '--dataset_train_len', '256', '--seed', '0', '--run_id', '4' ]) ave_precision, best_precision, train_accuracy, test_accuracy = train_classifier.main(args) """ WARNING this model does not run reliably due to the shortcut containing convnets""" assert best_precision > 0.13 assert train_accuracy > 20.0
def eval_face(input_dir): """ function that recognizes the face on a picture. input: location of the picture. (str) output: predicted label of the picture. (str) errors: + picture_name does not exist/is not and image - throw type error + no positive match to any of the labels - throw nonexistent error """ # se procesa la foto. pre.main(input_dir, input_dir, 180) # se evalúa la foto con el modelo. return main(input_dir, conf["model_path"], conf["classifier_output_path"], conf["batch_size"], conf["num_threads"], conf["num_epochs"], conf["min_num_images_per_class"], conf["split_ratio"], False)
## Libraries # Import custom modules import feature_extraction import feature_selection import train_classifier import train_regressor import predict import random ## Random seed # Set random seeds random.seed(seed) np.random.seed(seed) ## Code print('Feature extraction') feature_extraction.main() print('Feature selection') feature_selection.main() print('Training regressor') train_regressor.main() print('Training classifier') train_classifier.main() print('Predicting') predict.main()
def main(): exp_num = 6 if exp_num != -2: loaded_embedding = preload_embed() else: loaded_data = preload_data() start_time = time.time() counter = [0] categories = get_categories() if exp_num == -2: patterns = ["4-gram", "3-gram", "2-gram", "1-gram"] m = 20 n = 5 total_evals = (m + n) * len(patterns) for pattern in patterns: train_m_then_n_models( m, n, counter, total_evals, start_time, pattern=pattern, d_out="24", depth=1, filename_prefix="all_cs_and_equal_rho/hparam_opt/", dataset="bert/sst/", use_rho=False, seed=None, bert_embed=True, batch_size=32, loaded_data=loaded_data) if exp_num == -1: args = ExperimentParams( pattern="4-gram", d_out="24", reg_goal_params=20, filename_prefix= "all_cs_and_equal_rho/saving_model_for_interpretability/", seed=314159, loaded_embedding=loaded_embedding, dataset="amazon_categories/original_mix/", use_rho=False, clip_grad=2.82, dropout=0.1809, rnn_dropout=0.1537, embed_dropout=0.3141, lr=2.407E-02, weight_decay=3.64E-07, depth=1, reg_strength=3.125E-04, sparsity_type="states") cur_valid_err, _, _ = train_classifier.main(args) # a basic experiment if exp_num == 0: args = ExperimentParams(use_rho=True, pattern="4-gram", sparsity_type="rho_entropy", rho_sum_to_one=True, reg_strength=0.01, d_out="23", lr=0.001, seed=34159) train_classifier.main(args) # finding the largest learning rate that doesn't diverge, for evaluating the claims in this paper: # The Marginal Value of Adaptive Gradient Methods in Machine Learning # https://arxiv.org/abs/1705.08292 # conclusion: their results don't hold for our models. elif exp_num == 1: lrs = np.linspace(2, 0.1, 10) for lr in lrs: args = ExperimentParams(pattern="4-gram", d_out="256", trainer="sgd", max_epoch=3, lr=lr, filename_prefix="lr_tuning/") train_classifier.main(args) # baseline experiments for 1-gram up to 4-gram models elif exp_num == 3: patterns = ["4-gram", "3-gram", "2-gram", "1-gram"] m = 20 n = 5 total_evals = len(categories) * (len(patterns) + 1) * (m + n) for category in categories: for pattern in patterns: train_m_then_n_models( m, n, counter, total_evals, start_time, pattern=pattern, d_out="24", depth=1, filename_prefix="all_cs_and_equal_rho/hparam_opt/", dataset="amazon_categories/" + category, use_rho=False, seed=None, loaded_embedding=loaded_embedding) train_m_then_n_models( m, n, counter, total_evals, start_time, pattern="1-gram,2-gram,3-gram,4-gram", d_out="6,6,6,6", depth=1, filename_prefix="all_cs_and_equal_rho/hparam_opt/", dataset="amazon_categories/" + category, use_rho=False, seed=None, loaded_embedding=loaded_embedding) # to learn with an L_1 regularizer # first train with the regularizer, choose the best structure, then do hyperparameter search for that structure elif exp_num == 6: d_out = "24" k = 20 l = 5 m = 20 n = 5 reg_goal_params_list = [80, 60, 40, 20] total_evals = len(categories) * (m + n + k + l) * len(reg_goal_params_list) all_reg_search_counters = [] for category in categories: for reg_goal_params in reg_goal_params_list: best, reg_search_counters = regularization_search_experiments.train_k_then_l_models( k, l, counter, total_evals, start_time, logging_dir= "/home/jessedd/projects/rational-recurrences/classification/logging/", reg_goal_params=reg_goal_params, pattern="4-gram", d_out=d_out, sparsity_type="states", use_rho=False, filename_prefix= "all_cs_and_equal_rho/hparam_opt/structure_search/add_reg_term_to_loss/", seed=None, loaded_embedding=loaded_embedding, reg_strength=8 * 10**-6, distance_from_target=10, dataset="amazon_categories/" + category) all_reg_search_counters.append(reg_search_counters) args = train_m_then_n_models( m, n, counter, total_evals, start_time, pattern=best['learned_pattern'], d_out=best["learned_d_out"], learned_structure="l1-states-learned", reg_goal_params=reg_goal_params, filename_prefix= "all_cs_and_equal_rho/hparam_opt/structure_search/add_reg_term_to_loss/", seed=None, loaded_embedding=loaded_embedding, dataset="amazon_categories/" + category, use_rho=False) print("search counters:") for search_counter in all_reg_search_counters: print(search_counter) # some rho_entropy experiments elif exp_num == 8: k = 20 l = 5 total_evals = len(categories) * (k + l) for d_out in ["24"]: #, "256"]: for category in categories: # to learn the structure, and train with the regularizer best, reg_search_counters = regularization_search_experiments.train_k_then_l_models( k, l, counter, total_evals, start_time, use_rho=True, pattern="4-gram", sparsity_type="rho_entropy", rho_sum_to_one=True, reg_strength=1, d_out=d_out, filename_prefix="only_last_cs/hparam_opt/reg_str_search/", dataset="amazon_categories/" + category, seed=None, distance_from_target=10, loaded_embedding=loaded_embedding) # baseline for rho_entropy experiments elif exp_num == 9: categories = ["dvd/"] patterns = ["1-gram", "2-gram"] #["4-gram", "3-gram", "2-gram", "1-gram"] m = 20 n = 5 total_evals = len(categories) * (len(patterns) + 1) * (m + n) for category in categories: for pattern in patterns: # train and eval the learned structure args = train_m_then_n_models( m, n, counter, total_evals, start_time, pattern=pattern, d_out="24", filename_prefix="only_last_cs/hparam_opt/", dataset="amazon_categories/" + category, use_last_cs=True, use_rho=False, seed=None, loaded_embedding=loaded_embedding) # baseline experiments for l1 regularization, on sst. very similar to exp_num 3 elif exp_num == 10: patterns = ["4-gram", "3-gram", "2-gram", "1-gram"] m = 20 n = 5 total_evals = m * n for pattern in patterns: train_m_then_n_models( m, n, counter, total_evals, start_time, pattern=pattern, d_out="24", depth=1, filename_prefix="all_cs_and_equal_rho/hparam_opt/", dataset="sst/", use_rho=False, seed=None, loaded_embedding=loaded_embedding) train_m_then_n_models( m, n, counter, total_evals, start_time, pattern="1-gram,2-gram,3-gram,4-gram", d_out="6,6,6,6", depth=1, filename_prefix="all_cs_and_equal_rho/hparam_opt/", dataset="sst/", use_rho=False, seed=None, loaded_embedding=loaded_embedding) elif exp_num == 11: args = ExperimentParams( pattern="1-gram,2-gram,3-gram,4-gram", d_out="0,4,0,2", learned_structure="l1-states-learned", reg_goal_params=20, filename_prefix= "all_cs_and_equal_rho/saving_model_for_interpretability/", seed=None, loaded_embedding=loaded_embedding, dataset="amazon_categories/original_mix/", use_rho=False, clip_grad=1.09, dropout=0.1943, rnn_dropout=0.0805, embed_dropout=0.3489, lr=2.553E-02, weight_decay=1.64E-06, depth=1, batch_size=5) cur_valid_err, _, _ = train_classifier.main(args)
def search_reg_str_l1(cur_assignments, kwargs, global_counter, distance_from_target=10): # the final number of params is within this amount of target smallest_reg_str = 10**-9 largest_reg_str = 10**2 starting_reg_str = kwargs["reg_strength"] found_good_reg_str = False too_small = False too_large = False counter = 0 reg_str_growth_rate = 2.0 reduced_model_path = "" while not found_good_reg_str: # deleting models which aren't going to be used save_learned_structure.remove_old(reduced_model_path) # if more than 25 regularization strengths have been tried, throw out hparam assignment and resample if counter > 25: kwargs["reg_strength"] = starting_reg_str return counter, "bad_hparams", cur_valid_err, learned_d_out, reduced_model_path counter += 1 args = ExperimentParams(counter=global_counter, **kwargs, **cur_assignments) cur_valid_err, learned_d_out, reduced_model_path = train_classifier.main( args) num_params = sum([ int(learned_d_out.split(",")[i]) * (i + 1) for i in range(len(learned_d_out.split(","))) ]) if num_params < kwargs["reg_goal_params"] - distance_from_target: if too_large: # reduce size of steps for reg strength reg_str_growth_rate = (reg_str_growth_rate + 1) / 2.0 too_large = False too_small = True kwargs[ "reg_strength"] = kwargs["reg_strength"] / reg_str_growth_rate if kwargs["reg_strength"] < smallest_reg_str: kwargs["reg_strength"] = starting_reg_str return counter, "too_small_lr", cur_valid_err, learned_d_out, reduced_model_path elif num_params > kwargs["reg_goal_params"] + distance_from_target: if too_small: # reduce size of steps for reg strength reg_str_growth_rate = (reg_str_growth_rate + 1) / 2.0 too_small = False too_large = True kwargs[ "reg_strength"] = kwargs["reg_strength"] * reg_str_growth_rate if kwargs["reg_strength"] > largest_reg_str: kwargs["reg_strength"] = starting_reg_str # it diverged, and for some reason the weights didn't drop if num_params == int(args.d_out) * 4 and cur_assignments[ "lr"] > .1 and cur_valid_err > .3: return counter, "too_big_lr", cur_valid_err, learned_d_out, reduced_model_path else: return counter, "too_small_lr", cur_valid_err, learned_d_out, reduced_model_path else: found_good_reg_str = True return counter, "okay_lr", cur_valid_err, learned_d_out, reduced_model_path
def train_k_then_l_models(k, l, counter, total_evals, start_time, logging_dir, distance_from_target, **kwargs): if "seed" in kwargs and kwargs["seed"] is not None: np.random.seed(kwargs["seed"]) assert "reg_strength" in kwargs if "prox_step" not in kwargs: kwargs["prox_step"] = False elif kwargs["prox_step"]: assert False, "It's too unstable. books/all_cs_and_equal_rho/hparam_opt/structure_search/proximal_gradient too big then too small" assert kwargs[ "sparsity_type"] == "states", "setting kwargs for structure learning works only with states" assert "lr_patience" not in kwargs, "lr_patience is set s.t. the lr never decreases during structure learning." kwargs["logging_dir"] = logging_dir file_base = logging_dir + kwargs["dataset"] best = { "assignment": None, "valid_err": 1, "learned_pattern": None, "learned_d_out": None, "reg_strength": None } reg_search_counters = [] if kwargs["bert_embed"]: lr_lower_bound = BERT_LR_LOWER_BOUND lr_upper_bound = BERT_LR_UPPER_BOUND else: lr_lower_bound = LR_LOWER_BOUND lr_upper_bound = LR_UPPER_BOUND all_assignments = get_k_sorted_hparams(k, lr_lower_bound, lr_upper_bound) for i in range(len(all_assignments)): valid_assignment = False while not valid_assignment: cur_assignments = all_assignments[i] # to prevent the learning rate from decreasing during structure learning kwargs["lr_patience"] = 9999999 if kwargs["sparsity_type"] == "rho_entropy": one_search_counter, lr_judgement = search_reg_str_entropy( cur_assignments, kwargs) elif kwargs["sparsity_type"] == "states": one_search_counter, lr_judgement, cur_valid_err, learned_d_out, reduced_model_path = search_reg_str_l1( cur_assignments, kwargs, counter[0], distance_from_target) learned_pattern = "1-gram,2-gram,3-gram,4-gram" del kwargs["lr_patience"] reg_search_counters.append(one_search_counter) if lr_judgement == "okay_lr": valid_assignment = True else: save_learned_structure.remove_old(reduced_model_path) new_assignments = get_k_sorted_hparams(k - i, lr_lower_bound, lr_upper_bound, sort=False) all_assignments[i:len(all_assignments)] = new_assignments #if lr_judgement == "too_big_lr": # # lower the upper bound # lr_upper_bound = cur_assignments['lr'] # reverse = True #elif lr_judgement == "too_small_lr": # # rase lower bound # lr_lower_bound = cur_assignments['lr'] # reverse = False #else: # assert False, "shouldn't be here." #new_assignments = get_k_sorted_hparams(k-i, lr_lower_bound, lr_upper_bound) #if reverse: # new_assignments.reverse() #all_assignments[i:len(all_assignments)] = new_assignments # to fine tune the learned model kwargs_fine_tune = get_kwargs_for_fine_tuning(kwargs, reduced_model_path, learned_d_out, learned_pattern) args = ExperimentParams(counter=counter[0], **kwargs_fine_tune, **cur_assignments) cur_valid_err, _, _ = train_classifier.main(args) if cur_valid_err < best["valid_err"]: best = { "assignment": cur_assignments, "valid_err": cur_valid_err, "learned_pattern": learned_pattern, "learned_d_out": learned_d_out, "reg_strength": kwargs["reg_strength"] } counter[0] = counter[0] + 1 print( "trained {} out of {} hyperparameter assignments, so far {} seconds" .format(counter[0], total_evals, round(time.time() - start_time, 3))) kwargs["reg_strength"] = best["reg_strength"] for i in range(l): kwargs["lr_patience"] = 9999999 args = ExperimentParams(counter=counter[0], filename_suffix="_{}".format(i), **kwargs, **best["assignment"]) cur_valid_err, learned_d_out, reduced_model_path = train_classifier.main( args) del kwargs["lr_patience"] # to fine tune the model trained on the above line kwargs_fine_tune = get_kwargs_for_fine_tuning(kwargs, reduced_model_path, learned_d_out, learned_pattern) args = ExperimentParams(counter=counter[0], filename_suffix="_{}".format(i), **kwargs_fine_tune, **best["assignment"]) cur_valid_err, learned_d_out, reduced_model_path = train_classifier.main( args) counter[0] = counter[0] + 1 return best, reg_search_counters