def train_cifar10_model(net, learning_rates=[0.001, 0.0001], iters=[50, 50], output_path='resnet18_cifar10.model'): # Load data train_loader, test_loader, _ = cifar10_loader(batch_size=128) # Define loss criterion = nn.CrossEntropyLoss() for lr, iter in zip(learning_rates, iters): print("Training with lr=%f for %d iters" % (lr, iter)) optimizer = optim.Adam(net.parameters(), lr=lr) train_model(net, optimizer, criterion, train_loader, epochs=iter) save_model(net, output_file=output_path)
def run_nn_experiments(config, x_train, y_train, x_test, y_test, vectorizers_dict, x_train_lang_features, x_test_lang_features, x_train_word_lang_features, x_test_word_lang_features): y_pred, y_test, model, tensorboard_log_dir, train_test_time, callback_list =\ build_and_run(x_train, y_train, x_test, y_test, vectorizers_dict , config, x_train_lang_features, x_test_lang_features, x_train_word_lang_features,x_test_word_lang_features) # compute measures accuracy, macro_f1, micro_f1, precisions, recalls, f1_scores = compute_measures( y_test, y_pred, None) # get excel results and print str_head, final_ret = get_excel_format(accuracy, macro_f1, micro_f1) print(str_head + '\n' + final_ret) print("train and test time: ", train_test_time) embeddings_name = config['embeddings_file'] # save model file_name_model = save_model(model, config, embeddings_name, tensorboard_log_dir, train_test_time, accuracy, macro_f1, micro_f1, precisions, recalls, f1_scores, callback_list) res_file_name = os.path.join(TRAIN_MOD_DIR, 'allres.txt') append_result(res_file_name, file_name_model, accuracy, macro_f1, micro_f1)
def perform_transfer_knowledge(net, donor_net, transfer_loader, output_path, transfer_method, distill_temp=2, learning_rates=(0.0001, 0.0001), iters=(1, 1)): # Move the models into GPU net.cuda() donor_net.cuda() # Perform the transfer W = None for lr, iters in zip(learning_rates, iters): if transfer_method == 'hint': W = unsupervised_hint_transfer(net, donor_net, transfer_loader, epochs=iters, lr=lr, W=W) elif transfer_method == 'hint_optimized': W = unsupervised_hint_transfer_optimized(net, donor_net, transfer_loader, epochs=iters, lr=lr, W=W) elif transfer_method == 'distill': unsupervised_distillation(net, donor_net, transfer_loader, epochs=iters, lr=lr, T=distill_temp) elif transfer_method == 'pkt': knowledge_transfer(net, donor_net, transfer_loader, epochs=iters, lr=lr) else: assert False save_model(net, output_path) print("Model saved at ", output_path)
def run_transfer(learning_rates=(0.001, 0.0001), iters=(3, 0), method='mds'): torch.manual_seed(12345) student_layers, teacher_layers, weights, loss_params, T = (3,), (3,), (1,), {}, 2 print(method) transfer_name = method # Output paths output_path = 'models/aux_' + transfer_name + '.model' results_path = 'results/aux_' + transfer_name # Load a pre-trained teacher network student_net = Cifar_Tiny(10) # Use a pre-trained model load_model(student_net, 'models/tiny_cifar10.model') # Load the teacher model teacher_net = ResNet18(num_classes=10) load_model(teacher_net, 'models/resnet18_cifar10.model') train_loader, test_loader, train_loader_raw = cifar10_loader(batch_size=128) # Move the models into GPU student_net.cuda() teacher_net.cuda() # Perform the transfer W = None for lr, iters in zip(learning_rates, iters): if method == 'pkt': kernel_parameters = {'student': 'combined', 'teacher': 'combined', 'loss': 'combined'} prob_transfer(student_net, teacher_net, train_loader, epochs=iters, lr=lr, teacher_layers=teacher_layers, student_layers=student_layers, layer_weights=weights, kernel_parameters=kernel_parameters, loss_params=loss_params) else: assert False save_model(student_net, output_path) print("Model saved at ", output_path) # Perform the evaluation evaluate_model_retrieval(net=Cifar_Tiny(num_classes=10), path=output_path, result_path=results_path + '_retrieval.pickle', layer=3) evaluate_model_retrieval(net=Cifar_Tiny(num_classes=10), path=output_path, result_path=results_path + '_retrieval_e.pickle', layer=3, metric='l2')
def perform_kt_transfer(kt_type='hint', epochs=10): results = [] for i in range(5): train_loader, test_loader, database_loader = get_yt_loaders( batch_size=128, feature_type='transfer', seed=i) net = YT_Small() net.cuda() if kt_type == 'hint': unsupervised_hint_transfer_handcrafted(net, train_loader, epochs=epochs, lr=0.0001) elif kt_type == 'kt': knowledge_transfer_handcrafted(net, train_loader, epochs=epochs, lr=0.0001) elif kt_type == 'kt_optimal': knowledge_transfer_handcrafted(net, train_loader, epochs=epochs, lr=0.001) elif kt_type == 'kt_supervised': knowledge_transfer_handcrafted(net, train_loader, epochs=epochs, lr=0.0001, supervised_weight=0.001) save_model(net, 'models/' + kt_type + '_' + str(i) + '.model') train_loader, test_loader, database_loader = get_yt_loaders( batch_size=128, feature_type='image', seed=i) cur_res = retrieval_evaluation(net, database_loader, test_loader) results.append(cur_res) print(cur_res) with open('results/' + kt_type + '.pickle', 'wb') as f: pickle._dump(results, f, protocol=pickle.HIGHEST_PROTOCOL)
def train_cifar10_model(net, learning_rates=[0.001, 0.0001], iters=[50, 50], output_path='resnet18_cifar10.model'): """ Trains a baseline (classification model) :param net: the network to be trained :param learning_rates: the learning rates to be used during the training :param iters: number of epochs using each of the supplied learning rates :param output_path: path to save the trained model :return: """ # Load data train_loader, test_loader, _ = cifar10_loader(batch_size=128) # Define loss criterion = nn.CrossEntropyLoss() for lr, iter in zip(learning_rates, iters): print("Training with lr=%f for %d iters" % (lr, iter)) optimizer = optim.Adam(net.parameters(), lr=lr) train_model(net, optimizer, criterion, train_loader, epochs=iter) save_model(net, output_file=output_path)
def run_transfer( learning_rates=(0.001, ), epochs=(10, ), decay=0.7, init_weight=100): torch.manual_seed(12345) print(init_weight, decay) student_layers, teacher_layers, loss_params, T = (3, 2, 1, 0), (3, 2, 1, 0), {}, 2 # Output paths output_path = 'models/proposed.model' results_path = 'results/proposed' # Load a pre-trained teacher network student_net = Cifar_Very_Tiny(10) # Load the teacher model teacher_net = Cifar_Tiny(num_classes=10) load_model(teacher_net, 'models/aux_pkt.model') kernel_parameters = { 'student': 'combined', 'teacher': 'combined', 'loss': 'combined' } train_loader, test_loader, train_loader_raw = cifar10_loader( batch_size=128) # Move the models into GPU student_net.cuda() teacher_net.cuda() np.random.seed(1) cur_weight = init_weight for cur_epoch, cur_lr in zip(epochs, learning_rates): print("Running for ", cur_epoch, " epochs with lr = ", cur_lr) for i in range(cur_epoch): print(cur_weight) weights = (1, cur_weight, cur_weight, cur_weight) prob_transfer(student_net, teacher_net, train_loader, epochs=1, lr=cur_lr, teacher_layers=teacher_layers, student_layers=student_layers, layer_weights=weights, kernel_parameters=kernel_parameters, loss_params=loss_params) cur_weight = cur_weight * decay save_model(student_net, output_path) print("Model saved at ", output_path) # Perform the evaluation evaluate_model_retrieval(net=student_net, path='', result_path=results_path + '_retrieval.pickle', layer=3) evaluate_model_retrieval(net=student_net, path='', result_path=results_path + '_retrieval_e.pickle', layer=3, metric='l2')