def train(model, early_stopping): optimizer = optim.Adam(model.parameters(), lr=1e-3) # data_iter = data_loader.get_loader(batch_size=args.batch_size) data_iter = data_loader.get_train_loader(batch_size=args.batch_size) for epoch in range(args.epochs): model.train() run_loss = 0.0 for idx, data in enumerate(data_iter): data = utils.to_var(data) ret = model.run_on_batch(data, optimizer, epoch) run_loss += ret['loss'].item() print('\r Progress epoch {}, {:.2f}%, average loss {}'.format( epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0))) test_data_iter = data_loader.get_test_loader( batch_size=args.batch_size) valid_loss = evaluate(model, test_data_iter) # early stop early_stopping(valid_loss, model) if early_stopping.early_stop: print("Early stopping") break
def train(model): optimizer = optim.Adam(model.parameters(), lr=1e-3) data_iter = data_loader.get_train_loader(batch_size=args.batch_size) val_iter = data_loader.get_val_loader(batch_size=args.batch_size) for epoch in range(args.epochs): model.train() if epoch % 100 == 0: print('Save checkpoint') torch.save(model, './result/models/model_{}_'.format(epoch) \ + args.exp_name + '.pth') run_loss = 0.0 for idx, data in enumerate(data_iter): data = utils.to_var(data) ret = model.run_on_batch(data, optimizer, epoch) run_loss += ret['loss'].item() print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0))), if epoch % 10 == 0: evaluate(model, val_iter)
def run(config): kwargs = {} if config.use_gpu: # ensure reproducibility torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.manual_seed(0) np.random.seed(0) kwargs = {'num_workers': config.num_workers} # instantiate data loaders if config.is_train: data_loader = get_train_loader(config.data_dir, config.batch_size, is_shuffle=True**kwargs) else: data_loader = get_test_loader(config.data_dir, config.batch_size, is_shuffle=False, **kwargs) # instantiate trainer trainer = Trainer(config, data_loader) # either train if config.is_train: trainer.train() # or load a pretrained model and test else: trainer.test()
def get_experiment_dataloaders(experiment_name: str, data_dir: str = './data/'): first_level = get_num_levels(experiment_name)[0] config = get_experiment_config(experiment_name, first_level) config = config_dict_to_namespace(config) config.data_dir = os.path.join(data_dir, config.dataset) torch.manual_seed(config.random_seed) kwargs = {} if not config.disable_cuda and torch.cuda.is_available(): use_gpu = True torch.cuda.manual_seed_all(config.random_seed) kwargs = { 'num_workers': config.num_workers, 'pin_memory': config.pin_memory } else: use_gpu = False data_dict = get_dataset(config.dataset, config.data_dir, 'test') kwargs.update(data_dict) config.num_classes = data_dict['num_classes'] test_loader = get_test_loader(batch_size=config.batch_size, **kwargs) if 'cifar' in config.dataset: valid_loader = test_loader else: valid_loader = get_test_loader(batch_size=config.batch_size, **kwargs) if config.is_train: data_dict = get_dataset(config.dataset, config.data_dir, 'train') teachers = [] kwargs.update(data_dict) train_loader = get_train_loader(batch_size=config.batch_size, padding=config.padding, padding_mode=config.padding_mode, random_seed=config.random_seed, shuffle=config.shuffle, model_num=len(config.model_names), teachers=teachers, cuda=use_gpu, **kwargs) else: train_loader = None return train_loader, valid_loader, test_loader
def train(model, fine_tune, pseudo, num_epochs=100, data_sets=None): init_lr = 0.0001 criterion = nn.BCELoss() if fine_tune: arch = model.name if arch.startswith('resnet') or arch.startswith("inception"): dense_layers = model.fc elif arch.startswith("densenet") or arch.startswith("vgg"): dense_layers = model.classifier else: raise Exception('unknown model') optimizer_ft = optim.SGD(dense_layers.parameters(), lr=init_lr, momentum=0.9) init_lr = 0.001 else: optimizer_ft = optim.SGD(model.parameters(), lr=init_lr, momentum=0.9) max_num = 2 if pseudo: pseudo_data, valid_data = data_sets data_loaders = { 'train': data_loader.get_pseudo_train_loader(model, pseudo_data), 'valid': data_loader.get_val_loader(model, valid_data) } max_num += 2 else: train_data, valid_data = data_sets data_loaders = { 'train': data_loader.get_train_loader(model, train_data), 'valid': data_loader.get_val_loader(model, valid_data) } model = train_model(model, criterion, optimizer_ft, lr_scheduler, max_num=max_num, init_lr=init_lr, num_epochs=num_epochs, data_loaders=data_loaders, fine_tune=fine_tune, pseudo=pseudo) return model
def main(config): # ensure directories are setup prepare_dirs(config) # ensure reproducibility #torch.manual_seed(config.random_seed) kwargs = {} if config.use_gpu: #torch.cuda.manual_seed_all(config.random_seed) kwargs = { 'num_workers': config.num_workers, 'pin_memory': config.pin_memory } #torch.backends.cudnn.deterministic = True # instantiate data loaders test_data_loader = get_test_loader(config.data_dir, config.batch_size, **kwargs) if config.is_train: train_data_loader = get_train_loader(config.data_dir, config.batch_size, config.random_seed, config.shuffle, **kwargs) data_loader = (train_data_loader, test_data_loader) else: data_loader = test_data_loader # instantiate trainer trainer = Trainer(config, data_loader) # either train if config.is_train: save_config(config) trainer.train() # or load a pretrained model and test else: trainer.test()
def main(config): # ensure reproducibility torch.manual_seed(config.random_seed) scores = [] # instantiate data loaders count = 0 times = [] for i in range(1, 4): start = time.time() count = i train_data, test_data = load_dataset(config.data_dir, str(count)) # instantiate data loaders data_loader = get_train_loader(train_data, config.batch_size, config.random_seed, config.shuffle) test_loader = get_test_loader(test_data, config.batch_size) # instantiate trainer trainer = Trainer(config, count, data_loader, test_loader) trainer.train() result = trainer.test() scores.append(result) elapsed = time.time() - start times.append(elapsed) scores = np.array(scores) times = np.array(times) print('aver time', times.mean()) # print('avg\tacc\tf1\tprec\trec\tauc') print('acc:', scores.mean(axis=0)[0], '\nf1', scores.mean(axis=0)[1], '\nprec', scores.mean(axis=0)[2], '\nrec', scores.mean(axis=0)[3])
def init_loaders(train_batch_size, test_batch_size): import json import h5py from gensim.models.keyedvectors import KeyedVectors with open('./data/datainfo-v1.1.json', 'r') as f: data = json.load(f) f = h5py.File('./data/resnet_features.hdf5', 'r') img_features = f['resnet152_features'][()] f.close() wordvectors_file_vec = './data/fasttext-sbwc.vec' # count = 1000 wordvectors = KeyedVectors.load_word2vec_format( wordvectors_file_vec) #, limit=count) train_loader = get_train_loader(wordvectors, data, img_features, train_batch_size) test_loader = get_test_loader(wordvectors, data, img_features, test_batch_size) return train_loader, test_loader
def main(args): # for fast training. torch.backends.cudnn.benchmark = True setup_seed(args.seed) # create directories if not exist. create_folder(args.save_root_dir, args.version, args.model_save_path) create_folder(args.save_root_dir, args.version, args.sample_path) create_folder(args.save_root_dir, args.version, args.log_path) create_folder(args.save_root_dir, args.version, args.val_result_path) create_folder(args.save_root_dir, args.version, args.test_result_path) if args.mode == 'train': loaders = Munch(ref=get_train_loader(root=args.train_img_dir, img_size=args.image_size, resize_size=args.resize_size, batch_size=args.train_batch_size, shuffle=args.shuffle, num_workers=args.num_workers, drop_last=args.drop_last), val=get_test_loader(root=args.val_img_dir, batch_size=args.val_batch_size, shuffle=True, num_workers=args.num_workers)) trainer = Trainer(loaders, args) trainer.train() elif args.mode == 'test': loaders = Munch(tes=get_test_loader(root=args.test_img_dir, img_size=args.test_img_size, batch_size=args.val_batch_size, shuffle=True, num_workers=args.num_workers)) tester = Tester(loaders, args) tester.test() else: raise NotImplementedError('Mode [{}] is not found'.format(args.mode))
args.cuda = not args.no_cuda and torch.cuda.is_available() args.span_range_height = args.span_range_width = args.span_range args.grid_height = args.grid_width = args.grid_size args.image_height = args.image_width = 28 torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) model = mnist_model.get_model(args) if args.cuda: model.cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) train_loader = data_loader.get_train_loader(args) test_loader = data_loader.get_test_loader(args) def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() # print(data.shape) data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step()
k: v for k, v in pretrained_state.items() if k in model_state and v.size() == model_state[k].size() } model_state.update(pretrained_state) net.load_state_dict(model_state) if cuda: net.cuda() criterion = net_sphere.AngleLoss() # train_dir = '/Users/josephrobinson/Downloads/' train_dir = args.data_dir + '/train/' val_dir = args.data_dir + '/val/' # 'train' train_loader = get_train_loader(train_dir, batch_size=args.batch_size) print('start: time={}'.format(dt())) # optimizer = optim.Adam(net.parameters(), lr=args.lr) best_acc = 0 if not args.train: print('Begin train') for epoch in range(args.n_epochs): if epoch in [0, 2, 4, 6, 8]: if epoch != 0: args.lr *= 0.1 # hardcoded for now (n_epochs = 3) params = [x for x in net.parameters() if x.requires_grad] optimizer = optim.SGD(params, lr=args.lr, momentum=0.9, weight_decay=5e-4)
with open("data.pickle", "rb") as f: dataset = pickle.load(f) # data cut off and shuffle #data_in,data_out = bf.data_cutoff(dataset,output_size,cut_off=70) #data_loader.update_dataset(dataset,data_in,data_out) # spilt dataset train_dataset, test_dataset = data_loader.spilt_train_test_dataset(dataset) #train_dataset,test_dataset = data_loader.advanced_spilt_train_test_dataset(dataset,output_size) # balance train part #train_in,train_out = bf.balance_avg(train_dataset,output_size) #print(f"Before balance:\n{bf.view_count(train_dataset,output_size)}") #data_loader.update_dataset(train_dataset,train_in,train_out) #print(f"After balance:\n{bf.view_count(train_dataset,output_size)}") validate_loader = data_loader.get_validate_loader(test_dataset, 32) train_loader = data_loader.get_train_loader(train_dataset, 32) def validate_one_epoch(device, model, criterion, validate_loader): model.eval() num_validate = len(validate_loader.sampler.indices) if num_validate == 0: print("number of data is 0") return -1, -1 val_loss = 0. num_correct = 0 for b, (batch_input, batch_label) in enumerate(validate_loader): for i in range(len(batch_input)): # read data data_input, data_label = batch_input[i], batch_label[i] print(data_input)
def main_one(csnum): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # prepare neural network validate_size = 0.1 num_bands = 100 tmp = [] for hs in range(48): tmp.append(hs) hs_indices = tmp #hs_indices = [0, 1, 3, 4, 5, 7, 8, 13, 31, 34, 37] # 11 hs points in Brillouin zone out of 40 cs_sizes = crystalsystem.crystalsystem_sizes() output_size = cs_sizes[csnum - 1] - cs_sizes[csnum - 2] + 1 if csnum > 1 else 3 """ model = torch.nn.Sequential( torch.nn.LeakyReLU(), torch.nn.Linear(len(hs_indices)*num_bands, 300), torch.nn.LeakyReLU(), torch.nn.Linear(300, 100), torch.nn.LeakyReLU(), torch.nn.Linear(100, output_size), torch.nn.LeakyReLU(), ) """ model = torch.nn.Sequential( #torch.nn.LeakyReLU(), torch.nn.Linear(len(hs_indices) * num_bands, 128), torch.nn.LeakyReLU(), torch.nn.Linear(128, 64), torch.nn.LeakyReLU(), torch.nn.Linear(64, output_size), torch.nn.LeakyReLU(), ) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.75) criterion = torch.nn.CrossEntropyLoss() with open("data.pickle", "rb") as f: dataset = pickle.load(f) # data cut off and shuffle #data_in,data_out = bf.data_cutoff(dataset,output_size,cut_off=0) #data_loader.update_dataset(dataset,data_in,data_out) # spilt dataset #train_dataset,test_dataset = data_loader.spilt_train_test_dataset(dataset) #train_dataset,test_dataset = data_loader.advanced_spilt_train_test_dataset(dataset,output_size) # balance train part train_in, train_out = bf.balance_avg(dataset, output_size) #print(f"Before balance:\n{bf.view_count(train_dataset,output_size)}") data_loader.update_dataset(dataset, train_in, train_out) #print(f"After balance:\n{bf.view_count(train_dataset,output_size)}") train_dataset, test_dataset = data_loader.advanced_spilt_train_test_dataset( dataset, output_size) validate_loader = data_loader.get_validate_loader(test_dataset, 32) train_loader = data_loader.get_train_loader(train_dataset, 32) # train ech, loss, ech_a, acc = function_training.validate_train_loop( device, model, optimizer, scheduler, criterion, validate_loader, train_loader, num_epoch=30, num_epoch_per_validate=1, state_dict_path=f"state_dicts/state_dict_cs2sg_{csnum}") plot_loss(ech, loss, ech_a, acc) plot_dist(dataset, output_size, title="Cut-off Raw sample") plot_dist(train_dataset, output_size, title="Train sample") plot_dist(test_dataset, output_size, title="Test sample") """
def main(**kwargs): global args lowest_error1 = 100 for arg, v in kwargs.items(): args.__setattr__(arg, v) program_start_time = time.time() instanceName = "Classification_Accuracy" folder_path = os.path.dirname( os.path.abspath(__file__)) + os.sep + args.model timestamp = datetime.datetime.now() ts_str = timestamp.strftime('%Y-%m-%d-%H-%M-%S') path = folder_path + os.sep + instanceName + os.sep + args.model_name + os.sep + ts_str tensorboard_folder = path + os.sep + "Graph" os.makedirs(path) args.savedir = path writer = SummaryWriter(tensorboard_folder) global logFile logFile = path + os.sep + "log.txt" args.filename = logFile global num_outputs print(args) global device device = 'cuda' if torch.cuda.is_available() else 'cpu' if args.data == "cifar100" or args.data == "CIFAR100": fig_title_str = " on CIFAR-100" elif args.data == "cifar10" or args.data == "CIFAR10": fig_title_str = " on CIFAR-10" elif args.data == "tiny_imagenet": fig_title_str = " on tiny_imagenet" else: LOG( "ERROR =============================dataset should be CIFAR10 or CIFAR100", logFile) NotImplementedError captionStrDict = { "fig_title": fig_title_str, "x_label": "epoch", 'elastic_final_layer_label': "Final_Layer_Output_Classifier", "elastic_intermediate_layer_label": "Intermediate_Layer_Classifier_" } # save input parameters into log file LOG("program start time: " + ts_str + "\n", logFile) # if args.layers_weight_change == 1: # LOG("weights for intermediate layers: 1/(34-Depth), giving different weights for different intermediate layers output, using the formula weigh = 1/(34-Depth)", logFile) # elif args.layers_weight_change == 0: # LOG("weights for intermediate layers: 1, giving same weights for different intermediate layers output as 1", logFile) # else: # print("Parameter --layers_weight_change, Error") # sys.exit() if args.model == "Elastic_ResNet18" or args.model == "Elastic_ResNet34" or args.model == "Elastic_ResNet50" or args.model == "Elastic_ResNet101" or args.model == "Elastic_ResNet152": model = Elastic_ResNet(args, logFile) elif args.model == "Elastic_InceptionV3": args.target_size = ( 299, 299, 3 ) # since pytorch inceptionv3 pretrained accepts image size (299, 299, 3) instead of (224, 224, 3) model = Elastic_InceptionV3(args, logFile) elif args.model == "Elastic_MobileNet": model = Elastic_MobileNet(args, logFile) elif args.model == "Elastic_VGG16": model = Elastic_VGG16_bn(args, logFile) elif args.model == "Elastic_SqueezeNet": model = Elastic_SqueezeNet(args, logFile) elif args.model == "Elastic_DenseNet121" or args.model == "Elastic_DenseNet169" or args.model == "Elastic_DenseNet201": model = Elastic_DenseNet(args, logFile) else: LOG( "--model parameter should be in ResNet, InceptionV3, MobileNet, VGG16, SqueezeNet, DenseNet", logFile) exit() num_outputs = model.num_outputs # num_outputs = 1 LOG("num_outputs: " + str(num_outputs), logFile) LOG("successfully create model: " + args.model, logFile) args_str = str(args) LOG(args_str, logFile) model = model.to(device) if device == 'cuda': model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True # TUT thinkstation data folder path data_folder = "/media/yi/e7036176-287c-4b18-9609-9811b8e33769/tiny_imagenet/tiny-imagenet-200" # narvi data folder path # data_folder = "/home/zhouy/data/tiny-imagenet-200" # XPS 15 laptop data folder path # data_folder = "D:\Elastic\data" # args.batch_size = 1 summary(model, (3, 224, 224)) if args.data == "tiny_imagenet": train_loader, test_loader = tiny_image_data_loader(data_folder, args) else: train_loader = get_train_loader(args.data, data_dir=data_folder, batch_size=args.batch_size, augment=False, target_size=args.target_size, random_seed=20180614, valid_size=0.2, shuffle=True, show_sample=False, num_workers=4, pin_memory=True, debug=args.debug) test_loader = get_test_loader(args.data, data_dir=data_folder, batch_size=args.batch_size, shuffle=True, target_size=args.target_size, num_workers=4, pin_memory=True, debug=args.debug) criterion = nn.CrossEntropyLoss().cuda() if args.data != "tiny_imagenet": pretrain_optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), args.pretrain_learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) LOG("==> Pretraining for **1** epoches \n", logFile) for pretrain_epoch in range(0, 1): accs, losses, lr = train(train_loader, model, criterion, pretrain_optimizer, pretrain_epoch) epoch_result = " pretrain epoch: " + str( pretrain_epoch) + ", pretrain error: " + str( accs) + ", pretrain loss: " + str( losses) + ", pretrain learning rate: " + str( lr) + ", pretrain total train sum loss: " + str( sum(losses)) LOG(epoch_result, logFile) summary(model, (3, 224, 224)) LOG("==> Full training \n", logFile) for param in model.parameters(): param.requires_grad = True optimizers = [] childs = [] k = 0 for child in model.parameters(): childs.append(child) k += 1 # childs_params = [childs[:9], childs[:15], childs[:21], childs[:27], # childs[:33], childs[:39], childs[:45], childs[:51], # childs[:57], childs[:63], childs[:69], childs[:75], childs] childs_params = [childs[:25], childs[:43], childs[:61], childs] for i in range(num_outputs): optimizer = torch.optim.SGD(childs_params[i], args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizers.append(optimizer) # optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay) # summary(model, (3,224,224)) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', threshold=1e-4, patience=10) # implement early stop by own EarlyStopping_epoch_count = 0 epochs_train_accs = [] epochs_train_top5_accs = [] epochs_train_losses = [] epochs_test_accs = [] epochs_test_losses = [] epochs_lr = [] epochs_test_top5_accs = [] for epoch in range(0, args.epochs): epoch_str = "==================================== epoch %d ==============================" % epoch LOG(epoch_str, logFile) # Train for one epoch accs, losses, lr, accs_top5 = train(train_loader, model, criterion, optimizers, epoch) epochs_train_accs.append(accs) epochs_train_losses.append(losses) epochs_lr.append(lr) epochs_train_top5_accs.append(accs_top5) writer.add_scalar(tensorboard_folder + os.sep + "data" + os.sep + 'lr', lr, epoch) for i, a, l, k in zip(range(len(accs)), accs, losses, accs_top5): writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'train_error_' + str(i), a, epoch) writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'train_losses_' + str(i), l, epoch) writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'train_top5_error_' + str(i), k, epoch) epoch_result = "\ntrain error: " + str(accs) + "top 5 error: " + str( accs_top5) + ", \nloss: " + str( losses) + ", \nlearning rate " + str( lr) + ", \ntotal train sum loss " + str(sum(losses)) LOG(epoch_result, logFile) if num_outputs > 1: writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'train_total_sum_losses', sum(losses), epoch) losses.append(sum(losses)) # add the total sum loss LOG("train_total_sum_losses: " + str(sum(losses)), logFile) # run on test dataset LOG("==> test \n", logFile) test_accs, test_losses, test_top5_accs = validate( test_loader, model, criterion) epochs_test_accs.append(test_accs) epochs_test_losses.append(test_losses) epochs_test_top5_accs.append(test_top5_accs) for i, a, l, k in zip(range(len(test_accs)), test_accs, test_losses, test_top5_accs): writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'test_error_' + str(i), a, epoch) writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'test_losses_' + str(i), l, epoch) writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'test_top5_losses_' + str(i), k, epoch) test_result_str = "==> Test epoch: \nfinal output classifier error: " + str( test_accs ) + "test top 5 error: " + str(test_top5_accs) + ", \ntest_loss" + str( test_losses) + ", \ntotal test sum loss " + str(sum(test_losses)) LOG(test_result_str, logFile) total_loss = sum(test_losses) if num_outputs > 1: writer.add_scalar( tensorboard_folder + os.sep + "data" + os.sep + 'test_total_sum_losses', total_loss, epoch) test_losses.append(total_loss) # add the total sum loss LOG("test_total_sum_losses: " + str(total_loss), logFile) log_stats(path, accs, losses, lr, test_accs, test_losses, accs_top5, test_top5_accs) # Remember best prec@1 and save checkpoint is_best = test_accs[ -1] < lowest_error1 #error not accuracy, but i don't want to change variable names if is_best: lowest_error1 = test_accs[-1] #但是有个问题,有时是倒数第二个CLF取得更好的结果 save_checkpoint( { 'epoch': epoch, 'model': args.model_name, 'state_dict': model.state_dict(), 'best_prec1': lowest_error1, 'optimizer': optimizer.state_dict(), }, args) # apply early_stop with monitoring val_loss # EarlyStopping(patience=15, score_function=score_function(val_loss), trainer=model) scheduler.step(total_loss) # adjust learning rate with test_loss if epoch == 0: prev_epoch_loss = total_loss # use all intemediate classifiers sum loss instead of only one classifier loss else: if total_loss >= prev_epoch_loss: # means this current epoch doesn't reduce test losses EarlyStopping_epoch_count += 1 if EarlyStopping_epoch_count > 20: LOG( "No improving test_loss for more than 10 epochs, stop running model", logFile) break # n_flops, n_params = measure_model(model, IMAGE_SIZE, IMAGE_SIZE) # FLOPS_result = 'Finished training! FLOPs: %.2fM, Params: %.2fM' % (n_flops / 1e6, n_params / 1e6) # LOG(FLOPS_result, logFile) # print(FLOPS_result) writer.close() end_timestamp = datetime.datetime.now() end_ts_str = end_timestamp.strftime('%Y-%m-%d-%H-%M-%S') LOG("program end time: " + end_ts_str + "\n", logFile) # here plot figures plot_figs(epochs_train_accs, epochs_train_losses, epochs_test_accs, epochs_test_losses, args, captionStrDict) LOG("============Finish============", logFile)
def __init__(self, args, model, optimizer, lr_policy): self.args = args self.lr_policy = lr_policy self.iter_wise = self.lr_policy.iteration_wise # for loggin the training val_head = [ "iter" if self.iter_wise else "epoch", "mean_pixel_accuracy" ] for i in range(self.args.class_num): val_head.append("mean_precision_class_{}".format(i)) for i in range(self.args.class_num): val_head.append("mean_IoU_class_{}".format(i)) self.tlog = self.get_train_logger( { "train": [ "iter" if self.iter_wise else "epoch", "batch_mean_total_loss" ], "val": val_head }, save_dir=self.args.save_dir, save_name=self.args.save_name, arguments=self.get_argparse_arguments(self.args), use_http_server=self.args.use_http_server, use_msg_server=self.args.use_msg_server, notificate=False, visualize_fetch_stride=self.args.viz_fetch_stride, http_port=self.args.http_server_port, msg_port=self.args.msg_server_port) # paths self.save_dir = self.tlog.log_save_path self.model_param_dir = self.tlog.mkdir("model_param") if torch.cuda.is_available() and not self.args.nogpu: self.map_device = torch.device('cuda:{}'.format( self.args.gpu_device_num)) else: self.map_device = torch.device('cpu') self.model = model if torch.cuda.is_available() and not args.nogpu: self.model = self.model.to(self.map_device) self.optimizer = optimizer self.train_loader = data_loader.get_train_loader( self.args, [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]) #[(0.485, 0.456, 0.406),(0.229, 0.224, 0.225)]) self.val_loader = data_loader.get_val_loader(self.args, [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]) self.cmap = self._gen_cmap() if self.args.show_parameters: for idx, m in enumerate(model.modules()): print(idx, '->', m) print(args) print("\nsaving at {}\n".format(self.save_dir))
#Define a split for train/valid valid_size = 0.2 batch_size = 10 num_train = len(cities) indices = list(range(num_train)) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) #Load data generators train_data_loader = get_train_loader(cities=cities, labels=labels, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) valid_data_loader = get_train_loader(cities=cities, labels=labels, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, sampler=valid_sampler) #Initialize the model to train model = LSTMClassifier(27, 10, 14) # Loss and Optimizer criterion = nn.NLLLoss() learning_rate = 0.8
def train(opt): # Load models print('----------- Network Initialization --------------') teacher = select_model(dataset=opt.data_name, model_name=opt.t_name, pretrained=True, pretrained_models_path=opt.t_model, n_classes=opt.num_class).to(opt.device) print('finished teacher model init...') student = select_model(dataset=opt.data_name, model_name=opt.s_name, pretrained=True, pretrained_models_path=opt.s_model, n_classes=opt.num_class).to(opt.device) print('finished student model init...') teacher.eval() nets = {'snet': student, 'tnet': teacher} for param in teacher.parameters(): param.requires_grad = False # initialize optimizer optimizer = torch.optim.SGD(student.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) # define loss functions if opt.cuda: criterionCls = nn.CrossEntropyLoss().cuda() criterionAT = AT(opt.p) else: criterionCls = nn.CrossEntropyLoss() criterionAT = AT(opt.p) print('----------- DATA Initialization --------------') train_loader = get_train_loader(opt) test_clean_loader, test_bad_loader = get_test_loader(opt) print('----------- Train Initialization --------------') for epoch in range(0, opt.epochs): adjust_learning_rate(optimizer, epoch, opt.lr) # train every epoch criterions = {'criterionCls': criterionCls, 'criterionAT': criterionAT} if epoch == 0: # before training test firstly test(opt, test_clean_loader, test_bad_loader, nets, criterions, epoch) train_step(opt, train_loader, nets, optimizer, criterions, epoch + 1) # evaluate on testing set print('testing the models......') acc_clean, acc_bad = test(opt, test_clean_loader, test_bad_loader, nets, criterions, epoch + 1) # remember best precision and save checkpoint # save_root = opt.checkpoint_root + '/' + opt.s_name if opt.save: is_best = acc_clean[0] > opt.threshold_clean opt.threshold_clean = min(acc_bad[0], opt.threshold_clean) best_clean_acc = acc_clean[0] best_bad_acc = acc_bad[0] save_checkpoint( { 'epoch': epoch, 'state_dict': student.state_dict(), 'best_clean_acc': best_clean_acc, 'best_bad_acc': best_bad_acc, 'optimizer': optimizer.state_dict(), }, is_best, opt.checkpoint_root, opt.s_name)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=20, verbose=True) print('start: time={}'.format(dt())) # optimizer = optim.Adam(net.parameters(), lr=args.lr) best_acc = 0 if not args.train: print('Begin train') for epoch in range(args.n_epochs): train_set, train_loader = get_train_loader( image_size=args.img_size, batch_size=args.train_batch_size, train_steps=args.train_steps, val_steps=args.val_steps, one_to_zero_train=args.one_to_zero_train, one_to_zero_val=args.one_to_zero_val) val_loader = get_val_loader( image_size=args.img_size, batch_size=args.val_batch_size, train_steps=args.train_steps, val_steps=args.val_steps, one_to_zero_train=args.one_to_zero_train, one_to_zero_val=args.one_to_zero_val) print("epoch:", epoch) # if epoch in args.change_lr_for_epochs: # args.lr *= 0.1 # optimizer = optim.SGD(param_groups, lr=args.lr, momentum=0.9, weight_decay=5e-4)