def main(): user_args = get_args() class_labels, train_data, test_data, valid_data = utility.load_img(user_args.data_dir) model = utility.load_pretrained_model(user_args.arch, user_args.hidden_units) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=user_args.learning_rate) utility.train(model, user_args.learning_rate, criterion, train_data, valid_data, user_args.epochs, user_args.gpu) utility.test(model, test_data, user_args.gpu) model.to('cpu') # Save Checkpoint for predection utility.save_checkpoint({ 'arch': user_args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'hidden_units': user_args.hidden_units, 'class_labels': class_labels }, user_args.save_dir) print('Saved checkpoint!')
def run_model(): # Running the model dataset = CD_Dataset(path=DATASET_PATH, download=True, fit=False, num_classes=OUTPUT_CHANNELS[0]) dataset.mean_features = np.array([0.5, 0.5, 0.5]) dataset.std_features = np.array([0.5, 0.5, 0.5]) INPUT_PATCH_SIZE = [SPATCH, SPATCH] model_input_size = INPUT_PATCH_SIZE + INPUT_CHANNELS if MODEL == MIMO: MODEL_PATH_NAME = 'MIMO_' + MODEL_PATH_NAME model = MimoNet(model_input_size, classes=OUTPUT_CHANNELS[0], regularized=True) elif MODEL == UNET: MODEL_PATH_NAME = 'UNET_' + MODEL_PATH_NAME model = Unet(model_input_size, classes=OUTPUT_CHANNELS[0], regularized=True) else: print('CHOOSE MODEL: 0:MIMO, 1:UNET') sys.exit(0) if LOAD_MODEL: print("loading model " + MODEL_PATH_NAME + " from disk.") model.load_model(MODEL_PATH_NAME) if MODEL_TRAINING_SESSION: print("trainig model") train(model, dataset, epochs=EPOCHS, n_batch=N_PATCH_BATCH, use_weights=True, name=MODEL_PATH_NAME) print("saving model " + MODEL_PATH_NAME + " to disk.")
model_selector_rnn = Early_Stopper(patience) model_selector_gru = Early_Stopper(patience) model_selector_lstm = Early_Stopper(patience) criterion = nn.MSELoss() optimizer_rnn = torch.optim.Adam(rnn.parameters()) optimizer_gru = torch.optim.Adam(gru.parameters()) optimizer_lstm = torch.optim.Adam(lstm.parameters()) print('Training started at:', time_start) while (model_selector_rnn.keep_training or model_selector_gru.keep_training or model_selector_lstm.keep_training): if model_selector_rnn: rnn_loss.append([ train(x_tr, y_tr, batch_size, optimizer_rnn, criterion, rnn, False), validate(x_va, y_va, batch_size, criterion, rnn, False), test(x_te, y_te, batch_size, criterion, rnn, False) ]) rnn_time = str(datetime.datetime.now() - time_start) model_selector_rnn.update(rnn_loss[-1][1], n_epochs) if model_selector_gru: gru_loss.append([ train(x_tr, y_tr, batch_size, optimizer_gru, criterion, gru, False), validate(x_va, y_va, batch_size, criterion, gru, False), test(x_te, y_te, batch_size, criterion, gru, False) ])
model = torchvision.models.vgg16(pretrained=True) model.classifier[6] = torch.nn.Identity() model.fc = torch.nn.Identity() model = torch.nn.DataParallel(model.cuda(), args.gpus) train_feature, train_label = pre_convolute(model, train_loader) valid_feature, valid_label = pre_convolute(model, valid_loader) # Linear model ----------------------- train_data2 = torch.utils.data.TensorDataset(torch.tensor(train_feature), torch.tensor(train_label)) valid_data2 = torch.utils.data.TensorDataset(torch.tensor(valid_feature), torch.tensor(valid_label)) train_loader2 = torch.utils.data.DataLoader(train_data2, args.train_batch, num_workers=len(args.gpus), shuffle=True) valid_loader2 = torch.utils.data.DataLoader(valid_data2, args.valid_batch, num_workers=len(args.gpus), shuffle=False) fc = torch.nn.Linear(train_data2[0][0].shape[0], 2).cuda() optimizer = torch.optim.Adam(fc.parameters(), lr=args.learning_rate) _, accuracy = utility.valid(fc, valid_loader2, f"Epoch {0:03d} validation") for epoch in range(args.epochs): #pass _, _ = utility.train(fc, train_loader2, f"\nEpoch {epoch+1:03d} training ", optimizer) _, accuracy = utility.valid(fc, valid_loader2, f"Epoch {epoch+1:03d} validation")
import torch.utils.data as data import speech_dataset as sd import utility as util import model as md TRAIN = True ROOT_DIR = "../../../datasets/kws_mcu_dataset/" WORD_LIST = [ "yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "unknown", "silence" ] NUM_EPOCH = 60 if __name__ == "__main__": model = md.TCResNet8(40) if TRAIN: util.train(model, ROOT_DIR, WORD_LIST, NUM_EPOCH) else: train, dev, test = sd.split_dataset(ROOT_DIR, WORD_LIST) ap = sd.AudioPreprocessor() test_data = sd.SpeechDataset(test, "train", ap, WORD_LIST) test_dataloader = data.DataLoader(test_data, batch_size=64, shuffle=True) util.evaluate_testset(model, test_dataloader)
# args.cuda), # test(x_te, # y_te, # batch_size, # criterion, # gru, # args.cuda)]) # # gru_time = str(datetime.datetime.now()-time_start) # model_selector_gru.update(gru_loss[-1][1], n_epochs) if model_selector_lstm.keep_training: lstm_loss.append([train(x_tr, y_tr, batch_size, optimizer_lstm, criterion, lstm, args.cuda), validate(x_va, y_va, batch_size, criterion, lstm, args.cuda), test(x_te, y_te, batch_size, criterion, lstm, args.cuda)])
def main_worker(gpu, ngpus_per_node, args): global best_prec1, sample_size args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) print("Current Device is ", torch.cuda.get_device_name(0)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model2: if args.pretrained: print("=> Model (date_diff): using pre-trained model '{}_{}'".format( args.model, args.model_depth)) pretrained_model = models.__dict__[args.arch](pretrained=True) else: if args.model_type == 2: print("=> Model (date_diff regression): creating model '{}_{}'". format(args.model, args.model_depth)) pretrained_model = generate_model(args) # good for resnet save_folder = "{}/Model/{}{}".format(args.ROOT, args.model, args.model_depth) model = longi_models.ResNet_interval(pretrained_model, args.num_date_diff_classes, args.num_reg_labels) criterion0 = torch.nn.CrossEntropyLoss().cuda(args.gpu) # for STO loss criterion1 = torch.nn.CrossEntropyLoss().cuda(args.gpu) # for RISI loss criterion = [criterion0, criterion1] start_epoch = 0 optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) # all models optionally resume from a checkpoint if args.resume_all: if os.path.isfile(args.resume_all): print("=> Model_all: loading checkpoint '{}'".format( args.resume_all)) checkpoint = torch.load(args.resume_all, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() start_epoch = checkpoint['epoch'] print("=> Model_all: loaded checkpoint '{}' (epoch {})".format( args.resume_all, checkpoint['epoch'])) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() print("batch-size = ", args.batch_size) print("epochs = ", args.epochs) print("range-weight (weight of range loss) = ", args.range_weight) cudnn.benchmark = True print(model) # Data loading code traingroup = ["train"] evalgroup = ["eval"] testgroup = ["test"] train_augment = ['normalize', 'flip', 'crop'] # 'rotate', test_augment = ['normalize', 'crop'] eval_augment = ['normalize', 'crop'] train_stages = args.train_stages.strip('[]').split(', ') test_stages = args.test_stages.strip('[]').split(', ') eval_stages = args.eval_stages.strip('[]').split(', ') ############################################################################# # test-retest analysis trt_stages = args.trt_stages.strip('[]').split(', ') model_pair = longi_models.ResNet_pair(model.modelA, args.num_date_diff_classes) torch.cuda.set_device(args.gpu) model_pair = model_pair.cuda(args.gpu) if args.resume_all: model_name = args.resume_all[:-8] else: model_name = save_folder + "_" + time.strftime("%Y-%m-%d_%H-%M")+ \ traingroup[0] + '_' + args.train_stages.strip('[]').replace(', ', '') data_name = args.datapath.split("/")[-1] log_name = (args.ROOT + "/log/" + args.model + str(args.model_depth) + "/" + data_name + "/" + time.strftime("%Y-%m-%d_%H-%M")) writer = SummaryWriter(log_name) trt_dataset = long.LongitudinalDataset3DPair( args.datapath, testgroup, args.datapath + "/test_retest_list.csv", trt_stages, test_augment, args.max_angle, args.rotate_prob, sample_size) trt_loader = torch.utils.data.DataLoader(trt_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print("\nEvaluation on Test-Retest Set: ") util.validate_pair(trt_loader, model_pair, criterion, model_name + "_test_retest", args.epochs, writer, args.print_freq) ########################################################################## train_dataset = long.LongitudinalDataset3D( args.datapath, traingroup, args.datapath + "/train_list.csv", train_stages, train_augment, # advanced transformation: add random rotation args.max_angle, args.rotate_prob, sample_size) eval_dataset = long.LongitudinalDataset3D(args.datapath, evalgroup, args.datapath + "/eval_list.csv", eval_stages, eval_augment, args.max_angle, args.rotate_prob, sample_size) test_dataset = long.LongitudinalDataset3D(args.datapath, testgroup, args.datapath + "/test_list.csv", test_stages, test_augment, args.max_angle, args.rotate_prob, sample_size) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, # sampler = train_sampler, num_workers=args.workers, pin_memory=True) eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) data_name = args.datapath.split("/")[-1] if args.resume_all: model_name = args.resume_all[:-8] else: model_name = save_folder + "_" + time.strftime("%Y-%m-%d_%H-%M")+ \ traingroup[0] + '_' + args.train_stages.strip('[]').replace(', ', '') # Use a tool at comet.com to keep track of parameters used # log model name, loss, and optimizer as well hyper_params["loss"] = criterion hyper_params["optimizer"] = optimizer hyper_params["model_name"] = model_name hyper_params["save_folder"] = save_folder experiment.log_parameters(hyper_params) # End of using comet log_name = (args.ROOT + "/log/" + args.model + str(args.model_depth) + "/" + data_name + "/" + time.strftime("%Y-%m-%d_%H-%M")) writer = SummaryWriter(log_name) if args.evaluate: print("\nEVALUATE before starting training: ") util.validate(eval_loader, model, criterion, model_name + "_eval", writer=writer, range_weight=args.range_weight) # training the model if start_epoch < args.epochs - 1: print("\nTRAIN: ") for epoch in range(start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) util.adjust_learning_rate(optimizer, epoch, args.lr) # train for one epoch util.train(train_loader, model, criterion, optimizer, epoch, sample_size, args.print_freq, writer, range_weight=args.range_weight) # evaluate on validation set if epoch % args.eval_freq == 0: csv_name = model_name + "_eval.csv" if os.path.isfile(csv_name): os.remove(csv_name) prec = util.validate(eval_loader, model, criterion, model_name + "_eval", epoch, writer, range_weight=args.range_weight) if args.early_stop: early_stopping = util.EarlyStopping( patience=args.patience, tolerance=args.tolerance) early_stopping( { 'epoch': epoch + 1, 'arch1': args.arch1, 'arch2': args.model2 + str(args.model2_depth), 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, prec, model_name) print("=" * 50) if early_stopping.early_stop: print("Early stopping at epoch", epoch, ".") break else: # remember best prec@1 and save checkpoint is_best = prec > best_prec1 best_prec1 = max(prec, best_prec1) util.save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model + str(args.model_depth), 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, model_name) if args.test: print("\nTEST: ") util.validate(test_loader, model, criterion, model_name + "_test", args.epochs, writer, range_weight=args.range_weight) print("\nEvaluation on Train Set: ") util.validate(train_loader, model, criterion, model_name + "_train", args.epochs, writer, range_weight=args.range_weight) ############################################################################################################# # test on only the basic sub-network (STO loss) model_pair = longi_models.ResNet_pair(model.modelA, args.num_date_diff_classes) torch.cuda.set_device(args.gpu) model_pair = model_pair.cuda(args.gpu) if args.test_pair: train_pair_dataset = long.LongitudinalDataset3DPair( args.datapath, traingroup, args.datapath + "/train_pair_list.csv", train_stages, test_augment, args.max_angle, args.rotate_prob, sample_size) train_pair_loader = torch.utils.data.DataLoader( train_pair_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print("\nEvaluation on Train Pair Set: ") util.validate_pair(train_pair_loader, model_pair, criterion, model_name + "_train_pair_update", args.epochs, writer, args.print_freq) test_pair_dataset = long.LongitudinalDataset3DPair( args.datapath, testgroup, args.datapath + "/test_pair_list.csv", test_stages, test_augment, args.max_angle, args.rotate_prob, sample_size) test_pair_loader = torch.utils.data.DataLoader( test_pair_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print("\nEvaluation on Test Pair Set: ") util.validate_pair(test_pair_loader, model_pair, criterion, model_name + "_test_pair_update", args.epochs, writer, args.print_freq) writer.export_scalars_to_json("./all_scalars.json") writer.close()
bidirectional = False time_start = datetime.datetime.now() gru = GRU(n_inputs, n_hidden, n_outputs, n_layers, batch_size, bidirectional) all_losses = [] test_loss = [] criterion = nn.MSELoss() optimizer = torch.optim.Adam(gru.parameters()) print('Training started at:', time_start) for epoch in range(1, n_epochs + 1): all_losses.append( train(x_tr, y_tr, batch_size, optimizer, criterion, gru, False)) print( pandas.DataFrame( [epoch, all_losses[-1], datetime.datetime.now() - time_start], ['Iteration', 'Cost', 'Elapsed time'], ['GRU'])) pred = gru.pred(Variable(torch.from_numpy(x_te[0:batch_size, :, :]))) test_loss.append(np.abs((pred.data.numpy() - y_te)).sum() / batch_size) print(test_loss[-1]) plt.figure(8) plt.plot(all_losses) plt.plot(test_loss)
lstm = LSTM(n_inputs, n_hidden, n_outputs, n_layers, batch_size, bidirectional) all_losses = [] test_loss = [] criterion = nn.MSELoss() optimizer = torch.optim.Adam(lstm.parameters()) print('Training started at:', time_start) for epoch in range(1, n_epochs + 1): all_losses.append(train(x_tr, y_tr, batch_size, optimizer, criterion, lstm, False)) print(pandas.DataFrame([epoch, all_losses[-1], datetime.datetime.now()-time_start], ['Iteration', 'Cost', 'Elapsed time'], ['LSTM'])) pred = lstm.pred(Variable(torch.from_numpy(x_te))) test_loss.append(np.abs((pred.data.numpy()-y_te)).sum() / batch_size) print(test_loss[-1]) plt.figure(8)
args.add_argument('--save_dir', dest="save_dir", action="store", default="./checkpoint.pth", help='save a trained model to this directory') args.add_argument('--learning_rate', dest="learning_rate", action="store", default=0.01, help='learning rate') args.add_argument('--epochs', dest="epochs", action="store", type=int, default=10, help='epochs') args.add_argument('--arch', dest="arch", action="store", default="vgg19", type=str, help='select a network architecture') args.add_argument('--hidden_units', dest="hidden_units", action="store", type=int, default=1024, help='hidden nodes') args = args.parse_args() data_dir = args.data_dir save_dir = args.save_dir lr = args.learning_rate arch = args.arch hidden_units= args.hidden_units gpu = args.gpu epochs = args.epochs checkpoint = args.checkpoint_path import json with open('cat_to_name.json', 'r') as f: flower_to_name = json.load(f) flower_species=len(flower_to_name) image_datasets, dataloaders = u.loader(data_dir) model = u.network(arch, gpu, hidden_units) criterion, optimizer = u.optimizing(model, lr) # Let's train model = u.train(model, './ex_model.pth', epochs, optimizer, dataloaders, criterion, gpu) # Let's test u.test(dataloaders, model, criterion, gpu) u.saver(arch, image_datasets, path, model, lr)
for p in model.parameters(): p.requires_grad = not args.only_fc model.fc = torch.nn.Linear(in_features = model.fc.in_features, out_features = 2, bias=True) if args.model == "inception": model = torchvision.models.inception_v3(pretrained = not args.no_pretrain) model.aux_logits = False for p in model.parameters(): p.requires_grad = not args.only_fc model.fc = torch.nn.Linear(in_features = model.fc.in_features, out_features = 2, bias=True) if args.model == "densenet": model = torchvision.models.densenet121(pretrained = not args.no_pretrain) for p in model.parameters(): p.requires_grad = not args.only_fc model.classifier = torch.nn.Linear(in_features = model.classifier.in_features, out_features = 2, bias=True) if args.model == "vgg": model = torchvision.models.vgg19(pretrained = not args.no_pretrain) # vgg 16 for p in model.parameters(): p.requires_grad = not args.only_fc model.classifier[6] = torch.nn.Linear(in_features = model.classifier[6].in_features, out_features = 2, bias=True) model = torch.nn.DataParallel(model.cuda(), args.gpus) optimizer = torch.optim.Adam(model.parameters(), lr = args.learning_rate) if get_ipython(): sys.exit() _, accuracy = utility.valid(model, valid_loader, f"Epoch {0:03d} validation") for epoch in range(args.epochs): #pass _, _ = utility.train(model, train_loader, f"\nEpoch {epoch+1:03d} training ", optimizer) _, accuracy = utility.valid(model, valid_loader, f"Epoch {epoch+1:03d} validation") # model.cpu() # torch.save(model.module.state_dict(), f'{args.model}-{accuracy:4.2f}.pt') # model.cuda()