def train(args): assert args.num_classes common.make_dir(args.checkout_dir) nnet = DNN((args.left_context + args.right_context + 1) * args.feat_dim, hidden_layer, \ hidden_size, args.num_classes, dropout=dropout) print(nnet) nnet.cuda() criterion = nn.CrossEntropyLoss() optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate) train_dataset = THCHS30(root=args.data_dir, data_type='train', left_context=left_context, right_context=right_context, model_type='dnn') train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch, shuffle=True, num_workers=6) test_dataset = THCHS30(root=args.data_dir, data_type='test', left_context=left_context, right_context=right_context, model_type='dnn') test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch, shuffle=True, num_workers=6) cross_validate(-1, nnet, test_loader, test_dataset.num_frames) for epoch in range(args.num_epochs): common.train_one_epoch(nnet, criterion, optimizer, train_loader) cross_validate(epoch, nnet, test_loader, test_dataset.num_frames) th.save(nnet, common.join_path(args.checkout_dir, 'dnn.{}.pkl'.format(epoch + 1)))
def run(): df = pd.read_csv(config.TRAIN_PATH) kfold = KFold(n_splits=5, random_state=config.SEED, shuffle=True) fold_losses = [] for i, (train_idx, val_idx) in enumerate(kfold.split(df)): print("-------------------------------------------------------") print(f"Training fold {i}") print("-------------------------------------------------------") train = df.iloc[train_idx] validation = df.iloc[val_idx] train_dataset = PicDataset(train) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE ) val_dataset = PicDataset(validation) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=config.BATCH_SIZE ) device = 'cuda:0' if torch.cuda.is_available() else "cpu" model = DNN() model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=config.LR) loss = 0 for _ in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device) loss = engine.eval_fn(val_data_loader, model, device) print(f"Loss on fold {i} is {loss}") fold_losses.append(loss) torch.save(model.state_dict(), f'./models/model_{i}.bin') print(f"Average loss on cross validation is {sum(fold_losses) / 5}")
weight = torch.FloatTensor(list(weight.values())).to(device) data = sorted(corpus.examples.get('seq'), key=lambda x: len(x), reverse=True) vocab_size = len(corpus.words2id) logging.info('vocabulary size: {}'.format(vocab_size)) model = DNN(vocab_size=vocab_size, embedding_size=200, hidden_size=512, embedding=embedding) model.to(device) loss_function = nn.CrossEntropyLoss(weight=weight) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) model.train() total_data = len(data) batch_size = args['batch_size'] total_step = math.ceil(total_data / batch_size) last_training_loss = 1000000000000 for epoch in range(args.get('epoch')): start = 0 training_loss = 0 for _ in tqdm(range(int(total_step)), total=total_step): batch = data[start:start + batch_size] start += batch_size
def train(args, config, io): train_loader, validation_loader = get_loader(args, config) device = torch.device("cuda" if args.cuda else "cpu") # print(len(train_loader), len(validation_loader)) #Try to load models model = DNN(args).to(device) """if device == torch.device("cuda"): model = nn.DataParallel(model)""" if args.model_path != "": model.load_state_dict(torch.load(args.model_path)) # for para in list(model.parameters())[:-5]: # para.requires_grad=False # print(model) if args.use_sgd: # print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: # print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) """opt = optim.Adam([ {'params': list(model.parameters())[:-1], 'lr':args.lr/50, 'weight_decay': 1e-4}, {'params': list(model.parameters())[-1], 'lr':args.lr, 'weight_decay': 1e-4} ]) """ scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = nn.MSELoss() best_test_loss = 9999999. for epoch in range(args.epochs): startTime = time.time() #################### # Train #################### train_loss = 0.0 train_dis = 0.0 count = 0.0 model.train() for data, label in train_loader: data, label = data.to(device), label.to(device) data = drop(jitter(data, device), device) # data = jitter(data, device, delta=0.05) batch_size = data.shape[0] logits = model(data) loss = criterion(logits, label) opt.zero_grad() loss.backward() opt.step() dis = distance(logits, label) count += batch_size train_loss += loss.item() * batch_size train_dis += dis.item() * batch_size scheduler.step() outstr = 'Train %d, loss: %.6f, distance: %.6f' % ( epoch, train_loss * 1.0 / count, train_dis * 1.0 / count) io.cprint(outstr) #################### # Evaluation #################### test_loss = 0.0 test_dis = 0.0 count = 0.0 model.eval() with torch.no_grad(): for data, label in validation_loader: data, label = data.to(device), label.to(device) batch_size = data.shape[0] logits = model(data) loss = criterion(logits, label) dis = distance(logits, label) count += batch_size test_loss += loss.item() * batch_size test_dis += dis.item() * batch_size outstr = 'Test %d, loss: %.6f, distance: %.6f' % ( epoch, test_loss * 1.0 / count, test_dis * 1.0 / count) io.cprint(outstr) if test_loss <= best_test_loss: best_test_loss = test_loss torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) torch.save(model, (config.root + config.model_path)) io.cprint('Time: %.3f sec' % (time.time() - startTime))
pass try: os.makedirs(args.log) except OSError: pass # 加载训练数据和测试数据 train_loader, test_loader = get_data(args) # 如果有cuda就用cuda device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') input_size = 28 * 28 output_size = args.num_classes model = DNN(input_size=input_size, output_size=output_size).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # 每训练完一个mini-batch就计算一次loss,acc model.train() for epoch in range(args.epochs): correct = 0 total = 0 for idx, (x, y) in enumerate(train_loader): x, y = x.to(device), y.to(device) y_pred = model(x) _, y_pred_t = torch.max(y_pred.data, 1) total += y.size(0) # print(y_pred_t, y.data) # print((y_pred_t == y).sum().item())
def train(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') data_dict, topic_dict = dh.load_data( ) # data_dict, [group2topic, mem2topic] train_data, train_label, dev_data, dev_label, test_data, test_label = dh.data_split( data_dict, topic_dict) train_dataset = dh.Dataset(train_data, train_label) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) dev_dataset = dh.Dataset(dev_data, dev_label) dev_loader = DataLoader(dev_dataset, batch_size=128, shuffle=True) lambda1 = lambda epoch: ( epoch / args.warm_up_step ) if epoch < args.warm_up_step else 0.5 * (math.cos( (epoch - args.warm_up_step) / (args.n_epoch * len(train_dataset) - args.warm_up_step) * math.pi) + 1) model = DNN(args).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, len(train_loader) * args.n_epoch) global_step = 0 best_f1 = 0. loss_deq = collections.deque([], args.report_step) for epoch in range(args.n_epoch): for batch in tqdm(train_loader): optimizer.zero_grad() inputs = batch['input'].to(device) group_topic = batch['group_topic'].to(device) mem_topic = batch['mem_topic'].to(device) labels = batch['label'].to(device) output = model(inputs, mem_topic, group_topic, label=labels) loss = output[0] loss.backward() loss_deq.append(loss.item()) optimizer.step() scheduler.step() global_step += 1 if global_step % args.report_step == 0: logger.info('loss: {}, lr: {}, epoch: {}'.format( np.average(loss_deq).item(), optimizer.param_groups[0]['lr'], global_step / len(train_dataset))) if global_step % args.eval_step == 0: model.eval() eval_result = evaluation(model, data_loader=dev_loader, device=device) logger.info(eval_result) if eval_result['f1'] > best_f1: torch.save(model, './model/{}/torch.pt'.format(args.task_name)) best_f1 = eval_result['f1'] model.train()
for w in st: tmp.add(w) word_index = {w: i for i, w in enumerate(tmp)} #import pickle #with open("bow.pkl", "wb") as f: # pickle.dump(word_index, f) x = torch.zeros(len(train_x1), len(word_index)) for i in range(len(train_x1)): for w in train_x1[i]: x[i][word_index[w]] += 1 print(x.size()) print("\nConstructing model...", flush=True) model = DNN(x.size(1)).to(device) total_param = sum(p.numel() for p in model.parameters()) trainable_param = sum(p.numel() for p in model.parameters() if p.requires_grad) print("{} parameters with {} trainable".format(total_param, trainable_param), flush=True) print("\nStart training...", flush=True) train_dataset1 = TwitterDataset(x, train_y1) train_loader1 = torch.utils.data.DataLoader(dataset=train_dataset1, batch_size=BATCH, shuffle=True, num_workers=4) train_model(train_loader1, model, device, LR) print("\nStart testing...", flush=True)
def train(args, config, io): train_loader, validation_loader, unlabelled_loader = get_loader( args, config) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models model = DNN(args).to(device) ema_model = DNN(args).to(device) for param in ema_model.parameters(): param.detach_() if device == torch.device("cuda"): model = nn.DataParallel(model) ema_model = nn.DataParallel(ema_model) if args.model_path != "": model.load_state_dict(torch.load(args.model_path)) ema_model.load_state_dict(torch.load(args.model_path)) if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = nn.MSELoss() consistency_criterion = nn.MSELoss() best_test_loss = 9999999. global_step = 0 for epoch in range(args.epochs): startTime = time.time() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() ema_model.train() i = -1 for (data, label), (u, _) in zip(cycle(train_loader), unlabelled_loader): i = i + 1 if data.shape[0] != u.shape[0]: bt_size = np.minimum(data.shape[0], u.shape[0]) data = data[0:bt_size] label = label[0:bt_size] u = u[0:bt_size] data, label, u = data.to(device), label.to(device), u.to(device) batch_size = data.shape[0] logits = model(data) class_loss = criterion(logits, label) u_student = jitter(u, device) u_teacher = jitter(u, device) logits_unlabeled = model(u_student) ema_logits_unlabeled = ema_model(u_teacher) ema_logits_unlabeled = Variable(ema_logits_unlabeled.detach().data, requires_grad=False) consistency_loss = consistency_criterion(logits_unlabeled, ema_logits_unlabeled) if epoch < args.consistency_rampup_starts: consistency_weight = 0.0 else: consistency_weight = get_current_consistency_weight( args, args.final_consistency, epoch, i, len(unlabelled_loader)) consistency_loss = consistency_weight * consistency_loss loss = class_loss + consistency_loss opt.zero_grad() loss.backward() opt.step() global_step += 1 # print(global_step) update_ema_variables(model, ema_model, args.ema_decay, global_step) count += batch_size train_loss += loss.item() * batch_size scheduler.step() outstr = 'Train %d, loss: %.6f' % (epoch, train_loss * 1.0 / count) io.cprint(outstr) #################### # Evaluation #################### test_loss = 0.0 count = 0.0 model.eval() ema_model.eval() for data, label in validation_loader: data, label = data.to(device), label.to(device) batch_size = data.shape[0] logits = ema_model(data) loss = criterion(logits, label) count += batch_size test_loss += loss.item() * batch_size outstr = 'Test %d, loss: %.6f' % (epoch, test_loss * 1.0 / count) io.cprint(outstr) if test_loss <= best_test_loss: best_test_loss = test_loss torch.save(ema_model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) torch.save(ema_model, (config.root + config.model_path)) io.cprint('Time: %.3f sec' % (time.time() - startTime))
def main(): print('> Starting execution...') # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') group = parser.add_mutually_exclusive_group() group.add_argument('--fit', action='store_true', help='fit the tuned model on digits 0-4') group.add_argument('--transfer', action='store_true', help='train a pretrained model on digits 5-9') parser.add_argument('--batch-size', type=int, default=256, metavar='N', help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=50, metavar='E', help='number of epochs to train (default: 50)') parser.add_argument('--lr', type=float, default=1e-3, metavar='L', help='learning rate (default: 1e-3)') parser.add_argument('--early-stopping', type=int, default=7, metavar='E', help='early stopping (default: 7 epochs)') parser.add_argument( '--size', type=int, default=100, metavar='S', help='size of the training data for transfer learning (default: 100)') parser.add_argument('--seed', type=int, default=23, metavar='S', help='random seed (default: 23)') args = parser.parse_args() use_cuda = torch.cuda.is_available() # use cuda if available device = torch.device("cuda" if use_cuda else "cpu") torch.manual_seed(args.seed) # random seed print('> Loading MNIST data') train_set = datasets.MNIST(MNIST_DATA_DIR, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) test_set = datasets.MNIST(MNIST_DATA_DIR, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) train_digits_04 = np.where(train_set.train_labels < 5)[0] train_digits_59 = np.where(train_set.train_labels > 4)[0] test_digits_04 = np.where(test_set.test_labels < 5)[0] test_digits_59 = np.where(test_set.test_labels > 4)[0] if args.fit: # Training the tuned model on digits 0-4 print('> Training a new model on MNIST digits 0-4') X_train_04, y_train_04, X_valid_04, y_valid_04 = data_to_numpy( train_set, test_set, INPUT_DIM, train_digits_04, test_digits_04) torch.manual_seed(args.seed) print('> Initializing the model') model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True) model.apply(init_he_normal) # He initialization model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) print('> Training the model') model, _, _ = train_model(model, device, X_train_04, y_train_04, criterion, optimizer, X_valid=X_valid_04, y_valid=y_valid_04, batch_size=args.batch_size, n_epochs=args.epochs, early_stopping=args.early_stopping) print(f'> Saving the model state at {MODEL_04_PATH}') torch.save(model.state_dict(), MODEL_04_PATH) elif args.transfer: # Transfer learning print( '> Training a model on MNIST digits 5-9 from a pretrained model for digits 0-4' ) if os.path.isfile(MODEL_04_PATH): print('> Loading the pretrained model') model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True).to(device) model.load_state_dict(torch.load(MODEL_04_PATH)) for param in model.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default model.fc4 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM) model.fc5 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM) model.out = nn.Linear(HIDDEN_DIM, OUTPUT_DIM) print('> Using saved model state') else: print( '> Model state file is not found, fit a model before the transfer learning' ) print('> Stopping execution') return X_train_59, y_train_59, X_valid_59, y_valid_59 = data_to_numpy( train_set, test_set, INPUT_DIM, train_digits_59[:args.size], test_digits_59) # fixing the issues with labels y_train_59 = y_train_59 - 5 y_valid_59 = y_valid_59 - 5 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) print('> Training the model') model, _, _ = train_model(model, device, X_train_59, y_train_59, criterion, optimizer, X_valid=X_valid_59, y_valid=y_valid_59, batch_size=args.batch_size, n_epochs=args.epochs, early_stopping=args.early_stopping) print(f'> Saving the model state at {MODEL_59_PATH}') torch.save(model.state_dict(), MODEL_59_PATH) else: print('> Incorrect mode, try either `--fit` or `--transfer`') print('> Stopping execution')
model = DNN(input_size, hidden_size, out_size) model = torch.nn.DataParallel(model.to(device), device_ids=use_devices) # use GPU print(model) # load data print('loading data...') sample_generator = AudioSampleGenerator(os.path.join(in_path, ser_data_fdr)) random_data_loader = DataLoader( dataset=sample_generator, batch_size=batch_size, # specified batch size here shuffle=True, num_workers=1, drop_last=True, # drop the last batch that cannot be divided by batch_size pin_memory=True) print('DataLoader created') #optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.5, 0.999)) optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # create tensorboard writer # The logs will be stored NOT under the run_time, but under segan_data_out/'tblog_fdr'. # This way, tensorboard can show graphs for each experiment in one board tbwriter = SummaryWriter(log_dir=tblog_path) print('TensorboardX summary writer created') print('Starting Training...') total_steps = 1 MSE = nn.MSELoss() scaler_path_input = os.path.join(scaler_dir, "scaler_input.p") scaler_input = pickle.load(open(scaler_path_input, 'rb')) scaler_path_label = os.path.join(scaler_dir, "scaler_label.p") scaler_label = pickle.load(open(scaler_path_label, 'rb')) for epoch in range(num_epochs):