def train(args, model): model.train() # loader = DataLoader(MA('/Users/zhangweidong03/Code/dl/pytorch/github/piwise/MAdata', input_transform, target_transform), # num_workers=1, batch_size=1, shuffle=True) loader = DataLoader(MA(args.datadir, input_transform, target_transform), num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True) weight = torch.ones(2) weight[0] = 0 use_cuda = False if args.cuda: criterion = CrossEntropyLoss2d(weight.cuda()) else: criterion = CrossEntropyLoss2d(weight) criterion = CrossEntropyLoss2d() # optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5) optimizer = Adam(model.parameters()) if args.model.startswith('FCN'): optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5) if args.model.startswith('PSP'): optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4) if args.model.startswith('Seg'): optimizer = SGD(model.parameters(), 1e-3, .9) # for epoch in range(1, 51): # epoch_loss = [] # # for step, (images, labels) in enumerate(loader): # if use_cuda: # images = images.cuda() # labels = labels.cuda() # # inputs = Variable(images) # targets = Variable(labels) # outputs = model(inputs) # # optimizer.zero_grad() # loss = criterion(outputs, targets[:, 0]) # loss.backward() # optimizer.step() # # epoch_loss.append(loss.data[0]) # # average = sum(epoch_loss) / len(epoch_loss) # print(f'loss: {average} (epoch: {epoch}, step: {step})') if args.steps_plot > 0: board = Dashboard(args.port) for epoch in range(1, args.num_epochs+1): epoch_loss = [] for step, (images, labels) in enumerate(loader): if args.cuda: images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) outputs = model(inputs) optimizer.zero_grad() loss = criterion(outputs, targets[:, 0]) loss.backward() optimizer.step() epoch_loss.append(loss.data[0]) if args.steps_plot > 0 and step % args.steps_plot == 0: image = inputs[0].cpu().data image[0] = image[0] * .229 + .485 image[1] = image[1] * .224 + .456 image[2] = image[2] * .225 + .406 board.image(image, f'input (epoch: {epoch}, step: {step})') board.image(color_transform(outputs[0].cpu().max(0)[1].data), f'output (epoch: {epoch}, step: {step})') board.image(color_transform(targets[0].cpu().data), f'target (epoch: {epoch}, step: {step})') if args.steps_loss > 0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) print(f'loss: {average} (epoch: {epoch}, step: {step})') if args.steps_save > 0 and step % args.steps_save == 0: filename = f'{args.model}-{epoch:03}-{step:04}.pth' torch.save(model.state_dict(), filename) print(f'save: {filename} (epoch: {epoch}, step: {step})')
def main(experiment_path, load_path, data_path, loss_type, freqloss, optimizer, upsampling_mode, model_type, rnn_type, blocktype, space, weight_init, init_criterion, learning_rate, weight_decay, momentum, alpha, eps, beta1, beta2, clip, stochastic_drop, dropcopy, real_penalty, imag_penalty, efficient, bottleneck, schedule, nonesterov, rmspop_momentum, nopil, avg_copies, stochdrop_schedule, nb_resblocks, growth_rate, start_fmaps, epochs, batch_size, n_sources, print_interval, seed, nb_copies, **kwargs): if not experiment_path or not load_path: raise Exception( "You must precise the path of the model that will be used for testing." ) else: load_path = os.path.join(experiment_path, load_path) ################################################################################################# # the following lines are just initialization in order to perform the loading. testset = WSJ2MReader(data_path, 'test', random_seed=seed) print('...number of training utterances {}'.format(testset.n_examples)) bptt_len = None device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print('...building model') if model_type == 'complexunet': model = ComplexUNet( in_channels=1, nb_speakers=2, mode=upsampling_mode, nb_residual_blocks=nb_resblocks, start_fmaps=start_fmaps, blocktype=blocktype, growth_rate=growth_rate, space=space, seed=seed, weight_init=weight_init, init_criterion=init_criterion, efficient=efficient, bottleneck=bottleneck, nb_copies=nb_copies, avg_copies=avg_copies, stochastic_drop=stochastic_drop, stochdrop_schedule=stochdrop_schedule, dropcopy=dropcopy ).to(device) if torch.cuda.device_count() > 1: print('...multi-gpu training') model = DataParallel(model) # loss function print('...define loss function') if freqloss is None: if loss_type == 'specl2loss': maskloss = SequenceLoss(nb_speakers=n_sources, pil=nopil) elif loss_type == 'speccosloss': maskloss = CosLoss(real_penalty=real_penalty, imag_penalty=imag_penalty, nb_speakers=n_sources, pil=nopil) elif loss_type in {'istftl2loss', 'istftcosloss'}: maskloss = ISTFTLoss(nb_speakers=n_sources, pil=nopil, loss_type=loss_type) else: print("USING TIME FREQUENCY LOSS!") if loss_type in {'istftl2loss', 'istftcosloss'} and freqloss in {'specl2loss', 'speccosloss'}: maskloss = ISTFTLoss(nb_speakers=n_sources, pil=nopil, loss_type=loss_type, frequencyloss=freqloss, real_penalty=real_penalty, imag_penalty=imag_penalty) else: raise Exception( "When a frequency loss is used it should be used along with a loss on the temporal signal." + "Found freqloss == " + str(freqloss) + " and loss_type == " + str(loss_type) + "." ) maskloss = maskloss.to(device) # optimizer print('...define optimizer') if optimizer == 'adam': optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=weight_decay, betas=(beta1, beta2), eps=eps) elif optimizer == 'sgd': optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=momentum, weight_decay=weight_decay, nesterov=nonesterov) elif optimizer == 'rmsprop': rmsmom = momentum if rmspop_momentum else 0 optimizer = RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, alpha=alpha, eps=eps, weight_decay=weight_decay, momentum=rmsmom) ################################################################################################# if load_path: if os.path.isfile(load_path): print("=> loading checkpoint '{}'".format(load_path)) checkpoint = torch.load(load_path) for k in checkpoint.keys(): if k not in ['optimizer', 'training_losses', 'validation_losses', 'training_losses', 'train_reader', 'model_state_dict']: print(" " + str(k) + ": " + str(checkpoint[k])) start_epoch = checkpoint['start_epoch'] clip = checkpoint['clip'] schedule = checkpoint['schedule'] epochs = checkpoint['epochs'] batch_size = checkpoint['batch_size'] print_interval = checkpoint['print_interval'] best_devSDR = checkpoint['bestSDR'] best_epoch = checkpoint['best_epoch'] trainCost = checkpoint['training_losses'] devSDR = checkpoint['devSDR'] devSIR = checkpoint['devSIR'] devSAR = checkpoint['devSAR'] trainset = checkpoint['train_reader'] devset = checkpoint['dev_reader'] maskloss = checkpoint['maskloss'] optimizer.load_state_dict(checkpoint['optimizer']) model.load_state_dict(checkpoint['model_state_dict']) else: print("=> no checkpoint found at '{}'".format(load_path)) m = model.module if isinstance(model, DataParallel) else model print(" loss_type: " + str(maskloss.loss_type)) if maskloss.loss_type == 'speccosloss': print(" real_penalty: " + str(maskloss.real_penalty)) print(" imag_penalty: " + str(maskloss.imag_penalty)) print(" PIL: " + str(maskloss.pil)) print(" maskloss: " + str(maskloss.modules)) print(" optimizer: " + str(optimizer.__module__)) print(" upsampling_mode " + str(m.mode)) print(" model_type " + str(model_type)) print(" weight_init " + str(m.weight_init)) print(" init_criterion " + str(m.init_criterion)) if isinstance(m, ComplexUNet): print(" space " + str(m.space)) print(" blocktype " + str(m.blocktype)) print(" nb_copies " + str(m.nb_copies)) print(" avg_copies " + str(m.avg_copies)) print(" stochastic_drop " + str(m.stochastic_drop)) print(" stochdrop_schedule " + str(m.stochdrop_schedule)) print(" nb_residual_blocks " + str(m.nb_residual_blocks)) if m.blocktype in {'dense'}: print(" growth_rate " + str(m.growth_rate)) if m.space == 'real': print(" efficient " + str(m.efficient)) print(" bottleneck " + str(m.bottleneck)) print(" start_fmaps " + str(m.start_fmaps)) print(" learning_rate " + str(optimizer.param_groups[0]['lr'])) print(" weight_decay " + str(optimizer.param_groups[0]['weight_decay'])) if isinstance( optimizer, SGD): print(" nesterov " + str(optimizer.param_groups[0]['nesterov'])) print(" momentum " + str(optimizer.param_groups[0]['momentum'])) elif isinstance(optimizer, RMSprop): print(" alpha " + str(optimizer.param_groups[0]['alpha'])) print(" rms_momentum " + str(optimizer.param_groups[0]['momentum'])) print(" eps " + str(optimizer.param_groups[0]['eps'])) elif isinstance(optimizer, Adam): print(" beta1 " + str(optimizer.param_groups[0]['betas'][0])) print(" beta2 " + str(optimizer.param_groups[0]['betas'][1])) print(" eps " + str(optimizer.param_groups[0]['eps'])) print(" clip: " + str(clip)) print(" schedule " + str(schedule)) print(" start_epoch: " + str(start_epoch)) print(" epochs: " + str(epochs)) print(" print_interval: " + str(print_interval)) print(" batch_size: " + str(batch_size)) print(" nb_speakers: " + str(maskloss.nb_speakers)) print(" trainset_seed: " + str(trainset.random_seed)) print(" model_seed: " + str(m.seed)) params_num = 0 for param in model.parameters(): if param.requires_grad: params_num += np.prod(param.size()) print("number of parameters: {}".format(params_num)) print('...start evaluation') test_sdr, test_sir, test_sar = 0, 0, 0 test_iters = 0 model.eval() test_stream = testset.read(batch=batch_size, sortseq=False, normalize=False, bptt_len=bptt_len) with torch.no_grad(): for idx, data in enumerate(test_stream): source, target, mask, sourcelen = (torch.FloatTensor(_data).to(device) for _data in data) output = model(source[:, None]) if torch.cuda.is_available(): torch.cuda.empty_cache() output_pred = [] for i in range(n_sources): if m.nb_copies is None: output_pred.append(complex_product(source, output[i], input_type='convolution').cpu().data.numpy()) # complex_mul(source, output[i]).cpu().data.numpy()) else: output_pred.append(output[i].cpu().data.numpy()) sdr, sir, sar = eval_sources(source.cpu().data.numpy(), target.cpu().data.numpy(), output_pred, mask.cpu().data.numpy(), sourcelen.cpu().data.numpy()) test_sdr += sdr test_sir += sir test_sar += sar print(sdr, sir, sar) test_iters += 1 if torch.cuda.is_available(): torch.cuda.empty_cache() testSDR = test_sdr / test_iters testSIR = test_sir / test_iters testSAR = test_sar / test_iters print("SDR on valid set {}".format(testSDR)) print("SIR on valid set {}".format(testSIR)) print("SAR on valid set {}".format(testSAR)) print("------- End -------")
[ path + 'data/super/train_character_tensor_2.npy', path + 'data/super/valid_character_tensor_2.npy' ]] batch_loader_2 = BatchLoader(data_files, idx_files, tensor_files, path) params = Parameters(batch_loader_2.max_word_len, batch_loader_2.max_seq_len, batch_loader_2.words_vocab_size, batch_loader_2.chars_vocab_size) neg_loss = NEG_loss(params.word_vocab_size, params.word_embed_size) if args.use_cuda: neg_loss = neg_loss.cuda() optimizer = SGD(neg_loss.parameters(), 0.1) for iteration in range(args.num_iterations): input_idx, target_idx = batch_loader_2.next_embedding_seq( args.batch_size) input = Variable(t.from_numpy(input_idx).long()) target = Variable(t.from_numpy(target_idx).long()) if args.use_cuda: input, target = input.cuda(), target.cuda() out = neg_loss(input, target, args.num_sample).mean() optimizer.zero_grad() out.backward()
def main(args): # --- CONFIG device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu") # --------- # --- TRANSFORMATIONS train_transform = transforms.Compose([ RandomCrop(28, padding=4), ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) test_transform = transforms.Compose( [ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) # --------- # --- SCENARIO CREATION mnist_train = MNIST(root=expanduser("~") + "/.avalanche/data/mnist/", train=True, download=True, transform=train_transform) mnist_test = MNIST(root=expanduser("~") + "/.avalanche/data/mnist/", train=False, download=True, transform=test_transform) scenario = nc_benchmark(mnist_train, mnist_test, 5, task_labels=False, seed=1234) # --------- # MODEL CREATION model = SimpleMLP(num_classes=scenario.n_classes) eval_plugin = EvaluationPlugin( accuracy_metrics(epoch=True, experience=True, stream=True), loss_metrics(epoch=True, experience=True, stream=True), # save image should be False to appropriately view # results in Interactive Logger. # a tensor will be printed confusion_matrix_metrics(save_image=False, normalize='all', stream=True), loggers=InteractiveLogger()) # CREATE THE STRATEGY INSTANCE (NAIVE) cl_strategy = Naive(model, SGD(model.parameters(), lr=0.001, momentum=0.9), CrossEntropyLoss(), train_mb_size=100, train_epochs=4, eval_mb_size=100, device=device, evaluator=eval_plugin, plugins=[ReplayPlugin(5000)]) # TRAINING LOOP print('Starting experiment...') results = [] for experience in scenario.train_stream: print("Start of experience: ", experience.current_experience) print("Current Classes: ", experience.classes_in_this_experience) cl_strategy.train(experience) print('Training completed') print('Computing accuracy on the whole test set') results.append(cl_strategy.eval(scenario.test_stream))
batch_size=100, shuffle=False, num_workers=args.workers, pin_memory=True) m = wide_resnet(num_classes=10, depth=28, widen_factor=10, dropRate=args.drop) model = NormalizedModel(model=m, mean=image_mean, std=image_std).to( DEVICE) # keep images in the [0, 1] range model_file = '/home/frankfeng/projects/researchData/AI_security/code/PLP/fast_adv/defenses/weights/cifar10_base/cifar10_valacc0.8339999794960022.pth' model_dict = torch.load(model_file) model.load_state_dict(model_dict) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.adv == 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_decay) else: scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160], gamma=0.2) attacker = DDN(steps=args.steps, device=DEVICE) max_loss = torch.log(torch.tensor(10.)).item() # for callback best_acc = 0 best_epoch = 0
net, _ = tf.load_checkpoint(args.pretrain_dir, args.pretrain_epo) utils.update_params(model_dir, args.pretrain_dir) else: net = tf.load_architectures(args.arch, args.fd) transforms = tf.load_transforms(args.transform) trainset = tf.load_trainset(args.data, transforms, path=args.data_dir) trainset = tf.corrupt_labels(trainset, args.lcr, args.lcs) trainloader = DataLoader(trainset, batch_size=args.bs, drop_last=True, num_workers=4) criterion = MaximalCodingRateReduction(gam1=args.gam1, gam2=args.gam2, eps=args.eps) optimizer = SGD(net.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.wd) ## Training for epoch in range(args.epo): lr_schedule(epoch, optimizer) for step, (batch_imgs, batch_lbls) in enumerate(trainloader): features = net(batch_imgs.cuda()) loss, loss_empi, loss_theo = criterion( features, batch_lbls, num_classes=trainset.num_classes) optimizer.zero_grad() loss.backward() optimizer.step() utils.save_state(model_dir, epoch, step, loss.item(), *loss_empi, *loss_theo)
bn += ps if isinstance(m, nn.BatchNorm2d): bn_count += 1 if bn_count == 1: ps = list(m.parameters()) bn += ps optimize_policy = [ {'name': 'first_conv_weight', 'params': first_conv_weight, 'lr_mult': 5 if args.modality == 'Flow' else 1, 'decay_mult': 1}, {'name': 'first_conv_bias', 'params': first_conv_bias, 'lr_mult': 10 if args.modality == 'Flow' else 2, 'decay_mult': 0}, {'name': 'normal_weight', 'params': normal_weight, 'lr_mult': 1, 'decay_mult': 1}, {'name': 'normal_bias', 'params': normal_bias, 'lr_mult': 2, 'decay_mult': 0}, {'name': 'BN scale/shift', 'params': bn, 'lr_mult': 1, 'decay_mult': 0}, ] for i in optimize_policy: print('group {} has {} parameters, lr_mult: {}, decay_mult: {}'.format(i['name'], len(i['params']), i['lr_mult'], i['decay_mult'])) optimizer = SGD(optimize_policy, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) class AverageMeter(): def __init__(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count
if __name__ == '__main__': loss_name = 'cross_entropy' # ['cross_entropy', 'mse'] train_dataloader, test_dataloader = get_loader( '../dataset/MNIST/processed', BATCH_SIZE) model = MLPNN(784, HIDDEN_SIZES, NUM_CLASSES, drop_out=0).to(DEVICE) train_loss_history = [] train_acc_history = [] test_loss_history = [] test_acc_history = [] if loss_name == 'cross_entropy': loss_func = CrossEntropyLoss() elif loss_name == 'mse': loss_func = MSELoss() optimizer = SGD(model.parameters(), lr=LR) for e in tqdm(range(EPOCH)): train_loss, train_acc = run_epoch(model, train_dataloader, loss_func, optimizer, False, DEVICE, loss_name) test_loss, test_acc = run_epoch(model, test_dataloader, loss_func, optimizer, True, DEVICE, loss_name) tqdm.write('epoch {}, train loss {}, acc{}'.format( e + 1, train_loss, train_acc)) tqdm.write('epoch {}, test loss {}, acc{}'.format( e + 1, test_loss, test_acc)) train_loss_history.append(train_loss) train_acc_history.append(train_acc) test_loss_history.append(test_loss) test_acc_history.append(test_acc) fig = plt.figure(figsize=(20, 10))
def create_optimizer(opt, lr): # print('creating optimizer with lr = ', lr) return SGD(optimizable, lr, momentum=0.9, weight_decay=opt.weight_decay)
def train(model, attention, optimizer, loss_function, epochs_num, train_set, validation_set, epochs, path, metrics, log_every=50, validation_log_every=20, device=torch.device('cuda')): start = timer() assert (epochs > epochs_num) attention_loss_function = CrossEntropyLoss(ignore_index=0) attention_optimizer = SGD(attention.parameters(), lr=0.01) for epoch in range(epochs - epochs_num): counter = 1 epoch_start = timer() for batch in train_set: x_encoder, x_decoder, y, encoder_lengths, decoder_lengths = batch[ 0] x_encoder = x_encoder.to(device) x_decoder = x_decoder.to(device) y = y.to(device) encoder_lengths = encoder_lengths.to(device) decoder_lengths = decoder_lengths.to(device) x_encoder_raw, y_raw = batch[1] del (batch) optimizer.zero_grad() y_hat, attention_input, attention_targets = model(x_encoder, x_decoder, encoder_lengths, decoder_lengths, attention, device=device) del (x_encoder) del (x_decoder) del (encoder_lengths) del (decoder_lengths) loss = 0 for j in range(y.size(1)): y_hat_step = y_hat[:, j] y_step = y[:, j] loss = loss + loss_function(y_hat_step, y_step) del (y_hat_step) del (y_step) loss.backward() optimizer.step() batch_loss = loss.detach().cpu() / y_hat.size(1) del (loss) attention_loss = 0 attention_optimizer.zero_grad() attention_weights = attention(attention_input) for t in range(attention_targets.size(1)): attention_loss = attention_loss + attention_loss_function( attention_weights[:, t], attention_targets[:, t]) attention_loss.backward() attention_optimizer.step() del (attention_loss) del (attention_weights) del (attention_input) del (attention_targets) if counter % log_every == 0: y_hat = F.softmax(y_hat, dim=-1) metrics.log('train', x_encoder_raw, y.detach().cpu(), y_raw, torch.argmax(y_hat.detach().cpu(), -1), batch_loss) data_dict = { 'model': model, 'attention': attention, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'metrics': metrics, 'epoch_num': epochs_num } utils.save(data_dict, path) del (data_dict) del (y_hat) del (y) counter += 1 epochs_num += 1 evaluate(model, attention, loss_function, validation_set, metrics, log_every=validation_log_every, mode='validation', device=device) time = timer() print('Epoch finished: ', epoch + 1, '\tEpoch time: ', time - epoch_start, '\tOverall time: ', time - start, '\n\n') print('Training finished. Overall time: ', timer() - start, '\n\n') return model, optimizer, loss_function, epochs_num + epochs
train_loader = DataLoader(dataset = train_dataset, batch_size = BATCH_SIZE, shuffle = True) # 4/ Train the dataset num_classes = len(dataset.classes) model = NewsModel(VOCAB_SIZE, 100, 100, 100, 100, num_classes) # model = nn.Sequential(nn.Linear(VOCAB_SIZE, 40), nn.ReLU(40, 80), nn.ReLU(80, 60), nn.Sigmoid(60, num_classes)) #criterion = Adam([p for p in model.parameters() if p.requires_grad], lr = LEARNING_RATE) #criterion = BCEWithLogitsLoss() # need to convert labels in float # loss = criterion(outputs.squeeze(), labels.float()) criterion = CrossEntropyLoss() # need to convert labels in long # loss = criterion(outputs.squeeze(), labels.long()) (?) optimizer = SGD(model.parameters(), lr = LEARNING_RATE, weight_decay=0.01) trainer = Trainer(model, train_loader) train_losses = trainer.run(criterion, optimizer, EPOCHS, LEARNING_RATE) # Get the loss print("2.1/ Loss for {0} epochs".format(EPOCHS)) print(train_losses) print("\n=============================================\n\t\tTEST\n=============================================\n") # 5/ Test the model print("Load the model...") train_loader = DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE, shuffle = True) print("Classes : {0}".format(dataset.classes)) # Evaluate the model
def main(): parser = argparse.ArgumentParser(description='PyTorch NMIST example') parser.add_argument('--batch_size', type=int, default=32, metavar='N', help='batch size (default: 32)') parser.add_argument('--num_epochs', type=int, default=5, metavar='N', help='number of epochs (default: 5)') parser.add_argument('--embedding_dim', type=int, default=128, metavar='N', help='dimension of embedding (default: 128)') parser.add_argument('--ngrams', type=int, default=2, metavar='N', help='size of ngrams (default: 2)') parser.add_argument('--horovod', action='store_true', default=False, help='use horovod') parser.add_argument('--no-cuda', action='store_true', default=False, help='enables CUDA training') parser.add_argument('--fp16-allreduce', action='store_true', default=False, help='use fp16 compression during allreduce') parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() if not os.path.isdir('data'): os.mkdir('data') train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS']( root='data', ngrams=args.ngrams, vocab=None) args.vocab_size = len(train_dataset.get_vocab()) args.num_class = len(train_dataset.get_labels()) model = TextSentiment(args.vocab_size, args.embedding_dim, args.num_class) # horovod training if args.horovod: try: import horovod.torch as hvd except ImportError: raise ImportError( "Please install horovod from https://github.com/horovod to use horovod training." ) hvd.init() args.kwargs = {'num_workers': 1, 'pin_memory': True} args.hvd_size = hvd.size() args.rank = hvd.rank() args.hvd = hvd if args.cuda: # Horovod: pin GPU to local rank. torch.cuda.set_device(hvd.local_rank()) torch.cuda.manual_seed(args.seed) torch.set_num_threads(1) args.kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} min_valid_loss = float('inf') criterion = torch.nn.CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9) if args.cuda: criterion.cuda() model.cuda() train_len = int(len(train_dataset) * 0.95) sub_train_, sub_valid_ = \ random_split(train_dataset, [train_len, len(train_dataset) - train_len]) for epoch in range(args.num_epochs): start_time = time.time() train_loss, train_acc = train_func(args, sub_train_, model, optimizer, criterion, scheduler) valid_loss, valid_acc = test(args, sub_valid_, model, criterion) secs = int(time.time() - start_time) mins = secs / 60 secs = secs % 60 if args.horovod: valid_loss = metric_average(valid_loss, 'avg_loss') valid_acc = metric_average(valid_acc, 'avg_accuracy') if hvd.rank() == 0: print('Epoch: %d' % (epoch + 1), " | time in %d minutes, %d seconds" % (mins, secs)) print( f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)' ) print( f'\tLoss: {valid_loss:.4f}(valid)\t|\tAcc: {valid_acc * 100:.1f}%(valid)' ) else: print('Epoch: %d' % (epoch + 1), " | time in %d minutes, %d seconds" % (mins, secs)) print( f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)' ) print( f'\tLoss: {valid_loss:.4f}(valid)\t|\tAcc: {valid_acc * 100:.1f}%(valid)' )
def main(args: argparse.Namespace): logger = CompleteLogger(args.log, args.phase) print(args) if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') cudnn.benchmark = True # Data loading code train_transform = utils.get_train_transform(args.train_resizing, random_horizontal_flip=True, random_color_jitter=False) val_transform = utils.get_val_transform(args.val_resizing) print("train_transform: ", train_transform) print("val_transform: ", val_transform) train_source_dataset, train_target_dataset, val_dataset, test_dataset, num_classes, args.class_names = \ utils.get_dataset(args.data, args.root, args.source, args.target, train_transform, val_transform) train_source_loader = DataLoader(train_source_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) train_target_loader = DataLoader(train_target_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) train_source_iter = ForeverDataIterator(train_source_loader) train_target_iter = ForeverDataIterator(train_target_loader) # create model print("=> using pre-trained model '{}'".format(args.arch)) backbone = utils.get_model(args.arch) pool_layer = nn.Identity() if args.no_pool else None backbone = models.__dict__[args.arch](pretrained=True) classifier = ImageClassifier(backbone, train_source_dataset.num_classes, args.num_blocks, bottleneck_dim=args.bottleneck_dim, dropout_p=args.dropout_p, pool_layer=pool_layer).to(device) adaptive_feature_norm = AdaptiveFeatureNorm(args.delta).to(device) # define optimizer # the learning rate is fixed according to origin paper optimizer = SGD(classifier.get_parameters(), args.lr, weight_decay=args.weight_decay) # resume from the best checkpoint if args.phase != 'train': checkpoint = torch.load(logger.get_checkpoint_path('best'), map_location='cpu') classifier.load_state_dict(checkpoint) # analysis the model if args.phase == 'analysis': # extract features from both domains feature_extractor = nn.Sequential(classifier.backbone, classifier.pool_layer, classifier.bottleneck).to(device) source_feature = collect_feature(train_source_loader, feature_extractor, device) target_feature = collect_feature(train_target_loader, feature_extractor, device) # plot t-SNE tSNE_filename = osp.join(logger.visualize_directory, 'TSNE.png') tsne.visualize(source_feature, target_feature, tSNE_filename) print("Saving t-SNE to", tSNE_filename) # calculate A-distance, which is a measure for distribution discrepancy A_distance = a_distance.calculate(source_feature, target_feature, device) print("A-distance =", A_distance) return if args.phase == 'test': acc1 = utils.validate(test_loader, classifier, args, device) print(acc1) return # start training best_acc1 = 0. for epoch in range(args.epochs): # train for one epoch train(train_source_iter, train_target_iter, classifier, adaptive_feature_norm, optimizer, epoch, args) # evaluate on validation set acc1 = utils.validate(val_loader, classifier, args, device) # remember best acc@1 and save checkpoint torch.save(classifier.state_dict(), logger.get_checkpoint_path('latest')) if acc1 > best_acc1: shutil.copy(logger.get_checkpoint_path('latest'), logger.get_checkpoint_path('best')) best_acc1 = max(acc1, best_acc1) print("best_acc1 = {:3.1f}".format(best_acc1)) # evaluate on test set classifier.load_state_dict(torch.load(logger.get_checkpoint_path('best'))) acc1 = utils.validate(test_loader, classifier, args, device) print("test_acc1 = {:3.1f}".format(acc1)) logger.close()
# create optimizer optimizer_adm = Adam(model_adam.parameters(), lr=lr) # create loss loss_adm = nn.CrossEntropyLoss() loss_adm = loss_adm.to(device=device) models.append([model_name_adam, model_adam, optimizer_adm, loss_adm, True]) # create model and move it to device model_name_sgd = f'LeNet_sgd_{lr_str}_bth_{batch_size}_m_{momentum_str}' model_sgd = LeNet() model_sgd = model_sgd.to(device=device) # create optimizer optimizer_sgd = SGD(model_sgd.parameters(), lr=lr, momentum=momentum) # create loss loss_sgd = nn.CrossEntropyLoss() loss_sgd = loss_sgd.to(device=device) models.append([model_name_sgd, model_sgd, optimizer_sgd, loss_sgd, True]) for n, m, optim, loss, tb in models: logger.info(f'starting training for model: {n}') training(m, n, train, val, test, optim, loss, tb, epochs, device) # clearing the cache of cuda
def __init__(self, exp_name, ds_train, ds_val, epochs=210, batch_size=16, num_workers=4, loss='JointsMSELoss', lr=0.001, lr_decay=True, lr_decay_steps=(170, 200), lr_decay_gamma=0.1, optimizer='Adam', weight_decay=0., momentum=0.9, nesterov=False, pretrained_weight_path=None, checkpoint_path=None, log_path='./logs', use_tensorboard=True, model_c=48, model_nof_joints=17, model_bn_momentum=0.1, flip_test_images=True, device=None): """ Initializes a new Train object. The log folder is created, the HRNet model is initialized and optional pre-trained weights or saved checkpoints are loaded. The DataLoaders, the loss function, and the optimizer are defined. Args: exp_name (str): experiment name. ds_train (HumanPoseEstimationDataset): train dataset. ds_val (HumanPoseEstimationDataset): validation dataset. epochs (int): number of epochs. Default: 210 batch_size (int): batch size. Default: 16 num_workers (int): number of workers for each DataLoader Default: 4 loss (str): loss function. Valid values are 'JointsMSELoss' and 'JointsOHKMMSELoss'. Default: "JointsMSELoss" lr (float): learning rate. Default: 0.001 lr_decay (bool): learning rate decay. Default: True lr_decay_steps (tuple): steps for the learning rate decay scheduler. Default: (170, 200) lr_decay_gamma (float): scale factor for each learning rate decay step. Default: 0.1 optimizer (str): network optimizer. Valid values are 'Adam' and 'SGD'. Default: "Adam" weight_decay (float): weight decay. Default: 0. momentum (float): momentum factor. Default: 0.9 nesterov (bool): Nesterov momentum. Default: False pretrained_weight_path (str): path to pre-trained weights (such as weights from pre-train on imagenet). Default: None checkpoint_path (str): path to a previous checkpoint. Default: None log_path (str): path where tensorboard data and checkpoints will be saved. Default: "./logs" use_tensorboard (bool): enables tensorboard use. Default: True model_c (int): hrnet parameters - number of channels. Default: 48 model_nof_joints (int): hrnet parameters - number of joints. Default: 17 model_bn_momentum (float): hrnet parameters - path to the pretrained weights. Default: 0.1 flip_test_images (bool): flip images during validating. Default: True device (torch.device): device to be used (default: cuda, if available). Default: None """ super(Train, self).__init__() self.exp_name = exp_name self.ds_train = ds_train self.ds_val = ds_val self.epochs = epochs self.batch_size = batch_size self.num_workers = num_workers self.loss = loss self.lr = lr self.lr_decay = lr_decay self.lr_decay_steps = lr_decay_steps self.lr_decay_gamma = lr_decay_gamma self.optimizer = optimizer self.weight_decay = weight_decay self.momentum = momentum self.nesterov = nesterov self.pretrained_weight_path = pretrained_weight_path self.checkpoint_path = checkpoint_path self.log_path = os.path.join(log_path, self.exp_name) self.use_tensorboard = use_tensorboard self.model_c = model_c self.model_nof_joints = model_nof_joints self.model_bn_momentum = model_bn_momentum self.flip_test_images = flip_test_images self.epoch = 0 # torch device if device is not None: self.device = device else: if torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') print(self.device) os.makedirs(self.log_path, 0o755, exist_ok=False) # exist_ok=False to avoid overwriting if self.use_tensorboard: self.summary_writer = tb.SummaryWriter(self.log_path) # # write all experiment parameters in parameters.txt and in tensorboard text field self.parameters = [ x + ': ' + str(y) + '\n' for x, y in locals().items() ] with open(os.path.join(self.log_path, 'parameters.txt'), 'w') as fd: fd.writelines(self.parameters) if self.use_tensorboard: self.summary_writer.add_text('parameters', '\n'.join(self.parameters)) # # load model self.model = HRNet(c=self.model_c, nof_joints=self.model_nof_joints, bn_momentum=self.model_bn_momentum).to(self.device) # # define loss and optimizers if self.loss == 'JointsMSELoss': self.loss_fn = JointsMSELoss().to(self.device) elif self.loss == 'JointsOHKMMSELoss': self.loss_fn = JointsOHKMMSELoss().to(self.device) else: raise NotImplementedError if optimizer == 'SGD': self.optim = SGD(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay, momentum=self.momentum, nesterov=self.nesterov) elif optimizer == 'Adam': self.optim = Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay) else: raise NotImplementedError # # load pre-trained weights (such as those pre-trained on imagenet) if self.pretrained_weight_path is not None: missing_keys, unexpected_keys = self.model.load_state_dict( torch.load(self.pretrained_weight_path, map_location=self.device), strict= False # strict=False is required to load models pre-trained on imagenet ) print('Pre-trained weights loaded.') if len(missing_keys) > 0 or len(unexpected_keys) > 0: print('Pre-trained weights missing keys:', missing_keys) print('Pre-trained weights unexpected keys:', unexpected_keys) # # load previous checkpoint if self.checkpoint_path is not None: print('Loading checkpoint %s...' % self.checkpoint_path) if os.path.isdir(self.checkpoint_path): path = os.path.join(self.checkpoint_path, 'checkpoint_last.pth') else: path = self.checkpoint_path self.starting_epoch, self.model, self.optim, self.params = load_checkpoint( path, self.model, self.optim, self.device) else: self.starting_epoch = 0 if lr_decay: self.lr_scheduler = MultiStepLR( self.optim, list(self.lr_decay_steps), gamma=self.lr_decay_gamma, last_epoch=self.starting_epoch if self.starting_epoch else -1) # # load train and val datasets self.dl_train = DataLoader(self.ds_train, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, drop_last=True) self.len_dl_train = len(self.dl_train) # dl_val = DataLoader(self.ds_val, batch_size=1, shuffle=False, num_workers=num_workers) self.dl_val = DataLoader(self.ds_val, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers) self.len_dl_val = len(self.dl_val) # # initialize variables self.mean_loss_train = 0. self.mean_acc_train = 0. self.mean_loss_val = 0. self.mean_acc_val = 0. self.mean_mAP_val = 0. self.best_loss = None self.best_acc = None self.best_mAP = None
return _build_warm_up_scheduler(optimizer, epochs, last_epoch) else: return _build_lr_scheduler(optimizer, cfg.TRAIN.LR, epochs, last_epoch) if __name__ == '__main__': import torch.nn as nn from torch.optim import SGD class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv = nn.Conv2d(10, 10, kernel_size=3) net = Net().parameters() optimizer = SGD(net, lr=0.01) # test1 step = {'type': 'step', 'start_lr': 0.01, 'step': 10, 'mult': 0.1} lr = build_lr_scheduler(optimizer, step) print(lr) log = { 'type': 'log', 'start_lr': 0.03, 'end_lr': 5e-4, } lr = build_lr_scheduler(optimizer, log) print(lr)
def __init__(self, path_state_dict=''): ##import pdb; pdb.set_trace() self.writer: Optional[CustomWriter] = None meltrans = create_mel_filterbank( hp.sampling_rate, hp.n_fft, fmin=hp.mel_fmin, fmax=hp.mel_fmax, n_mels=hp.mel_freq) self.model = melGen(self.writer, hp.n_freq, meltrans, hp.mel_generator) count_parameters(self.model) self.module = self.model self.lws_processor = lws.lws(hp.n_fft, hp.l_hop, mode='speech', perfectrec=False) self.prev_stoi_scores = {} self.base_stoi_scores = {} if hp.crit == "l1": self.criterion = nn.L1Loss(reduction='none') elif hp.crit == "l2": self.criterion = nn.L2Loss(reduction='none') else: print("Loss not implemented") return None self.criterion2 = nn.L1Loss(reduction='none') self.f_specs= {0: [(5, 2),(15,5)], 1: [(5, 2)], 2: [(3 ,1)], 3: [(3 ,1),(5, 2 )], 4: [(3 ,1),(5, 2 ), ( 7,3 ) ], 5: [(15 ,5)], 6: [(3 ,1),(5, 2 ), ( 7,3 ), (15,5), (25,10)], 7: [(1 ,1)], 8: [(1 ,1), (3 ,1), (5, 2 ),(15 ,5), ( 7,3 ), (25,10), (9,4), (20,5), (5,3) ] }[hp.loss_mode] self.filters = [gen_filter(k) for k,s in self.f_specs] if hp.optimizer == "adam": self.optimizer = Adam(self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay, ) elif hp.optimizer == "sgd": self.optimizer = SGD(self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay, ) elif hp.optimizer == "radam": self.optimizer = RAdam(self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay, ) elif hp.optimizer == "novograd": self.optimizer = NovoGrad(self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay ) elif hp.optimizer == "sm3": raise NameError('sm3 not implemented') else: raise NameError('optimizer not implemented') self.__init_device(hp.device) ##if hp.optimizer == "novograd": ## self.scheduler = lr_scheduler.CosineAnnealingLR(self.optimizer, 200 ,1e-5) ##else: self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, **hp.scheduler) self.max_epochs = hp.n_epochs self.valid_eval_sample: Dict[str, Any] = dict() # len_weight = hp.repeat_train # self.loss_weight = torch.tensor( # [1./i for i in range(len_weight, 0, -1)], # ) # self.loss_weight /= self.loss_weight.sum() # Load State Dict if path_state_dict: st_model, st_optim, st_sched = torch.load(path_state_dict, map_location=self.in_device) try: self.module.load_state_dict(st_model) self.optimizer.load_state_dict(st_optim) self.scheduler.load_state_dict(st_sched) except: raise Exception('The model is different from the state dict.') path_summary = hp.logdir / 'summary.txt' if not path_summary.exists(): # print_to_file( # path_summary, # summary, # (self.model, hp.dummy_input_size), # dict(device=self.str_device[:4]) # ) with path_summary.open('w') as f: f.write('\n') with (hp.logdir / 'hparams.txt').open('w') as f: f.write(repr(hp))
def init_sit(self): model = self.get_model(fast_test=True) optimizer = SGD(model.parameters(), lr=1e-3) criterion = CrossEntropyLoss() benchmark = self.load_benchmark(use_task_labels=False) return model, optimizer, criterion, benchmark
# # softmax_output = softmax(linear_output) # print("Shape of softmax output: ", softmax_output.shape) # print("Shape of target: ", y.shape) # # loss = criterion(softmax_output, y) # print("Loss value: ", loss.data.numpy()) from rnn_clf import RNNClassifier model = RNNClassifier(vocal_size=vocal_size, embedding_dim=100, hidden_dim=50, output_dim=label_size, batch_size=1) optim = SGD(params=model.parameters(), lr=0.01) criterion = NLLLoss() for i in range(10): total_loss = 0 model.train() for it, ex in enumerate(train_data): f, t = ex X = torch.LongTensor(f) y = torch.LongTensor(t) model.hidden = model.init_hidden() output = model.forward(X) optim.zero_grad() pred = torch.argmax(output) loss = criterion(output, y)
def train_texter(args): ruler_pkl_path = args.ruler_pkl texter_pkl_path = args.texter_pkl sent_count = args.sent_count split_dir_path = args.split_dir text_dir_path = args.text_dir epoch_count = args.epoch_count log_dir = args.log_dir log_steps = args.log_steps lr = args.lr overwrite = args.overwrite sent_len = args.sent_len # # Check that (input) POWER Ruler PKL exists # logging.info('Check that (input) POWER Ruler PKL exists ...') ruler_pkl = RulerPkl(Path(ruler_pkl_path)) ruler_pkl.check() # # Check that (input) POWER Texter PKL exists # logging.info('Check that (input) POWER Texter PKL exists ...') texter_pkl = TexterPkl(Path(texter_pkl_path)) texter_pkl.check() # # Check that (input) POWER Split Directory exists # logging.info('Check that (input) POWER Split Directory exists ...') split_dir = SplitDir(Path(split_dir_path)) split_dir.check() # # Check that (input) IRT Text Directory exists # logging.info('Check that (input) IRT Text Directory exists ...') text_dir = TextDir(Path(text_dir_path)) text_dir.check() # # Load ruler # logging.info('Load ruler ...') ruler = ruler_pkl.load() # # Load texter # logging.info('Load texter ...') texter = texter_pkl.load().cpu() # # Build POWER # power = Aggregator(texter, ruler) # # Load facts # logging.info('Load facts ...') ent_to_lbl = split_dir.entities_tsv.load() rel_to_lbl = split_dir.relations_tsv.load() train_facts = split_dir.train_facts_tsv.load() train_facts = { Fact.from_ints(head, rel, tail, ent_to_lbl, rel_to_lbl) for head, _, rel, _, tail, _ in train_facts } known_valid_facts = split_dir.valid_facts_known_tsv.load() unknown_valid_facts = split_dir.valid_facts_unknown_tsv.load() known_eval_facts = known_valid_facts all_valid_facts = known_valid_facts + unknown_valid_facts known_facts = { Fact.from_ints(head, rel, tail, ent_to_lbl, rel_to_lbl) for head, _, rel, _, tail, _ in known_eval_facts } all_valid_facts = { Fact.from_ints(head, rel, tail, ent_to_lbl, rel_to_lbl) for head, _, rel, _, tail, _ in all_valid_facts } # # Load entities # logging.info('Load entities ...') train_ents = split_dir.train_entities_tsv.load() valid_ents = split_dir.valid_entities_tsv.load() train_ents = [Ent(ent, lbl) for ent, lbl in train_ents.items()] valid_ents = [Ent(ent, lbl) for ent, lbl in valid_ents.items()] # # Load texts # logging.info('Load texts ...') train_ent_to_sents = text_dir.cw_train_sents_txt.load() valid_ent_to_sents = text_dir.ow_valid_sents_txt.load() # # Prepare training # criterion = MSELoss() writer = SummaryWriter(log_dir=log_dir) # # Train # logging.info('Train ...') texter_optimizer = SGD([power.texter_weight], lr=lr) ruler_optimizer = SGD([power.ruler_weight], lr=lr) for epoch in range(epoch_count): for ent in train_ents: print(power.texter_weight) print(power.ruler_weight) print() # # Get entity ground truth facts # gt_facts = [fact for fact in train_facts if fact.head == ent] logging.debug('Ground truth:') for fact in gt_facts: logging.debug(str(fact)) # # Train Texter Weight # sents = list(train_ent_to_sents[ent.id])[:sent_count] if len(sents) < sent_count: logging.warning( f'Only {len(sents)} sentences for entity "{ent.lbl}" ({ent.id}). Skipping.' ) continue texter_preds = texter.predict(ent, sents) train_confs = [pred.conf for pred in texter_preds] gt_confs = [ 1 if pred.fact in gt_facts else 0 for pred in texter_preds ] for train_conf, gt_conf in zip(train_confs, gt_confs): loss = criterion( torch.tensor(train_conf) * power.texter_weight, torch.tensor(gt_conf).float()) texter_optimizer.zero_grad() loss.backward() texter_optimizer.step() # # Train Ruler Weight # ruler_preds = ruler.predict(ent) train_confs = [pred.conf for pred in ruler_preds] gt_confs = [ 1 if pred.fact in gt_facts else 0 for pred in ruler_preds ] for train_conf, gt_conf in zip(train_confs, gt_confs): loss = criterion( torch.tensor(train_conf) * power.ruler_weight, torch.tensor(gt_conf).float()) ruler_optimizer.zero_grad() loss.backward() ruler_optimizer.step()
def __init__(self, data_loader): """ Construct a new Trainer instance. Args ---- - config: object containing command line arguments. - data_loader: data iterator """ # self.config = config # glimpse network params self.patch_size = 64 self.glimpse_scale = 2 self.num_patches = 3 self.loc_hidden = 128 self.glimpse_hidden = 128 # core network params self.num_glimpses = 16 self.hidden_size = 256 # reinforce params self.std = 0.17 self.M = 10 # data params self.train_loader = data_loader[0] self.valid_loader = data_loader[1] self.num_train = len(self.train_loader.sampler.indices) self.num_valid = len(self.valid_loader.sampler.indices) self.num_classes = 27 self.num_channels = 3 # training params self.epochs = 100 self.start_epoch = 0 self.saturate_epoch = 150 self.init_lr = 0.001 self.min_lr = 1e-06 self.decay_rate = (self.min_lr - self.init_lr) / (self.saturate_epoch) self.momentum = 0.5 self.lr = self.init_lr # misc params self.use_gpu = False self.best = True # self.ckpt_dir = config.ckpt_dir # self.logs_dir = config.logs_dir self.best_valid_acc = 0. self.counter = 0 # self.patience = config.patience # self.use_tensorboard = config.use_tensorboard # self.resume = config.resume # self.print_freq = config.print_freq # self.plot_freq = config.plot_freq # self.plot_dir = './plots/' + self.model_name + '/' # if not os.path.exists(self.plot_dir): # os.makedirs(self.plot_dir) # configure tensorboard logging # build RAM model self.model = RecurrentAttention( self.patch_size, self.num_patches, self.glimpse_scale, self.num_channels, self.loc_hidden, self.glimpse_hidden, self.std, self.hidden_size, self.num_classes, ) if self.use_gpu: self.model.cuda() print('[*] Number of model parameters: {:,}'.format( sum([p.data.nelement() for p in self.model.parameters()]))) # initialize optimizer and scheduler self.optimizer = SGD( self.model.parameters(), lr=self.lr, momentum=self.momentum, ) self.scheduler = ReduceLROnPlateau(self.optimizer, 'min')
def init_optimizer(self, classifier_module): return SGD(classifier_module().parameters(), lr=0.05)
def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD(params.values(), lr, momentum=0.9, weight_decay=opt.weight_decay)
param_groups=[{'params':model.parameters(), 'lr':args.learning_rate}, {'params':bridges.parameters(), 'lr':args.learning_rate}] t_model = t_model.to(args.device) bridge = bridges.to(args.device) t_model.eval() elif args.task=='student': model = StudentModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float()) param_groups = [{'params': model.parameters(), 'lr': args.learning_rate}] elif args.task=='teacher': model = TeacherModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float()) param_groups = [{'params': model.parameters(), 'lr': args.learning_rate}] if args.classify_loss: classifier = PathClassifier(params=args) param_groups.append({'params': classifier.parameters(),'lr':args.learning_rate}) classifiers = classifier.to(args.device) optimizer = SGD(param_groups, lr=args.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=args.gamma) model = model.to(args.device) total_step = 0 eval_result = {} accum_train_link_loss, accum_train_label_loss = 0, 0 accum_distill_loss, accum_classify_loss = 0, 0 accum_eval_loss = 0 scheduler_step = 0 best_eval_result = None stop_sign=0 for epoch in range(args.epoches): print('{} epoch training..'.format(epoch + 1))
def create_optimizer(opt, lr): print('creating optimizer with lr = ', lr) return SGD([v for v in params.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay)
def train(vocab_dict, file_dict, model_dict, batch_size, storage_path, epochs, print_every=100, check_every=100, save=False): """Trains the model and saves it Args: vocab_dict (dict): Parameters required to create tokenizers file_dict (dict): Files needed to train model_dict (dict): Parameters required to create model batch_size (int): Batch size storage_path (Path): Path where model is stored epochs (int): -NUmber of epochs to train print_every (int, optional): How often to print training logs. Defaults to 100. check_every (int, optional): How often to validate on validation set. Defaults to 100. save (bool, optional):Whether to save the model or not. Defaults to False. Returns: model: Model training log: Training log used to visualize the results """ source_tokenizer, target_tokenizer = create_source_target_tokenizers( **vocab_dict) pad_id = target_tokenizer.padding['pad_id'] train_dataset = TransliterationDataset(file_dict["train"], source_tokenizer, target_tokenizer) train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=pad_collate, shuffle=False) del train_dataset val_dataset = TransliterationDataset(file_dict["val"], source_tokenizer, target_tokenizer) val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=pad_collate, shuffle=False) del val_dataset test_dataset = TransliterationDataset(file_dict["test"], source_tokenizer, target_tokenizer) test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=pad_collate, shuffle=False) del test_dataset del file_dict if model_dict['type'] == "simple_seq2seq": model = Simple_seq2seq(model_dict['embed_size'], model_dict['hidden_size'], src_tokenizer=source_tokenizer, tgt_tokenizer=target_tokenizer) optimizer = SGD(model.parameters(), lr=model_dict['lr']) if model_dict['type'] == "attention_seq2seq": model = Attention_seq2seq(model_dict['embed_size'], model_dict['hidden_size'], src_tokenizer=source_tokenizer, tgt_tokenizer=target_tokenizer, dropout_rate=model_dict["dropout_rate"]) optimizer = SGD(model.parameters(), lr=model_dict['lr']) model = model.to(device) epoch_len = len(train_loader) training_losses = [] training_iteration = [] validation_losses = [] validation_accuracy = [] training_accuracy = [] validation_iteration = [] for epoch in range(epochs): print('\n') print(f'Running epoch {epoch + 1}') print('\n') for i, batch in enumerate(train_loader): iteration = epoch * epoch_len + i model.train() src_sents, tgt_sents, src_lens = batch scores = model(batch) del batch, src_sents, src_lens loss = masked_loss(scores, tgt_sents, pad_id) optimizer.zero_grad() loss.backward() optimizer.step() if i % print_every == 0: training_iteration.append(iteration) training_losses.append(loss.item()) accuracy = masked_accuracy(scores, tgt_sents, pad_id) training_accuracy.append(accuracy.item()) print( f'Iteration {i}, loss = {round(loss.item(),4)},training accuracy = {round(accuracy.item(),4)}' ) del scores, tgt_sents if i % check_every == 0: validation_iteration.append(iteration) val_loss, val_accuracy = valid_step(model, val_loader) validation_losses.append(val_loss) validation_accuracy.append(val_accuracy) print( f'Iteration {i}, validation loss = {round(val_loss,4)},validation accuracy = {round(val_accuracy,4)}' ) if save: PATH = storage_path / f'model_epoch_{epoch}.pt' torch.save( { 'epoch': epoch, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'loss': loss, }, PATH) training_log = { 'training_losses': training_losses, 'training_accuracy': training_accuracy, 'training_iteration': training_iteration, 'validation_losses': validation_losses, 'validation_accuracy': validation_accuracy, 'validation_iteration': validation_iteration, } return model, training_log
def run(train_batch_size, val_batch_size, epochs, lr, momentum, log_interval): train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size) model = Net() wandb.watch(model) device = 'cpu' if torch.cuda.is_available(): device = 'cuda' optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) trainer = create_supervised_trainer(model, optimizer, F.nll_loss, device=device) evaluator = create_supervised_evaluator(model, metrics={ 'accuracy': Accuracy(), 'nll': Loss(F.nll_loss) }, device=device) desc = "ITERATION - loss: {:.2f}" pbar = tqdm(initial=0, leave=False, total=len(train_loader), desc=desc.format(0)) @trainer.on(Events.ITERATION_COMPLETED(every=log_interval)) def log_training_loss(engine): pbar.desc = desc.format(engine.state.output) pbar.update(log_interval) wandb.log({"train loss": engine.state.output}) @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(engine): pbar.refresh() evaluator.run(train_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] tqdm.write( "Training Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll)) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics avg_accuracy = metrics['accuracy'] avg_nll = metrics['nll'] tqdm.write( "Validation Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f}" .format(engine.state.epoch, avg_accuracy, avg_nll)) pbar.n = pbar.last_print_n = 0 wandb.log({"validation loss": engine.state.metrics['nll']}) wandb.log({"validation accuracy": engine.state.metrics['accuracy']}) trainer.run(train_loader, max_epochs=epochs) pbar.close()
def overfit_small_batch_testing(vocab_dict, sample_file, model_dict, title_suffix=None, batch_size=4, iterations=1000): """Overfits the model on a single batch to check whether model have enough capacity Args: vocab_dict (dict): Parameters required to create tokenizers sample_file (file): Sample file to extract a single batch model_dict (dict): Parameters required to create model title_suffix (str, optional): Title of experiment. Defaults to None. batch_size (int, optional): batch size. Defaults to 4. iterations (int, optional): Number of max iterations to perform. Defaults to 1000. """ #Creates tokenizers source_tokenizer, target_tokenizer = create_source_target_tokenizers( **vocab_dict) pad_id = target_tokenizer.padding['pad_id'] #Creates datasets and dataloaders sample_dataset = TransliterationDataset(sample_file, source_tokenizer, target_tokenizer) batch = next( iter( DataLoader(sample_dataset, batch_size=batch_size, collate_fn=pad_collate))) #Selecting the appropriate model if model_dict['type'] == "simple_seq2seq": model = Simple_seq2seq(model_dict['embed_size'], model_dict['hidden_size'], src_tokenizer=source_tokenizer, tgt_tokenizer=target_tokenizer) optimizer = SGD(model.parameters(), lr=model_dict['lr']) if model_dict['type'] == "attention_seq2seq": model = Attention_seq2seq(model_dict['embed_size'], model_dict['hidden_size'],src_tokenizer = source_tokenizer,\ tgt_tokenizer = target_tokenizer, dropout_rate =model_dict["dropout_rate"]) optimizer = SGD(model.parameters(), lr=model_dict['lr']) model = model.to(device) model.train() src_sents, tgt_sents, src_lens = batch training_losses = [] training_iteration = [] training_accuracy = [] for i in range(iterations): scores = model(batch) loss = masked_loss(scores, tgt_sents, PAD_ID) optimizer.zero_grad() loss.backward() optimizer.step() accuracy = masked_accuracy(scores, tgt_sents, PAD_ID) training_losses.append(loss.item()) training_iteration.append(i) training_accuracy.append(accuracy.item()) if i % 50 == 0: print( f'Iteration {i}, loss = {round(loss.item(),4)},training accuracy = {round(accuracy.item(),4)}' ) if accuracy == 1: break #Printing predictions of mdoel at the ened print_predictions(src_sents, tgt_sents, scores, src_tokenizer=source_tokenizer, tgt_tokenizer=target_tokenizer) #Plotting the logs plt.style.use("dark_background") fig, ax = plt.subplots(figsize=(16, 8), ncols=2) #,ncols=2 ax1 = sns.lineplot(x=training_iteration, y=training_losses, label='Training Loss', ax=ax[0]) ax1.set_xlabel('No. of Iterations', fontsize=15) ax1.set_ylabel('Loss', fontsize=15) ax1.set_title("Loss", fontsize=18) ax2 = sns.lineplot(x=training_iteration, y=training_accuracy, label='Training Accuracy', ax=ax[1]) ax2.set_xlabel('No. of Iterations', fontsize=15) ax2.set_ylabel('Accuracy', fontsize=15) ax2.set_title('Accuracy', fontsize=18) title = title_suffix fig.suptitle(title, size=25) fig.get_tight_layout()
def main(random_seed, test_on_gt, only_test, overfit): random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed_all(random_seed) n_epochs = 3 lr = 1e-2 wd = 0 lr_scheduler = True train_db = JointCocoTasks() network = JointClassifierWithClass() optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd) experiment = JointClassifierExperimentWithClass( network=network, optimizer=optimizer, dataset=train_db, tensorboard=True, seed=random_seed, ) train_folder = "ablation-joint-classifier-withclass-seed:{s}".format( s=random_seed) folder = os.path.join(SAVING_DIRECTORY, train_folder) mkdir_p(folder) if not only_test: experiment.train_n_epochs(n_epochs, overfit=overfit, lr_scheduler=lr_scheduler) torch.save(network.state_dict(), os.path.join(folder, "model.mdl")) else: network.load_state_dict(torch.load(os.path.join(folder, "model.mdl"))) for task_number in TASK_NUMBERS: if test_on_gt: test_db = CocoTasksTestGT(task_number) else: test_db = CocoTasksTest(task_number) print("testing task {}".format(task_number), "---------------------") # test_model detections = experiment.do_test(test_db, task_number=task_number) detections_file_name = "detections_tn:{}_tgt:{}.json".format( task_number, test_on_gt) # save detections with open(os.path.join(folder, detections_file_name), "w") as f: json.dump(detections, f) # perform evaluation with redirect_stdout(open(os.devnull, "w")): gtCOCO = test_db.task_coco dtCOCO = gtCOCO.loadRes(os.path.join(folder, detections_file_name)) cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox") cocoEval.params.catIds = 1 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("mAP:\t\t %1.6f" % cocoEval.stats[0]) print("[email protected]:\t\t %1.6f" % cocoEval.stats[1]) # save evaluation performance result_file_name = "result_tn:{}_tgt:{}.txt".format( task_number, test_on_gt) with open(os.path.join(folder, result_file_name), "w") as f: f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
train_set = DATASET_DICT[args.dataset](root=DATA_DIR, train=True, transform=transform, download=download) test_set = DATASET_DICT[args.dataset](root=DATA_DIR, train=False, transform=transform, download=download) train_loader = DataLoader(train_set, batch_size=1024, shuffle=True) test_loader = DataLoader(test_set, batch_size=1024, shuffle=False) model = MODEL_DICT[args.model]() model.to(DEVICE) EPOCHS = 100 optimizer = SGD(model.parameters(), lr=0.01, momentum=0.8, nesterov=True) scheduler = lr_scheduler.StepLR(optimizer, step_size=80, gamma=0.1) loss_fn = nn.CrossEntropyLoss() train_losses = [] train_accuracies = [] test_losses = [] test_accuracies = [] for e in range(EPOCHS): with tqdm(train_loader, desc=f"{e + 1}/{EPOCHS} epochs") as t: running_correct = 0 running_loss = 0 running_total = 0 model.train() for i, (x, y) in enumerate(t):