def self_training(args, **kwargs): torch.manual_seed(args.seed) device = kwargs['device'] file = kwargs['file'] current_time = kwargs['current_time'] nclasses = datasets.__dict__[args.dataset].nclasses model = models.__dict__[args.arch](nclasses = nclasses) model = torch.nn.DataParallel(model).to(device) model.to(device) # Multiple loss will be needed because we need in-between probabilty. # nn.CrossEntropyLoss criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class. softmax = nn.Softmax(dim = 1) logsoftmax = nn.LogSoftmax(dim = 1) nll = nn.NLLLoss().to(device) optimizer = utils.select_optimizer(args, model) train_supervised_dataset, _, _ = utils.get_dataset(args) # because we need to update the dataset after each epoch _, train_unsupervised_loader, val_loader = utils.make_loader(args) report = PrettyTable(['Epoch #', 'Train loss', 'Train Accuracy', 'Train Correct', 'Train Total', 'Val loss', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Top-1 Correct', 'Top-5 Correct', 'Val Total', 'Time(secs)']) for epoch in range(1, args.epochs + 1): per_epoch = PrettyTable(['Epoch #', 'Train loss', 'Train Accuracy', 'Train Correct', 'Train Total', 'Val loss', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Top-1 Correct', 'Top-5 Correct', 'Val Total', 'Time(secs)']) start_time = time.time() training_loss, train_correct, train_total = train(device, model, logsoftmax, nll, epoch, train_supervised_dataset, optimizer, args.batch_size) validation_loss, val1_correct, val5_correct, val_total = validation(device, model, logsoftmax, nll, val_loader) train_supervised_dataset = label_addition(device, model, softmax, train_supervised_dataset, train_unsupervised_loader, args.tau) end_time = time.time() report.add_row([epoch, round(training_loss, 4), "{:.3f}%".format(round((train_correct*100.0)/train_total, 3)), train_correct, train_total, round(validation_loss, 4), "{:.3f}%".format(round((val1_correct*100.0)/val_total, 3)), "{:.3f}%".format(round((val5_correct*100.0)/val_total, 3)), val1_correct, val5_correct, val_total, round(end_time - start_time, 2)]) per_epoch.add_row([epoch, round(training_loss, 4), "{:.3f}%".format(round((train_correct*100.0)/train_total, 3)), train_correct, train_total, round(validation_loss, 4), "{:.3f}%".format(round((val1_correct*100.0)/val_total, 3)), "{:.3f}%".format(round((val5_correct*100.0)/val_total, 3)), val1_correct, val5_correct, val_total, round(end_time - start_time, 2)]) print(per_epoch) if args.save_model == 'y': val_folder = "saved_model/" + current_time if not os.path.isdir(val_folder): os.mkdir(val_folder) save_model_file = val_folder + '/model_' + str(epoch) +'.pth' torch.save(model.state_dict(), save_model_file) file.write(report.get_string())
def main(): global args args = parser.parse_args() if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar') logging.debug("run arguments: %s", args) logging.info("using pretrained cnn %s", args.cnn) cnn = resnet.__dict__[args.cnn](pretrained=True) vocab = build_vocab() model = CaptionModel(cnn, vocab, embedding_size=args.embedding_size, rnn_size=args.rnn_size, num_layers=args.num_layers, share_embedding_weights=args.share_weights) train_data = get_iterator(get_coco_data(vocab, train=True), batch_size=args.batch_size, max_length=args.max_length, shuffle=True, num_workers=args.workers) val_data = get_iterator(get_coco_data(vocab, train=False), batch_size=args.eval_batch_size, max_length=args.max_length, shuffle=False, num_workers=args.workers) if 'cuda' in args.type: cudnn.benchmark = True model.cuda() optimizer = select_optimizer( args.optimizer, params=model.parameters(), lr=args.lr) regime = lambda e: {'lr': args.lr * (args.lr_decay ** e), 'momentum': args.momentum, 'weight_decay': args.weight_decay} model.finetune_cnn(False) def forward(model, data, training=True, optimizer=None): use_cuda = 'cuda' in args.type loss = nn.CrossEntropyLoss() perplexity = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() if training: model.train() else: model.eval() end = time.time() for i, (imgs, (captions, lengths)) in enumerate(data): data_time.update(time.time() - end) if use_cuda: imgs = imgs.cuda() captions = captions.cuda(async=True) imgs = Variable(imgs, volatile=not training) captions = Variable(captions, volatile=not training) input_captions = captions[:-1] target_captions = pack_padded_sequence(captions, lengths)[0] pred, _ = model(imgs, input_captions, lengths) err = loss(pred, target_captions) perplexity.update(math.exp(err.data[0])) if training: optimizer.zero_grad() err.backward() clip_grad_norm(model.rnn.parameters(), args.grad_clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format( epoch, i, len(data), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, perp=perplexity)) return perplexity.avg for epoch in range(args.start_epoch, args.epochs): if epoch >= args.finetune_epoch: model.finetune_cnn(True) optimizer = adjust_optimizer( optimizer, epoch, regime) # Train train_perp = forward( model, train_data, training=True, optimizer=optimizer) # Evaluate val_perp = forward(model, val_data, training=False) logging.info('\n Epoch: {0}\t' 'Training Perplexity {train_perp:.4f} \t' 'Validation Perplexity {val_perp:.4f} \n' .format(epoch + 1, train_perp=train_perp, val_perp=val_perp)) model.save_checkpoint(checkpoint_file % (epoch + 1))
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) losses.append(cur_loss) bpcs.append(cur_loss / math.log(2)) total_loss = 0 start_time = time.time() return np.mean(losses) # Loop over epochs. lr = args.lr decay = args.weight_decay best_val_loss = None optimizer, orthog_optimizer = select_optimizer(model, args) scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.5) if orthog_optimizer: orthog_scheduler = optim.lr_scheduler.StepLR(orthog_optimizer, 1, gamma=0.5) # At any point you can hit Ctrl + C to break out of training early. try: exp_time = "{0:%Y-%m-%d}_{0:%H-%M-%S}".format(datetime.now()) SAVEDIR = os.path.join('./saves', 'sMNIST', NET_TYPE, str(args.random_seed), exp_time) if not os.path.exists(SAVEDIR): os.makedirs(SAVEDIR) with open(SAVEDIR + 'hparams.txt', 'w') as fp: for key, val in args.__dict__.items():
T = args.T batch_size = args.batch out_size = args.labels + 1 if args.onehot: inp_size = args.labels + 2 rnn = select_network(args, inp_size) net = Model(hidden_size, rnn) if CUDA: net = net.cuda() net.rnn = net.rnn.cuda() print('Copy task') print(NET_TYPE) print('Cuda: {}'.format(CUDA)) print(nonlin) print(hidden_size) for name, param in net.named_parameters(): if param.requires_grad: print(name, param.data) if not os.path.exists(SAVEDIR): os.makedirs(SAVEDIR) orthog_optimizer = None optimizer, orthog_optimizer = select_optimizer(net, args) with open(SAVEDIR + 'hparams.txt', 'w') as fp: for key, val in args.__dict__.items(): fp.write(('{}: {}'.format(key, val))) train_model(net, optimizer, batch_size, T, n_steps)
exit() log_dir =directory+'logs/' model_dir=directory+'/models/' if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) add_experiment_notfinished(directory)#create this file to monitor errors in computer platform logging.basicConfig(filename=log_dir+'train.log',level=logging.INFO) logging.info("Logger for model: {}".format(topology)) logging.info("Training specificacitions: {}".format([args.epochs,args.lr,args.anneal])) logging.info("Stochastic Optimization specs: MC samples {} dkl_after_epoch {} scale factor {}".format(args.MC_samples,dkl_after_epoch,dkl_scale)) #Start training for ind,(l,ep) in enumerate(zip(args.lr,args.epochs)): optimizer=select_optimizer(train_parameters,lr=l,optim='ADAM') ''' Activate annealing ''' if ind == len(args.epochs)-1 and linear_anneal: activate_anneal=True lr_init=l epochs_N=ep for e in range(ep): #annealing if activate_anneal: lr_new=anneal_lr(lr_init,epochs_N,e) optimizer=select_optimizer(train_parameters,lr=lr_new,optim='ADAM') elbo_d,dkl_d,llh_d,MCtrain,total_train,total_batch,MCvalid,total_valid,MCtest,total_test=[0.0]*10
def main(): # Argument Settings parser = argparse.ArgumentParser( description='Image Tagging Classification from Naver Shopping Reviews') parser.add_argument('--sess_name', default='example', type=str, help='Session name that is loaded') parser.add_argument('--checkpoint', default='best', type=str, help='Checkpoint') parser.add_argument('--batch_size', default=256, type=int, help='batch size') parser.add_argument('--num_workers', default=16, type=int, help='The number of workers') parser.add_argument('--num_epoch', default=100, type=int, help='The number of epochs') parser.add_argument('--model_name', default='mobilenet_v2', type=str, help='[resnet50, rexnet, dnet1244, dnet1222]') parser.add_argument('--weight_file', default='model.pth', type=str) parser.add_argument('--optimizer', default='SGD', type=str) parser.add_argument('--lr', default=1e-2, type=float) parser.add_argument('--weight_decay', default=1e-5, type=float) parser.add_argument('--learning_anneal', default=1.1, type=float) parser.add_argument('--annealing_period', default=10, type=int) parser.add_argument('--num_gpu', default=1, type=int) parser.add_argument('--pretrain', action='store_true', default=False) parser.add_argument('--mode', default='train', help='Mode') parser.add_argument('--pause', default=0, type=int) parser.add_argument('--iteration', default=0, type=str) args = parser.parse_args() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Model logger.info('Build Model') model = select_model(args.model_name, pretrain=args.pretrain, n_class=41) total_param = sum([p.numel() for p in model.parameters()]) logger.info(f'Model size: {total_param} tensors') load_weight(model, args.weight_file) model = model.to(device) nu.bind_model(model) nsml.save('best') if args.pause: nsml.paused(scope=locals()) if args.num_epoch == 0: return # Set the dataset logger.info('Set the dataset') df = pd.read_csv(f'{DATASET_PATH}/train/train_label') train_size = int(len(df) * 0.8) trainset = TagImageDataset(data_frame=df[:train_size], root_dir=f'{DATASET_PATH}/train/train_data', transform=train_transform) testset = TagImageDataset(data_frame=df[train_size:], root_dir=f'{DATASET_PATH}/train/train_data', transform=test_transform) train_loader = DataLoader(dataset=trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) test_loader = DataLoader(dataset=testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) criterion = nn.CrossEntropyLoss(reduction='mean') optimizer = select_optimizer(model.parameters(), args.optimizer, args.lr, args.weight_decay) criterion = criterion.to(device) if args.mode == 'train': logger.info('Start to train!') train_process(args=args, model=model, train_loader=train_loader, test_loader=test_loader, optimizer=optimizer, criterion=criterion, device=device) elif args.mode == 'test': nsml.load(args.checkpoint, session=args.sess_name) logger.info('[NSML] Model loaded from {}'.format(args.checkpoint)) model.eval() logger.info('Start to test!') test_loss, test_acc, test_f1 = evaluate(model=model, test_loader=test_loader, device=device, criterion=criterion) logger.info(test_loss, test_acc, test_f1)
def test(args: Namespace): cfg = json.load(open(args.config_path, 'r', encoding='UTF-8')) batch_size = 1 # for predicting one sentence. encoder = Encoder(cfg['vocab_input_size'], cfg['embedding_dim'], cfg['units'], batch_size, 0) decoder = Decoder(cfg['vocab_target_size'], cfg['embedding_dim'], cfg['units'], cfg['method'], batch_size, 0) optimizer = select_optimizer(cfg['optimizer'], cfg['learning_rate']) ckpt = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) manager = tf.train.CheckpointManager(ckpt, cfg['checkpoint_dir'], max_to_keep=3) ckpt.restore(manager.latest_checkpoint) while True: sentence = input( 'Input Sentence or If you want to quit, type Enter Key : ') if sentence == '': break sentence = re.sub(r"(\.\.\.|[?.!,¿])", r" \1 ", sentence) sentence = re.sub(r'[" "]+', " ", sentence) sentence = '<s> ' + sentence.lower().strip() + ' </s>' input_vocab = load_vocab('./data/', 'en') target_vocab = load_vocab('./data/', 'de') input_lang_tokenizer = tf.keras.preprocessing.text.Tokenizer( filters='', oov_token='<unk>') input_lang_tokenizer.word_index = input_vocab target_lang_tokenizer = tf.keras.preprocessing.text.Tokenizer( filters='', oov_token='<unk>') target_lang_tokenizer.word_index = target_vocab convert_vocab(input_lang_tokenizer, input_vocab) convert_vocab(target_lang_tokenizer, target_vocab) inputs = [ input_lang_tokenizer.word_index[i] if i in input_lang_tokenizer.word_index else input_lang_tokenizer.word_index['<unk>'] for i in sentence.split(' ') ] inputs = tf.keras.preprocessing.sequence.pad_sequences( [inputs], maxlen=cfg['max_len_input'], padding='post') inputs = tf.convert_to_tensor(inputs) result = '' enc_hidden = encoder.initialize_hidden_state() enc_cell = encoder.initialize_cell_state() enc_state = [[enc_hidden, enc_cell], [enc_hidden, enc_cell], [enc_hidden, enc_cell], [enc_hidden, enc_cell]] enc_output, enc_hidden = encoder(inputs, enc_state) dec_hidden = enc_hidden #dec_input = tf.expand_dims([target_lang_tokenizer.word_index['<eos>']], 0) dec_input = tf.expand_dims([target_lang_tokenizer.word_index['<s>']], 1) print('dec_input:', dec_input) h_t = tf.zeros((batch_size, 1, cfg['embedding_dim'])) for t in range(int(cfg['max_len_target'])): predictions, dec_hidden, h_t = decoder(dec_input, dec_hidden, enc_output, h_t) # predeictions shape == (1, 50002) predicted_id = tf.argmax(predictions[0]).numpy() print('predicted_id', predicted_id) result += target_lang_tokenizer.index_word[predicted_id] + ' ' if target_lang_tokenizer.index_word[predicted_id] == '</s>': print('Early stopping') break dec_input = tf.expand_dims([predicted_id], 1) print('dec_input:', dec_input) print('<s> ' + result) print(sentence) sys.stdout.flush()
def train(args: Namespace): input_tensor, target_tensor, input_lang_tokenizer, target_lang_tokenizer = load_dataset( './data/', args.max_len, limit_size=None) max_len_input = len(input_tensor[0]) max_len_target = len(target_tensor[0]) print('max len of each seq:', max_len_input, ',', max_len_target) input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split( input_tensor, target_tensor, test_size=args.dev_split) # init hyperparameter EPOCHS = args.epoch batch_size = args.batch_size steps_per_epoch = len(input_tensor_train) // batch_size embedding_dim = args.embedding_dim units = args.units vocab_input_size = len(input_lang_tokenizer.word_index) + 1 vocab_target_size = len(target_lang_tokenizer.word_index) + 1 BUFFER_SIZE = len(input_tensor_train) learning_rate = args.learning_rate setattr(args, 'max_len_input', max_len_input) setattr(args, 'max_len_target', max_len_target) setattr(args, 'steps_per_epoch', steps_per_epoch) setattr(args, 'vocab_input_size', vocab_input_size) setattr(args, 'vocab_target_size', vocab_target_size) setattr(args, 'BUFFER_SIZE', BUFFER_SIZE) dataset = tf.data.Dataset.from_tensor_slices( (input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE) dataset = dataset.batch(batch_size) print('dataset shape (batch_size, max_len):', dataset) encoder = Encoder(vocab_input_size, embedding_dim, units, batch_size, args.dropout) decoder = Decoder(vocab_target_size, embedding_dim, units, args.method, batch_size, args.dropout) optimizer = select_optimizer(args.optimizer, args.learning_rate) loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') @tf.function def train_step(_input, _target, enc_state): loss = 0 with tf.GradientTape() as tape: enc_output, enc_state = encoder(_input, enc_state) dec_hidden = enc_state dec_input = tf.expand_dims( [target_lang_tokenizer.word_index['<s>']] * batch_size, 1) # First input feeding definition h_t = tf.zeros((batch_size, 1, embedding_dim)) for idx in range(1, _target.shape[1]): # idx means target character index. predictions, dec_hidden, h_t = decoder(dec_input, dec_hidden, enc_output, h_t) #tf.print(tf.argmax(predictions, axis=1)) loss += loss_function(loss_object, _target[:, idx], predictions) dec_input = tf.expand_dims(_target[:, idx], 1) batch_loss = (loss / int(_target.shape[1])) variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss # Setting checkpoint now_time = dt.datetime.now().strftime("%m%d%H%M") checkpoint_dir = './training_checkpoints/' + now_time setattr(args, 'checkpoint_dir', checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) os.makedirs(checkpoint_dir, exist_ok=True) # saving information of the model with open('{}/config.json'.format(checkpoint_dir), 'w', encoding='UTF-8') as fout: json.dump(vars(args), fout, indent=2, sort_keys=True) min_total_loss = 1000 for epoch in range(EPOCHS): start = time.time() enc_hidden = encoder.initialize_hidden_state() enc_cell = encoder.initialize_cell_state() enc_state = [[enc_hidden, enc_cell], [enc_hidden, enc_cell], [enc_hidden, enc_cell], [enc_hidden, enc_cell]] total_loss = 0 for (batch, (_input, _target)) in enumerate(dataset.take(steps_per_epoch)): batch_loss = train_step(_input, _target, enc_state) total_loss += batch_loss if batch % 10 == 0: print('Epoch {}/{} Batch {}/{} Loss {:.4f}'.format( epoch + 1, EPOCHS, batch + 10, steps_per_epoch, batch_loss.numpy())) print('Epoch {}/{} Total Loss per epoch {:.4f} - {} sec'.format( epoch + 1, EPOCHS, total_loss / steps_per_epoch, time.time() - start)) # saving checkpoint if min_total_loss > total_loss / steps_per_epoch: print('Saving checkpoint...') min_total_loss = total_loss / steps_per_epoch checkpoint.save(file_prefix=checkpoint_prefix) print('\n')
best_validation_acc = 0 #training loop for ind, epoch, lr, mmu in zip(range(len(args.epochs)), args.epochs, args.lr, mmu_t): if ind == len(args.epochs) - 1 and args.anneal: activate_anneal = True lr_init = lr epochs_N = epoch else: activate_anneal = False lr_new = lr optim_fc = select_optimizer(parameters_fc, lr=lr_new, mmu=mmu, optim='SGD') for e in range(epoch): if activate_anneal: lr_new = anneal_lr(lr_init, epochs_N, e) optim_fc = select_optimizer(parameters_fc, lr=lr_new, mmu=mmu, optim='SGD') if total_ep >= after_total_ep: optim_conv = select_optimizer(parameters_conv, lr=lr_new / factor_over_pretrain, mmu=mmu, optim='SGD')