def self_training(args, **kwargs):
	torch.manual_seed(args.seed)
	device = kwargs['device']
	file = kwargs['file']
	current_time = kwargs['current_time']

	nclasses = datasets.__dict__[args.dataset].nclasses
	model = models.__dict__[args.arch](nclasses = nclasses)
	model = torch.nn.DataParallel(model).to(device)
	model.to(device)

	# Multiple loss will be needed because we need in-between probabilty.
	# nn.CrossEntropyLoss criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.
	softmax = nn.Softmax(dim = 1)
	logsoftmax = nn.LogSoftmax(dim = 1)
	nll = nn.NLLLoss().to(device)

	optimizer = utils.select_optimizer(args, model)
	train_supervised_dataset, _, _ = utils.get_dataset(args)	# because we need to update the dataset after each epoch
	_, train_unsupervised_loader, val_loader = utils.make_loader(args)

	report = PrettyTable(['Epoch #', 'Train loss', 'Train Accuracy', 'Train Correct', 'Train Total', 'Val loss', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Top-1 Correct', 'Top-5 Correct', 'Val Total', 'Time(secs)'])
	for epoch in range(1, args.epochs + 1):
		per_epoch = PrettyTable(['Epoch #', 'Train loss', 'Train Accuracy', 'Train Correct', 'Train Total', 'Val loss', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Top-1 Correct', 'Top-5 Correct', 'Val Total', 'Time(secs)'])
		start_time = time.time()
		training_loss, train_correct, train_total = train(device, model, logsoftmax, nll, epoch, train_supervised_dataset, optimizer, args.batch_size)
		validation_loss, val1_correct, val5_correct, val_total = validation(device, model, logsoftmax, nll, val_loader)
		train_supervised_dataset = label_addition(device, model, softmax, train_supervised_dataset, train_unsupervised_loader, args.tau)
		end_time = time.time()
		report.add_row([epoch, round(training_loss, 4), "{:.3f}%".format(round((train_correct*100.0)/train_total, 3)), train_correct, train_total, round(validation_loss, 4), "{:.3f}%".format(round((val1_correct*100.0)/val_total, 3)), "{:.3f}%".format(round((val5_correct*100.0)/val_total, 3)), val1_correct, val5_correct, val_total, round(end_time - start_time, 2)])
		per_epoch.add_row([epoch, round(training_loss, 4), "{:.3f}%".format(round((train_correct*100.0)/train_total, 3)), train_correct, train_total, round(validation_loss, 4), "{:.3f}%".format(round((val1_correct*100.0)/val_total, 3)), "{:.3f}%".format(round((val5_correct*100.0)/val_total, 3)), val1_correct, val5_correct, val_total, round(end_time - start_time, 2)])
		print(per_epoch)
		if args.save_model == 'y':
			val_folder = "saved_model/" + current_time
			if not os.path.isdir(val_folder):
				os.mkdir(val_folder)
			save_model_file = val_folder + '/model_' + str(epoch) +'.pth'
			torch.save(model.state_dict(), save_model_file)
	file.write(report.get_string())
Beispiel #2
0
def main():
    global args
    args = parser.parse_args()
    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'log.txt'))
    checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar')

    logging.debug("run arguments: %s", args)
    logging.info("using pretrained cnn %s", args.cnn)
    cnn = resnet.__dict__[args.cnn](pretrained=True)

    vocab = build_vocab()
    model = CaptionModel(cnn, vocab,
                         embedding_size=args.embedding_size,
                         rnn_size=args.rnn_size,
                         num_layers=args.num_layers,
                         share_embedding_weights=args.share_weights)

    train_data = get_iterator(get_coco_data(vocab, train=True),
                              batch_size=args.batch_size,
                              max_length=args.max_length,
                              shuffle=True,
                              num_workers=args.workers)
    val_data = get_iterator(get_coco_data(vocab, train=False),
                            batch_size=args.eval_batch_size,
                            max_length=args.max_length,
                            shuffle=False,
                            num_workers=args.workers)

    if 'cuda' in args.type:
        cudnn.benchmark = True
        model.cuda()

    optimizer = select_optimizer(
        args.optimizer, params=model.parameters(), lr=args.lr)
    regime = lambda e: {'lr': args.lr * (args.lr_decay ** e),
                        'momentum': args.momentum,
                        'weight_decay': args.weight_decay}
    model.finetune_cnn(False)

    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in args.type
        loss = nn.CrossEntropyLoss()
        perplexity = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        if training:
            model.train()
        else:
            model.eval()

        end = time.time()
        for i, (imgs, (captions, lengths)) in enumerate(data):
            data_time.update(time.time() - end)
            if use_cuda:
                imgs = imgs.cuda()
                captions = captions.cuda(async=True)
            imgs = Variable(imgs, volatile=not training)
            captions = Variable(captions, volatile=not training)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            pred, _ = model(imgs, input_captions, lengths)
            err = loss(pred, target_captions)
            perplexity.update(math.exp(err.data[0]))

            if training:
                optimizer.zero_grad()
                err.backward()
                clip_grad_norm(model.rnn.parameters(), args.grad_clip)
                optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t'
                             'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                             'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                             'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format(
                                 epoch, i, len(data),
                                 phase='TRAINING' if training else 'EVALUATING',
                                 batch_time=batch_time,
                                 data_time=data_time, perp=perplexity))

        return perplexity.avg

    for epoch in range(args.start_epoch, args.epochs):
        if epoch >= args.finetune_epoch:
            model.finetune_cnn(True)
        optimizer = adjust_optimizer(
            optimizer, epoch, regime)
        # Train
        train_perp = forward(
            model, train_data, training=True, optimizer=optimizer)
        # Evaluate
        val_perp = forward(model, val_data, training=False)

        logging.info('\n Epoch: {0}\t'
                     'Training Perplexity {train_perp:.4f} \t'
                     'Validation Perplexity {val_perp:.4f} \n'
                     .format(epoch + 1, train_perp=train_perp, val_perp=val_perp))
        model.save_checkpoint(checkpoint_file % (epoch + 1))
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss), cur_loss / math.log(2)))
            losses.append(cur_loss)
            bpcs.append(cur_loss / math.log(2))

            total_loss = 0
            start_time = time.time()
    return np.mean(losses)


# Loop over epochs.
lr = args.lr
decay = args.weight_decay
best_val_loss = None

optimizer, orthog_optimizer = select_optimizer(model, args)
scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.5)
if orthog_optimizer:
    orthog_scheduler = optim.lr_scheduler.StepLR(orthog_optimizer,
                                                 1,
                                                 gamma=0.5)
# At any point you can hit Ctrl + C to break out of training early.
try:
    exp_time = "{0:%Y-%m-%d}_{0:%H-%M-%S}".format(datetime.now())
    SAVEDIR = os.path.join('./saves', 'sMNIST', NET_TYPE,
                           str(args.random_seed), exp_time)

    if not os.path.exists(SAVEDIR):
        os.makedirs(SAVEDIR)
    with open(SAVEDIR + 'hparams.txt', 'w') as fp:
        for key, val in args.__dict__.items():
Beispiel #4
0
T = args.T
batch_size = args.batch
out_size = args.labels + 1
if args.onehot:
    inp_size = args.labels + 2

rnn = select_network(args, inp_size)
net = Model(hidden_size, rnn)
if CUDA:
    net = net.cuda()
    net.rnn = net.rnn.cuda()

print('Copy task')
print(NET_TYPE)
print('Cuda: {}'.format(CUDA))
print(nonlin)
print(hidden_size)
for name, param in net.named_parameters():
    if param.requires_grad:
        print(name, param.data)
if not os.path.exists(SAVEDIR):
    os.makedirs(SAVEDIR)

orthog_optimizer = None
optimizer, orthog_optimizer = select_optimizer(net, args)

with open(SAVEDIR + 'hparams.txt', 'w') as fp:
    for key, val in args.__dict__.items():
        fp.write(('{}: {}'.format(key, val)))
train_model(net, optimizer, batch_size, T, n_steps)
Beispiel #5
0
		exit()
	log_dir =directory+'logs/'
	model_dir=directory+'/models/'
	if not os.path.exists(log_dir):
		os.makedirs(log_dir)
	if not os.path.exists(model_dir):
		os.makedirs(model_dir)
	add_experiment_notfinished(directory)#create this file to monitor errors in computer platform
	logging.basicConfig(filename=log_dir+'train.log',level=logging.INFO)
	logging.info("Logger for model: {}".format(topology))
	logging.info("Training specificacitions: {}".format([args.epochs,args.lr,args.anneal]))
	logging.info("Stochastic Optimization specs: MC samples {} dkl_after_epoch {} scale factor {}".format(args.MC_samples,dkl_after_epoch,dkl_scale))

	#Start training
	for ind,(l,ep) in enumerate(zip(args.lr,args.epochs)):
		optimizer=select_optimizer(train_parameters,lr=l,optim='ADAM')
		'''
		Activate annealing
		'''
		if ind == len(args.epochs)-1 and linear_anneal:
                        activate_anneal=True
                        lr_init=l
                        epochs_N=ep

		for e in range(ep):
			#annealing
			if activate_anneal:
				lr_new=anneal_lr(lr_init,epochs_N,e)
				optimizer=select_optimizer(train_parameters,lr=lr_new,optim='ADAM')

			elbo_d,dkl_d,llh_d,MCtrain,total_train,total_batch,MCvalid,total_valid,MCtest,total_test=[0.0]*10
Beispiel #6
0
def main():
    # Argument Settings
    parser = argparse.ArgumentParser(
        description='Image Tagging Classification from Naver Shopping Reviews')
    parser.add_argument('--sess_name',
                        default='example',
                        type=str,
                        help='Session name that is loaded')
    parser.add_argument('--checkpoint',
                        default='best',
                        type=str,
                        help='Checkpoint')
    parser.add_argument('--batch_size',
                        default=256,
                        type=int,
                        help='batch size')
    parser.add_argument('--num_workers',
                        default=16,
                        type=int,
                        help='The number of workers')
    parser.add_argument('--num_epoch',
                        default=100,
                        type=int,
                        help='The number of epochs')
    parser.add_argument('--model_name',
                        default='mobilenet_v2',
                        type=str,
                        help='[resnet50, rexnet, dnet1244, dnet1222]')
    parser.add_argument('--weight_file', default='model.pth', type=str)
    parser.add_argument('--optimizer', default='SGD', type=str)
    parser.add_argument('--lr', default=1e-2, type=float)
    parser.add_argument('--weight_decay', default=1e-5, type=float)
    parser.add_argument('--learning_anneal', default=1.1, type=float)
    parser.add_argument('--annealing_period', default=10, type=int)
    parser.add_argument('--num_gpu', default=1, type=int)
    parser.add_argument('--pretrain', action='store_true', default=False)
    parser.add_argument('--mode', default='train', help='Mode')
    parser.add_argument('--pause', default=0, type=int)
    parser.add_argument('--iteration', default=0, type=str)
    args = parser.parse_args()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Model
    logger.info('Build Model')
    model = select_model(args.model_name, pretrain=args.pretrain, n_class=41)
    total_param = sum([p.numel() for p in model.parameters()])
    logger.info(f'Model size: {total_param} tensors')
    load_weight(model, args.weight_file)
    model = model.to(device)

    nu.bind_model(model)
    nsml.save('best')

    if args.pause:
        nsml.paused(scope=locals())

    if args.num_epoch == 0:
        return

    # Set the dataset
    logger.info('Set the dataset')
    df = pd.read_csv(f'{DATASET_PATH}/train/train_label')
    train_size = int(len(df) * 0.8)

    trainset = TagImageDataset(data_frame=df[:train_size],
                               root_dir=f'{DATASET_PATH}/train/train_data',
                               transform=train_transform)
    testset = TagImageDataset(data_frame=df[train_size:],
                              root_dir=f'{DATASET_PATH}/train/train_data',
                              transform=test_transform)

    train_loader = DataLoader(dataset=trainset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    test_loader = DataLoader(dataset=testset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers)

    criterion = nn.CrossEntropyLoss(reduction='mean')
    optimizer = select_optimizer(model.parameters(), args.optimizer, args.lr,
                                 args.weight_decay)

    criterion = criterion.to(device)

    if args.mode == 'train':
        logger.info('Start to train!')
        train_process(args=args,
                      model=model,
                      train_loader=train_loader,
                      test_loader=test_loader,
                      optimizer=optimizer,
                      criterion=criterion,
                      device=device)

    elif args.mode == 'test':
        nsml.load(args.checkpoint, session=args.sess_name)
        logger.info('[NSML] Model loaded from {}'.format(args.checkpoint))

        model.eval()
        logger.info('Start to test!')
        test_loss, test_acc, test_f1 = evaluate(model=model,
                                                test_loader=test_loader,
                                                device=device,
                                                criterion=criterion)
        logger.info(test_loss, test_acc, test_f1)
Beispiel #7
0
def test(args: Namespace):
    cfg = json.load(open(args.config_path, 'r', encoding='UTF-8'))

    batch_size = 1  # for predicting one sentence.

    encoder = Encoder(cfg['vocab_input_size'], cfg['embedding_dim'],
                      cfg['units'], batch_size, 0)
    decoder = Decoder(cfg['vocab_target_size'], cfg['embedding_dim'],
                      cfg['units'], cfg['method'], batch_size, 0)
    optimizer = select_optimizer(cfg['optimizer'], cfg['learning_rate'])

    ckpt = tf.train.Checkpoint(optimizer=optimizer,
                               encoder=encoder,
                               decoder=decoder)
    manager = tf.train.CheckpointManager(ckpt,
                                         cfg['checkpoint_dir'],
                                         max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)

    while True:
        sentence = input(
            'Input Sentence or If you want to quit, type Enter Key : ')

        if sentence == '': break

        sentence = re.sub(r"(\.\.\.|[?.!,¿])", r" \1 ", sentence)
        sentence = re.sub(r'[" "]+', " ", sentence)

        sentence = '<s> ' + sentence.lower().strip() + ' </s>'

        input_vocab = load_vocab('./data/', 'en')
        target_vocab = load_vocab('./data/', 'de')

        input_lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
            filters='', oov_token='<unk>')
        input_lang_tokenizer.word_index = input_vocab

        target_lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
            filters='', oov_token='<unk>')
        target_lang_tokenizer.word_index = target_vocab

        convert_vocab(input_lang_tokenizer, input_vocab)
        convert_vocab(target_lang_tokenizer, target_vocab)

        inputs = [
            input_lang_tokenizer.word_index[i]
            if i in input_lang_tokenizer.word_index else
            input_lang_tokenizer.word_index['<unk>']
            for i in sentence.split(' ')
        ]
        inputs = tf.keras.preprocessing.sequence.pad_sequences(
            [inputs], maxlen=cfg['max_len_input'], padding='post')

        inputs = tf.convert_to_tensor(inputs)

        result = ''

        enc_hidden = encoder.initialize_hidden_state()
        enc_cell = encoder.initialize_cell_state()
        enc_state = [[enc_hidden, enc_cell], [enc_hidden, enc_cell],
                     [enc_hidden, enc_cell], [enc_hidden, enc_cell]]

        enc_output, enc_hidden = encoder(inputs, enc_state)

        dec_hidden = enc_hidden
        #dec_input = tf.expand_dims([target_lang_tokenizer.word_index['<eos>']], 0)
        dec_input = tf.expand_dims([target_lang_tokenizer.word_index['<s>']],
                                   1)

        print('dec_input:', dec_input)

        h_t = tf.zeros((batch_size, 1, cfg['embedding_dim']))

        for t in range(int(cfg['max_len_target'])):
            predictions, dec_hidden, h_t = decoder(dec_input, dec_hidden,
                                                   enc_output, h_t)

            # predeictions shape == (1, 50002)

            predicted_id = tf.argmax(predictions[0]).numpy()
            print('predicted_id', predicted_id)

            result += target_lang_tokenizer.index_word[predicted_id] + ' '

            if target_lang_tokenizer.index_word[predicted_id] == '</s>':
                print('Early stopping')
                break

            dec_input = tf.expand_dims([predicted_id], 1)
            print('dec_input:', dec_input)

        print('<s> ' + result)
        print(sentence)
        sys.stdout.flush()
Beispiel #8
0
def train(args: Namespace):
    input_tensor, target_tensor, input_lang_tokenizer, target_lang_tokenizer = load_dataset(
        './data/', args.max_len, limit_size=None)

    max_len_input = len(input_tensor[0])
    max_len_target = len(target_tensor[0])

    print('max len of each seq:', max_len_input, ',', max_len_target)

    input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(
        input_tensor, target_tensor, test_size=args.dev_split)

    # init hyperparameter
    EPOCHS = args.epoch
    batch_size = args.batch_size
    steps_per_epoch = len(input_tensor_train) // batch_size
    embedding_dim = args.embedding_dim
    units = args.units
    vocab_input_size = len(input_lang_tokenizer.word_index) + 1
    vocab_target_size = len(target_lang_tokenizer.word_index) + 1
    BUFFER_SIZE = len(input_tensor_train)
    learning_rate = args.learning_rate

    setattr(args, 'max_len_input', max_len_input)
    setattr(args, 'max_len_target', max_len_target)

    setattr(args, 'steps_per_epoch', steps_per_epoch)
    setattr(args, 'vocab_input_size', vocab_input_size)
    setattr(args, 'vocab_target_size', vocab_target_size)
    setattr(args, 'BUFFER_SIZE', BUFFER_SIZE)

    dataset = tf.data.Dataset.from_tensor_slices(
        (input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
    dataset = dataset.batch(batch_size)

    print('dataset shape (batch_size, max_len):', dataset)

    encoder = Encoder(vocab_input_size, embedding_dim, units, batch_size,
                      args.dropout)
    decoder = Decoder(vocab_target_size, embedding_dim, units, args.method,
                      batch_size, args.dropout)

    optimizer = select_optimizer(args.optimizer, args.learning_rate)

    loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True,
                                                          reduction='none')

    @tf.function
    def train_step(_input, _target, enc_state):
        loss = 0

        with tf.GradientTape() as tape:
            enc_output, enc_state = encoder(_input, enc_state)

            dec_hidden = enc_state

            dec_input = tf.expand_dims(
                [target_lang_tokenizer.word_index['<s>']] * batch_size, 1)

            # First input feeding definition
            h_t = tf.zeros((batch_size, 1, embedding_dim))

            for idx in range(1, _target.shape[1]):
                # idx means target character index.
                predictions, dec_hidden, h_t = decoder(dec_input, dec_hidden,
                                                       enc_output, h_t)

                #tf.print(tf.argmax(predictions, axis=1))

                loss += loss_function(loss_object, _target[:, idx],
                                      predictions)

                dec_input = tf.expand_dims(_target[:, idx], 1)

        batch_loss = (loss / int(_target.shape[1]))

        variables = encoder.trainable_variables + decoder.trainable_variables

        gradients = tape.gradient(loss, variables)

        optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss

    # Setting checkpoint
    now_time = dt.datetime.now().strftime("%m%d%H%M")
    checkpoint_dir = './training_checkpoints/' + now_time
    setattr(args, 'checkpoint_dir', checkpoint_dir)
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     encoder=encoder,
                                     decoder=decoder)

    os.makedirs(checkpoint_dir, exist_ok=True)

    # saving information of the model
    with open('{}/config.json'.format(checkpoint_dir), 'w',
              encoding='UTF-8') as fout:
        json.dump(vars(args), fout, indent=2, sort_keys=True)

    min_total_loss = 1000

    for epoch in range(EPOCHS):
        start = time.time()

        enc_hidden = encoder.initialize_hidden_state()
        enc_cell = encoder.initialize_cell_state()
        enc_state = [[enc_hidden, enc_cell], [enc_hidden, enc_cell],
                     [enc_hidden, enc_cell], [enc_hidden, enc_cell]]

        total_loss = 0

        for (batch, (_input,
                     _target)) in enumerate(dataset.take(steps_per_epoch)):
            batch_loss = train_step(_input, _target, enc_state)
            total_loss += batch_loss

            if batch % 10 == 0:
                print('Epoch {}/{} Batch {}/{} Loss {:.4f}'.format(
                    epoch + 1, EPOCHS, batch + 10, steps_per_epoch,
                    batch_loss.numpy()))

        print('Epoch {}/{} Total Loss per epoch {:.4f} - {} sec'.format(
            epoch + 1, EPOCHS, total_loss / steps_per_epoch,
            time.time() - start))

        # saving checkpoint
        if min_total_loss > total_loss / steps_per_epoch:
            print('Saving checkpoint...')
            min_total_loss = total_loss / steps_per_epoch
            checkpoint.save(file_prefix=checkpoint_prefix)

        print('\n')
Beispiel #9
0
    best_validation_acc = 0

    #training loop
    for ind, epoch, lr, mmu in zip(range(len(args.epochs)), args.epochs,
                                   args.lr, mmu_t):

        if ind == len(args.epochs) - 1 and args.anneal:
            activate_anneal = True
            lr_init = lr
            epochs_N = epoch
        else:
            activate_anneal = False
            lr_new = lr

        optim_fc = select_optimizer(parameters_fc,
                                    lr=lr_new,
                                    mmu=mmu,
                                    optim='SGD')

        for e in range(epoch):
            if activate_anneal:
                lr_new = anneal_lr(lr_init, epochs_N, e)
                optim_fc = select_optimizer(parameters_fc,
                                            lr=lr_new,
                                            mmu=mmu,
                                            optim='SGD')

            if total_ep >= after_total_ep:
                optim_conv = select_optimizer(parameters_conv,
                                              lr=lr_new / factor_over_pretrain,
                                              mmu=mmu,
                                              optim='SGD')