예제 #1
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    token_stream = get_data()
    assert START_TOKEN == 0
    words = ['_START'] + list(set(token_stream))
    word2idx = dict((word, i) for i, word in enumerate(words))
    num_words = len(words)
    three_grams = dict((tuple(word2idx[w] for w in token_stream[i:i + 3]), True)
                       for i in range(len(token_stream) - 3))
    print('num words', num_words)
    print('stream length', len(token_stream))
    print('distinct 3-grams', len(three_grams))

    trainable_model = get_trainable_model(num_words)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    print('training')
    for epoch in range(TRAIN_ITER // EPOCH_ITER):
        print('epoch', epoch)
        proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch)
        train.train_epoch(
            sess, trainable_model, EPOCH_ITER,
            proportion_supervised=proportion_supervised,
            g_steps=1, d_steps=D_STEPS,
            next_sequence=lambda: get_random_sequence(token_stream, word2idx),
            verify_sequence=lambda seq: verify_sequence(three_grams, seq),
            words=words)
예제 #2
0
def trainer(model, train_data, test_data, epochs, learning_rate):
    # set loss function and optimizer
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    # loop over the dataset multiple times
    for epoch in range(epochs):
        # train one epoch
        train_epoch(train_data, model, loss_function, optimizer)
        # validate epoch on validation set
        loss_train, accuracy_train, loss_test, accuracy_test = validate_epoch(
            train_data, test_data, model, loss_function)
        # print the metrics
        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(
            template.format(
                epoch,
                np.array2string(loss_train, precision=2, floatmode='fixed'),
                np.array2string(accuracy_train * 100,
                                precision=2,
                                floatmode='fixed'),
                np.array2string(loss_test, precision=2, floatmode='fixed'),
                np.array2string(accuracy_test * 100,
                                precision=2,
                                floatmode='fixed')))

    print('Finished Training')
예제 #3
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    token_stream = get_data()
    assert START_TOKEN == 0
    words = ['_START'] + list(set(token_stream))
    word2idx = dict((word, i) for i, word in enumerate(words))
    num_words = len(words)
    three_grams = dict((tuple(word2idx[w]
                              for w in token_stream[i:i + 3]), True)
                       for i in range(len(token_stream) - 3))
    print('num words', num_words)
    print('stream length', len(token_stream))
    print('distinct 3-grams', len(three_grams))

    trainable_model = get_trainable_model(num_words)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer)

    print('training')
    for epoch in range(TRAIN_ITER // EPOCH_ITER):
        print('epoch', epoch)
        proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch)
        train.train_epoch(
            sess,
            trainable_model,
            EPOCH_ITER,
            proportion_supervised=proportion_supervised,
            g_steps=1,
            d_steps=D_STEPS,
            next_sequence=lambda: get_random_sequence(token_stream, word2idx),
            verify_sequence=lambda seq: verify_sequence(three_grams, seq),
            words=words)
def retrain_with_pseudo_label(loaded_models, train_ids, valid_ids, TRAIN_IMAGE_DIR, DATAFRAME, config):

    if 'pseudo_dataframe' not in loaded_models[list(loaded_models.keys())[0]]:
        return
    
    def worker_init_fn(worker_id):   
        random.seed(worker_id+random_seed)   
        np.random.seed(worker_id+random_seed) 

    for key in loaded_models.keys():    

        # make dataloader with pseudo label
        model_config = loaded_models[key]['config']
        dataframe_with_pseudo = pd.concat([DATAFRAME.loc[DATAFRAME['image_id'].isin(train_ids), :], loaded_models[key]['pseudo_dataframe']], axis=0)
        retrain_dataset = GWDDataset(dataframe_with_pseudo, TRAIN_IMAGE_DIR, model_config, is_train=True, do_transform=False)
        # dataset for retrain
        retrain_data_loader = DataLoader(retrain_dataset, batch_size=1, shuffle=True, num_workers=0, worker_init_fn=worker_init_fn, collate_fn=collate_fn)    

        model = copy.deepcopy(loaded_models[key]['model'])
        model.train()
        trainable_params = [p for p in model.parameters() if p.requires_grad]
        optimizer = get_optimizer(model_config['train']['optimizer'], trainable_params)

        # retraining
        print("Retraining %s" % key)
        for epoch in range(0, config['epochs']):
            if model_config['general']['kfold'] < 0:
                print("\r[Epoch %d]" % epoch)
            train_epoch(model, retrain_data_loader, None, optimizer)
        model.eval()
        loaded_models[key]['pseudo_model'] = model
    return 
예제 #5
0
def main():
    train_dataset = datasets.MNIST(
        root='C:/Users/user/Documents/InterestingAttempt/VAE/mnist_data/',
        train=True,
        transform=transforms.Compose([transforms.ToTensor()]))
    test_dataset = datasets.MNIST(
        root='C:/Users/user/Documents/InterestingAttempt/VAE/mnist_data/',
        train=False,
        transform=transforms.Compose([transforms.ToTensor()]))
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=100, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=1000, shuffle=False)

    criterion = nn.MSELoss()
    # criterion = nn.BCELoss()
    criterion2 = KLLoss()

    epoch_num = 30
    lr = 1e-3
    weight_decay = 1e-5
    lamda = 0.01
    latent_num = 2
    mid_features = 256

    outf = r'C:\Users\user\Documents\InterestingAttempt\VAE\logs\linear_{}_{}_{}_{}_{}'.format(
        latent_num, lr, lamda, weight_decay, epoch_num)
    if not os.path.exists(outf):
        os.makedirs(outf)

    model = VAE(28 * 28, mid_features, latent_num).cuda()
    optimizer = optim.Adam(
        model.parameters(), weight_decay=weight_decay, betas=(0.9, 0.999))
    writer = SummaryWriter(outf)
    for epoch in range(epoch_num):
        current_lr = lr / 2**int(epoch / 40)
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_lr
        train_epoch(
            model,
            optimizer,
            train_loader,
            criterion,
            epoch,
            writer=writer,
            criterion2=criterion2,
            lamda=lamda)
        test(
            model,
            test_loader,
            criterion,
            epoch,
            writer=writer,
            criterion2=criterion2)
        if (epoch + 1) % 10 == 0:
            torch.save(model.state_dict(),
                       os.path.join(outf, 'model_{}.pth'.format(epoch)))
    writer.close()
    torch.save(model.state_dict(), os.path.join(outf, 'model.pth'))
예제 #6
0
def run():
    args = parse_opts()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "9"

    # GLOBAL VARS #
    MODE = args.mode
    CLASS_WEIGHT = False
    N_EP = 20
    FLATTEN = args.flatten
    RNN = args.rnn
    BATCH_SIZE = args.batch_size
    ####

    datasets, dataloaders = init_dataset(
        BATCH_SIZE, single_channel=args.single_channel)

    print('[Train] class counts', np.unique(
        datasets['train'].target_vals, return_counts=True))
    print('[Test] class counts', np.unique(
        datasets['test'].target_vals, return_counts=True))

    n_ch = 1 if args.single_channel else 3

    if MODE == 'min':
        in_channels = datasets['train'].min_depth*n_ch
    elif MODE == 'max':
        in_channels = datasets['train'].max_depth*n_ch

    torch.manual_seed(0)

    # init net
    net = init_net(opt=args.model_idx, in_channels=in_channels)

    class_weight = None
    if CLASS_WEIGHT:
        cnts = Counter(datasets['train'].target_vals)
        n = len(datasets['train'])
        class_weight = [max(cnts.values())/cnts['0'],
                        max(cnts.values())/cnts['1']]
        class_weight = torch.FloatTensor(class_weight)

    cross_entrp_loss = nn.CrossEntropyLoss(weight=class_weight).cuda()
    focal_loss = FocalLoss().cuda()

    optimizer = optim.Adam(net.parameters(), lr=0.000027)

    criterion = cross_entrp_loss

    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    #     optimizer, 'min', verbose=True, patience=7)

    for ep in range(N_EP):
        train_epoch(net, dataloaders['train'], optimizer,
                    criterion, ep, scheduler=None, flatten=FLATTEN, MODE=MODE, rnn=RNN)
        valid_loss = evaluate(net, dataloaders['test'], criterion,
                              ep, flatten=FLATTEN, MODE=MODE, rnn=RNN)
예제 #7
0
def main():
    # init or load model
    print("init model with input shape",config["input_shape"])
    model = NvNet(config=config,input_shape=config["input_shape"], seg_outChans=config["n_labels"])
    parameters = model.parameters()
    optimizer = optim.Adam(parameters, 
                           lr=config["initial_learning_rate"],
                           weight_decay = config["L2_norm"])
    start_epoch = 1
    if config["VAE_enable"]:
        loss_function = CombinedLoss(k1=config["loss_k1_weight"], k2=config["loss_k2_weight"])
    else:
        loss_function = SoftDiceLoss()
    # data_generator
    print("data generating")
    training_data = BratsDataset(phase="train", config=config)
    train_loader = torch.utils.data.DataLoader(dataset=training_data, 
                                               batch_size=config["batch_size"], 
                                               shuffle=True, 
                                               pin_memory=True)
    valildation_data = BratsDataset(phase="validate", config=config)
    valildation_loader = torch.utils.data.DataLoader(dataset=valildation_data, 
                                               batch_size=config["batch_size"], 
                                               shuffle=True, 
                                               pin_memory=True)
    
    train_logger = Logger(model_name=config["model_file"],header=['epoch', 'loss', 'acc', 'lr'])

    if config["cuda_devices"] is not None:
        model = model.cuda()
        loss_function = loss_function.cuda()
        
    # if not config["overwrite"] and os.path.exists(config["model_file"]) or os.path.exists(config["saved_model_file"]):
    #    model, start_epoch, optimizer = load_old_model(model, optimizer, saved_model_path=config["saved_model_file"])
    
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=config["lr_decay"],patience=config["patience"])
    
    print("training on label:{}".format(config["labels"]))    
    for i in range(start_epoch,config["epochs"]):
        train_epoch(epoch=i, 
                    data_loader=train_loader, 
                    model=model,
                    model_name=config["model_file"], 
                    criterion=loss_function, 
                    optimizer=optimizer, 
                    opt=config, 
                    epoch_logger=train_logger) 
        
        val_loss = val_epoch(epoch=i, 
                  data_loader=valildation_loader, 
                  model=model, 
                  criterion=loss_function, 
                  opt=config,
                  optimizer=optimizer, 
                  logger=train_logger)
        scheduler.step(val_loss)
예제 #8
0
 def test_bert(self):
     utt_encoder = model.BertUttEncoder(utt_dims)
     dar_model = model.DARRNN(utt_dims, n_labels, n_hidden, 1, dropout=0)
     train_params = itertools.chain(dar_model.parameters(),
                                    utt_encoder.parameters())
     optimizer = optim.Adam(train_params)
     criterion = nn.CrossEntropyLoss(ignore_index=0)
     print("Testing BERT on random inputs.")
     for epoch in range(epochs):
         train.train_epoch(utt_encoder, dar_model, train_data, n_labels,
                           batch_size, bptt, None, criterion, optimizer,
                           'cpu')
예제 #9
0
def main():
    print(outf)
    print("loading dataset ...")
    trainDataset = TrainDataset(name='/data0/niejiangtao/ICIP2019Deraining/train/train.h5')
    batchSize = opt.batchSize_per_gpu * len(device_ids)
    trainLoader = udata.DataLoader(trainDataset, batch_size=batchSize, shuffle=True, num_workers=0)

    testDataset = TestDataset2(name='/data0/niejiangtao/ICIP2019Deraining/test_a/test.h5')
    print('testDataset len : {}'.format(len(testDataset)))
    
    l1_criterion = nn.L1Loss().cuda()
    # mask_criterion = nn.MSELoss().cuda()
    ssim_criterion = SSIM().cuda()
    
    model = UNet_v2(n_channels=3, n_classes=3)
    # model = RESCAN()

    if len(device_ids) > 1:
        model = nn.DataParallel(model, device_ids=device_ids)
    model.cuda()
    
    beta1 = 0.9
    beta2 = 0.999
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=weight_decay, betas=(beta1, beta2))

    writer = SummaryWriter(outf)

    for epoch in range(opt.epochs):
        start = time.time()
        current_lr = opt.lr / 2**int(epoch / interval)
        for param_group in optimizer.param_groups:
            param_group["lr"] = current_lr
        print("epoch {} learning rate {}".format(epoch, current_lr))

        # test(model, testDataset, None, epoch, writer=writer)
        train_epoch(model, optimizer, trainLoader, l1_criterion, None, ssim_criterion, epoch, writer=writer, radio=radio)

        if (epoch+1) % 5 == 0:
            test(model, testDataset, None, epoch, writer=writer)
        if (epoch+1) % 20 == 0:
            """ torch.save({
                'epoch': epoch,
                'model_state_dict': model.module.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
            }, os.path.join(outf, 'checkpoint_{}.pth'.format(epoch))) """
            torch.save(model.state_dict(), os.path.join(outf, 'model_{}.pth'.format(epoch)))

        end = time.time()
        print('epoch {} cost {} hour '.format(
            epoch, str((end - start) / (60 * 60))))
            
    torch.save(model.state_dict(), os.path.join(outf, 'model.pth'))
    generate_result(model, outf, testDataset, mat=False, ouput_img=True)
예제 #10
0
def train(rnn_trainer, rnn_predictor, train_data, valid_target_data,
          valid_source_data, dictionary, epoch_size, model_directory,
          beam_size, viterbi_size):
    start_time = time.time()
    log_path = os.path.join(model_directory, 'log.txt')
    log_file = open(log_path, 'w')
    best_epoch = None
    best_metrics = None

    for epoch in range(epoch_size):
        # Train one epoch and save the model
        train_epoch(rnn_trainer, train_data, model_directory, epoch)

        # Decode all sentences
        rnn_predictor.restore_from_directory(model_directory)
        system, decode_time = decode_all(rnn_predictor, valid_source_data,
                                         dictionary, beam_size, viterbi_size)

        # Evaluate results
        metrics = evaluate(system, valid_target_data)

        # Print metrics
        log_text = 'decoding precision: {:.2f} recall: {:.2f} f-score: {:.2f} accuracy: {:.2f}\n'.format(
            *metrics)
        log_text += 'decoding total time: {:.2f} average time: {:.2f}'.format(
            decode_time, decode_time / len(system))
        print(log_text)
        print(log_text, file=log_file)

        # Write decoded results to file
        decode_path = os.path.join(model_directory,
                                   'decode-{}.txt'.format(epoch))
        with open(decode_path, 'w') as file:
            file.write('\n'.join(system))

        # Update best epoch
        if not best_epoch or best_metrics[2] < metrics[2]:
            best_epoch = epoch
            best_metrics = metrics

    total_time = time.time() - start_time
    print('best epoch:', best_epoch)
    print(
        'best epoch metrics: precision: {:.2f} recall: {:.2f} f-score: {:.2f} accuracy: {:.2f}'
        .format(*best_metrics))
    print('total experiment time:', total_time)
    print()
    return best_metrics, best_epoch
예제 #11
0
def main_worker():
	opt = parse_opts()
	print(opt)

	seed = 1
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)

	# CUDA for PyTorch
	device = torch.device(f"cuda:{opt.gpu}" if opt.use_cuda else "cpu")

	# tensorboard
	summary_writer = tensorboardX.SummaryWriter(log_dir='tf_logs')

	# defining model
	encoder_cnn, decoder_rnn =  generate_model(opt, device)
	# get data loaders
	train_loader, val_loader = get_loaders(opt)

	# optimizer
	crnn_params = list(encoder_cnn.parameters()) + \
		list(decoder_rnn.parameters())
	optimizer = torch.optim.Adam(crnn_params, lr=opt.lr_rate, weight_decay=opt.weight_decay)

	# scheduler = lr_scheduler.ReduceLROnPlateau(
	# 	optimizer, 'min', patience=opt.lr_patience)
	criterion = nn.CrossEntropyLoss()

	# resume model
	if opt.resume_path:
		start_epoch = resume_model(opt, encoder_cnn, decoder_rnn, optimizer)
	else:
		start_epoch = 1

	# start training
	for epoch in range(start_epoch, opt.n_epochs + 1):
		train_loss, train_acc = train_epoch(
			encoder_cnn, decoder_rnn, train_loader, criterion, optimizer, epoch, opt.log_interval, device)
		val_loss, val_acc = val_epoch(
			encoder_cnn, decoder_rnn, val_loader, criterion, device)

		# saving weights to checkpoint
		if (epoch) % opt.save_interval == 0:
			# scheduler.step(val_loss)
			# write summary
			summary_writer.add_scalar(
				'losses/train_loss', train_loss, global_step=epoch)
			summary_writer.add_scalar(
				'losses/val_loss', val_loss, global_step=epoch)
			summary_writer.add_scalar(
				'acc/train_acc', train_acc * 100, global_step=epoch)
			summary_writer.add_scalar(
				'acc/val_acc', val_acc * 100, global_step=epoch)

			state = {'epoch': epoch, 'encoder_state_dict': encoder_cnn.state_dict(),
					 'decoder_state_dict': decoder_rnn.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}
			torch.save(state, os.path.join('snapshots', f'{opt.model}-Epoch-{epoch}-Loss-{val_loss}.pth'))
			print("Epoch {} model saved!\n".format(epoch))
예제 #12
0
def main():

    opt = set_opts()
    model = load_pretrained_resnet101(opt)
    train_loader, val_loader, test_loader, test_data = get_ucf_data(opt)

    criterion = nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    # get fine-tune parameters (we fine-tune all of them)
    parameters = get_fine_tuning_parameters(model, opt.ft_begin_index)

    optimizer = optim.SGD(parameters,
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          dampening=opt.dampening,
                          weight_decay=opt.weight_decay,
                          nesterov=opt.nesterov)

    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=opt.lr_patience)

    train_logger = Logger(os.path.join(opt.result_path, 'train.log'),
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        os.path.join(opt.result_path, 'train_batch.log'),
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    val_logger = Logger(os.path.join(opt.result_path, 'val.log'),
                        ['epoch', 'loss', 'acc'])

    # training
    for i in range(opt.begin_epoch, opt.n_epochs + 1):

        train_epoch(i, train_loader, model, criterion, optimizer, opt,
                    train_logger, train_batch_logger)

        validation_loss = val_epoch(i, val_loader, model, criterion, opt,
                                    val_logger)

        scheduler.step(validation_loss)

    # testing
    test_results, all_output_buffer = final_test(test_loader, model, opt,
                                                 test_data.class_names)
예제 #13
0
def main(args):
    mode = "evaluation" + str(args.fold)
    traindataloader, testdataloader, meta = get_dataloader(
        args.datapath,
        mode,
        args.batchsize,
        args.workers,
        level=args.level,
        preload_ram=args.preload_ram)

    num_classes = meta["num_classes"]
    ndims = meta["ndims"]
    sequencelength = meta["sequencelength"]

    print(f"Logging results to {args.logdir}")
    logdir = os.path.join(args.logdir, str(args.fold))
    os.makedirs(logdir, exist_ok=True)

    epochs, learning_rate, weight_decay = select_hyperparameter(args.model)

    device = torch.device(args.device)
    model = get_model(args.model, ndims, num_classes, sequencelength, device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)
    model.modelname += f"_learning-rate={learning_rate}_weight-decay={weight_decay}"
    print(f"Initialized {model.modelname}")
    criterion = torch.nn.CrossEntropyLoss(reduction="mean")

    for epoch in range(epochs):
        print(f"train epoch {epoch}")
        train_epoch(model, optimizer, criterion, traindataloader, device)
    losses, y_true, y_pred, y_score, field_ids = test_epoch(
        model, criterion, dataloader=testdataloader, device=device)

    logdir = os.path.join(logdir, args.model)
    os.makedirs(logdir, exist_ok=True)
    print(f"saving results to {logdir}")
    print(sklearn.metrics.classification_report(y_true.cpu(), y_pred.cpu()),
          file=open(os.path.join(logdir, "classification_report.txt"), "w"))
    np.save(os.path.join(logdir, "y_pred.npy"), y_pred.cpu().numpy())
    np.save(os.path.join(logdir, "y_true.npy"), y_true.cpu().numpy())
    np.save(os.path.join(logdir, "y_score.npy"), y_score.cpu().numpy())
    np.save(os.path.join(logdir, "field_ids.npy"), field_ids.numpy())
    save(model, os.path.join(logdir, model.modelname + ".pth"))
예제 #14
0
def train(hidden_size, num_layers, lr, weight_decay):
    region = "germany"

    log_name = log_pattern.format(region=region,
                                  num_layers=num_layers,
                                  hidden_size=hidden_size,
                                  lr=lr,
                                  weight_decay=weight_decay)
    log_path = os.path.join(log_dir, log_name)

    if os.path.exists(log_path):
        print(f"{log_path} exists. skipping...")
        return

    try:
        model, dataset, validdataset, dataloader, validdataloader, optimizer = setup(
            hidden_size, num_layers, lr, weight_decay)
        stats = list()
        for epoch in range(epochs):
            trainloss = train_epoch(model, dataloader, optimizer, criterion,
                                    device)
            testmetrics, testloss = test_epoch(model,
                                               validdataloader,
                                               device,
                                               criterion,
                                               n_predictions=1)
            metric_msg = ", ".join([
                f"{name}={metric.compute():.2f}"
                for name, metric in testmetrics.items()
            ])
            msg = f"epoch {epoch}: train loss {trainloss:.2f}, test loss {testloss:.2f}, {metric_msg}"
            print(msg)

            #test_model(model, validdataset, device)

            model_name = name_pattern.format(region=region,
                                             num_layers=num_layers,
                                             hidden_size=hidden_size,
                                             lr=lr,
                                             weight_decay=weight_decay,
                                             epoch=epoch)
            pth = os.path.join(model_dir, model_name + ".pth")
            print(f"saving model snapshot to {pth}")
            snapshot(model, optimizer, pth)
            stat = dict()
            stat["epoch"] = epoch
            for name, metric in testmetrics.items():
                stat[name] = metric.compute()

            stat["trainloss"] = trainloss.cpu().detach().numpy()
            stat["testloss"] = testloss.cpu().detach().numpy()
            stats.append(stat)

    finally:
        df = pd.DataFrame(stats)
        df.to_csv(log_path)
        print(f"saving log to {log_path}")
예제 #15
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    trainable_model = get_trainable_model()

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    print('training')
    for epoch in range(TRAIN_ITER // EPOCH_ITER):
        print('epoch', epoch)
        proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch)
        train.train_epoch(
            sess, trainable_model, EPOCH_ITER,
            proportion_supervised=proportion_supervised,
            g_steps=4, d_steps=D_STEPS,
            next_sequence=get_random_sequence)
예제 #16
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    trainable_model = get_trainable_model()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    print('training')
    for epoch in range(TRAIN_ITER // EPOCH_ITER):
        print('epoch', epoch)
        proportion_supervised = max(0.0, 1.0 - CURRICULUM_RATE * epoch)
        train.train_epoch(
            sess, trainable_model, EPOCH_ITER,
            proportion_supervised=proportion_supervised,
            g_steps=1, d_steps=D_STEPS,
            next_sequence=get_random_sequence,
            verify_sequence=verify_sequence)
예제 #17
0
def main():
    print('')
    print("training EM model")
    os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"
    opt = parse_opts()
    torch.manual_seed(opt.manual_seed)

    model, parameters = generate_model(opt)
    optimizer = torch.optim.Adam(parameters, lr=opt.learning_rate)

    if not os.path.exists(opt.model_weight):
        os.mkdir(opt.model_weight)

    trainSet = weaklyDataset(opt.train_path)
    train_loader = DataLoader(trainSet, batch_size=1,
                              shuffle=True, num_workers=0)
    E_step = False

    def adjust_learning_rate(optimizer):
        for param_group in optimizer.param_groups:
            param_group['lr'] = opt.learning_rate*4

    for epoch in range(1, 66):

        if epoch <= 10:
            stage = 1
            E_step = False

        elif epoch > 10 and epoch <= 20:
            stage = 1
            E_step = True

        elif epoch > 20 and epoch <= 30:
            stage = 2
            E_step = False
            adjust_learning_rate(optimizer)

        else:
            stage = 2
            E_step = not E_step

        train_epoch(epoch, train_loader, model, optimizer, opt, E_step, stage)
        torch.save(model.state_dict(), opt.model_weight+"/{}.pt".format(epoch))
    return
예제 #18
0
 def test_train_one_epoch(self):
     train_ds, test_ds = train.get_datasets()
     input_rng = onp.random.RandomState(0)
     model = train.create_model(random.PRNGKey(0))
     optimizer = train.create_optimizer(model, 0.1, 0.9)
     optimizer, train_metrics = train.train_epoch(optimizer, train_ds, 128,
                                                  0, input_rng)
     self.assertLessEqual(train_metrics['loss'], 0.27)
     self.assertGreaterEqual(train_metrics['accuracy'], 0.92)
     loss, accuracy = train.eval_model(optimizer.target, test_ds)
     self.assertLessEqual(loss, 0.06)
     self.assertGreaterEqual(accuracy, 0.98)
예제 #19
0
파일: lif.py 프로젝트: syntapy/normad
 def train(self, a, b, method='resume', threshold=0.7):
     i = 0
     self.r = 1
     while True:
         i += 1
         print "Epoch ", i
         correct = train.train_epoch(self, a, b, method=method)
         p_correct = float(correct) / (b - a)
         print  ": %", p_correct, " correct"
         if p_correct > threshold:
             break
         self.r = 32 / (1 + 1024*p_correct)
     return p_correct
예제 #20
0
파일: lif.py 프로젝트: liuYing77/normad
 def train(self, a, b, method='resume', threshold=0.7):
     i = 0
     self.r = 1
     while True:
         i += 1
         print "Epoch ", i
         correct = train.train_epoch(self, a, b, method=method)
         p_correct = float(correct) / (b - a)
         print ": %", p_correct, " correct"
         if p_correct > threshold:
             break
         self.r = 32 / (1 + 1024 * p_correct)
     return p_correct
예제 #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-read_test_dir', required=True)
    parser.add_argument('-read_vocab_file', required=True)
    parser.add_argument('-load_model_dir', required=True)
    parser.add_argument('-load_model_file_list', required=True, nargs='+')
    parser.add_argument('-save_model_dir', required=True)
    parser.add_argument('-use_gpu', action='store_true')
    opt = parser.parse_args()
    
    print('[PROCEDURE] combining model with model averaging...')
    models = []
    for file in opt.load_model_file_list:
        checkpoint = torch.load(opt.load_model_dir + '/' + file, map_location=lambda storage, loc: storage)
        train_options = checkpoint['train_options']
        models.append(checkpoint['model'])

    print('[INFO] model loaded')

    print('[INFO] reading test data...')
    batch_size = 96 # dev384/test187
    test_data = train.initialize_batch_loader(opt.read_test_dir + '/feats.scp', opt.read_test_dir + '/text', opt.read_vocab_file, batch_size)
    print('[INFO] batch loader is initialized')

    vocab_size = len(torch.load(opt.read_vocab_file))
    crit = train.get_criterion(vocab_size)
    if opt.use_gpu:
        crit = crit.cuda()
    print('[INFO] using cross entropy loss.')

#---------------------------------------------------------------------------------------------------------------------
    '''
    for model in models:
        if opt.use_gpu:
            model = model.cuda()
        start = time.time()
        test_loss, test_accu = train.train_epoch(model, test_data, crit, mode = 'eval', use_gpu = opt.use_gpu)
        print('[INFO]-----(evaluating test set)----- ppl: {:7.3f}, accuracy: {:3.2f} %, elapse: {:3.2f} min'
            .format(math.exp(min(test_loss, 100)), 100*test_accu, (time.time()-start)/60))
    '''
    model = sum_average_model(models)
    if opt.use_gpu:
        model = model.cuda()
    start = time.time()
    test_loss, test_accu = train.train_epoch(model, test_data, crit, mode = 'eval', use_gpu = opt.use_gpu)
    print('[INFO]-----(evaluating combining set)----- ppl: {:7.3f}, accuracy: {:3.2f} %, elapse: {:3.2f} min'
            .format(math.exp(min(test_loss, 100)), 100*test_accu, (time.time()-start)/60))

    model_name = opt.save_model_dir + '/combined.accu{:3.2f}.torch'.format(100*test_accu)
    checkpoint['model'] = model
    torch.save(checkpoint, model_name)
예제 #22
0
def main():
    opt = parse_opts()
    opt.device_ids = list(range(device_count()))
    local2global_path(opt)
    model, parameters = generate_model(opt)

    criterion = get_loss(opt)
    criterion = criterion.cuda()
    optimizer = get_optim(opt, parameters)

    writer = SummaryWriter(logdir=opt.log_path)

    # train
    spatial_transform = get_spatial_transform(opt, 'train')
    temporal_transform = TSN(seq_len=opt.seq_len,
                             snippet_duration=opt.snippet_duration,
                             center=False)
    target_transform = ClassLabel()
    training_data = get_training_set(opt, spatial_transform,
                                     temporal_transform, target_transform)
    train_loader = get_data_loader(opt, training_data, shuffle=True)

    # validation
    spatial_transform = get_spatial_transform(opt, 'test')
    temporal_transform = TSN(seq_len=opt.seq_len,
                             snippet_duration=opt.snippet_duration,
                             center=False)
    target_transform = ClassLabel()
    validation_data = get_validation_set(opt, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = get_data_loader(opt, validation_data, shuffle=False)

    for i in range(1, opt.n_epochs + 1):
        train_epoch(i, train_loader, model, criterion, optimizer, opt,
                    training_data.class_names, writer)
        val_epoch(i, val_loader, model, criterion, opt, writer, optimizer)

    writer.close()
예제 #23
0
파일: main.py 프로젝트: Shawn-Hx/TATA
def run(opts):
    # Set the random seed
    torch.manual_seed(opts.seed)
    random.seed(opts.seed)

    # Set the device
    opts.device = torch.device(
        f'cuda:{opts.gpu_id}' if opts.use_cuda else 'cpu')

    # Load and prepare data
    train_graphs = load_graphs(dirname=opts.train_dsp_dataset_dir)
    valid_graphs = load_graphs(dirname=opts.valid_dsp_dataset_dir)
    resources = load_resources(opts.communicate_costs,
                               dirname=opts.res_dataset_dir)

    train_data = build_samples(train_graphs, resources, opts)
    valid_data = build_samples(valid_graphs, resources, opts)

    # train_data, valid_data = data_split(total_data, opts.train_ratio, shuffle=True)
    build_feature(train_data, is_train=True)
    build_feature(valid_data, is_train=False)
    train_data = data_augment(train_data, opts.train_batch_size)

    # Initialize model
    model = Model(opts.op_dim, opts.slot_dim, opts.edge_dim, opts.embed_dim,
                  opts.dsp_conv_iter, opts.res_conv_iter, opts.dsp_gcn_aggr,
                  opts.res_gcn_aggr, opts.gcn_act, opts.rnn_type,
                  opts.tanh_clip).to(opts.device)
    optimizer = optim.Adam(model.parameters(), lr=opts.lr)
    lr_scheduler = optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: opts.lr_decay**epoch)

    if opts.save_model:
        model_dir = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
        os.mkdir(os.path.join(opts.model_dir, model_dir))

    best_avg_reward = -1
    for epoch in range(1, opts.epochs + 1):
        valid_avg_reward = train_epoch(train_data, valid_data, model,
                                       optimizer, lr_scheduler, epoch, opts)
        if opts.save_model and epoch > opts.save_model_epoch_threshold and valid_avg_reward > best_avg_reward:
            best_avg_reward = valid_avg_reward
            torch.save(model, f'model/{model_dir}/best_model.pt')
    # Create loss criterion & optimizer
    # criterion = nn.CrossEntropyLoss()
    criterion = LabelSmoothingCrossEntropy()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, threshold=0.0001)

    # Start training
    if phase == 'Train':
        logger.info("Training Started".center(60, '#'))
        for epoch in range(epochs):
            current_lr = get_lr(optimizer)
            if epoch == 15:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = current_lr * 0.1
            print('lr: ', get_lr(optimizer))
            # Train the model
            train_epoch(model, criterion, optimizer, train_loader, device, epoch, logger, log_interval, writer)

            # Validate the model
            val_loss = val_epoch(model, criterion, val_loader, device, epoch, logger, writer)
            # scheduler.step(val_loss)
            
            # Save model
            torch.save(model.state_dict(), os.path.join(model_path, "sign_resnet2d+1_epoch{:03d}.pth".format(epoch+1)))
            logger.info("Epoch {} Model Saved".format(epoch+1).center(60, '#'))
    elif phase == 'Test':
        logger.info("Testing Started".center(60, '#'))
        val_loss = val_epoch(model, criterion, val_loader, device, 0, logger, writer, phase=phase, exp_name=exp_name)

    logger.info("Finished".center(60, '#'))
예제 #25
0
def create_3d_resnet(ema=False, num_classes=101):
    


if __name__ == '__main__':
    args = opts.parse_opts()
    if not os.path.exists(args.result_path):
        os.makedirs(args.result_path)
    # for key in cfg.keys():
    #     print('{}: {}'.format(key, cfg[key]))
    # if not os.path.exists(os.path.join(args.result_path, 'config.py')):
    #     shutil.copyfile('./config.py', os.path.join(args.result_path, 'config.py'))
    args.scales = [args.initial_scale]
    for i in range(1, args.n_scales):
        args.scales.append(args.scales[-1] * args.scales_step)
    args.arch = 'resnet18'
    args.mean = get_mean(1, dataset='activitynet')
    args.std = get_std(args.norm_value)

    print(args)
    with open(os.path.join(args.result_path, 'args.json'), 'w') as args_file:
        json.dump(vars(args), args_file)

    torch.manual_seed(args.manual_seed)

    # writer = SummaryWriter(log_dir='./results')
    train_batch_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'train_batch.log'),
                                ['epoch', 'batch', 'iter', 'class_loss', 'consistency_loss', 'prec1', 'ema_prec1', 'lr'])
    train_epoch_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'train_epoch.log'),
                                ['epoch', 'class_loss', 'consistency_loss', 'prec1', 'ema_prec1'])
    val_logger = Logger(os.path.join(args.result_path, args.pth_name + '_' + 'val.log'), ['epoch', 'loss', 'prec1'])

    student_model = create_model().cuda()  # student
    ema_model = create_model(ema=True).cuda()  # teacher

    train_set, val_set, classes = prepare_cifar10(args.dataset_root)
    train_loader, val_loader = sample_train(train_set, val_set, len(classes), args)

    # classification error is ignored for unlabeled samples, but averaged by whole batch, not just labeled samples
    class_criterion = nn.CrossEntropyLoss(ignore_index=args.NO_LABEL, reduction='sum').cuda()
    if args.consistency_type == 'mse':
        consistency_criterion = softmax_mse_loss
    elif args.consistency_type == 'kl':
        consistency_criterion = softmax_kl_loss
    else:
        consistency_criterion = None
        exit('wrong consistency type! Check config file!')

    criterion = {'classification': class_criterion, 'consistency': consistency_criterion}

    optimizer = torch.optim.SGD(student_model.parameters(), args.init_lr,
                                momentum=0.9, weight_decay=args.weight_decay, nesterov=True)

    best_prec1 = 0
    for epoch in range(args.num_epochs):
        train_epoch(epoch, student_model, ema_model, train_loader, optimizer, criterion,
                    train_batch_logger, train_epoch_logger, args)

        state = {'epoch': epoch, 'state_dict': student_model.state_dict(), 'ema_state_dict': ema_model.state_dict(),
                 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1}
        save_checkpoint(state, False, args.result_path, args.pth_name)

        validation_loss, prec1 = validate_epoch(epoch, student_model, val_loader, criterion, val_logger, args)
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        state = {'epoch': epoch, 'state_dict': student_model.state_dict(), 'ema_state_dict': ema_model.state_dict(),
                 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict()}
        save_checkpoint(state, is_best, args.result_path, args.pth_name)
예제 #26
0
def _run_rl(opts):

    # Pretty print the run args
    pp.pprint(vars(opts))

    # Set the random seed
    torch.manual_seed(opts.seed)

    # Optionally configure tensorboard
    tb_logger = None
    if not opts.no_tensorboard:
        tb_logger = TbLogger(
            os.path.join(opts.log_dir, "{}_{}".format(opts.problem,
                                                      opts.graph_size),
                         opts.run_name))

    os.makedirs(opts.save_dir)
    # Save arguments so exact configuration can always be found
    with open(os.path.join(opts.save_dir, "args.json"), 'w') as f:
        json.dump(vars(opts), f, indent=True)

    # Set the device
    opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu")

    # Figure out what's the problem
    problem = load_problem(opts.problem)

    # Load data from load_path
    load_data = {}
    assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given"
    load_path = opts.load_path if opts.load_path is not None else opts.resume
    if load_path is not None:
        print('  [*] Loading data from {}'.format(load_path))
        load_data = torch_load_cpu(load_path)

    # Initialize model
    model_class = {
        'attention': AttentionModel,
        'pointer': PointerNetwork
    }.get(opts.model, None)
    assert model_class is not None, "Unknown model: {}".format(model_class)
    encoder_class = {
        'gat': GraphAttentionEncoder,
        'gcn': GCNEncoder,
        'mlp': MLPEncoder
    }.get(opts.encoder, None)
    assert encoder_class is not None, "Unknown encoder: {}".format(
        encoder_class)
    model = model_class(opts.embedding_dim,
                        opts.hidden_dim,
                        problem,
                        encoder_class,
                        n_encode_layers=opts.n_encode_layers,
                        mask_inner=True,
                        mask_logits=True,
                        normalization=opts.normalization,
                        tanh_clipping=opts.tanh_clipping,
                        checkpoint_encoder=opts.checkpoint_encoder,
                        shrink_size=opts.shrink_size).to(opts.device)

    if opts.use_cuda and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # Compute number of network parameters
    print(model)
    nb_param = 0
    for param in model.parameters():
        nb_param += np.prod(list(param.data.size()))
    print('Number of parameters: ', nb_param)

    # Overwrite model parameters by parameters to load
    model_ = get_inner_model(model)
    model_.load_state_dict({
        **model_.state_dict(),
        **load_data.get('model', {})
    })

    # Initialize baseline
    if opts.baseline == 'exponential':
        baseline = ExponentialBaseline(opts.exp_beta)
    elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm':
        assert problem.NAME == 'tsp', "Critic only supported for TSP"
        baseline = CriticBaseline(
            (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim,
                               opts.n_encode_layers, opts.tanh_clipping)
             if opts.baseline == 'critic_lstm' else CriticNetwork(
                 encoder_class, 2, opts.embedding_dim, opts.hidden_dim,
                 opts.n_encode_layers, opts.normalization)).to(opts.device))
    elif opts.baseline == 'rollout':
        baseline = RolloutBaseline(model, problem, opts)
    else:
        assert opts.baseline is None, "Unknown baseline: {}".format(
            opts.baseline)
        baseline = NoBaseline()

    if opts.bl_warmup_epochs > 0:
        baseline = WarmupBaseline(baseline,
                                  opts.bl_warmup_epochs,
                                  warmup_exp_beta=opts.exp_beta)

    # Load baseline from data, make sure script is called with same type of baseline
    if 'baseline' in load_data:
        baseline.load_state_dict(load_data['baseline'])

    # Initialize optimizer
    optimizer = optim.Adam([{
        'params': model.parameters(),
        'lr': opts.lr_model
    }] + ([{
        'params': baseline.get_learnable_parameters(),
        'lr': opts.lr_critic
    }] if len(baseline.get_learnable_parameters()) > 0 else []))

    # Load optimizer state
    if 'optimizer' in load_data:
        optimizer.load_state_dict(load_data['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                # if isinstance(v, torch.Tensor):
                if torch.is_tensor(v):
                    state[k] = v.to(opts.device)

    # Initialize learning rate scheduler, decay by lr_decay once per epoch!
    lr_scheduler = optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: opts.lr_decay**epoch)

    # Start the actual training loop
    val_dataset = problem.make_dataset(size=opts.graph_size,
                                       num_samples=opts.val_size,
                                       filename=opts.val_dataset)
    opts.val_size = val_dataset.size

    if opts.resume:
        epoch_resume = int(
            os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1])

        torch.set_rng_state(load_data['rng_state'])
        if opts.use_cuda:
            torch.cuda.set_rng_state_all(load_data['cuda_rng_state'])
        # Set the random states
        # Dumping of state was done before epoch callback, so do that now (model is loaded)
        baseline.epoch_callback(model, epoch_resume)
        print("Resuming after {}".format(epoch_resume))
        opts.epoch_start = epoch_resume + 1

    if opts.eval_only:
        validate(model, val_dataset, opts)
    else:
        for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs):
            train_epoch(model, optimizer, baseline, lr_scheduler, epoch,
                        val_dataset, problem, tb_logger, opts)
예제 #27
0
    if 'baseline' in load_data:
        baseline.load_state_dict(load_data['baseline'])

    # Initialize optimizer
    optimizer = optim.Adam([{
        'params': model.parameters(),
        'lr': float(opts.lr_model)
    }] + ([{
        'params': baseline.get_learnable_parameters(),
        'lr': float(opts.lr_critic)
    }] if len(baseline.get_learnable_parameters()) > 0 else []))

    # Load optimizer state
    if 'optimizer' in load_data:
        optimizer.load_state_dict(load_data['optimizer'])

    # Initialize learning rate scheduler, decay by lr_decay once per epoch!
    lr_scheduler = optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: opts.lr_decay**epoch)

    # Start the actual training loop
    val_dataset = problem.make_dataset(size=opts.graph_size,
                                       num_samples=opts.val_size)

    if opts.eval_only:
        validate(model, val_dataset, opts)
    else:
        for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs):
            train_epoch(model, optimizer, baseline, lr_scheduler, epoch,
                        val_dataset, problem, opts)
예제 #28
0
            os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc'])

    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])

    print('run')
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            train_epoch(i, train_loader, model, criterion, optimizer, opt,
                        train_logger, train_batch_logger)
        if not opt.no_val:
            validation_loss = val_epoch(i, val_loader, model, criterion, opt,
                                        val_logger)

        if not opt.no_train and not opt.no_val:
            scheduler.step(validation_loss)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()
예제 #29
0
                                                        root_path=root_path),
                                              batch_size=100)

    net = ResNeXt(10)
    # net = SKNet(10)
    net.cuda()
    optimizer = optim.Adam(net.parameters(),
                           weight_decay=1e-5,
                           betas=(0.9, 0.999))
    criterion = nn.CrossEntropyLoss().cuda()

    log_path = './logs/'
    writer = SummaryWriter(log_path)

    epoch_num = 300
    lr0 = 1e-3
    for epoch in range(epoch_num):
        current_lr = lr0 / 2**int(epoch / 50)
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_lr
        train_epoch(net,
                    optimizer,
                    train_loader,
                    criterion,
                    epoch,
                    writer=writer)
        test(net, test_loader, criterion, epoch, writer=writer)
        if (epoch + 1) % 5 == 0:
            torch.save(net.state_dict(),
                       os.path.join('./model/model_{}.pth'.format(epoch)))
    torch.save(net.state_dict(), os.path.join('./model/model.pth'))
예제 #30
0
                            ['epoch', 'loss', 'acc'])

    if opt.resume_path:
        print('loading checkpoint {}')  # .format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])

    print('run')
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            train_epoch(i, train_loader, model, criterion, optimizer, opt,
                        train_logger, train_batch_logger)
        if not opt.no_val:
            validation_loss = val_epoch(i, val_loader, model, criterion, opt,
                                        val_logger)

        if not opt.no_train and not opt.no_val:
            scheduler.step(validation_loss)

    if opt.test:
        spatial_transform = Compose([
            Scale(int(opt.sample_size / opt.scale_in_test)),
            CornerCrop(opt.sample_size, opt.crop_position_in_test),
            ToTensor(opt.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(opt.sample_duration)
        target_transform = VideoID()
예제 #31
0
    if opt.resume_path:
        print('loading checkpoint {}'.format(opt.resume_path))
        checkpoint = torch.load(opt.resume_path)
        assert opt.arch == checkpoint['arch']

        opt.begin_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        if not opt.no_train:
            optimizer.load_state_dict(checkpoint['optimizer'])

    print('run')
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            train_loss, train_acc = train_epoch(
                i, train_loader, model, criterion, optimizer, opt,
                train_logger, train_batch_logger, viz, train_lot, netD,
                optimizerD, criterion2, netG, optimizerG, criterion3)

        if not opt.no_val:
            validation_loss, validation_acc = val_epoch(
                i, val_loader, model, criterion, opt, val_logger)
            if opt.visdom:
                viz.line(X=torch.ones((1, 2)).cpu() * (i - 1),
                         Y=torch.Tensor(
                             [[validation_loss, validation_acc * 10]]),
                         win=val_lot,
                         update='append')

    # ===================================================================================

    if opt.test:
예제 #32
0
def run(opts):

    # Pretty print the run args
    pp.pprint(vars(opts))

    # Set the random seed
    torch.manual_seed(opts.seed)

    # Optionally configure tensorboard
    tb_logger = None
    if not opts.no_tensorboard:
        tb_logger = TbLogger(
            os.path.join(opts.log_dir, "{}_{}".format(opts.problem,
                                                      opts.graph_size),
                         opts.run_name))

    os.makedirs(opts.save_dir)
    # Save arguments so exact configuration can always be found
    with open(os.path.join(opts.save_dir, "args.json"), 'w') as f:
        json.dump(vars(opts), f, indent=True)

    # Set the device
    opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu")

    # Figure out what's the problem
    problem = load_problem(opts.problem)

    # Load data from load_path
    load_data = {}
    assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given"
    load_path = opts.load_path if opts.load_path is not None else opts.resume
    if load_path is not None:
        print('  [*] Loading data from {}'.format(load_path))
        load_data = torch_load_cpu(load_path)

    # Initialize model
    model_class = {
        'attention': AttentionModel,
        'pointer': PointerNetwork
    }.get(opts.model, None)
    assert model_class is not None, "Unknown model: {}".format(model_class)
    model = model_class(opts.embedding_dim,
                        opts.hidden_dim,
                        problem,
                        n_encode_layers=opts.n_encode_layers,
                        mask_inner=True,
                        mask_logits=True,
                        normalization=opts.normalization,
                        tanh_clipping=opts.tanh_clipping,
                        checkpoint_encoder=opts.checkpoint_encoder,
                        shrink_size=opts.shrink_size,
                        steps=opts.awe_steps,
                        graph_size=opts.graph_size).to(opts.device)

    if opts.use_cuda and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # Overwrite model parameters by parameters to load
    model_ = get_inner_model(model)
    model_.load_state_dict({
        **model_.state_dict(),
        **load_data.get('model', {})
    })

    # Initialize baseline
    if opts.baseline == 'exponential':
        baseline = ExponentialBaseline(opts.exp_beta)
    elif opts.baseline == 'constant':
        baseline = ConstantBaseline()
    elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm':
        assert problem.NAME == 'tsp', "Critic only supported for TSP"
        baseline = CriticBaseline(
            (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim,
                               opts.n_encode_layers, opts.tanh_clipping)
             if opts.baseline == 'critic_lstm' else CriticNetwork(
                 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers,
                 opts.normalization)).to(opts.device))
    elif opts.baseline == 'rollout':
        baseline = RolloutBaseline(model, problem, opts)
    elif opts.baseline == 'critic_lp':
        assert problem.NAME == 'lp'
        dim_vocab = {2: 2, 3: 5, 4: 15, 5: 52, 6: 203, 7: 877, 8: 4140}
        baseline = CriticBaseline(
            (CriticNetworkLP(dim_vocab[opts.awe_steps], opts.embedding_dim,
                             opts.hidden_dim, opts.n_encode_layers,
                             opts.normalization)).to(opts.device))
    else:
        assert opts.baseline is None, "Unknown baseline: {}".format(
            opts.baseline)
        baseline = NoBaseline()

    if opts.bl_warmup_epochs > 0:
        baseline = WarmupBaseline(baseline,
                                  opts.bl_warmup_epochs,
                                  warmup_exp_beta=opts.exp_beta)

    # Load baseline from data, make sure script is called with same type of baseline
    if 'baseline' in load_data:
        baseline.load_state_dict(load_data['baseline'])

    # Initialize optimizer
    optimizer = optim.Adam([{
        'params': model.parameters(),
        'lr': opts.lr_model
    }] + ([{
        'params': baseline.get_learnable_parameters(),
        'lr': opts.lr_critic
    }] if len(baseline.get_learnable_parameters()) > 0 else []))

    # Load optimizer state
    if 'optimizer' in load_data:
        optimizer.load_state_dict(load_data['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                # if isinstance(v, torch.Tensor):
                if torch.is_tensor(v):
                    state[k] = v.to(opts.device)

    # Initialize learning rate scheduler, decay by lr_decay once per epoch!
    lr_scheduler = optim.lr_scheduler.LambdaLR(
        optimizer, lambda epoch: opts.lr_decay**epoch)

    # Start the actual training loop
    val_dataset = problem.make_dataset(num_samples=opts.val_size,
                                       filename=opts.val_dataset,
                                       distribution=opts.data_distribution,
                                       size=opts.graph_size,
                                       degree=opts.degree,
                                       steps=opts.awe_steps,
                                       awe_samples=opts.awe_samples)

    if opts.resume:
        epoch_resume = int(
            os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1])

        torch.set_rng_state(load_data['rng_state'])
        if opts.use_cuda:
            torch.cuda.set_rng_state_all(load_data['cuda_rng_state'])
        # Set the random states
        # Dumping of state was done before epoch callback, so do that now (model is loaded)
        baseline.epoch_callback(model, epoch_resume)
        print("Resuming after {}".format(epoch_resume))
        opts.epoch_start = epoch_resume + 1

    if opts.eval_only:
        validate(model, val_dataset, opts)
    else:
        extra = {'updates': 0, 'avg_reward': 10**8, "best_epoch": -1}
        start = time.time()
        for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs):

            train_epoch(model, optimizer, baseline, lr_scheduler, epoch,
                        val_dataset, problem, tb_logger, opts, extra)

        finish = time.time()
        with open("experiments.log", "a+") as f:
            f.write("{} {:.4f} {} {:.2f}\n".format(
                '-'.join(opts.train_dataset.split('/')[-2:]),
                extra["avg_reward"], extra["best_epoch"], finish - start))
        print("Took {:.2f} sec for {} epochs".format(finish - start,
                                                     opts.n_epochs))
예제 #33
0
    test_loader = torch.utils.data.DataLoader(dataset('test'), batch_size=64)
    # net = SimpleNet_Bin(10)
    net = SimpleNet(10)
    net.cuda()
    optimizer = optim.Adam(net.parameters(),
                           lr=1e-2,
                           weight_decay=1e-6,
                           betas=(0.9, 0.999))
    criterion = nn.CrossEntropyLoss().cuda()
    criterion_test = nn.CrossEntropyLoss(reduction='sum').cuda()

    log_path = 'logs/bin'
    writer = SummaryWriter(log_dir=log_path)

    epoch_num = 20
    lr0 = 1e-4
    for epoch in range(epoch_num):
        current_lr = lr0 / 2**int(epoch / 4)
        for param_group in optimizer.param_groups:
            param_group["lr"] = current_lr
        # train_epoch(net, optimizer, train_loader, criterion, epoch, writer, current_lr=current_lr, mode='Bin')
        train_epoch(net,
                    optimizer,
                    train_loader,
                    criterion,
                    epoch,
                    writer,
                    current_lr=current_lr,
                    mode='normal')
        test(net, test_loader, criterion_test, epoch, writer)
예제 #34
0
def objective(dropout):
    global count
    count += 1
    print(
        '-------------------------------------------------------------------')
    print('%d' % count)
    print(dropout)

    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    torch.backends.cudnn.benchmark = True

    root_path = '/data/eaxfjord/deep_LFP'
    matrix = 'shuffled_LR.npy'
    batch_size = 20

    training_dataset = LFP_data(root_path=root_path,
                                data_file=matrix,
                                split='train')
    training_loader = DataLoader(training_dataset,
                                 shuffle=True,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)

    validation_set = LFP_data(root_path=root_path,
                              data_file=matrix,
                              split='valid')
    validation_loader = DataLoader(validation_set,
                                   shuffle=False,
                                   batch_size=batch_size,
                                   pin_memory=True,
                                   num_workers=1)

    input_shape = (2, 2110)  # this is a hack to figure out shape of fc layer
    net = conv1d_nn.Net(input_shape=input_shape, dropout=dropout)
    net.cuda()

    criterion = nn.CrossEntropyLoss()
    criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     patience=100,
                                                     threshold=1e-3)
    num_epochs = 200

    for epoch in range(1, num_epochs + 1):
        train_loss, train_acc = train_epoch(training_loader, net, criterion,
                                            optimizer)
        validation_loss, validation_accuracy = val_epoch(
            validation_loader, net, criterion)

        scheduler.step(validation_loss)
        print(
            'EPOCH:: %i, (%s), train_loss, test_loss: %.3f, train_acc: %.3f, test_acc: %.3f'
            % (epoch + 1, train_loss, validation_loss, train_acc,
               validation_accuracy))

    return {
        'loss': -validation_accuracy,
        'status': STATUS_OK,
        'val_loss': validation_loss
    }