Ejemplo n.º 1
0
def model_training(t_alpha, t_beta, t_gamma):
    #model_evaluation()
    dataloader = get_dataloader(squad_train_path)
    dev_data = load_dev_data()
    cvae.train()

    print("training ..", flush=True)
    for epoch in range(n_epochs):
        tot_n_loss = 0
        tot_p_loss = 0

        # as mentioned by the author, turning it off.
        # if epoch % anneal_alpha == 0:
        #     t_alpha = min(t_alpha*2, 1)

        if epoch % anneal_beta == 0:
            t_beta = min(t_beta * 2, 1)

        for i, (q, p_ans, n_ans) in enumerate(dataloader):

            tot_p_loss += p_loss.item()
            tot_n_loss += n_loss.item()

            if i % p_epochs == 0:
                print(
                    f'epoch: {epoch}/{n_epochs}, steps: {i}/{len(dataloader)}, c_p_loss: {p_loss.item():.2f}, p_loss: {(tot_p_loss/(i+1)):.2f}, c_n_loss: {n_loss.item():.2f}, n_loss {(tot_n_loss/(i+1)):.2f}',
                    flush=True)

                torch.save({'cvae': cvae.state_dict()}, model_path)

            if i % e_epochs == 0:
                model_evaluation(dev_data, 'train')
                cvae.train()
Ejemplo n.º 2
0
def main(args):
    if args.dataset == 'SYDNEY':
        from load_data import Load_Sydney_Demand_Data
        data = Load_Sydney_Demand_Data(
            os.path.join(base_dir, '1h_data_new3.csv'))
        data = np.expand_dims(data, axis=-1)
        args.dim = 1
        print(data.shape)
    adj = generate_graph_with_data(data, len(data), threshold=args.threshold)

    adj = torch.from_numpy(Cheb_Poly(Scaled_Laplacian(adj),
                                     2)).type(torch.float32)

    model = Network(adj, args, dropout=0.15)

    print_model_parameters(model)
    model.apply(init_weights)
    model = model.to(args.device)
    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=args.lr_init,
                                 betas=(0.8, 0.999),
                                 eps=1e-7)

    lr_scheduler = init_lr_scheduler(optimizer, args)
    criterion = nn.MSELoss(reduction='sum')
    criterion.to(args.device)
    train_dataloader, val_dataloader, test_dataloader, scaler = get_dataloader(
        args.dataset,
        args.batch_size,
        args.window,
        args.horizon,
        args.valdays,
        args.testdays,
        normalizer='max')
    print('************START TRAINING************')
    n_batch = len(train_dataloader) / args.batch_size  #1920/

    path = '/home/canli/upload_file/save_model/'
    for epoch in range(1, args.epochs + 1):
        train_epoch_loss = 0
        epoch_norm = 0
        model.train()
        for index, (x, y) in enumerate(train_dataloader):
            optimizer.zero_grad()
            train_pred = model(x)
            train_loss = criterion(train_pred, y)
            train_loss.backward()
            grad_norm = check_gradients(model)
            epoch_norm = epoch_norm + grad_norm
            optimizer.step()
            train_epoch_loss = train_epoch_loss + train_loss.data
        print('Epoch {}/{}: train loss: {:.4f}, grad norm: {:.6f}'.format(
            epoch, args.epochs, train_epoch_loss, (epoch_norm / n_batch)))
        lr_scheduler.step()
        torch.save(model.state_dict(), path + str(epoch) + 'para_model.pkl')

        val_mae, val_rmse, val_mape = eval(model, val_dataloader, scaler)
        print('Val---MAE: {:.4f}, RMSE: {:.4f}, MAPE: {:.4f}'.format(
            val_mae, val_rmse, val_mape))
Ejemplo n.º 3
0
def main(args):
    if args.dataset == 'SYDNEY':
        from load_data import Load_Sydney_Demand_Data
        data = Load_Sydney_Demand_Data(
            os.path.join(base_dir, '1h_data_new3.csv'))
        data = np.expand_dims(data, axis=-1)
        args.dim = 1
        print(data.shape)
    adj = generate_graph_with_data(data, len(data), threshold=args.threshold)
    adj = torch.from_numpy(Cheb_Poly(Scaled_Laplacian(adj),
                                     2)).type(torch.float32)

    model = Network(adj, args, dropout=0.15)
    model_path = '/home/canli/upload_file/save_model/15para_model.pkl'
    model.load_state_dict(torch.load(model_path))

    print_model_parameters(model)
    model = model.to(args.device)

    train_dataloader, val_dataloader, test_dataloader, scaler = get_dataloader(
        args.dataset, args.batch_size, args.window, normalizer='max')

    pred_matrix = []
    pred_tensor = torch.Tensor().cuda()
    for i in range(10):
        test_mae, test_rmse, test_mape, pred, true = eval(
            model, test_dataloader, scaler)
        print('Test---MAE: {:.4f}, RMSE: {:.4f}, MAPE: {:.4f}'.format(
            test_mae, test_rmse, test_mape))
        pred = pred.squeeze()
        true = true.squeeze()
        pred = pred.unsqueeze(0)
        pred_tensor = torch.cat((pred_tensor, pred), 0)

    ave = torch.mean(pred_tensor, dim=0)
    std = torch.std(pred_tensor, dim=0)
    ave = ave.cpu().detach().numpy()
    std = std.cpu().detach().numpy()
    true = true.cpu().detach().numpy()
    interval = stats.norm.interval(0.95, ave, std)
    span = interval[1] - interval[0]
    compare = (true < interval[1]) & (true > interval[0])
    per = np.sum(compare) / (ave.shape[0] * ave.shape[1])
    print(per)
    print(np.mean(span))
Ejemplo n.º 4
0
    elif opt.model == "dip_vae":
        parameter = opt.lambda_diag
    else:
        parameter = 0

    out_path = (
        f"../results/{opt.dataset}/{opt.model}/parameter_{parameter}/seed_{opt.seed}"
    )
    os.makedirs(out_path, exist_ok=True)

    # check for GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # load data
    print("Loading data")
    train_dataloader = get_dataloader(opt)
    test_dataloader = None
    n = len(train_dataloader.dataset)
    iter_per_epoch = math.ceil(n / opt.batch_size)
    # run
    start = time.time()
    print("Training")

    vae = ConvVAE(opt).to(device)
    optimizer = optim.Adam(vae.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

    discriminator = Discriminator(opt.latent_dim).to(device)
    optimizer_d = optim.Adam(discriminator.parameters(),
                             lr=opt.lrd,
                             betas=(opt.b1d, opt.b2d))
Ejemplo n.º 5
0
def ids2words(lang, ids):
    return [lang.index2word[idx] for idx in ids]

def greedy_decode(model, dataloader, input_lang, output_lang):
    with torch.no_grad():
        batch = next(iter(dataloader))
        input_tensor  = batch[0]
        input_mask    = batch[1]
        target_tensor = batch[2]

        decoder_outputs, decoder_hidden = model(input_tensor, input_mask)
        topv, topi = decoder_outputs.topk(1)
        decoded_ids = topi.squeeze()

        for idx in range(input_tensor.size(0)):
            input_sent = ids2words(input_lang, input_tensor[idx].cpu().numpy())
            output_sent = ids2words(output_lang, decoded_ids[idx].cpu().numpy())
            target_sent = ids2words(output_lang, target_tensor[idx].cpu().numpy())
            print('Input:  {}'.format(input_sent))
            print('Target: {}'.format(target_sent))
            print('Output: {}'.format(output_sent))


if __name__ == '__main__':
    input_lang, output_lang, train_dataloader = load_data.get_dataloader(batch_size)
    model = model.EncoderDecoder(hidden_size, input_lang.n_words, output_lang.n_words).to(device)
    train(train_dataloader, model, n_epochs=20)
    greedy_decode(model, train_dataloader, input_lang, output_lang)


Ejemplo n.º 6
0
import torch.nn.functional as f
import numpy as np
from load_data import get_dataloader
from model import JDDA
from itertools import cycle

class_num = 10
batch_size = 128
total_iters = 200200
lr = 0.0001
discriminative_loss_param = 0.03  ##0.03 for InstanceBased method, 0.01 for CenterBased method
domain_loss_param = 8
device = torch.device('cuda:2')

source_dataloader = get_dataloader('mnist',
                                   batch_size=batch_size,
                                   split='train')
target_dataloader = get_dataloader('mnistm',
                                   batch_size=batch_size,
                                   split='train')
test_dataloader = get_dataloader('mnistm', batch_size=batch_size, split='test')

model = JDDA()
model = model.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# since length of svhn and mnist is different
# origin author train steps 200200
# and drop the data that batch size is not 128
        total_loss += loss.item()
        
        if (i+1) % print_every == 0:
            avg_loss = total_loss / float(print_every)
            print_progress((time.time() - start)//60, epoch+1, i+1, avg_loss)
            total_loss = 0
            
        #if (i+1) % SAVE_ITERS == 0:
        #    save(epoch, i+1, NAME, model, optimizer)
       
    avg_loss = total_loss / max(1, (i+1) % print_every)
    print_progress((time.time() - start)//60, epoch+1, i+1, avg_loss)
    save(epoch, model, optimizer)
    
    
dataloader, dataset = get_dataloader(args.batch_size, args.data_path, max_len=args.max_length)

print("Loaded {0} samples from {1}".format(len(dataset), args.data_path))

print("Initializing Transformer...")
model = Transformer(ALPHABET_SIZE, args.embedding_size, args.num_layers)
if torch.cuda.is_available() and not args.cpu:
    model = torch.nn.DataParallel(model)
model = model.to(DEVICE)
print("Transformer Initialized on device(s):", DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98), eps=1e-9)
sched = CosineWithRestarts(optimizer, T_max=len(dataloader))
epoch = 0

if args.checkpoint_path is not None:
Ejemplo n.º 8
0
def train(style_list, content_list, batch_size, num_epochs, style_weight,
          content_weight, ngf, log_interval, save_model_dir):

    ########################
    # Data loader
    ########################

    content_loader = load_data.get_dataloader(content_list, batch_size)
    style_loader = load_data.get_dataloader(style_list, batch_size)

    ########################
    # Init model
    ########################
    vgg = basic_block.Vgg()
    style_model = basic_block.Net(ngf)

    ########################
    # optimizer and loss
    ########################
    mse_loss = tf.keras.losses.mean_squared_error()
    optimizer = tf.keras.optimizers.Adam()

    ########################
    # Start training loop
    ########################
    for epoch in range(1, num_epochs):
        agg_content_loss = 0.0
        agg_style_loss = 0.0
        count = 0
        for batch_id, content_img in enumerate(content_loader):
            with tf.GradientTape() as tape:
                n_batch = len(content_img)
                count += n_batch
                # data preparation. TODO: figure out these helper functions
                style_image = next(style_loader)
                #style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy())

                feature_style = vgg(style_image)
                gram_style = [
                    basic_block.gram_matrix(y) for y in feature_style
                ]

                f_xc_c = vgg(content_img)[1]

                style_model.set_target(style_image)
                y = style_model(content_img)
                features_y = vgg(y)

                # TODO: why the coefficient 2?
                content_loss = 2 * content_weight * mse_loss(
                    features_y[1], f_xc_c)

                style_loss = 0.0
                for m in range(len(features_y)):
                    gram_y = basic_block.gram_matrix(features_y[m])
                    _, C, _ = gram_style[m].shape
                    gram_s = tf.expand_dims(gram_style[m], 0).broadcast_to(
                        batch_size, 1, C, C)
                    style_loss += 2 * style_weight * mse_loss(
                        gram_y, gram_s[:n_batch, :, :])
                total_loss = content_loss + style_loss
                agg_content_loss += content_loss[0]
                agg_style_loss += style_loss[0]
            gradients = tape.gradient(total_loss, style_model.variables)
            optimizer.apply_gradients(
                zip(gradients, style_model.trainable_variables))

            if (batch_id + 1) % log_interval == 0:
                mesg = "{}\tEpoch {}:\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format(
                    time.ctime(), epoch + 1, agg_content_loss / (batch_id + 1),
                    agg_style_loss / (batch_id + 1),
                    (agg_content_loss + agg_style_loss) / (batch_id + 1))
                print(mesg)

            if (batch_id + 1) % (4 * log_interval) == 0:
                # save model
                save_model_filename = "Epoch_" + str(epoch) + "iters_" + \
                    str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
                    content_weight) + "_" + str(style_weight) + ".params"
                save_model_path = os.path.join(save_model_dir,
                                               save_model_filename)
                tf.saved_model.save(style_model, save_model_path)
                print("\nCheckpoint, trained model saved at", save_model_path)