Python MLP.cuda Beispiele

Programmiersprache: Python

Namespace / Paketname: model

Klasse / Typ: MLP

Methode / Funktion: cuda

Beispiele auf hotexamples.com: 14

Python MLP.cuda - 14 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die model.MLP.cuda, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

parameters(30)

MLP(30)

state_dict(28)

train(27)

to(20)

eval(16)

load_state_dict(15)

cuda(14)

predict(11)

fit(8)

named_parameters(4)

forward(4)

zero_grad(3)

load_model(2)

reset_parameters(2)

compile(2)

test(2)

load(2)

evaluate(2)

train_mnist(1)

accuracy(1)

total_error(1)

to_gpu(1)

training(1)

transform_data(1)

update_omega(1)

store(1)

update_theta(1)

apply(1)

set_ctx(1)

save_model(1)

save_checkpoint(1)

add(1)

double(1)

prepare_data(1)

initWeight(1)

evaluation(1)

cpu(1)

construct_model(1)

get_model(1)

get_weights_L2_norm(1)

inference(1)

initialize_features(1)

buildModel(1)

lead_state_dict(1)

load_checkpoint(1)

_epoch_iterate(1)

load_weights(1)

loss(1)

name(1)

Beispiel #1

Datei anzeigen

Datei: run.py Projekt: sdc17/TextClassifier

def main(cfg):
    if cfg['model'] == 'mlp':
        net = MLP(300, 768, cfg['class_num'])
    elif cfg['model'] == 'cnn':
        net = CNN(300, 768, cfg['class_num'])
    elif cfg['model'] == 'lstm':
        net = LSTM(300, cfg['class_num'], cfg['device'])
    elif cfg['model'] == 'gru':
        net = GRU(300, cfg['class_num'], cfg['device'])
    else:
        raise Exception(f'model {args.model} not available')

    if cfg['device'] == 'cuda':
        if len(cfg['gpu_ids']) == 1:
            torch.cuda.set_device(cfg['gpu_ids'][0])
            net = net.cuda()
        else:
            net = net.cuda()
            net = nn.DataParallel(net, device_ids=cfg['gpu_ids'])

    torch.backends.cudnn.benchmark = True

    if cfg['mode'] == 'train':
        train(cfg, net)
    elif cfg['mode'] == 'predict':
        predict(cfg, net, 'checkpoints/{}.pth'.format(cfg['model']))

Beispiel #2

Datei anzeigen

def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # # Build data loader
    # dataset,targets= load_dataset()
    # np.save("__cache_dataset.npy", dataset)
    # np.save("__cache_targets.npy", targets)
    # return

    dataset = np.load("__cache_dataset.npy")
    targets = np.load("__cache_targets.npy")

    # Build the models
    mlp = MLP(args.input_size, args.output_size)

    mlp.load_state_dict(
        torch.load(
            '_backup_model_statedict/mlp_100_4000_PReLU_ae_dd_final.pkl'))

    if torch.cuda.is_available():
        mlp.cuda()

    # Loss and Optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adagrad(mlp.parameters())

    # Train the Models
    total_loss = []
    print(len(dataset))
    print(len(targets))
    sm = 100  # start saving models after 100 epochs
    for epoch in range(args.num_epochs):
        print("epoch" + str(epoch))
        avg_loss = 0
        for i in range(0, len(dataset), args.batch_size):
            # Forward, Backward and Optimize
            mlp.zero_grad()
            bi, bt = get_input(i, dataset, targets, args.batch_size)
            bi = to_var(bi)
            bt = to_var(bt)
            bo = mlp(bi)
            loss = criterion(bo, bt)
            avg_loss = avg_loss + loss.item()
            loss.backward()
            optimizer.step()
        print("--average loss:")
        print(avg_loss / (len(dataset) / args.batch_size))
        total_loss.append(avg_loss / (len(dataset) / args.batch_size))
        # Save the models
        if epoch == sm:
            model_path = 'mlp_100_4000_PReLU_ae_dd' + str(sm) + '.pkl'
            torch.save(mlp.state_dict(),
                       os.path.join(args.model_path, model_path))
            sm = sm + 50  # save model after every 50 epochs from 100 epoch ownwards
    torch.save(total_loss, 'total_loss.dat')
    model_path = 'mlp_100_4000_PReLU_ae_dd_final.pkl'
    torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path))

Beispiel #3

Datei anzeigen

Datei: main.py Projekt: hsack6/AGATE

def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node

    net = MLP(opt)
    net.double()
    print(net)

    criterion = nn.BCELoss()

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)

Beispiel #4

Datei anzeigen

Datei: mlp_mnist_main.py Projekt: jerermyyoung/rtlearning

def main():

    np.random.seed(args.seed)
    cur_acc = 0
    max_acc = 0
    num_param = 20
    cur_param = np.zeros(args.n_epoch)
    max_pt = np.zeros(args.n_epoch)
    for iii in range(args.n_iter):
        for jjj in range(args.n_samples):
            cur_a = np.random.randn(10)
            cur_w = np.random.randn(10)
            cur_b = np.random.randn(10)
            x = np.arange(args.n_epoch) / args.n_epoch
            cur_rt = np.dot(np.outer(x, cur_w) + cur_b, cur_a)
            cur_rt = 1 / (1 + np.exp(-cur_rt))
            cur_param = cur_rt.copy()
            cur_acc = black_box_function(cur_param)
            if max_acc < cur_acc:
                max_acc = cur_acc
                max_pt = cur_param.copy()
    '''
    rate_schedule=np.ones(args.n_epoch)*forget_rate
    rate_schedule[:10]=np.arange(10,dtype=float)/10*forget_rate
    # rate_schedule[10:]=np.arange(args.n_epoch-10,dtype=float)/(args.n_epoch-10)*forget_rate+forget_rate
    rate_schedule=np.zeros(args.n_epoch)
    print(rate_schedule)
    '''
    rate_schedule = max_pt.copy()
    print('Final Schedule:', rate_schedule)

    mean_pure_ratio1 = 0
    mean_pure_ratio2 = 0

    print('building model...')
    cnn1 = MLP(n_outputs=num_classes)
    cnn1.cuda()
    print(cnn1.parameters)
    optimizer1 = torch.optim.Adam(cnn1.parameters(), lr=learning_rate)

    cnn2 = MLP(n_outputs=num_classes)
    cnn2.cuda()
    print(cnn2.parameters)
    optimizer2 = torch.optim.Adam(cnn2.parameters(), lr=learning_rate)

    epoch = 0
    train_acc1 = 0
    train_acc2 = 0
    # evaluate models with random weights
    test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
    print(
        'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %% Pure Ratio1 %.4f %% Pure Ratio2 %.4f %%'
        % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2,
           mean_pure_ratio1, mean_pure_ratio2))
    # save results
    with open(txtfile, "a") as myfile:
        myfile.write(
            str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) +
            ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' +
            str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' +
            str(rate_schedule[epoch]) + "\n")

    # training
    for epoch in range(1, args.n_epoch):
        # train models
        cnn1.train()
        adjust_learning_rate(optimizer1, epoch)
        cnn2.train()
        adjust_learning_rate(optimizer2, epoch)
        train_acc1, train_acc2, pure_ratio_1_list, pure_ratio_2_list = train(
            train_loader, epoch, cnn1, optimizer1, cnn2, optimizer2,
            rate_schedule)
        # evaluate models
        test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
        # save results
        mean_pure_ratio1 = sum(pure_ratio_1_list) / len(pure_ratio_1_list)
        mean_pure_ratio2 = sum(pure_ratio_2_list) / len(pure_ratio_2_list)
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %%, Pure Ratio 1 %.4f %%, Pure Ratio 2 %.4f %%'
            % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1,
               test_acc2, mean_pure_ratio1, mean_pure_ratio2))
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc1) + ' ' +
                str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) +
                ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) +
                ' ' + str(rate_schedule[epoch]) + "\n")

Beispiel #5

Datei anzeigen

Datei: mlp_mnist_main.py Projekt: jerermyyoung/rtlearning

def black_box_function(opt_param):
    mean_pure_ratio1 = 0
    mean_pure_ratio2 = 0

    print('building model...')
    cnn1 = MLP(n_outputs=num_classes)
    cnn1.cuda()
    print(cnn1.parameters)
    optimizer1 = torch.optim.Adam(cnn1.parameters(), lr=learning_rate)

    cnn2 = MLP(n_outputs=num_classes)
    cnn2.cuda()
    print(cnn2.parameters)
    optimizer2 = torch.optim.Adam(cnn2.parameters(), lr=learning_rate)

    rate_schedule = opt_param.copy()
    print('Schedule:', rate_schedule)

    epoch = 0
    train_acc1 = 0
    train_acc2 = 0
    # evaluate models with random weights
    test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
    print(
        'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %% Pure Ratio1 %.4f %% Pure Ratio2 %.4f %%'
        % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2,
           mean_pure_ratio1, mean_pure_ratio2))
    # save results
    with open(txtfile, "a") as myfile:
        myfile.write(
            str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) +
            ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' +
            str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' +
            str(rate_schedule[epoch]) + "\n")

    # training
    for epoch in range(1, args.n_epoch):
        # train models
        cnn1.train()
        adjust_learning_rate(optimizer1, epoch)
        cnn2.train()
        adjust_learning_rate(optimizer2, epoch)
        train_acc1, train_acc2, pure_ratio_1_list, pure_ratio_2_list = train(
            train_loader, epoch, cnn1, optimizer1, cnn2, optimizer2,
            rate_schedule)
        # evaluate models
        test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2)
        # save results
        mean_pure_ratio1 = sum(pure_ratio_1_list) / len(pure_ratio_1_list)
        mean_pure_ratio2 = sum(pure_ratio_2_list) / len(pure_ratio_2_list)
        print(
            'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %%, Pure Ratio 1 %.4f %%, Pure Ratio 2 %.4f %%'
            % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1,
               test_acc2, mean_pure_ratio1, mean_pure_ratio2))
        with open(txtfile, "a") as myfile:
            myfile.write(
                str(int(epoch)) + ' ' + str(train_acc1) + ' ' +
                str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) +
                ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) +
                ' ' + str(rate_schedule[epoch]) + "\n")

    return (test_acc1 + test_acc2) / 200

Beispiel #6

Datei anzeigen

    # prepare mnist datasets.
    train_datasets = [
        get_dataset('mnist', permutation=p) for p in permutations
    ]
    test_datasets = [
        get_dataset('mnist', train=False, permutation=p) for p in permutations
    ]

    # prepare the model.
    mlp = MLP(
        DATASET_CONFIGS['mnist']['size']**2,
        DATASET_CONFIGS['mnist']['classes'],
        hidden_size=args.hidden_size,
        hidden_layer_num=args.hidden_layer_num,
        hidden_dropout_prob=args.hidden_dropout_prob,
        input_dropout_prob=args.input_dropout_prob,
    )

    # prepare the cuda if needed.
    if cuda:
        mlp.cuda()

    # run the experiment.
    train(
        mlp, train_datasets, test_datasets,
        epochs_per_task=args.epochs_per_task,
        batch_size=args.batch_size, lr=args.lr,
        weight_decay=args.weight_decay,
        cuda=cuda
    )

Beispiel #7

Datei anzeigen

Datei: train.py Projekt: VladimirIsakov91/Tutorial

from ignite.contrib.handlers.neptune_logger import *
from ignite.handlers import Checkpoint

from model import Data, MLP
from data import X, y
from parse import args


scaler = GradScaler()

model = MLP(n_neurons=[(20, 100), (100, 60), (60, 2)],
            activation=nn.LeakyReLU(),
            batch_norm=True,
            dropout=0.2)

model.cuda()

logger = NeptuneLogger(api_token=os.getenv('NEPTUNE_API_TOKEN'),
                        project_name = "vladimir.isakov/sandbox",
                        experiment_name  = 'Run',
                        upload_source_files='./train.py',
                        #tags = 'v1',
                        params = {'batch_size': args.batch_size,
                                    'epochs': args.epochs,
                                    'lr': args.lr,
                                    'step_size': args.step_size,
                                    'gamma': args.gamma,
                                    'weight_decay': args.weight_decay,
                                    'model': repr(model)})

optimizer = torch.optim.Adam(model.parameters(),

Beispiel #8

Datei anzeigen

def train_model(config, gpu_id, save_dir, exp_name):
    # Instantiating the model
    model_type = config.get('model_type', 'MLP')
    if model_type == "MLP":
        model = MLP(784, config["hidden_layers"], 10, config["nonlinearity"], config["initialization"], config["dropout"], verbose=True)
    elif model_type == "CNN":
        model = CNN(config["initialization"], config["is_batch_norm"], verbose=True)
    else:
        raise ValueError('config["model_type"] not supported : {}'.format(model_type))

    # Loading the MNIST dataset
    x_train, y_train, x_valid, y_valid, x_test, y_test = utils.load_mnist(config["data_file"], data_format=config["data_format"])

    if config['data_reduction'] != 1.:
        x_train, y_train = utils.reduce_trainset_size(x_train, y_train, config['data_reduction'])

    # If GPU is available, sends model and dataset on the GPU
    if torch.cuda.is_available():
        model.cuda(gpu_id)

        x_train = torch.from_numpy(x_train).cuda(gpu_id)
        y_train = torch.from_numpy(y_train).cuda(gpu_id)

        x_valid = Variable(torch.from_numpy(x_valid), volatile=True).cuda(gpu_id)
        y_valid = Variable(torch.from_numpy(y_valid), volatile=True).cuda(gpu_id)

        x_test = Variable(torch.from_numpy(x_test), volatile=True).cuda(gpu_id)
        y_test = Variable(torch.from_numpy(y_test), volatile=True).cuda(gpu_id)
        print("Running on GPU")
    else:
        x_train = torch.from_numpy(x_train)
        y_train = torch.from_numpy(y_train)

        x_valid = Variable(torch.from_numpy(x_valid))
        y_valid = Variable(torch.from_numpy(y_valid))

        x_test = Variable(torch.from_numpy(x_test))
        y_test = Variable(torch.from_numpy(y_test))
        print("WATCH-OUT : torch.cuda.is_available() returned False. Running on CPU.")

    # Instantiate TensorDataset and DataLoader objects
    train_set = torch.utils.data.TensorDataset(x_train, y_train)
    loader = torch.utils.data.DataLoader(train_set, batch_size=config["mb_size"], shuffle=True)

    # Optimizer and Loss Function
    optimizer = optim.SGD(model.parameters(), lr=config['lr'],
                                              momentum=config['momentum'],
                                              weight_decay=config['L2_hyperparam'] * (config['mb_size'] / x_train.size()[0]))
    loss_fn = nn.NLLLoss()

    # Records the model's performance
    train_tape = [[],[]]
    valid_tape = [[],[]]
    test_tape = [[],[]]
    weights_tape = []

    def evaluate(data, labels):

        model.eval()
        if not isinstance(data, Variable):
            if torch.cuda.is_available():
                data = Variable(data, volatile=True).cuda(gpu_id)
                labels = Variable(labels, volatile=True).cuda(gpu_id)
            else:
                data = Variable(data)
                labels = Variable(labels)

        output = model(data)
        loss = loss_fn(output, labels)
        prediction = torch.max(output.data, 1)[1]
        accuracy = (prediction.eq(labels.data).sum() / labels.size(0)) * 100

        return loss.data[0], accuracy

    if not os.path.exists(os.path.join(save_dir, exp_name)):
        os.makedirs(os.path.join(save_dir, exp_name))

    # Record train accuracy
    train_loss, train_acc = evaluate(x_train, y_train)
    train_tape[0].append(train_loss)
    train_tape[1].append(train_acc)

    # Record valid accuracy
    valid_loss, valid_acc = evaluate(x_valid, y_valid)
    valid_tape[0].append(valid_loss)
    valid_tape[1].append(valid_acc)

    # Record test accuracy
    test_loss, test_acc = evaluate(x_test, y_test)
    test_tape[0].append(test_loss)
    test_tape[1].append(test_acc)

    # Record weights L2 norm
    weights_L2_norm = model.get_weights_L2_norm()
    weights_tape.append(float(weights_L2_norm.data.cpu().numpy()))

    print("BEFORE TRAINING \nLoss : {0:.3f} \nAcc : {1:.3f}".format(valid_loss, valid_acc))

    # TRAINING LOOP
    best_valid_acc = 0
    for epoch in range(1, config["max_epochs"]):
        start = time.time()
        model.train()
        for i,(x_batch, y_batch) in enumerate(loader):

            #pdb.set_trace()

            if torch.cuda.is_available():
                x_batch = Variable(x_batch).cuda(gpu_id)
                y_batch = Variable(y_batch).cuda(gpu_id)
            else:
                x_batch = Variable(x_batch)
                y_batch = Variable(y_batch)

            # Empties the gradients
            optimizer.zero_grad()

            # Feedforward through the model
            output = model(x_batch)

            # Computes the loss
            loss = loss_fn(output, y_batch)

            # Backpropagates to compute the gradients
            loss.backward()

            # Takes one training step
            optimizer.step()

            # Record weights L2 norm
            weights_L2_norm = model.get_weights_L2_norm()
            weights_tape.append(float(weights_L2_norm.data.cpu().numpy()))

        # Record train accuracy
        train_loss, train_acc = evaluate(x_train, y_train)
        train_tape[0].append(train_loss)
        train_tape[1].append(train_acc)

        # Record valid accuracy
        valid_loss, valid_acc = evaluate(x_valid, y_valid)
        valid_tape[0].append(valid_loss)
        valid_tape[1].append(valid_acc)

        # Record test accuracy
        test_loss, test_acc = evaluate(x_test, y_test)
        test_tape[0].append(test_loss)
        test_tape[1].append(test_acc)

        print("Epoch {0} \nLoss : {1:.3f} \nAcc : {2:.3f}".format(epoch, valid_loss, valid_acc))
        print("Time : {0:.2f}".format(time.time() - start))

        # Saves the model
        if valid_acc > best_valid_acc:
            print("NEW BEST MODEL")
            torch.save(model.state_dict(), os.path.join(save_dir, exp_name, "model"))
            best_valid_acc = valid_acc

    # Saves the graphs
    utils.save_results(train_tape, valid_tape, test_tape, weights_tape, save_dir, exp_name, config)
    utils.update_comparative_chart(save_dir, config['show_test'])

    return

Beispiel #9

Datei anzeigen

Datei: train_Pong.py Projekt: julienroyd/COMP767-assignments

def train_model(config, gpu_id, save_dir, exp_name):

    # Instantiating the model
    model_type = config.get('model_type', 'MLP')
    if model_type == "MLP":
        model = MLP(config['input_size'],
                    config["hidden_layers"],
                    1,
                    config["nonlinearity"],
                    config["initialization"],
                    config["dropout"],
                    verbose=True)
    elif model_type == "CNN":
        model = CNN(config["initialization"],
                    config["is_batch_norm"],
                    verbose=True)
    else:
        raise ValueError(
            'config["model_type"] not supported : {}'.format(model_type))

    if config['resume']:
        model.load_state_dict(
            torch.load(os.path.join(save_dir, exp_name, "model")))

    # If GPU is available, sends model and dataset on the GPU
    if torch.cuda.is_available():
        model.cuda(gpu_id)
        print("USING GPU-{}".format(gpu_id))

    # Optimizer and Loss Function
    optimizer = optim.RMSprop(model.parameters(), lr=config['lr'])
    loss_fn = nn.CrossEntropyLoss()
    """ Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
    env = gym.make("Pong-v0")
    observation = env.reset()

    prev_x = None  # used in computing the difference frame
    y_list, LL_list, reward_list = [], [], []
    running_reward = None
    reward_sum = 0
    episode_number = 0

    start = time.time()

    # Initializing recorders
    update = 0
    loss_tape = []
    our_score_tape = []
    opponent_score_tape = []
    our_score = 0
    opponent_score = 0

    # TRAINING LOOP
    while update < config['max_updates']:

        if config['render']: env.render()

        # preprocess the observation and set input to network to be difference image
        cur_x = utils.preprocess(observation,
                                 data_format=config['data_format'])
        if prev_x is None:
            x = np.zeros(cur_x.shape)
        else:
            x = cur_x - prev_x
        prev_x = cur_x

        x_torch = Variable(torch.from_numpy(x).float(), requires_grad=False)
        if config['data_format'] == "array":
            x_torch = x_torch.unsqueeze(dim=0).unsqueeze(dim=0)

        if torch.cuda.is_available():
            x_torch = x_torch.cuda(gpu_id)

        # Feedforward through the policy network
        action_prob = model(x_torch)

        # Sample an action from the returned probability
        if np.random.uniform() < action_prob.cpu().data.numpy():
            action = 2  # UP
        else:
            action = 3  # DOWN

        # record the log-likelihoods
        y = 1 if action == 2 else 0  # a "fake label"
        NLL = -y * torch.log(action_prob) - (1 - y) * torch.log(1 -
                                                                action_prob)
        LL_list.append(NLL)
        y_list.append(
            y
        )  # grad that encourages the action that was taken to be taken        TODO: the tensor graph breaks here. Find a way to backpropagate the PG error.

        # step the environment and get new measurements
        observation, reward, done, info = env.step(action)
        reward_sum += reward

        reward_list.append(
            reward
        )  # record reward (has to be done after we call step() to get reward for previous action)

        if done:  # an episode finished (an episode ends when one of the player wins 21 games)
            episode_number += 1

            # Computes loss and reward for each step of the episode
            R = torch.zeros(1, 1)
            loss = 0
            for i in reversed(range(len(reward_list))):
                R = config['gamma'] * R + reward_list[i]
                Return_i = Variable(R)
                if torch.cuda.is_available():
                    Return_i = Return_i.cuda(gpu_id)
                loss = loss + (LL_list[i] *
                               (Return_i)).sum()  # .expand_as(LL_list[i])
            loss = loss / len(reward_list)
            print(loss)

            # Backpropagates to compute the gradients
            loss.backward()

            y_list, LL_list, reward_list = [], [], []  # reset array memory

            # Performs parameter update every config['mb_size'] episodes
            if episode_number % config['mb_size'] == 0:

                # Takes one training step
                optimizer.step()

                # Empties the gradients
                optimizer.zero_grad()

                stop = time.time()
                print("PARAMETER UPDATE ------------ {}".format(stop - start))
                start = time.time()

                utils.save_results(save_dir, exp_name, loss_tape,
                                   our_score_tape, opponent_score_tape, config)

                update += 1
                if update % 10 == 0:
                    torch.save(
                        model.state_dict(),
                        os.path.join(save_dir, exp_name,
                                     "model_" + model.name()))

            # Records the average loss and score of the episode
            loss_tape.append(loss.cpu().data.numpy())

            our_score_tape.append(our_score)
            opponent_score_tape.append(opponent_score)
            our_score = 0
            opponent_score = 0

            # boring book-keeping
            if running_reward is None:
                running_reward = reward_sum
            else:
                running_reward = running_reward * 0.99 + reward_sum * 0.01
            print(
                'resetting env. episode reward total was {0:.2f}. running mean: {1:.2f}'
                .format(reward_sum, running_reward))

            reward_sum = 0
            observation = env.reset()  # reset env
            prev_x = None

        if reward != 0:  # Pong has either +1 or -1 reward exactly when game ends.
            if reward == -1:
                opponent_score += 1
                print('ep {0}: game finished, reward: {1:.2f}'.format(
                    episode_number, reward))
            else:
                our_score += 1
                print(
                    'ep {0}: game finished, reward: {1:.2f} !!!!!!!!!'.format(
                        episode_number, reward))

Beispiel #10

Datei anzeigen

class REINFORCE:
    def __init__(self, obs_space_size, hidden_sizes, action_space_size,
                 learning_rate, use_cuda, gpu_id):

        self.action_space_size = action_space_size
        self.use_cuda = use_cuda
        self.gpu_id = gpu_id

        # Initializes the policy network and optimizer
        self.policy = MLP(obs_space_size,
                          hidden_sizes,
                          action_space_size,
                          "distribution",
                          "relu",
                          "standard",
                          name="PolicyNetwork",
                          verbose=True)
        self.optimizer = torch.optim.Adam(self.policy.parameters(),
                                          lr=learning_rate)

        # Creates counters
        self.action_count = np.zeros(shape=(self.action_space_size, ))

        self.explore_count = 0
        self.exploit_count = 0

        # If GPU is available, sends model on GPU
        if torch.cuda.is_available() and self.use_cuda:
            self.policy.cuda(gpu_id)
            print("USING GPU-{}".format(gpu_id))

        self.policy.train()

    def select_action(self, observation):

        # Transforms the state into a torch Variable
        x = Variable(torch.Tensor([observation]))

        if torch.cuda.is_available() and self.use_cuda:
            x = x.cuda(self.gpu_id)

        # Forward propagation through policy network
        action_probs = self.policy(x)

        # Samples an action
        action = action_probs.multinomial().data

        # Negative log-likelihood of sampled action
        NLL = -torch.log(action_probs[:, action[0, 0]]).view(1, -1)

        if int(action) == int(torch.max(action_probs, 1)[1].cpu().data):
            self.exploit_count += 1
        else:
            self.explore_count += 1
        self.action_count[int(action)] += 1

        return int(action), NLL

    def compute_gradients(self, reward_list, NLL_list, gamma):

        R = torch.zeros(1, 1)
        loss = 0

        # Iterates through the episode in reverse order to compute return for each step
        for i in reversed(range(len(reward_list))):

            # Discounts reward
            R = gamma * R + reward_list[i]
            Return_i = Variable(R)
            if torch.cuda.is_available() and self.use_cuda:
                Return_i = Return_i.cuda(self.gpu_id)

            # Loss is the NLL at each step weighted by the return for that step
            loss = loss + (NLL_list[i] * Return_i).squeeze()

        # Average to get the total loss
        loss = loss / len(reward_list)

        # Backpropagation to compute the gradients
        loss.backward()

        return loss.cpu().data.numpy()

    def update_parameters(self):
        # Clips the gradient and apply the update
        torch.nn.utils.clip_grad_norm(self.policy.parameters(), 40)
        self.optimizer.step()
        self.optimizer.zero_grad()

    def save_policy(self, directory):
        torch.save(self.policy.state_dict(),
                   os.path.join(directory, self.policy.name + "_ckpt.pkl"))

    def load_policy(self, directory):
        model.load_state_dict(
            torch.load(os.path.join(directory, "model_" + self.policy.name)))

    def reset_counters(self):

        self.action_count = np.zeros(shape=(self.action_space_size, ))

        self.explore_count = 0
        self.exploit_count = 0

Beispiel #11

Datei anzeigen

def main():
    # Create a meta optimizer that wraps a model into a meta model
    # to keep track of the meta updates.
    #    meta_model = FullyConnectedNN()
    meta_model = MLP()
    print(meta_model)

    if args.cuda:
        meta_model.cuda()

    meta_optimizer = MetaOptimizer(MetaModel(meta_model), args.num_layers,
                                   args.hidden_size)
    if args.cuda:
        meta_optimizer.cuda()

    optimizer = optim.Adam(meta_optimizer.parameters(), lr=1e-3)

    for epoch in range(args.max_epoch):
        decrease_in_loss = 0.0
        final_loss = 0.0
        train_iter = iter(train_loader)
        for i in range(args.updates_per_epoch):

            # Sample a new model
            #model = FullyConnectedNN()
            model = MLP()
            if args.cuda:
                model.cuda()

            x, y = next(train_iter)
            if args.cuda:
                x, y = x.cuda(), y.cuda()
            x, y = Variable(x), Variable(y)

            # Compute initial loss of the model
            f_x = model(x)
            initial_loss = F.nll_loss(f_x, y)

            for k in range(args.optimizer_steps // args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_lstm(keep_states=k > 0,
                                          model=model,
                                          use_cuda=args.cuda)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda()
                for j in range(args.truncated_bptt_step):
                    x, y = next(train_iter)
                    if args.cuda:
                        x, y = x.cuda(), y.cuda()
                    x, y = Variable(x), Variable(y)

                    # First we need to compute the gradients of the model
                    f_x = model(x)
                    loss = F.nll_loss(f_x, y)
                    model.zero_grad()
                    loss.backward()

                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the optimizer
                    meta_model = meta_optimizer.meta_update(model, loss.data)

                    # Compute a loss for a step the meta optimizer
                    f_x = meta_model(x)
                    loss = F.nll_loss(f_x, y)

                    loss_sum += (loss - Variable(prev_loss))

                    prev_loss = loss.data

                # Update the parameters of the meta optimizer
                meta_optimizer.zero_grad()
                loss_sum.backward()
                for param in meta_optimizer.parameters():
                    param.grad.data.clamp_(-1, 1)
                optimizer.step()

            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.data[0] / initial_loss.data[0]
            final_loss += loss.data[0]

        print("Epoch: {}, final loss {}, average final/initial loss ratio: {}".
              format(epoch, final_loss / args.updates_per_epoch,
                     decrease_in_loss / args.updates_per_epoch))

Beispiel #12

Datei anzeigen

def train(lr=args.lr,
          n_hidden=args.n_hidden,
          batch_size=args.batch_size,
          dropout=args.dropout,
          valid_freq=3000,
          disp_freq=1000,
          save_freq=100000,
          max_epochs=args.n_epoch,
          patience=15,
          save_name=args.save_name,
          save_dir=args.save_dir,
          device=args.device):
    # Load train and valid dataset
    print('loading train')
    with open(args.train_path, 'rb') as f:
        train_val_y = pickle.load(f)
        train_val_x = pickle.load(f)

    print('loading english test')
    with open(args.en_test_path, 'rb') as f:
        en_test_y = pickle.load(f)
        en_test_x = pickle.load(f)

    print('loading french test')
    with open(args.fr_test_path, 'rb') as f:
        fr_test_y = pickle.load(f)
        fr_test_x = pickle.load(f)

    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1125)
    for train_index, test_index in sss.split(train_val_x, train_val_y):
        train_y = train_val_y[train_index]
        train_x = train_val_x[train_index]
        valid_y = train_val_y[test_index]
        valid_x = train_val_x[test_index]

    print('Number of training sample: %d' % train_x.shape[0])
    print('Number of validation sample: %d' % valid_x.shape[0])
    print('Number of english testing sample: %d' % en_test_x.shape[0])
    print('Number of french testing sample: %d' % fr_test_x.shape[0])
    print('-' * 100)

    kf_valid = get_minibatches_idx(len(valid_y), batch_size)
    kf_en_test = get_minibatches_idx(len(en_test_y), batch_size)
    kf_fr_test = get_minibatches_idx(len(fr_test_y), batch_size)

    # Loader parameter: use CUDA pinned memory for faster data loading
    pin_memory = (device == args.device)
    # Test set

    n_emb = train_x.shape[1]
    n_class = len(set(train_y))
    best_valid_acc = None
    bad_counter = 0

    uidx = 0  # the number of update done
    estop = False  # early stop switch
    net = MLP(n_mlp_layer=args.n_mlp_layers,
              n_hidden=args.n_hidden,
              dropout=args.dropout,
              n_class=n_class,
              n_emb=n_emb,
              device=args.device)

    if args.load_net != '':
        assert os.path.exists(
            args.load_net), 'Path to pretrained net does not exist'
        net.load_state_dict(torch.load(args.load_net))
        print('Load exists model stored at: ', args.load_net)

    if args.device == 'gpu':
        net = net.cuda()

    # Begin Training
    net.train()
    print('-' * 100)
    print('Model structure: ')
    print('MLP baseline')
    print(net.main)
    print('-' * 100)
    print('Parameters for tuning: ')
    print(net.state_dict().keys())
    print('-' * 100)

    # Define optimizer
    assert args.optimizer in [
        'SGD', 'Adam', "RMSprop", "LBFGS", "Rprop", "ASGD", "Adadelta",
        "Adagrad", "Adamax"
    ], 'Please choose either SGD or Adam'
    if args.optimizer == 'SGD':
        optimizer = optim.SGD(lr=lr,
                              params=filter(lambda p: p.requires_grad,
                                            net.parameters()),
                              momentum=0.9)
    else:
        optimizer = getattr(optim, args.optimizer)(params=filter(
            lambda p: p.requires_grad, net.parameters()),
                                                   lr=lr)

    #lambda1 = lambda epoch: epoch // 30
    lambda2 = lambda epoch: 0.98**epoch
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda2])
    #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
    try:
        for eidx in range(max_epochs):
            scheduler.step()
            # print('Training mode on: ' ,net.training)
            start_time = time.time()
            n_samples = 0
            # Get new shuffled index for the training set
            kf = get_minibatches_idx(len(train_y), batch_size, shuffle=True)

            for _, train_index in kf:
                # Remove gradient from previous batch
                #net.zero_grad()
                optimizer.zero_grad()
                uidx += 1
                y_batch = torch.autograd.Variable(
                    torch.from_numpy(train_y[train_index]).long())
                x_batch = torch.autograd.Variable(
                    torch.from_numpy(train_x[train_index]).float())
                if net.device == 'gpu':
                    y_batch = y_batch.cuda()
                scores = net.forward(x_batch)
                loss = net.loss(scores, y_batch)

                loss.backward()
                optimizer.step()
                n_samples += len(x_batch)
                gradient = 0

                # For logging gradient information
                for name, w in net.named_parameters():
                    if w.grad is not None:
                        w_grad = torch.norm(w.grad.data, 2)**2
                        gradient += w_grad
                gradient = gradient**0.5
                if np.mod(uidx, disp_freq) == 0:
                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ',
                          loss.data[0], 'Gradient ', gradient)

                if save_name and np.mod(uidx, save_freq) == 0:
                    print('Saving...')
                    torch.save(
                        net.state_dict(), '%s/%s_epoch%d_update%d.net' %
                        (save_dir, save_name, eidx, uidx))

                if np.mod(uidx, valid_freq) == 0:
                    print("=" * 50)
                    print('Evaluation on validation set: ')
                    kf_valid = get_minibatches_idx(len(valid_y), batch_size)
                    top_1_acc, top_n_acc = eval.net_evaluation(
                        net, kf_valid, valid_x, valid_y)
                    #scheduler.step(top_1_acc)

                    # Save best performance state_dict for testing
                    if best_valid_acc is None:
                        best_valid_acc = top_1_acc
                        best_state_dict = net.state_dict()
                        torch.save(best_state_dict,
                                   '%s/%s_best.net' % (save_dir, save_name))
                    else:
                        if top_1_acc > best_valid_acc:
                            print(
                                'Best validation performance so far, saving model parameters'
                            )
                            print("*" * 50)
                            bad_counter = 0  # reset counter
                            best_valid_acc = top_1_acc
                            best_state_dict = net.state_dict()
                            torch.save(
                                best_state_dict,
                                '%s/%s_best.net' % (save_dir, save_name))
                        else:
                            bad_counter += 1
                            print('Validation accuracy: ', 100 * top_1_acc)
                            print('Getting worse, patience left: ',
                                  patience - bad_counter)
                            print('Best validation accuracy  now: ',
                                  100 * best_valid_acc)
                            # Learning rate annealing
                            lr /= args.lr_anneal
                            print('Learning rate annealed to: ', lr)
                            print('*' * 100)
                            if args.optimizer == 'SGD':
                                optimizer = optim.SGD(
                                    lr=lr,
                                    params=filter(lambda p: p.requires_grad,
                                                  net.parameters()),
                                    momentum=0.9)
                            else:
                                optimizer = getattr(optim, args.optimizer)(
                                    params=filter(lambda p: p.requires_grad,
                                                  net.parameters()),
                                    lr=lr)
                            if bad_counter > patience:
                                print('-' * 100)
                                print('Early Stop!')
                                estop = True
                                break

            epoch_time = time.time() - start_time
            print('Epoch processing time: %.2f s' % epoch_time)
            print('Seen %d samples' % n_samples)
            if estop:
                break
        print('-' * 100)
        print('Training finish')
        best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name))
        torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name))
        net.load_state_dict(best_state_dict)

        # add self connection
        print('Evaluation on validation set: ')
        kf_valid = get_minibatches_idx(len(valid_y), batch_size)
        eval.net_evaluation(net, kf_valid, valid_x, valid_y)

        # Evaluate model on test set
        print('Evaluation on test set: ')
        print('Evaluation on English testset: ')
        eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y)
        print('Evaluation on French testset: ')
        eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)
    except KeyboardInterrupt:
        print('-' * 100)
        print("Training interrupted, saving final model...")
        best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name))
        torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name))
        net.load_state_dict(best_state_dict)
        print('Evaluation on validation set: ')
        kf_valid = get_minibatches_idx(len(valid_y), batch_size)
        eval.net_evaluation(net, kf_valid, valid_x, valid_y)

        # Evaluate model on test set
        print('Evaluation on English testset: ')
        eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y)
        print('Evaluation on French testset: ')
        eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)

Beispiel #13

Datei anzeigen

Datei: train_diff.py Projekt: jeffchy/Learning-Numeral-Embeddings

def train(args, logger, model_save_dir, val_dataset, test_dataset,
          train_dataset):
    # set seed
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    pretrain_embed = pickle.load(
        open('../{}/{}'.format(args.embed_dir, args.embed), 'rb'))

    try:
        pretrain_embed = torch.from_numpy(pretrain_embed).float()
    except:
        pretrain_embed = pretrain_embed.float()

    dataLoader = DataLoader(train_dataset,
                            batch_size=args.batch_sz,
                            shuffle=True)
    if args.model == 'MLP':
        model = MLP(args.hidden_dim, pretrain_embed)
    elif args.model == 'MLP3':
        model = MLP3Diff(args.hidden_dim, pretrain_embed)
    elif args.model == 'BiLinear':
        model = BiLinearDiff1(args.hidden_dim, pretrain_embed)
    else:
        model = BiLinearDiffH(args.hidden_dim, pretrain_embed)

    # model = ListMaxTransformer(args.hidden_dim, pretrain_embed)
    if torch.cuda.is_available():
        model.cuda()

    criterion = torch.nn.MSELoss()
    # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=args.gamma)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    best_dev_loss = float('+inf')
    best_dev_model = None
    best_dev_test_loss = 0
    counter = 0

    for epoch in range(1, args.n_epoch + 1):
        train_loss = 0
        model.train()
        iteration = 0
        optimizer.zero_grad()

        for batch in dataLoader:
            x = torch.stack(batch['input'])  # 5 x bz
            y = batch['label'].float()  # bz

            if torch.cuda.is_available():
                x = x.cuda()
                y = y.cuda()

            output = model(x)
            loss = criterion(output, y)
            train_loss += loss.item()
            loss.backward()
            nn.utils.clip_grad_norm(model.parameters(), 5)
            optimizer.step()

            iteration += 1
            # if iteration % args.iter_print == 0:
            #     logger.info('{}-{}-{}-{}'.format(epoch, iteration, train_loss, train_acc))

        train_loss = train_loss / len(dataLoader)
        dev_loss = val(model, val_dataset)
        test_loss = val(model, test_dataset)

        # scheduler.step()

        if dev_loss < best_dev_loss:
            best_dev_model = model.state_dict().copy()
            best_dev_loss = dev_loss
            best_dev_test_loss = test_loss
            counter = 0
        else:
            counter += 1

        if epoch % 5 == 0:
            logger.info('=================================================')
            logger.info('TRAIN: epoch:{}-loss:{}'.format(epoch, train_loss))
            logger.info('DEV: epoch:{}-loss:{}'.format(epoch, dev_loss))
            logger.info('TEST: epoch:{}-loss:{}'.format(epoch, test_loss))
            logger.info('BEST-DEV-LOSS: {}, BEST-DEV-TEST-LOSS:{}'.format(
                best_dev_loss, best_dev_test_loss))

        if counter > 40:
            break

    logger.info('===================[][][][][]====================')
    logger.info('TRAIN: epoch:{}-loss:{}'.format(epoch, train_loss))
    logger.info('DEV: epoch:{}-loss:{}'.format(epoch, dev_loss))
    logger.info('TEST: epoch:{}-loss:{}'.format(epoch, test_loss))
    logger.info('BEST-DEV-LOSS: {}, BEST-DEV-TEST-LOSS:{}'.format(
        best_dev_loss, best_dev_test_loss))
    torch.save(
        best_dev_model, model_save_dir + '/model-{}-{}-{}-{}.pt'.format(
            best_dev_test_loss, args.lr, args.hidden_dim, args.gamma))

    del dataLoader
    del best_dev_model
    del model
    del train_dataset
    del val_dataset
    del test_dataset

Beispiel #14

Datei anzeigen

def train_model(config, gpu_id, save_dir, exp_name):
    # Instantiating the model
    model = MLP(784,
                config["hidden_layers"],
                10,
                config["activation"],
                config["initialization"],
                verbose=True)

    # Loading the MNIST dataset
    x_train, y_train, x_valid, y_valid, x_test, y_test = utils.load_mnist(
        config["data_file"])

    if config['data_reduction'] != 1.:
        x_train, y_train = utils.reduce_trainset_size(x_train, y_train,
                                                      config['data_reduction'])

    # If GPU is available, sends model and dataset on the GPU
    if torch.cuda.is_available():
        model.cuda(gpu_id)

        x_train = torch.from_numpy(x_train).cuda(gpu_id)
        y_train = torch.from_numpy(y_train).cuda(gpu_id)

        x_valid = Variable(torch.from_numpy(x_valid)).cuda(gpu_id)
        y_valid = Variable(torch.from_numpy(y_valid)).cuda(gpu_id)

        x_test = Variable(torch.from_numpy(x_test)).cuda(gpu_id)
        y_test = Variable(torch.from_numpy(y_test)).cuda(gpu_id)
        print("Running on GPU")
    else:
        x_train = torch.from_numpy(x_train)
        y_train = torch.from_numpy(y_train)

        x_valid = Variable(torch.from_numpy(x_valid))
        y_valid = Variable(torch.from_numpy(y_valid))

        x_test = Variable(torch.from_numpy(x_test))
        y_test = Variable(torch.from_numpy(y_test))
        print(
            "WATCH-OUT : torch.cuda.is_available() returned False. Running on CPU."
        )

    # Instantiate TensorDataset and DataLoader objects
    train_set = torch.utils.data.TensorDataset(x_train, y_train)
    loader = torch.utils.data.DataLoader(train_set,
                                         batch_size=config["mb_size"],
                                         shuffle=True)

    # Optimizer and Loss Function
    optimizer = optim.SGD(model.parameters(),
                          lr=config['lr'],
                          momentum=config['momentum'])
    loss_fn = nn.NLLLoss()

    # Records the model's performance
    train_tape = [[], []]
    valid_tape = [[], []]
    test_tape = [[], []]

    def evaluate(data, labels):

        if not isinstance(data, Variable):
            if torch.cuda.is_available():
                data = Variable(data).cuda(gpu_id)
                labels = Variable(labels).cuda(gpu_id)
            else:
                data = Variable(data)
                labels = Variable(labels)

        output = model(data)
        loss = loss_fn(output, labels)
        prediction = torch.max(output.data, 1)[1]
        accuracy = (prediction.eq(labels.data).sum() / labels.size(0)) * 100

        return loss.data[0], accuracy

    # Record train accuracy
    train_loss, train_acc = evaluate(x_train, y_train)
    train_tape[0].append(train_loss)
    train_tape[1].append(train_acc)

    # Record valid accuracy
    valid_loss, valid_acc = evaluate(x_valid, y_valid)
    valid_tape[0].append(valid_loss)
    valid_tape[1].append(valid_acc)

    # Record test accuracy
    test_loss, test_acc = evaluate(x_test, y_test)
    test_tape[0].append(test_loss)
    test_tape[1].append(test_acc)

    print("BEFORE TRAINING \nLoss : {0:.3f} \nAcc : {1:.3f}".format(
        valid_loss, valid_acc))

    # TRAINING LOOP
    for epoch in range(1, config["max_epochs"]):
        start = time.time()
        for i, (x_batch, y_batch) in enumerate(loader):

            #pdb.set_trace()

            if torch.cuda.is_available():
                x_batch = Variable(x_batch).cuda(gpu_id)
                y_batch = Variable(y_batch).cuda(gpu_id)
            else:
                x_batch = Variable(x_batch)
                y_batch = Variable(y_batch)

            # Empties the gradients
            optimizer.zero_grad()

            # Feedforward through the model
            output = model(x_batch)

            # Computes the loss
            loss = loss_fn(output, y_batch)

            #print(i, loss)

            #if i % 10 == 0:
            #   print("LOSS : {}".format(loss))
            #  print("MAX : {}".format(torch.max(output)[0]))
            # time.sleep(2)

            # Backpropagates to compute the gradients
            loss.backward()

            # Takes one training step
            optimizer.step()

        # Record train accuracy
        train_loss, train_acc = evaluate(x_train, y_train)
        train_tape[0].append(train_loss)
        train_tape[1].append(train_acc)

        # Record valid accuracy
        valid_loss, valid_acc = evaluate(x_valid, y_valid)
        valid_tape[0].append(valid_loss)
        valid_tape[1].append(valid_acc)

        # Record test accuracy
        test_loss, test_acc = evaluate(x_test, y_test)
        test_tape[0].append(test_loss)
        test_tape[1].append(test_acc)

        print("Epoch {0} \nLoss : {1:.3f} \nAcc : {2:.3f}".format(
            epoch, valid_loss, valid_acc))
        print("Time : {0:.2f}".format(time.time() - start))

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Saves the graphs
    utils.save_results(train_tape, valid_tape, test_tape, save_dir, exp_name,
                       config)
    utils.update_comparative_chart(save_dir, config['show_test'])

    return