def driver():
    dataset = build()
    delaylist = [
        'ArrDelay', 'DepDelay', 'CarrierDelay', 'WeatherDelay', 'NASDelay',
        'SecurityDelay', 'LateAircraftDelay'
    ]
    #plotStats(dataset, plotlist1, 'SFO')
    #print(dataset.columns.tolist())

    dataset = dataset.reset_index()
    dataset.fillna(0)
    #Converting categorical features to numerics
    dataset["Dest"] = dataset["Dest"].astype('category')
    dataset["Dest"] = dataset["Dest"].cat.codes

    #dataset = dataset.sample(n=20000)

    dataset['Date'] = dataset['Date'].apply(lambda x: x.timestamp())
    dataSFO = dataset.loc[dataset['Origin'].isin(['SFO'])]
    dataOAK = dataset.loc[dataset['Origin'].isin(['OAK'])]
    dataSFO = dataSFO.iloc[0:10000]
    dataOAK = dataOAK.iloc[0:10000]
    frames = [dataSFO, dataOAK]
    NNdata = pd.concat(frames)
    #NNdata = NNdata.sample(n=20000)
    labels = NNdata["Origin"]
    NNdata.drop('Origin', axis=1, inplace=True)

    delayset = dataset[delaylist]

    c1 = dataset.DayOfWeek.unique()

    #labels = dataset["Origin"]
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    labels = np_utils.to_categorical(labels, 2)
    data = NNdata
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        train_size=0.8)

    FeedForward(x_train, x_test, y_train, y_test, len(NNdata.dtypes))
Exemple #2
0
def main():
    parser = argparse.ArgumentParser(
        'Train a simple classifier on a toy dataset')

    parser.add_argument('--dataset', type=str, default='')
    parser.add_argument('--train-fraction',
                        type=float,
                        default=.5,
                        help='proportion of the dataset to use for training')
    parser.add_argument('--n-samples', type=int, default=10000)
    parser.add_argument('--hidden-size',
                        type=int,
                        default=512,
                        help='Hidden size of the cleanup network')
    parser.add_argument('--epochs', type=int, default=20)
    parser.add_argument('--batch-size', type=int, default=32)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--seed', type=int, default=13)
    parser.add_argument('--logdir',
                        type=str,
                        default='trained_models/simple_classifier',
                        help='Directory for saved model and tensorboard log')
    parser.add_argument('--load-model',
                        type=str,
                        default='',
                        help='Optional model to continue training from')
    parser.add_argument(
        '--name',
        type=str,
        default='',
        help=
        'Name of output folder within logdir. Will use current date and time if blank'
    )
    parser.add_argument('--weight-histogram',
                        action='store_true',
                        help='Save histograms of the weights if set')

    args = parser.parse_args()

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    rng = np.random.RandomState(seed=args.seed)

    dataset_train = ToyDataset(args.n_samples)
    dataset_test = ToyDataset(args.n_samples)

    trainloader = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=0,
    )

    # For testing just do everything in one giant batch
    testloader = torch.utils.data.DataLoader(
        dataset_test,
        batch_size=len(dataset_test),
        shuffle=False,
        num_workers=0,
    )

    model = FeedForward(input_size=2,
                        hidden_size=args.hidden_size,
                        output_size=4)

    # Open a tensorboard writer if a logging directory is given
    if args.logdir != '':
        current_time = datetime.now().strftime('%b%d_%H-%M-%S')
        save_dir = osp.join(args.logdir, current_time)
        writer = SummaryWriter(log_dir=save_dir)
        if args.weight_histogram:
            # Log the initial parameters
            for name, param in model.named_parameters():
                writer.add_histogram('parameters/' + name,
                                     param.clone().cpu().data.numpy(), 0)

    criterion = nn.CrossEntropyLoss()
    # criterion = nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)

    for e in range(args.epochs):
        print('Epoch: {0}'.format(e + 1))

        avg_loss = 0
        n_batches = 0
        for i, data in enumerate(trainloader):

            locations, labels = data

            if locations.size()[0] != args.batch_size:
                continue  # Drop data, not enough for a batch
            optimizer.zero_grad()

            # outputs = torch.max(model(locations), 1)[1].unsqueeze(1)
            outputs = model(locations)

            loss = criterion(outputs, labels)

            avg_loss += loss.data.item()
            n_batches += 1

            loss.backward()

            # print(loss.data.item())

            optimizer.step()

        print(avg_loss / n_batches)

        if args.logdir != '':
            if n_batches > 0:
                avg_loss /= n_batches
                writer.add_scalar('avg_loss', avg_loss, e + 1)

            if args.weight_histogram and (e + 1) % 10 == 0:
                for name, param in model.named_parameters():
                    writer.add_histogram('parameters/' + name,
                                         param.clone().cpu().data.numpy(),
                                         e + 1)

    print("Testing")
    with torch.no_grad():

        # Everything is in one batch, so this loop will only happen once
        for i, data in enumerate(testloader):

            locations, labels = data

            outputs = model(locations)

            loss = criterion(outputs, labels)

            print(loss.data.item())

        if args.logdir != '':
            # TODO: get a visualization of the performance
            writer.add_scalar('test_loss', loss.data.item())

    # Close tensorboard writer
    if args.logdir != '':
        writer.close()

        torch.save(model.state_dict(), osp.join(save_dir, 'model.pt'))

        params = vars(args)
        with open(osp.join(save_dir, "params.json"), "w") as f:
            json.dump(params, f)
trainloader = torch.utils.data.DataLoader(
    dataset_train,
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=0,
)

# For testing just do everything in one giant batch
testloader = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=len(dataset_test),
    shuffle=False,
    num_workers=0,
)

model = FeedForward(input_size=train_inputs.shape[1],
                    output_size=train_outputs.shape[1])

if args.load_saved_model:
    model.load_state_dict(torch.load(args.load_saved_model), strict=False)

# Open a tensorboard writer if a logging directory is given
if args.logdir != '':
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    save_dir = os.path.join(args.logdir, current_time)
    writer = SummaryWriter(log_dir=save_dir)
    if args.weight_histogram:
        # Log the initial parameters
        for name, param in model.named_parameters():
            writer.add_histogram('parameters/' + name,
                                 param.clone().cpu().data.numpy(), 0)
def train(args):
    # setup metric logging. It's important to log your loss!!
    log_f = open(args.log_file, 'w')
    fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc']
    logger = csv.DictWriter(log_f, fieldnames)
    logger.writeheader()

    # load data
    train_data, train_labels = load(args.data_dir, split="train")
    if args.model.lower() == "best":
        try:
            train_data = pickle.load(open('./obj/train_data.pkl', 'rb'))
            train_labels = pickle.load(open('./obj/train_label.pkl', 'rb'))
            print('loaded transformed data.')
        except FileNotFoundError:
            transforms.double_batch(train_data.astype(np.float32),
                                    train_labels.astype(np.int))

    dev_data, dev_labels = load(args.data_dir, split="dev")

    # Build model
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits)
    elif args.model.lower() == "simple-cnn":
        model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel,
                             args.cnn_n2_kernel)
    elif args.model.lower() == "best":
        # TODO: Feel free to change in initialization arguments here to take
        # whatever parameters you need.
        print("training model:: channel_init- " + str(args.channel_size) +
              "  compression_ratio- " + str(args.compression_ratio) +
              " initial kernel- " + str(args.initial_kernel) +
              " interior kernel- " + str(args.interior_kernel) +
              ' first hidden- ' + str(args.hidden))
        model = BestNN(args.hidden, args.channel_size, args.final_channel_size,
                       args.compression_ratio, args.initial_kernel,
                       args.interior_kernel)
    else:
        raise Exception("Unknown model type passed in!")

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # TODO: You can change this loop as you need to, to optimize your training!
    # for example, if you wanted to implement early stopping to make sure you
    # don't overfit your model, you would do so in this loop.
    for step in range(args.train_steps):
        # run the model and backprop for train steps
        i = np.random.choice(train_data.shape[0],
                             size=args.batch_size,
                             replace=False)
        x = torch.from_numpy(train_data[i].astype(np.float32))
        y = torch.from_numpy(train_labels[i].astype(np.int))
        # Forward pass: Get logits for x
        logits = model(x)
        # Compute loss
        loss = F.cross_entropy(logits, y)
        if step % 25 == 0:
            print(str(step) + "  :Loss (cross entropy): " + str(loss.item()))
        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # every 100 steps, log metrics
        if (step + 1) % 50 == 0 or step == 0:
            train_acc, train_loss = approx_train_acc_and_loss(
                model, train_data, train_labels)
            dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels)

            step_metrics = {
                'step': step,
                'train_loss': loss.item(),
                'train_acc': train_acc,
                'dev_loss': dev_loss,
                'dev_acc': dev_acc
            }

            print(
                f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}'
            )
            logger.writerow(step_metrics)

            if dev_acc > .92:
                print(
                    f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}'
                )
                logger.writerow(step_metrics)
                print(f'Done training. Saving model at {args.model_save}')
                torch.save(model, args.model_save)
                break

    # close the log file
    log_f.close()
    # save model
    print(f'Done training. Saving model at {args.model_save}')
    torch.save(model, args.model_save)
Exemple #5
0
def train(args):
    # setup metric logging. It's important to log your loss!!
    log_f = open(args.log_file, 'w')
    fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc']
    logger = csv.DictWriter(log_f, fieldnames)
    logger.writeheader()

    # load data
    train_data, train_labels = load(args.data_dir, split="train")
    dev_data, dev_labels = load(args.data_dir, split="dev")

    # Build model
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits)
    elif args.model.lower() == "simple-cnn":
        model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel,
                             args.cnn_n2_kernel)
    elif args.model.lower() == "best":
        # TODO: Feel free to change in initialization arguments here to take
        # whatever parameters you need.
        model = BestNN(args.cnn_n1_channels, args.cnn_n1_kernel,
                       args.cnn_n2_kernel, args.linear_size, args.dropout)
    else:
        raise Exception("Unknown model type passed in!")

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    # TODO: You can change this loop as you need to, to optimize your training!
    # for example, if you wanted to implement early stopping to make sure you
    # don't overfit your model, you would do so in this loop.
    for step in range(args.train_steps):
        # run the model and backprop for train steps
        i = np.random.choice(train_data.shape[0],
                             size=args.batch_size,
                             replace=False)
        x = torch.from_numpy(train_data[i].astype(np.float32))
        y = torch.from_numpy(train_labels[i].astype(np.int))
        y = y.long()
        # Forward pass: Get logits for x
        logits = model(x)
        # Compute loss
        loss = F.cross_entropy(logits, y)
        # Zero gradients, perform a backward pass, and update the weights.
        if args.model.lower() == "best":
            optimizer_best = torch.optim.Adam(model.parameters(),
                                              lr=args.learning_rate *
                                              (0.5**(step // 1000)))
            optimizer_best.zero_grad()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # every 100 steps, log metrics
        if step % 100 == 0:
            train_acc, train_loss = approx_train_acc_and_loss(
                model, train_data, train_labels)
            dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels)

            step_metrics = {
                'step': step,
                'train_loss': loss.item(),
                'train_acc': train_acc,
                'dev_loss': dev_loss,
                'dev_acc': dev_acc
            }

            print(
                f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}'
            )
            logger.writerow(step_metrics)

    # close the log file
    log_f.close()
    # save model
    print(f'Done training. Saving model at {args.model_save}')
    torch.save(model, args.model_save)
Exemple #6
0
def main():

    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE,
                                                      num_workers=NUM_WORKERS)

    CLASSES = []
    AUROCs = []
    auroc = AverageMeter()

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES = [cls]

        print("==> Creating model")
        model = FeedForward(num_classes=1)

        if CUDA:
            model = model.cuda()
            model = nn.DataParallel(model)
            cudnn.benchmark = True

        print('    Total params: %.2fK' %
              (sum(p.numel() for p in model.parameters()) / 1000))

        criterion = nn.BCELoss()
        optimizer = optim.SGD(model.parameters(),
                              lr=LEARNING_RATE,
                              momentum=MOMENTUM,
                              weight_decay=WEIGHT_DECAY)

        print("==> Learning")

        best_loss = 1e10
        learning_rate = LEARNING_RATE

        for epoch in range(EPOCHS):

            # decay learning rate
            if (epoch + 1) % EPOCHS_DROP == 0:
                learning_rate *= LR_DROP
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS))

            train_loss = train(trainloader,
                               model,
                               criterion,
                               CLASSES,
                               CLASSES,
                               optimizer=optimizer,
                               use_cuda=CUDA)
            test_loss = train(validloader,
                              model,
                              criterion,
                              CLASSES,
                              CLASSES,
                              test=True,
                              use_cuda=CUDA)

            # save model
            is_best = test_loss < best_loss
            best_loss = min(test_loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'loss': test_loss,
                    'optimizer': optimizer.state_dict()
                }, CHECKPOINT, is_best)

        print("==> Calculating AUROC")

        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA)
        auroc.update(new_auroc)

        print('New Task AUROC: {}'.format(new_auroc))
        print('Average AUROC: {}'.format(auroc.avg))

        AUROCs.append(auroc.avg)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))
Exemple #7
0
def train(args):
    # setup metric logging
    log_f = open(args.log_file, 'w')
    fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc']
    logger = csv.DictWriter(log_f, fieldnames)
    logger.writeheader()

    # load data
    train_data, train_labels = load(args.data_dir, split="train")
    dev_data, dev_labels = load(args.data_dir, split="dev")

    # Build model
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits)
    elif args.model.lower() == "simple-cnn":
        model = SimpleConvNN(args.cnn_n1_channels, args.cnn_n1_kernel,
                             args.cnn_n2_kernel)
    elif args.model.lower() == "best":
        model = BestNN(args.best_n1_channels, args.best_n2_channels,
                       args.best_n1_kernel, args.best_n2_kernel,
                       args.best_lin1_trans, args.best_lin2_trans)
    else:
        raise Exception("Unknown model type passed in!")

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    for step in range(args.train_steps):
        # run the model and backprop for train steps
        i = np.random.choice(train_data.shape[0],
                             size=args.batch_size,
                             replace=False)
        x = torch.from_numpy(train_data[i].astype(np.float32))
        y = torch.from_numpy(train_labels[i].astype(np.int))

        # Forward pass: Get logits for x
        logits = model(x)
        # Compute loss
        loss = F.cross_entropy(logits, y)
        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # every 100 steps, log metrics
        if step % 100 == 0:
            train_acc, train_loss = approx_train_acc_and_loss(
                model, train_data, train_labels)
            dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels)

            step_metrics = {
                'step': step,
                'train_loss': loss.item(),
                'train_acc': train_acc,
                'dev_loss': dev_loss,
                'dev_acc': dev_acc
            }

            print(
                f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}'
            )
            logger.writerow(step_metrics)

    # close the log file
    log_f.close()
    # save model
    print(f'Done training. Saving model at {args.model_save}')
    torch.save(model, args.model_save)
Exemple #8
0
def train(args):
    np.random.seed(42)
    torch.manual_seed(42)

    # setup metric logging. It's important to log your loss!!
    log_f = open(args.log_file, 'w')
    fieldnames = ['step', 'train_loss', 'train_acc', 'dev_loss', 'dev_acc']
    logger = csv.DictWriter(log_f, fieldnames)
    logger.writeheader()

    # load data
    train_data, train_labels = load(args.data_dir, split="train")
    dev_data, dev_labels = load(args.data_dir, split="dev")

    # Build model
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits)
    elif args.model.lower() == "simple-cnn":
        model = SimpleConvNN(args.cnn_n1_channels,
                            args.cnn_n1_kernel,
                            args.cnn_n2_kernel)
    elif args.model.lower() == "best":
        model = BestNN(args.best_n1_channels,
                       args.best_n2_channels,
                       args.best_n3_channels)
    else:
        raise Exception("Unknown model type passed in!")

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    if not args.train_my_way:
        for step in range(args.train_steps):
            # run the model and backprop for train steps
            i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False)
            x = torch.from_numpy(train_data[i].astype(np.float32))
            y = torch.from_numpy(train_labels[i].astype(np.int))

            # Forward pass: Get logits for x
            logits = model(x)
            # Compute loss
            loss = F.cross_entropy(logits, y)
            # Zero gradients, perform a backward pass, and update the weights.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # every 100 steps, log metrics
            if step % 100 == 0:
                train_acc, train_loss = approx_train_acc_and_loss(model,
                                                                  train_data,
                                                                  train_labels)
                dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels)

                step_metrics = {
                    'step': step,
                    'train_loss': loss.item(),
                    'train_acc': train_acc,
                    'dev_loss': dev_loss,
                    'dev_acc': dev_acc
                }

                print(f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}')
                logger.writerow(step_metrics)
        # close the log file
        log_f.close()
        # save model
        print(f'Done training. Saving model at {args.model_save}')
        torch.save(model, args.model_save)
    else:
        '''
        MY OPTIMIZATION SCHEME
        
        Three conditions decide whether to continue training
        
        1. Always train for at least 'min_steps', and no more than 'max_steps'
        2. If dev acc drops by 'stepwise_cushion' or more between measured points (every 100 steps), stop training
        3. If dev acc has improved by less than 'timesaver_cushion' in the past 1000 iteration
        '''

        # Set up improving
        last_acc = 0
        improving = True

        # Set up got_time
        last1000 = 0
        got_time = True

        step = 0

        while step <= args.max_iter and (step <= args.min_iter or (improving and got_time)):
            # run the model and backprop for train steps
            i = np.random.choice(train_data.shape[0], size=args.batch_size, replace=False)
            x = torch.from_numpy(train_data[i].astype(np.float32))
            y = torch.from_numpy(train_labels[i].astype(np.int))

            # Forward pass: Get logits for x
            logits = model(x)
            # Compute loss
            loss = F.cross_entropy(logits, y)
            # Zero gradients, perform a backward pass, and update the weights.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # every 100 steps, log metrics
            if step % 100 == 0:
                train_acc, train_loss = approx_train_acc_and_loss(model,
                                                                  train_data,
                                                                  train_labels)
                dev_acc, dev_loss = dev_acc_and_loss(model, dev_data, dev_labels)

                step_metrics = {
                    'step': step,
                    'train_loss': loss.item(),
                    'train_acc': train_acc,
                    'dev_loss': dev_loss,
                    'dev_acc': dev_acc
                }

                print(f'On step {step}: Train loss {train_loss} | Dev acc is {dev_acc}')
                logger.writerow(step_metrics)

                # Update conditions
                diff = dev_acc - last_acc
                improving = diff > args.stepwise_cushion
                last_acc = dev_acc

                if step % 1000 == 0:
                    got_time = dev_acc - last1000 > args.timesaver_cushion
                    last1000 = dev_acc

            step += 1

        # close the log file
        log_f.close()
        # save model
        print(f'Done training. Saving model at {args.model_save}')
        torch.save(model, args.model_save)
        spatial_encoding=args.spatial_encoding,
    )
    maze_name = 'singlemaze'
else:
    validation_set = ValidationSet(
        data=data, maze_sps=maze_sps, maze_indices=[0, 1, 2, 3], goal_indices=[0, 1], subsample=args.subsample,
        spatial_encoding=args.spatial_encoding,
    )
    maze_name = 'multimaze'

if args.spatial_encoding == 'learned':
    # input is maze, loc, goal ssps, output is 2D direction to move
    model = LearnedEncoding(input_size=repr_dim, maze_id_size=id_size, hidden_size=512, output_size=2)
else:
    # input is maze, loc, goal ssps, output is 2D direction to move
    model = FeedForward(input_size=id_size + repr_dim * 2, output_size=2)

if args.load_saved_model:
    model.load_state_dict(torch.load(args.load_saved_model), strict=False)

model.eval()

# Open a tensorboard writer if a logging directory is given
if args.logdir != '':
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    save_dir = os.path.join(args.logdir, current_time)
    writer = SummaryWriter(log_dir=save_dir)

criterion = nn.MSELoss()

print("Visualization")
import sys
import torch
import torch.nn as nn
from models import FeedForward
from toy_dataset import ToyDataset, plot_data
import matplotlib.pyplot as plt
import numpy as np

fname = sys.argv[1]

n_samples = 10000
hidden_size = 512

model = FeedForward(input_size=2, hidden_size=hidden_size, output_size=4)
model.load_state_dict(torch.load(fname), strict=True)
model.eval()

dataset_test = ToyDataset(n_samples)

# For testing just do everything in one giant batch
testloader = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=len(dataset_test),
    shuffle=False,
    num_workers=0,
)

criterion = nn.CrossEntropyLoss()

with torch.no_grad():
    # Everything is in one batch, so this loop will only happen once
Exemple #11
0
def main():
    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE,
                                                      num_workers=NUM_WORKERS)

    print("==> Creating model")
    model = FeedForward(num_classes=len(ALL_CLASSES))

    if CUDA:
        model = model.cuda()
        # model = nn.DataParallel(model)
        cudnn.benchmark = True

    # initialize parameters
    # for name, param in model.named_parameters():
    #     if 'bias' in name:
    #         param.data.zero_()
    #     elif 'weight' in name:
    #         param.data.normal_(0, 0.005)

    print('    Total params: %.2fK' %
          (sum(p.numel() for p in model.parameters()) / 1000))

    criterion = nn.BCELoss()

    CLASSES = []
    AUROCs = []

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES.append(cls)

        if t == 0:
            print("==> Learning")

            optimizer = optim.SGD(model.parameters(),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=WEIGHT_DECAY)

            penalty = L1Penalty(coeff=L1_COEFF)
            best_loss = 1e10
            learning_rate = LEARNING_RATE
            # epochs = 10
            for epoch in range(MAX_EPOCHS):
                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train_loss = train(trainloader,
                                   model,
                                   criterion,
                                   ALL_CLASSES, [cls],
                                   optimizer=optimizer,
                                   penalty=penalty,
                                   use_cuda=CUDA)
                test_loss = train(validloader,
                                  model,
                                  criterion,
                                  ALL_CLASSES, [cls],
                                  test=True,
                                  penalty=penalty,
                                  use_cuda=CUDA)

                # save model
                is_best = test_loss < best_loss
                best_loss = min(test_loss, best_loss)
                save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT,
                                is_best)

                suma = 0
                for p in model.parameters():
                    p = p.data.cpu().numpy()
                    suma += (abs(p) < ZERO_THRESHOLD).sum()
                print("Number of zero weights: %d" % suma)
        else:  # if t != 0
            # copy model
            model_copy = copy.deepcopy(model)

            print("==> Selective Retraining")

            # Solve Eq.3

            # freeze all layers except the last one (last 2 parameters)
            params = list(model.parameters())
            for param in params[:-2]:
                param.requires_grad = False

            optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=WEIGHT_DECAY)

            penalty = L1Penalty(coeff=L1_COEFF)
            best_loss = 1e10
            learning_rate = LEARNING_RATE

            for epoch in range(MAX_EPOCHS):

                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train(trainloader,
                      model,
                      criterion,
                      ALL_CLASSES, [cls],
                      optimizer=optimizer,
                      penalty=penalty,
                      use_cuda=CUDA)
                train(validloader,
                      model,
                      criterion,
                      ALL_CLASSES, [cls],
                      test=True,
                      penalty=penalty,
                      use_cuda=CUDA)

            for param in model.parameters():
                param.requires_grad = True

            print("==> Selecting Neurons")
            hooks = select_neurons(model, t)

            print("==> Training Selected Neurons")

            optimizer = optim.SGD(model.parameters(),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=1e-4)

            best_loss = 1e10
            learning_rate = LEARNING_RATE

            for epoch in range(MAX_EPOCHS):

                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train_loss = train(trainloader,
                                   model,
                                   criterion,
                                   ALL_CLASSES, [cls],
                                   optimizer=optimizer,
                                   use_cuda=CUDA)
                test_loss = train(validloader,
                                  model,
                                  criterion,
                                  ALL_CLASSES, [cls],
                                  test=True,
                                  use_cuda=CUDA)

                # save model
                is_best = test_loss < best_loss
                best_loss = min(test_loss, best_loss)
                save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT,
                                is_best)

            # remove hooks
            for hook in hooks:
                hook.remove()

            print("==> Splitting Neurons")
            split_neurons(model_copy, model)

        print("==> Calculating AUROC")
        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        auroc = calc_avg_AUROC(model, testloader, ALL_CLASSES, CLASSES, CUDA)

        print('AUROC: {}'.format(auroc))

        AUROCs.append(auroc)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))
# input is maze, loc, goal ssps, output is 2D direction to move
if args.n_hidden_layers > 1:
    model = MLP(input_size=id_size + repr_dim * 2,
                hidden_size=args.hidden_size,
                output_size=2,
                n_layers=args.n_hidden_layers)
else:
    if args.spatial_encoding == 'learned':
        model = LearnedEncoding(input_size=repr_dim,
                                maze_id_size=id_size,
                                hidden_size=args.hidden_size,
                                output_size=2)
    else:
        model = FeedForward(input_size=id_size + repr_dim * 2,
                            hidden_size=args.hidden_size,
                            output_size=2)

if args.load_saved_model:
    model.load_state_dict(torch.load(args.load_saved_model), strict=False)

# Open a tensorboard writer if a logging directory is given
if args.logdir != '':
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    save_dir = os.path.join(args.logdir, current_time)
    writer = SummaryWriter(log_dir=save_dir)
    if args.weight_histogram:
        # Log the initial parameters
        for name, param in model.named_parameters():
            writer.add_histogram('parameters/' + name,
                                 param.clone().cpu().data.numpy(), 0)
def train(args):
    """
    This function trains the models
    :param args: the command line arguments defining the desired actions
    """

    # load data
    train_data_all, dev_data_all, _ = load(args.data_dir,
                                           cachedir=args.cachedir,
                                           override_cache=args.override_cache,
                                           text_only=(args.model.lower()
                                                      in ["bi-lstm", "bert"]),
                                           include_tfidf=args.include_tfidf,
                                           balanced=args.balanced)
    train_data, train_labels = train_data_all.X, train_data_all.y
    dev_data, dev_labels = dev_data_all.X, dev_data_all.y

    # Build model
    apx = get_appendix(args.include_tfidf, args.balanced)
    if args.model.lower() == "simple-ff":
        model = FeedForward(args.ff_hunits, train_data.shape[1])
        train_pytorch(args,
                      model,
                      train_data,
                      train_labels,
                      dev_data,
                      dev_labels,
                      save_model_path=f"models/simple-ff{apx}.torch")
    elif args.model.lower() == "bi-lstm":
        model = BiLSTM(epochs=args.num_epochs,
                       batch_size=args.batch_size,
                       max_seq_len=args.max_seq_len)
        model.train(train_data, train_labels, dev_data, dev_labels)
    elif args.model.lower() == "logreg":
        model = LogisticRegression()
        model.train(train_data,
                    train_labels,
                    dev_data,
                    dev_labels,
                    save_model_path=f"models/logreg{apx}.pkl")
    elif args.model.lower() == "majority-vote":
        model = MajorityVote()
        model.train(train_labels, dev_labels)
    elif args.model.lower() == "bert":
        model = Bert(epochs=args.num_epochs,
                     batch_size=args.batch_size,
                     max_seq_len=args.max_seq_len,
                     learning_rate=args.learning_rate)
        model.train(train_data,
                    train_labels,
                    dev_data,
                    dev_labels,
                    save_model_path=f"models/bert.pkl")
    elif args.model.lower() == "svm":
        model = SVM()
        model.train(train_data,
                    train_labels,
                    save_model_path=f"models/svm{apx}.sav")
    else:
        raise Exception("Unknown model type passed in!")
for i in range(n_goals):
    sp_name = possible_objects[i]
    x_env, y_env = env.object_locations[sp_name][[0, 1]]

    # Need to scale to SSP coordinates
    # Env is 0 to 13, SSP is -5 to 5
    x = ((x_env - 0) / coarse_size) * limit_range + xs[0]
    y = ((y_env - 0) / coarse_size) * limit_range + ys[0]

    item_memory += vocab[sp_name] * encode_point(x, y, x_axis_sp, y_axis_sp)
item_memory.normalize()

# Component functions of the full system

cleanup_network = FeedForward(input_size=ssp_dim,
                              hidden_size=512,
                              output_size=ssp_dim)
cleanup_network.load_state_dict(torch.load(args.cleanup_network), strict=True)
cleanup_network.eval()

# Input is x and y velocity plus the distance sensor measurements, plus map ID
localization_network = LocalizationModel(
    input_size=2 + n_sensors + n_maps,
    unroll_length=1,  #rollout_length,
    sp_dim=ssp_dim)
localization_network.load_state_dict(torch.load(args.localization_network),
                                     strict=True)
localization_network.eval()

if args.n_hidden_layers_policy == 1:
    policy_network = FeedForward(input_size=id_size + ssp_dim * 2,
Exemple #15
0
	rel_pretrained = model.rels.weight.data


# =========================================
# Initialize MODEL
# =========================================
if args.model == 'transE':
	model = TransE(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'])
if args.model == 'transH':
	model = TransH(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'])
elif args.model == 'subjD':
	model = SubjKB_Deviation(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'], relPretrained=rel_pretrained, entPretrained=ent_pretrained)
elif args.model == 'subjM':
	model = SubjKB_Matrix(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'],  nonlinearity='tanh')
elif args.model == 'ff':
	model = FeedForward(len(rel2id), len(ent2id), dim=config['embedding_dim'])
elif args.model == 'ffs':
	model = FeedForward_Source(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'])
elif args.model == 'hyte':
	model = HyTE(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'])

# model.to(device)

# Logger
if args.mode.startswith('train'):
	logger = Logger(config['name'], ['loss', 'val_loss', 'MR', 'MRR', 'h@10'])
else:
	logger = None

# Loss function
criterion = MarginRankingLoss(config['margin'], reduction='sum')