Beispiel #1
0
def create_encoder():
    all_train_dir = '../small_embed/emb3/'
    dataset = Datasets.load('../4x4_all_reimbed/data/datasets.pkl')

    model_filename = all_train_dir + "model.mdl"
    train_log_filename = all_train_dir + "logs/training.pkl"

    with open(train_log_filename, 'rb') as f:
        train_log = pickle.load(f)
    hp = train_log['hyperparameters']

    model = rrn.RRN(dim_x=hp['dim_x'],
                    dim_y=hp['dim_y'],
                    embed_size=hp['embed_size'],
                    hidden_layer_size=hp['hidden_layer_size'])
    model.load_state_dict(torch.load(model_filename), strict=False)
    model.eval()

    for k, v in model.named_modules():
        if k == 'embed_layer':
            orig_embed_layer = v

    device = 4
    split_inputs, split_outputs = dataset.split_data([100])
    train_inputs = split_outputs[0]
    train_outputs = split_outputs[0]
    train_x = torch.stack([rrn_utils.encode_input(p)
                           for p in train_inputs]).cuda(device)
    train_y = torch.stack([rrn_utils.encode_output(p)
                           for p in train_outputs]).cuda(device)

    digitEncoder = DigitEncoder(orig_embed_layer).cuda(device)
    optimizer = optim.Adam(digitEncoder.parameters())

    def closure():
        optimizer.zero_grad()
        predictions = digitEncoder(train_x)
        loss = F.cross_entropy(predictions.permute(0, 2, 1), train_y)
        loss.backward()

        return loss

    for i in range(1000):
        optimizer.step(closure)

    encoder = nn.Embedding(digitEncoder.num_embeddings,
                           digitEncoder.embedding_dim)
    encoder.load_state_dict(digitEncoder.encoder.state_dict())
    encoder.eval()
    for p in encoder.parameters():
        p.requires_grad = False

    decoder = nn.Linear(digitEncoder.embedding_dim,
                        digitEncoder.num_embeddings - 1)
    decoder.load_state_dict(digitEncoder.decoder.state_dict())
    decoder.eval()
    for p in decoder.parameters():
        p.requires_grad = False

    return encoder, decoder
Beispiel #2
0
other_inputs = {'validation': split_inputs[1]}
other_outputs = {'validation': split_outputs[1]}

model = RelNet(dim_x=hp['dim_x'],
               dim_y=hp['dim_y'],
               embed_size=hp['embed_size'],
               hidden_layer_size=hp['hidden_layer_size']).cuda(hp['device'])

optimizer = optim.Adam(model.parameters(),
                       lr=hp['learning_rate'],
                       weight_decay=hp['weight_decay'])

train_x_grid = torch.stack([rrn_utils.encode_input(p) for p in train_inputs])
train_x_prob = utils.puzzle_as_dist(train_x_grid).cuda(hp['device'])
train_x_grid = train_x_grid.cuda(hp['device'])
train_y = torch.stack([rrn_utils.encode_output(p)
                       for p in train_outputs]).cuda(hp['device'])

other_x_grid = {}
other_x_prob = {}
other_y = {}
for k in other_inputs:
    other_x_grid[k] = torch.stack(
        [rrn_utils.encode_input(p) for p in other_inputs[k]])
    other_x_prob[k] = utils.puzzle_as_dist(other_x_grid[k]).cuda(hp['device'])
    other_x_grid[k] = other_x_grid[k].cuda(hp['device'])
    other_y[k] = torch.stack([
        rrn_utils.encode_output(p) for p in other_outputs[k]
    ]).cuda(hp['device'])

train_losses = []  # (epoch)
Beispiel #3
0
dim_x = hyperparameters['dim_x']
dim_y = hyperparameters['dim_y']
num_iters = hyperparameters['num_iters']
batch_size = hyperparameters['batch_size']
epochs = hyperparameters['epochs']
valid_epochs = hyperparameters['valid_epochs']
save_epochs = hyperparameters['save_epochs']
embed_size = hyperparameters['embed_size']
hidden_layer_size = hyperparameters['hidden_layer_size']
learning_rate = hyperparameters['learning_rate']
weight_decay = hyperparameters['weight_decay']
device = hyperparameters['device']

train_x = torch.stack([rrn_utils.encode_input(p)
                       for p in train_inputs]).cuda(device)
train_y = torch.stack([rrn_utils.encode_output(p)
                       for p in train_outputs]).cuda(device)

other_x = {}
other_y = {}
for k in other_inputs:
    other_x[k] = torch.stack(
        [rrn_utils.encode_input(p) for p in other_inputs[k]]).cuda(device)
    other_y[k] = torch.stack(
        [rrn_utils.encode_output(p) for p in other_outputs[k]]).cuda(device)

# model = EmbedRRN(dim_x=dim_x, dim_y=dim_y, embed_size=embed_size, hidden_layer_size=hidden_layer_size).cuda(device)
model = RRN(dim_x=dim_x,
            dim_y=dim_y,
            embed_size=embed_size,
            hidden_layer_size=hidden_layer_size).cuda(device)
Beispiel #4
0
def train_rrn(hyperparameters: dict,
              train_inputs: list,
              train_outputs: list,
              other_inputs: dict = None,
              other_outputs: dict = None):
    """
    :param hyperparameters: Check below for what fields must exist in hyperparameters
    :param train_inputs: list of GridStrings
    :param train_outputs: list of GridStrings, corresponding in index to train_inputs
    :param other_inputs: dictionary of GridStrings where the key is name of the dataset
    :param other_outputs: dictionary of GridStrings where the key is name of the dataset,
        corresponding in index to inputs of same name
    :return:
    """

    if other_inputs is None:
        other_inputs = {}
    if other_outputs is None:
        other_outputs = {}
    assert set(other_inputs.keys()) == set(other_outputs.keys())

    if not os.path.exists('./checkpoints'):
        os.makedirs('./checkpoints')
    if not os.path.exists('./logs'):
        os.makedirs('./logs')

    dim_x = hyperparameters['dim_x']
    dim_y = hyperparameters['dim_y']
    num_iters = hyperparameters['num_iters']
    batch_size = hyperparameters['batch_size']
    epochs = hyperparameters['epochs']
    valid_epochs = hyperparameters['valid_epochs']
    save_epochs = hyperparameters['save_epochs']
    embed_size = hyperparameters['embed_size']
    hidden_layer_size = hyperparameters['hidden_layer_size']
    learning_rate = hyperparameters['learning_rate']
    weight_decay = hyperparameters['weight_decay']
    parallel = False

    if 'devices' in hyperparameters:
        if len(hyperparameters['devices']) > 1:
            devices = hyperparameters['devices']
            parallel = True
        device = hyperparameters['devices'][0]
    else:
        device = hyperparameters['device']

    train_x = torch.stack([rrn_utils.encode_input(p)
                           for p in train_inputs]).cuda(device)
    train_y = torch.stack([rrn_utils.encode_output(p)
                           for p in train_outputs]).cuda(device)

    other_x = {}
    other_y = {}
    for k in other_inputs:
        other_x[k] = torch.stack(
            [rrn_utils.encode_input(p) for p in other_inputs[k]]).cuda(device)
        other_y[k] = torch.stack([
            rrn_utils.encode_output(p) for p in other_outputs[k]
        ]).cuda(device)

    model = RRN(dim_x=dim_x,
                dim_y=dim_y,
                embed_size=embed_size,
                hidden_layer_size=hidden_layer_size)
    model.embed_layer.load_state_dict(orig_embed_layer.state_dict())
    model.embed_layer.eval()
    for p in model.embed_layer.parameters():
        p.requires_grad = False

    model = model.cuda(device)
    if parallel:
        model = nn.DataParallel(model, device_ids=devices)
    # else:
    #     model = model.cuda(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    train_losses = []  # (epoch, )
    train_accuracies = []  # (epoch, grid, timestep)
    other_losses = {name: [] for name in other_x}  # (epoch, )
    other_accuracies = {name: []
                        for name in other_x}  # (epoch, grid, timestep)
    times = []

    def closure():
        optimizer.zero_grad()
        total_loss = 0
        epoch_accuracies = []
        shuffle_indices = np.arange(len(train_x))
        np.random.shuffle(shuffle_indices)
        for i in tqdm(range(0, len(train_x), batch_size), leave=False):
            x_batch = train_x[shuffle_indices[i:i + batch_size]]
            y_batch = train_y[shuffle_indices[i:i + batch_size]]
            loss, accuracies = rrn_utils.get_performance(model=model,
                                                         x=x_batch,
                                                         y=y_batch,
                                                         no_grad=False,
                                                         num_iters=num_iters)
            loss.backward()
            total_loss += loss

        train_losses.append(float(total_loss))
        epoch_accuracies.append(accuracies)
        train_accuracies.append(np.concatenate(epoch_accuracies))
        return total_loss

    for i in tqdm(range(epochs)):
        start_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        start_time = time.time()

        train_loss = optimizer.step(closure)

        run_validate = i == 0 or (i + 1) % valid_epochs == 0
        if run_validate:
            for name in other_x:
                loss, accuracy = rrn_utils.get_performance(model=model,
                                                           x=other_x[name],
                                                           y=other_y[name],
                                                           num_iters=num_iters,
                                                           no_grad=True)
                other_losses[name].append(float(loss))
                other_accuracies[name].append(accuracy)

        if (i + 1) % save_epochs == 0:
            model_filename = "./checkpoints/epoch_{}.mdl".format(i + 1)
            train_data_filename = "./logs/training.pkl"
            print("Saving model to {}".format(model_filename))
            torch.save(model.state_dict(), model_filename)
            with open(train_data_filename, 'wb') as f:
                pickle.dump(
                    {
                        'hyperparameters': hyperparameters,
                        'train_losses': train_losses,
                        'train_accuracies': train_accuracies,
                        'other_losses': other_losses,
                        'other_accuracies': other_accuracies,
                        'times': times
                    }, f)

        end_time_str = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
        end_time = time.time()
        runtime = end_time - start_time
        times.append({
            'start_time': start_time_str,
            'end_time': end_time_str,
            'runtime': runtime
        })
        print("duration: {}s\t iter: {}\t| loss: {}\t| accuracy: {}".format(
            round(runtime, 1), i, round(float(train_loss), 3),
            round(np.mean(train_accuracies[-1][:, -1]), 3)))
        if run_validate:
            for name in sorted(other_x):
                print("data: {}\t| loss: {}\t| accuracy: {}".format(
                    name, round(other_losses[name][-1], 3),
                    round(np.mean(other_accuracies[name][-1][:, -1]), 3)))

    model_filename = "./model.mdl"
    print("Saving model to {}".format(model_filename))
    torch.save(model.state_dict(), model_filename)
    return model
Beispiel #5
0
other_outputs = {'validation': split_outputs[1]}


model = RelNet(dim_x=hp['dim_x'],
               dim_y=hp['dim_y'],
               embed_size=hp['embed_size'],
               hidden_layer_size=hp['hidden_layer_size']).cuda(hp['device'])


optimizer = optim.Adam(model.parameters(),
                       lr=hp['learning_rate'],
                       weight_decay=hp['weight_decay'])

train_x = torch.stack([rrn_utils.encode_input(p) for p in train_inputs])
train_x = utils.puzzle_as_dist(train_x).cuda(hp['device'])
train_y = torch.stack([rrn_utils.encode_output(p) for p in train_outputs]).cuda(hp['device'])

other_x = {}
other_y = {}
for k in other_inputs:
    other_x[k] = torch.stack([rrn_utils.encode_input(p) for p in other_inputs[k]])
    other_x[k] = utils.puzzle_as_dist(other_x[k]).cuda(hp['device'])
    other_y[k] = torch.stack([rrn_utils.encode_output(p) for p in other_outputs[k]]).cuda(hp['device'])

train_losses = []  # (epoch)
train_accuracies = []  # (epoch, grid, timestep)
other_losses = {name: [] for name in other_x} # (epoch)
other_accuracies = {name: [] for name in other_x} # (epoch, grid, timestep)
times = []

if not os.path.exists('./checkpoints'):