# load data
file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data", "grid16_po.pkl")
model_path = os.path.join(rlvision.RLVISION_MODEL, "grid16-po")
if not os.path.isdir(model_path):
    os.makedirs(model_path)

# parameters
batch_size = 256
nb_epochs = 80

print('# Minibatch-size: {}'.format(batch_size))
print('# epoch: {}'.format(nb_epochs))
print('')

train, test, _ = utils.process_map_data(file_name)
model = vin_model(l_s=train[0].shape[2], k=20)
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_file = os.path.join(model_path,
                          "vin-model-po-16-{epoch:02d}-{acc:.2f}.h5")
checkpoint = ModelCheckpoint(model_file,
                             monitor='acc',
                             verbose=1,
                             save_best_only=True,
                             mode='max',
                             save_weights_only=True)

history = model.fit([
    train[0].transpose(
    reward = get_layer_output(model, 'reward', im_ary)
    value = get_layer_output(model, 'value{}'.format(k), im_ary)
    reward = np.reshape(reward, im.shape[1:])
    value = np.reshape(value, im.shape[1:])

    return action, reward, value


file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data",
                         "grid28_with_idx.pkl")
model_file = os.path.join(rlvision.RLVISION_MODEL, "grid28-po",
                          "vin-model-po-28-77-0.89.h5")

im_data, state_data, label_data, sample_idx = process_map_data(
    file_name, return_full=True)
model = vin_model(l_s=im_data.shape[2], k=20)
model.load_weights(model_file)

sampler = GridSampler(im_data, state_data, label_data, sample_idx, (28, 28))

gt_collector = []
po_collector = []
diff_collector = []

grid, state, label, goal = sampler.get_grid(77)
gt_collector.append(state)

step_map = np.zeros((2, 28, 28))
step_map[0] = np.ones((28, 28))
step_map[1] = grid[1]
pos = [state[0, 0], state[0, 1]]
    reward = get_layer_output(model, 'reward', im_ary)
    value = get_layer_output(model, 'value{}'.format(k), im_ary)
    reward = np.reshape(reward, im.shape[1:])
    value = np.reshape(value, im.shape[1:])

    return action, reward, value


# load data
file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data",
                         "grid16_with_idx.pkl")
model_file = os.path.join(rlvision.RLVISION_MODEL, "vin_model_po_16.h5")

k = 20
_, test, _ = process_map_data(file_name)
model = vin_model(l_s=test[0].shape[2], k=k)
model.load_weights(model_file)

for d in zip(*test):
    im = d[0]
    print im.shape
    pos = d[1]
    print pos
    action, reward, value = predict(im, pos, model, k)

    path = [tuple(pos)]
    for _ in range(30):
        if im[1][pos[1], pos[0]] == 1:
            break
        action, _, _ = predict(im, pos, model, k)
        dx, dy = get_action(action)