# load data
file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data", "grid16_po.pkl")
model_path = os.path.join(rlvision.RLVISION_MODEL, "grid16-po")
if not os.path.isdir(model_path):
    os.makedirs(model_path)

# parameters
batch_size = 256
nb_epochs = 80

print('# Minibatch-size: {}'.format(batch_size))
print('# epoch: {}'.format(nb_epochs))
print('')

train, test, _ = utils.process_map_data(file_name)
model = vin_model(l_s=train[0].shape[2], k=20)
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model_file = os.path.join(model_path,
                          "vin-model-po-16-{epoch:02d}-{acc:.2f}.h5")
checkpoint = ModelCheckpoint(model_file,
                             monitor='acc',
                             verbose=1,
                             save_best_only=True,
                             mode='max',
                             save_weights_only=True)

history = model.fit([
from rlvision.grid import GridSampler
from rlvision.utils import process_map_data
from rlvision.dstar import Dstar

# general parameters

n_samples = 100  # use limited data
n_steps = 16  # twice much as the step
save_model = True  # if true, all data will be saved for future use
enable_vis = False  # if true, real time visualization will be enable

# setup result folder

file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data",
                         "grid8_with_idx.pkl")
im_data, state_data, label_data, sample_idx = process_map_data(
    file_name, return_full=True)
sampler = GridSampler(im_data, state_data, label_data, sample_idx, (8, 8))

gt_collector = []
po_collector = []
diff_collector = []

print("[MESSAGE] EXPERIMENT STARTED!")
for grid_idx in xrange(0, len(sample_idx), 7):
    # get a grid
    grid, state, label, goal = sampler.get_grid(grid_idx)
    gt_collector.append(state)

    # define step map
    grid = 1 - grid[0]
    step_map = np.ones((8, 8), dtype=np.uint8)
    action = np.argmax(res)
    reward = get_layer_output(model, 'reward', im_ary)
    value = get_layer_output(model, 'value{}'.format(k), im_ary)
    reward = np.reshape(reward, im.shape[1:])
    value = np.reshape(value, im.shape[1:])

    return action, reward, value


# load data
file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data",
                         "grid16_with_idx.pkl")
model_file = os.path.join(rlvision.RLVISION_MODEL, "vin_model_po_16.h5")

k = 20
_, test, _ = process_map_data(file_name)
model = vin_model(l_s=test[0].shape[2], k=k)
model.load_weights(model_file)

for d in zip(*test):
    im = d[0]
    print im.shape
    pos = d[1]
    print pos
    action, reward, value = predict(im, pos, model, k)

    path = [tuple(pos)]
    for _ in range(30):
        if im[1][pos[1], pos[0]] == 1:
            break
        action, _, _ = predict(im, pos, model, k)