# load data file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data", "grid16_po.pkl") model_path = os.path.join(rlvision.RLVISION_MODEL, "grid16-po") if not os.path.isdir(model_path): os.makedirs(model_path) # parameters batch_size = 256 nb_epochs = 80 print('# Minibatch-size: {}'.format(batch_size)) print('# epoch: {}'.format(nb_epochs)) print('') train, test, _ = utils.process_map_data(file_name) model = vin_model(l_s=train[0].shape[2], k=20) model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy']) model_file = os.path.join(model_path, "vin-model-po-16-{epoch:02d}-{acc:.2f}.h5") checkpoint = ModelCheckpoint(model_file, monitor='acc', verbose=1, save_best_only=True, mode='max', save_weights_only=True) history = model.fit([ train[0].transpose(
reward = get_layer_output(model, 'reward', im_ary) value = get_layer_output(model, 'value{}'.format(k), im_ary) reward = np.reshape(reward, im.shape[1:]) value = np.reshape(value, im.shape[1:]) return action, reward, value file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data", "grid28_with_idx.pkl") model_file = os.path.join(rlvision.RLVISION_MODEL, "grid28-po", "vin-model-po-28-77-0.89.h5") im_data, state_data, label_data, sample_idx = process_map_data( file_name, return_full=True) model = vin_model(l_s=im_data.shape[2], k=20) model.load_weights(model_file) sampler = GridSampler(im_data, state_data, label_data, sample_idx, (28, 28)) gt_collector = [] po_collector = [] diff_collector = [] grid, state, label, goal = sampler.get_grid(77) gt_collector.append(state) step_map = np.zeros((2, 28, 28)) step_map[0] = np.ones((28, 28)) step_map[1] = grid[1] pos = [state[0, 0], state[0, 1]]
reward = get_layer_output(model, 'reward', im_ary) value = get_layer_output(model, 'value{}'.format(k), im_ary) reward = np.reshape(reward, im.shape[1:]) value = np.reshape(value, im.shape[1:]) return action, reward, value # load data file_name = os.path.join(rlvision.RLVISION_DATA, "chain_data", "grid16_with_idx.pkl") model_file = os.path.join(rlvision.RLVISION_MODEL, "vin_model_po_16.h5") k = 20 _, test, _ = process_map_data(file_name) model = vin_model(l_s=test[0].shape[2], k=k) model.load_weights(model_file) for d in zip(*test): im = d[0] print im.shape pos = d[1] print pos action, reward, value = predict(im, pos, model, k) path = [tuple(pos)] for _ in range(30): if im[1][pos[1], pos[0]] == 1: break action, _, _ = predict(im, pos, model, k) dx, dy = get_action(action)