Beispiel #1
0
def load_vision_model(model_dict, kp_type, colour_input, batch_size,
                      lsp_layers, patch_sizes, ckpt_load_dir, vis_load):
    # FIX: run forward passes to ensure weight init of encoder and lsp_model
    if kp_type == "permakey":
        if colour_input:
            inputs = tf.zeros((batch_size, 84, 84, 3))
            _, _, _ = model_dict["encoder"](inputs, training=True)
        if not colour_input:
            inputs = tf.zeros((batch_size, 84, 84, 1))

        for l in range(len(lsp_layers)):
            lsp_input = tf.zeros(
                (batch_size, 8 * patch_sizes[l]**2 *
                 model_dict["encoder"].filters[lsp_layers[l]]))
            _ = model_dict["lsp_model"][l](lsp_input, training=True)
        pnet_inputs = tf.zeros((batch_size, 42, 42, 2))
        _, _ = model_dict["pnet"](pnet_inputs, training=True)
        # load vision module from ckpts
        model_dict["encoder"].load_weights(ckpt_load_dir + 'ckpt_encoder-' +
                                           str(vis_load) + '.h5')
        for l in range(len(lsp_layers)):
            model_dict["lsp_model"][l].load_weights(ckpt_load_dir +
                                                    'ckpt_lsp_model-layer-' +
                                                    str(lsp_layers[l]) + '-' +
                                                    str(vis_load) + '.h5')

        model_dict["pnet"].load_weights(ckpt_load_dir + 'ckpt_pnet-' +
                                        str(vis_load) + '.h5')

    elif kp_type == "transporter":
        if colour_input:
            inputs = tf.zeros((batch_size, 84, 84, 3, 2))
            _ = transporter_loss(inputs,
                                 model_dict["encoder"],
                                 model_dict["keypointer"],
                                 model_dict["decoder"],
                                 training=True)
        if not colour_input:
            inputs = tf.zeros((batch_size, 84, 84, 1, 2))
            _ = transporter_loss(inputs,
                                 model_dict["encoder"],
                                 model_dict["keypointer"],
                                 model_dict["decoder"],
                                 training=True)

        # load vision module from ckpts
        model_dict["encoder"].load_weights(ckpt_load_dir + 'ckpt_encoder-' +
                                           str(vis_load) + '.h5')
        model_dict["keypointer"].load_weights(ckpt_load_dir +
                                              'ckpt_keypointer-' +
                                              str(vis_load) + '.h5')
        model_dict["decoder"].load_weights(ckpt_load_dir + 'ckpt_decoder-' +
                                           str(vis_load) + '.h5')
    return model_dict
Beispiel #2
0
    def train_step(images, loss_type):
        if loss_type == "transporter":
            with tf.GradientTape() as tape:
                reconstruction_loss = ul_loss.transporter_loss(images,
                                                               encoder,
                                                               keypointer,
                                                               decoder,
                                                               training=True)

            # update params
            model_params = encoder.trainable_variables + keypointer.trainable_variables + decoder.trainable_variables
            grads = tape.gradient(reconstruction_loss, model_params)
            optimizer.apply_gradients(zip(grads, model_params))

        return reconstruction_loss
Beispiel #3
0
def vision_forward_pass(inputs, vision_model_dict, lsp_layers, kp_type,
                        patch_sizes, img_size):
    """
	:param inputs: input images (greyscale or colour) for vision_module (batch*timesteps, H, W, C)
	:param vision_model_dict: (dict) of vision networks {"encoder", "lsp_model", "pnet"}
								or {"encoder", "keypointer", "decoder"}
	:param lsp_layers: (list) selected layers for lsp computation
	:param kp_type: "transporter" or "permakey" type of keypoint method used
	:param patch_sizes: (tuple)
	:param img_size: (int) size of input images
	:return:
	mask = botom-up (un)-predictability heatmaps (batch_size, H, W, num_kpts)
	encoder_activations = (b, H, W, C)
	kpts = keypoint locations (b, num_keypoints, 2)
	"""
    # global variables
    encoder_activations, bottom_up_map, kpts = 0.0, 0.0, 0.0
    if kp_type == "permakey":
        mu, var, encoder_activations = vision_model_dict["encoder"](
            inputs, training=False)
        # run lsp on activation patches
        kpts, bottom_up_map, stacked_error_masks, _, _ = lsp_loss(
            vision_model_dict["lsp_model"],
            encoder_activations,
            patch_sizes,
            img_size,
            lsp_layers,
            pnet=vision_model_dict["pnet"],
            training=False)
        # storing only last lsp_layer activations
        encoder_activations = encoder_activations[lsp_layers[-1]]
    elif kp_type == "transporter":
        inputs = tf.stack([inputs, inputs], axis=4)
        kpts, bottom_up_map, encoder_activations, pred, _ = transporter_loss(
            inputs,
            vision_model_dict["encoder"],
            vision_model_dict["keypointer"],
            vision_model_dict["decoder"],
            training=False)
    # collect_glimpse_start = time.time()
    return bottom_up_map, encoder_activations, kpts
Beispiel #4
0
def compare_kpts(data_dir, loss_to_use, num_keypoints, latent_dim_size, env,
                 img_input, img_size, colour_input, patch_sizes, lsp_layers,
                 noise_type, batch_size, eval_split, tp_fname, pkey_fname,
                 tp_epoch, pkey_epoch, save_base_dir, ablation, _run):

    # Input params
    tp_ckpt_load_dir = "transporter_exp/" + img_input + "/" + noise_type \
                       + "/" + env + "/" + str(num_keypoints) + "/" + \
                       tp_fname + "/ckpt_"
    pkey_ckpt_load_dir = "permakey_exp/" + img_input + "/" + noise_type \
                         + "/" + env + "/" + str(num_keypoints) + "/" + \
                         pkey_fname + "/ckpt_"

    model_id = ""
    if not ablation:
        # numerical string after '.' as unique model_id
        model_id = pkey_ckpt_load_dir.split(".")[1][0:6] + "_" \
                   + tp_ckpt_load_dir.split(".")[1][0:6]
    elif ablation:
        model_id = pkey_ckpt_load_dir.split(".")[1][0:6]

    save_dir = save_base_dir + img_input + "/" + noise_type + "/" \
               + env + "/" + str(num_keypoints) + "/" + model_id

    # setup data pipeline
    if img_input == "dm_atari":
        eval_dataset = preprocess.deepmind_atari(data_dir, env, eval_split,
                                                 loss_to_use, batch_size,
                                                 noise_type, colour_input)
    else:
        raise ValueError("Eval data %s does not exist" % img_input)

    # load best pkey ckpt models
    pkey_model_list = create_model()
    tp_kp_model_list = transporter_train.create_model(colour_input,
                                                      num_keypoints, 0.1,
                                                      "transporter")

    # unpacking models from model list
    encoder, decoder, lsp_models, pnet = pkey_model_list[0], pkey_model_list[1], \
                                        pkey_model_list[2], pkey_model_list[3]
    # FIX: run 1 forward pass over models to make it do weight init
    if colour_input:
        test_inputs = tf.zeros((batch_size, img_size, img_size, 3))
    if not colour_input:
        test_inputs = tf.zeros((batch_size, img_size, img_size, 1))
    _ = ul_loss.pkey_loss(pkey_model_list,
                          test_inputs,
                          latent_dim_size,
                          patch_sizes,
                          batch_size,
                          img_size,
                          lsp_layers,
                          loss_to_use,
                          training=True)

    # restore best model weights
    encoder.load_weights(pkey_ckpt_load_dir + 'encoder-' + str(pkey_epoch) +
                         '.h5')
    decoder.load_weights(pkey_ckpt_load_dir + 'decoder-' + str(pkey_epoch) +
                         '.h5')
    pnet.load_weights(pkey_ckpt_load_dir + 'pnet-' + str(pkey_epoch) + '.h5')
    for m in range(len(lsp_models)):
        lsp_models[m].load_weights(pkey_ckpt_load_dir + 'lsp_model-layer-' +
                                   str(lsp_layers[m]) + '-' + str(pkey_epoch) +
                                   '.h5')

    pkey_model_list = [encoder, decoder, lsp_models, pnet]

    # unpacking models from tp_model_list
    tp_encoder, keypointer, decoder = tp_kp_model_list[0], tp_kp_model_list[
        1], tp_kp_model_list[2]

    if colour_input:
        test_inputs = tf.zeros((batch_size, img_size, img_size, 3, 2))
    if not colour_input:
        test_inputs = tf.zeros((batch_size, img_size, img_size, 1, 2))
    _ = ul_loss.transporter_loss(test_inputs,
                                 tp_encoder,
                                 keypointer,
                                 decoder,
                                 training=True)

    # restore best model weights
    tp_encoder.load_weights(tp_ckpt_load_dir + 'encoder-' + str(tp_epoch) +
                            '.h5')
    decoder.load_weights(tp_ckpt_load_dir + 'decoder-' + str(tp_epoch) + '.h5')
    keypointer.load_weights(tp_ckpt_load_dir + 'keypointer-' + str(tp_epoch) +
                            '.h5')

    batch_num = 0
    for x_test in eval_dataset:
        batch_num = batch_num + 1
        # inference using pkey model
        x_pred, kpts, gauss_mask, error_mask, _ = ul_loss.pkey_loss(
            pkey_model_list,
            x_test,
            latent_dim_size,
            patch_sizes,
            batch_size,
            img_size,
            lsp_layers,
            loss_to_use,
            training=False)

        # inference using tp_model
        tp_x_test = tf.stack([x_test, x_test], axis=4)
        kpts_tp, gauss_mask_tp, features, x_pred_tp, _ = ul_loss.transporter_loss(
            tp_x_test, tp_encoder, keypointer, decoder, training=False)

        # logging results for viz
        if not (os.path.exists(save_dir)):
            # create the directory you want to save to
            os.makedirs(save_dir)
        # saving data from pkey model
        np.savez(
            save_dir + "/" + "batch_" + str(batch_num) + "_preds_masks.npz",
            x_pred, x_test.numpy(), kpts, gauss_mask, error_mask)
        # saving data from tp_model
        np.savez(save_dir + "/" + "batch_" + str(batch_num) + "_keypoints.npz",
                 x_pred_tp, x_test.numpy(), kpts_tp, gauss_mask_tp)
    return 0
Beispiel #5
0
def evaluate(data_dir, env, ckpt_load_dir, test_logs_prefix, loss_to_use,
             noise_type, eval_split, img_input, colour_input, num_keypoints,
             gauss_std, batch_size, epoch, _run):

    test_inputs, keypoints, heatmaps, x_pred = 0.0, 0.0, 0.0, 0.0
    encoder, keypointer, decoder = None, None, None

    # setup data pipeline
    if img_input == "dm_atari":
        eval_dataset = preprocess.deepmind_atari(data_dir, env, eval_split,
                                                 loss_to_use, batch_size,
                                                 noise_type, colour_input)
    else:
        raise ValueError("Eval data %s does not exist" % img_input)

    # load best ckpt models
    if loss_to_use == "transporter":
        encoder = TransporterEncoder()
        keypointer = TransporterKeypointer(num_keypoints=num_keypoints,
                                           gauss_std=gauss_std)
        decoder = TransporterDecoder(colour_input)

    # FIX: run 1 forward pass over models to make it do weight init
    if colour_input:
        test_inputs = tf.zeros((batch_size, 84, 84, 3, 2))
    if not colour_input:
        test_inputs = tf.zeros((batch_size, 84, 84, 1, 2))
    _ = ul_loss.transporter_loss(test_inputs,
                                 encoder,
                                 keypointer,
                                 decoder,
                                 training=True)

    # restore best model weights
    encoder.load_weights(ckpt_load_dir + 'encoder-' + str(epoch) + '.h5')
    decoder.load_weights(ckpt_load_dir + 'decoder-' + str(epoch) + '.h5')
    keypointer.load_weights(ckpt_load_dir + 'keypointer-' + str(epoch) + '.h5')

    batch_num = 0
    test_recon_loss = 0.0
    for x_test in eval_dataset:
        batch_num = batch_num + 1
        if loss_to_use == "transporter":
            keypoints, heatmaps, features, x_pred, loss = ul_loss.transporter_loss(
                x_test, encoder, keypointer, decoder, training=False)

            test_recon_loss = test_recon_loss + loss

        # saving data
        if not (os.path.exists(test_logs_prefix)):
            # create the directory you want to save to
            os.makedirs(test_logs_prefix)
        np.savez(
            test_logs_prefix + "/" + "epoch_" + str(epoch) +
            "_batch_" + str(batch_num) + "_keypoints.npz", x_pred,
            x_test.numpy(), keypoints, heatmaps)

    # log test loss
    test_recon_loss = test_recon_loss / batch_num

    # logging avg. test epoch losses to Sacred
    add_sacred_log("test.epoch_recon_loss", float(test_recon_loss.numpy()),
                   _run)

    print(" avg. test_nll_loss: %3.4f " % (test_recon_loss.numpy()))
    return 0.
Beispiel #6
0
 def test_step(images, loss_type):
     if loss_type == "transporter":
         keypoints, heatmaps, features, x_hat, loss = ul_loss.transporter_loss(
             images, encoder, keypointer, decoder, training=False)
         return keypoints, heatmaps, x_hat, loss