Beispiel #1
0
    def init_model(self, dataset):
        """Initialize self.model, including normalization parameters."""
        self.set_max_obs_vector_length(dataset)

        _, _, info = dataset.random_sample()
        obs_vector = self.info_to_gt_obs(info)

        obs_dim = obs_vector.shape[0]
        act_dim = 6
        if self.six_dof:
            act_dim = 9
        self.model = MlpModel(self.batch_size,
                              obs_dim,
                              act_dim,
                              'relu',
                              self.use_mdn,
                              dropout=0.1)

        sampled_gt_obs = []

        num_samples = 1000
        for _ in range(num_samples):
            _, _, info = dataset.random_sample()
            t_worldaug_world, _ = self.get_augmentation_transform()
            sampled_gt_obs.append(self.info_to_gt_obs(info, t_worldaug_world))

        sampled_gt_obs = np.array(sampled_gt_obs)

        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.model.set_normalization_parameters(obs_train_parameters)
Beispiel #2
0
  def init_model(self, dataset):
    """Initialize models, including normalization parameters."""
    self.set_max_obs_vector_length(dataset)

    _, _, info = dataset.random_sample()
    obs_vector = self.info_to_gt_obs(info)

    # Setup pick model
    obs_dim = obs_vector.shape[0]
    act_dim = 3
    self.pick_model = MlpModel(
        self.batch_size, obs_dim, act_dim, 'relu', self.use_mdn, dropout=0.1)

    sampled_gt_obs = []

    num_samples = 1000
    for _ in range(num_samples):
      _, _, info = dataset.random_sample()
      t_worldaug_world, _ = self.get_augmentation_transform()
      sampled_gt_obs.append(self.info_to_gt_obs(info, t_worldaug_world))

    sampled_gt_obs = np.array(sampled_gt_obs)

    obs_train_parameters = dict()
    obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
        np.float32)
    obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
        np.float32)
    self.pick_model.set_normalization_parameters(obs_train_parameters)

    # Setup pick-conditioned place model
    obs_dim = obs_vector.shape[0] + act_dim
    act_dim = 3
    self.place_model = MlpModel(
        self.batch_size, obs_dim, act_dim, 'relu', self.use_mdn, dropout=0.1)

    sampled_gt_obs = []

    num_samples = 1000
    for _ in range(num_samples):
      _, act, info = dataset.random_sample()
      t_worldaug_world, _ = self.get_augmentation_transform()
      obs = self.info_to_gt_obs(info, t_worldaug_world)
      obs = np.hstack((obs, self.act_to_gt_act(act, t_worldaug_world)[:3]))
      sampled_gt_obs.append(obs)

    sampled_gt_obs = np.array(sampled_gt_obs)

    obs_train_parameters = dict()
    obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
        np.float32)
    obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
        np.float32)
    self.place_model.set_normalization_parameters(obs_train_parameters)
Beispiel #3
0
    def init_model(self, dataset):
        """Initialize self.model, including normalization parameters."""
        self.set_max_obs_vector_length(dataset)

        # Get obs dim and action dim (3 for pick, 3 for place), initialize model.
        if self.goal_conditioned:
            _, _, info, goal = dataset.random_sample(goal_images=True)
            obs_vector = self.info_to_gt_obs(info, goal=goal)
        else:
            _, _, info = dataset.random_sample()
            obs_vector = self.info_to_gt_obs(info)

        obs_dim = obs_vector.shape[0]
        act_dim = 6
        if self.six_dof:
            act_dim = 9
        self.model = MlpModel(self.BATCH_SIZE,
                              obs_dim,
                              act_dim,
                              'relu',
                              self.USE_MDN,
                              dropout=0.1)

        # Sample points from the data to get reasonable mean / std values.
        sampled_gt_obs = []
        num_samples = 1000
        for _ in range(num_samples):
            t_worldaug_world, _ = self.get_augmentation_transform()
            if self.goal_conditioned:
                _, _, info, goal = dataset.random_sample(goal_images=True)
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world, goal=goal))
            else:
                _, _, info = dataset.random_sample()
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world))
        sampled_gt_obs = np.array(sampled_gt_obs)
        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.model.set_normalization_parameters(obs_train_parameters)

        self.obs_train_mean = obs_train_parameters['mean']
        self.obs_train_std = obs_train_parameters['std']
        print('Done initializing self.model for ground truth agent.')
Beispiel #4
0
class GtState2StepAgent(GtStateAgent):
  """Agent which uses ground-truth state information -- useful as a baseline."""

  def __init__(self, name, task):
    super(GtState2StepAgent, self).__init__(name, task)

    # Set up model.
    self.pick_model = None
    self.place_model = None

    self.pick_optim = tf.keras.optimizers.Adam(learning_rate=2e-4)
    self.place_optim = tf.keras.optimizers.Adam(learning_rate=2e-4)
    self.metric = tf.keras.metrics.Mean(name='metric')
    self.val_metric = tf.keras.metrics.Mean(name='val_metric')

  def init_model(self, dataset):
    """Initialize models, including normalization parameters."""
    self.set_max_obs_vector_length(dataset)

    _, _, info = dataset.random_sample()
    obs_vector = self.info_to_gt_obs(info)

    # Setup pick model
    obs_dim = obs_vector.shape[0]
    act_dim = 3
    self.pick_model = MlpModel(
        self.batch_size, obs_dim, act_dim, 'relu', self.use_mdn, dropout=0.1)

    sampled_gt_obs = []

    num_samples = 1000
    for _ in range(num_samples):
      _, _, info = dataset.random_sample()
      t_worldaug_world, _ = self.get_augmentation_transform()
      sampled_gt_obs.append(self.info_to_gt_obs(info, t_worldaug_world))

    sampled_gt_obs = np.array(sampled_gt_obs)

    obs_train_parameters = dict()
    obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
        np.float32)
    obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
        np.float32)
    self.pick_model.set_normalization_parameters(obs_train_parameters)

    # Setup pick-conditioned place model
    obs_dim = obs_vector.shape[0] + act_dim
    act_dim = 3
    self.place_model = MlpModel(
        self.batch_size, obs_dim, act_dim, 'relu', self.use_mdn, dropout=0.1)

    sampled_gt_obs = []

    num_samples = 1000
    for _ in range(num_samples):
      _, act, info = dataset.random_sample()
      t_worldaug_world, _ = self.get_augmentation_transform()
      obs = self.info_to_gt_obs(info, t_worldaug_world)
      obs = np.hstack((obs, self.act_to_gt_act(act, t_worldaug_world)[:3]))
      sampled_gt_obs.append(obs)

    sampled_gt_obs = np.array(sampled_gt_obs)

    obs_train_parameters = dict()
    obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
        np.float32)
    obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
        np.float32)
    self.place_model.set_normalization_parameters(obs_train_parameters)

  def train(self, dataset, num_iter, writer, validation_dataset):
    """Train on dataset for a specific number of iterations."""

    if self.pick_model is None:
      self.init_model(dataset)

    if self.use_mdn:
      loss_criterion = mdn_utils.mdn_loss
    else:
      loss_criterion = tf.keras.losses.MeanSquaredError()

    @tf.function
    def train_step(pick_model, place_model, batch_obs, batch_act,
                   loss_criterion):
      with tf.GradientTape() as tape:
        prediction = pick_model(batch_obs)
        loss0 = loss_criterion(batch_act[:, 0:3], prediction)
        grad = tape.gradient(loss0, pick_model.trainable_variables)
        self.pick_optim.apply_gradients(
            zip(grad, pick_model.trainable_variables))
      with tf.GradientTape() as tape:
        # batch_obs = tf.concat((batch_obs, batch_act[:,0:3] +
        #                        tf.random.normal(shape=batch_act[:,0:3].shape,
        #                                         stddev=0.001)), axis=1)
        batch_obs = tf.concat((batch_obs, batch_act[:, 0:3]), axis=1)
        prediction = place_model(batch_obs)
        loss1 = loss_criterion(batch_act[:, 3:], prediction)
        grad = tape.gradient(loss1, place_model.trainable_variables)
        self.place_optim.apply_gradients(
            zip(grad, place_model.trainable_variables))
      return loss0 + loss1

    print_rate = 100
    for i in range(num_iter):
      start = time.time()

      batch_obs, batch_act, _, _, _ = self.get_data_batch(dataset)

      # Forward through model, compute training loss, update weights.
      self.metric.reset_states()
      loss = train_step(self.pick_model, self.place_model, batch_obs, batch_act,
                        loss_criterion)
      self.metric(loss)
      with writer.as_default():
        tf.summary.scalar(
            'gt_state_loss', self.metric.result(), step=self.total_iter + i)

      if i % print_rate == 0:
        loss = np.float32(loss)
        print(f'Train Iter: {self.total_iter + i} Loss: {loss:.4f} Iter time:',
              time.time() - start)
        # utils.meshcat_visualize(self.vis, obs, act, info)

    self.total_iter += num_iter
    self.save()

  def act(self, obs, info):
    """Run inference and return best action."""
    act = {'camera_config': self.camera_config, 'primitive': None}

    # Get observations and run pick prediction
    gt_obs = self.info_to_gt_obs(info)
    pick_prediction = self.pick_model(gt_obs[None, Ellipsis])
    if self.use_mdn:
      pi, mu, var = pick_prediction
      # prediction = mdn_utils.pick_max_mean(pi, mu, var)
      pick_prediction = mdn_utils.sample_from_pdf(pi, mu, var)
      pick_prediction = pick_prediction[:, 0, :]
    pick_prediction = pick_prediction[0]  # unbatch

    # Get observations and run place prediction
    obs_with_pick = np.hstack((gt_obs, pick_prediction))

    # since the pick at train time is always 0.0,
    # the predictions are unstable if not exactly 0
    obs_with_pick[-1] = 0.0

    place_prediction = self.place_model(obs_with_pick[None, Ellipsis])
    if self.use_mdn:
      pi, mu, var = place_prediction
      # prediction = mdn_utils.pick_max_mean(pi, mu, var)
      place_prediction = mdn_utils.sample_from_pdf(pi, mu, var)
      place_prediction = place_prediction[:, 0, :]
    place_prediction = place_prediction[0]

    prediction = np.hstack((pick_prediction, place_prediction))

    # just go exactly to objects, from observations
    # p0_position = np.hstack((gt_obs[3:5], 0.02))
    # p0_rotation = utils.eulerXYZ_to_quatXYZW(
    #     (0, 0, -gt_obs[5]*self.theta_scale))
    # p1_position = np.hstack((gt_obs[0:2], 0.02))
    # p1_rotation = utils.eulerXYZ_to_quatXYZW(
    #     (0, 0, -gt_obs[2]*self.theta_scale))

    # just go exactly to objects, predicted
    p0_position = np.hstack((prediction[0:2], 0.02))
    p0_rotation = utils.eulerXYZ_to_quatXYZW(
        (0, 0, -prediction[2] * self.theta_scale))
    p1_position = np.hstack((prediction[3:5], 0.02))
    p1_rotation = utils.eulerXYZ_to_quatXYZW(
        (0, 0, -prediction[5] * self.theta_scale))

    # Select task-specific motion primitive.
    act['primitive'] = 'pick_place'
    if self.task == 'sweeping':
      act['primitive'] = 'sweep'
    elif self.task == 'pushing':
      act['primitive'] = 'push'

    params = {
        'pose0': (p0_position, p0_rotation),
        'pose1': (p1_position, p1_rotation)
    }
    act['params'] = params
    return act

  #-------------------------------------------------------------------------
  # Helper Functions
  #-------------------------------------------------------------------------

  def load(self, num_iter):
    """Load something."""

    # Do something here.
    # self.model.load(os.path.join(self.models_dir, model_fname))
    # Update total training iterations of agent.
    self.total_iter = num_iter

  def save(self):
    """Save models."""
    # Do something here.
    # self.model.save(os.path.join(self.models_dir, model_fname))
    pass
Beispiel #5
0
class GtStateAgent:
    """Agent which uses ground-truth state information -- useful as a baseline."""
    def __init__(self, name, task):
        # self.fig, self.ax = plt.subplots(1,1)
        self.name = name
        self.task = task

        if self.task in ['aligning', 'palletizing', 'packing']:
            self.use_box_dimensions = True
        else:
            self.use_box_dimensions = False

        if self.task in ['sorting']:
            self.use_colors = True
        else:
            self.use_colors = False

        self.total_iter = 0
        self.camera_config = cameras.RealSenseD415.CONFIG

        # A place to save pre-trained models.
        self.models_dir = os.path.join('checkpoints', self.name)
        if not os.path.exists(self.models_dir):
            os.makedirs(self.models_dir)

        # Set up model.
        self.model = None
        # boundaries = [1000, 2000, 5000, 10000]
        # values = [1e-2, 1e-2, 1e-2, 1e-3, 1e-5]
        # learning_rate_fn = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
        #   boundaries, values)
        # self.optim = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn)
        self.optim = tf.keras.optimizers.Adam(learning_rate=2e-4)
        self.metric = tf.keras.metrics.Mean(name='metric')
        self.val_metric = tf.keras.metrics.Mean(name='val_metric')
        self.theta_scale = 10.0
        self.batch_size = 128
        self.use_mdn = True
        self.vis = utils.create_visualizer()

        self.bounds = np.array([[0.25, 0.75], [-0.5, 0.5], [0, 0.28]])
        self.pixel_size = 0.003125
        self.six_dof = False

    def extract_x_y_theta(self,
                          object_info,
                          t_worldaug_world=None,
                          preserve_theta=False):
        """Extract in-plane theta."""
        object_position = object_info[0]
        object_quat_xyzw = object_info[1]

        if t_worldaug_world is not None:
            object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0],
                                object_quat_xyzw[1], object_quat_xyzw[2])
            t_world_object = transformations.quaternion_matrix(
                object_quat_wxyz)
            t_world_object[0:3, 3] = np.array(object_position)
            t_worldaug_object = t_worldaug_world @ t_world_object

            object_quat_wxyz = transformations.quaternion_from_matrix(
                t_worldaug_object)
            if not preserve_theta:
                object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2],
                                    object_quat_wxyz[3], object_quat_wxyz[0])
            object_position = t_worldaug_object[0:3, 3]

        object_xy = object_position[0:2]
        object_theta = -np.float32(
            utils.quatXYZW_to_eulerXYZ(object_quat_xyzw)[2]) / self.theta_scale
        return np.hstack((object_xy, object_theta)).astype(
            np.float32), object_position, object_quat_xyzw

    def extract_box_dimensions(self, info):
        return np.array(info[2])

    def extract_color(self, info):
        return np.array(info[-1])

    def info_to_gt_obs(self, info, t_worldaug_world=None):
        """Calculate ground-truth observation vector for environment info."""
        object_keys = sorted(info.keys())
        observation_vector = []
        for object_key in object_keys:
            object_x_y_theta, _, _ = self.extract_x_y_theta(
                info[object_key], t_worldaug_world)
            observation_vector.append(object_x_y_theta)
            if self.use_box_dimensions:
                observation_vector.append(
                    self.extract_box_dimensions(info[object_key]))
            if self.use_colors:
                observation_vector.append(self.extract_color(info[object_key]))
        observation_vector = np.array(observation_vector).reshape(-1).astype(
            np.float32)

        # pad with zeros
        if self.max_obs_vector_length != 0:
            observation_vector = np.pad(
                observation_vector,
                [0, self.max_obs_vector_length - len(observation_vector)])

        return observation_vector

    def act_to_gt_act(self, act, t_worldaug_world=None, transform_params=None):
        del transform_params

        # dont update theta due to suction invariance to theta
        pick_se2, _, _ = self.extract_x_y_theta(act['params']['pose0'],
                                                t_worldaug_world,
                                                preserve_theta=True)
        place_se2, _, _ = self.extract_x_y_theta(act['params']['pose1'],
                                                 t_worldaug_world,
                                                 preserve_theta=True)
        return np.hstack((pick_se2, place_se2)).astype(np.float32)

    def set_max_obs_vector_length(self, dataset):
        num_samples = 2000  # just to find the largest environment dimensionality
        self.max_obs_vector_length = 0
        max_obs_vector_length = 0
        for _ in range(num_samples):
            _, _, info = dataset.random_sample()
            obs_vector_length = self.info_to_gt_obs(info).shape[0]
            if obs_vector_length > max_obs_vector_length:
                max_obs_vector_length = obs_vector_length
        self.max_obs_vector_length = max_obs_vector_length

    def init_model(self, dataset):
        """Initialize self.model, including normalization parameters."""
        self.set_max_obs_vector_length(dataset)

        _, _, info = dataset.random_sample()
        obs_vector = self.info_to_gt_obs(info)

        obs_dim = obs_vector.shape[0]
        act_dim = 6
        if self.six_dof:
            act_dim = 9
        self.model = MlpModel(self.batch_size,
                              obs_dim,
                              act_dim,
                              'relu',
                              self.use_mdn,
                              dropout=0.1)

        sampled_gt_obs = []

        num_samples = 1000
        for _ in range(num_samples):
            _, _, info = dataset.random_sample()
            t_worldaug_world, _ = self.get_augmentation_transform()
            sampled_gt_obs.append(self.info_to_gt_obs(info, t_worldaug_world))

        sampled_gt_obs = np.array(sampled_gt_obs)

        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.model.set_normalization_parameters(obs_train_parameters)

    def get_augmentation_transform(self):
        heightmap = np.zeros((320, 160))
        theta, trans, pivot = utils.get_random_image_transform_params(
            heightmap.shape)
        transform_params = theta, trans, pivot
        t_world_center, t_world_centeraug = utils.get_se3_from_image_transform(
            *transform_params, heightmap, self.bounds, self.pixel_size)
        t_worldaug_world = t_world_centeraug @ np.linalg.inv(t_world_center)
        return t_worldaug_world, transform_params

    def get_data_batch(self, dataset):
        """Pre-process info and obs-act, and make batch."""
        batch_obs = []
        batch_act = []
        for _ in range(self.batch_size):
            obs, act, info = dataset.random_sample()
            t_worldaug_world, transform_params = self.get_augmentation_transform(
            )

            batch_obs.append(self.info_to_gt_obs(info, t_worldaug_world))
            batch_act.append(
                self.act_to_gt_act(
                    act, t_worldaug_world,
                    transform_params))  # this samples pick points from surface
            # on insertion task only, this can be used to imagine as if the picks were
            # deterministic.
            # batch_act.append(self.info_to_gt_obs(info))

        batch_obs = np.array(batch_obs)
        batch_act = np.array(batch_act)
        return batch_obs, batch_act, obs, act, info

    def train(self, dataset, num_iter, writer, validation_dataset):
        """Train on dataset for a specific number of iterations."""

        if self.model is None:
            self.init_model(dataset)

        if self.use_mdn:
            loss_criterion = mdn_utils.mdn_loss
        else:
            loss_criterion = tf.keras.losses.MeanSquaredError()

        @tf.function
        def train_step(model, batch_obs, batch_act, loss_criterion):
            with tf.GradientTape() as tape:
                prediction = model(batch_obs)
                loss = loss_criterion(batch_act, prediction)
                grad = tape.gradient(loss, model.trainable_variables)
                self.optim.apply_gradients(zip(grad,
                                               model.trainable_variables))
            return loss

        print_rate = 100
        validation_rate = 1000
        for i in range(num_iter):
            start = time.time()
            batch_obs, batch_act, _, _, _ = self.get_data_batch(dataset)

            # Forward through model, compute training loss, update weights.
            self.metric.reset_states()
            loss = train_step(self.model, batch_obs, batch_act, loss_criterion)
            self.metric(loss)
            with writer.as_default():
                tf.summary.scalar('gt_state_loss',
                                  self.metric.result(),
                                  step=self.total_iter + i)

            if i % print_rate == 0:
                loss = np.float32(loss)
                print(
                    f'Train Iter: {self.total_iter + i} Loss: {loss:.4f} Iter time:',
                    time.time() - start)
                # utils.meshcat_visualize(self.vis, obs, act, info)

            # Compute valid loss
            if (self.total_iter + i) % validation_rate == 0:
                print('Validating!')
                tf.keras.backend.set_learning_phase(0)
                self.val_metric.reset_states()
                batch_obs, batch_act, _, _, _ = self.get_data_batch(
                    validation_dataset)
                prediction = self.model(batch_obs)
                loss = loss_criterion(batch_act, prediction)
                self.val_metric(loss)
                with writer.as_default():
                    tf.summary.scalar('validation_gt_state_loss',
                                      self.val_metric.result(),
                                      step=self.total_iter + i)
                tf.keras.backend.set_learning_phase(1)

        self.total_iter += num_iter
        self.save()

    def plot_act_mdn(self, y, mdn_predictions):
        """Plot actions.

    Args:
      y: true "y", shape (batch_size, d_out)
      mdn_predictions: tuple of:
        - pi: (batch_size, num_gaussians)
        - mu: (batch_size, num_gaussians * d_out)
        - var: (batch_size, num_gaussians)
    """

        pi, mu, _ = mdn_predictions

        self.ax.cla()
        self.ax.scatter(y[:, 0], y[:, 1])
        mu = tf.reshape(mu, (-1, y.shape[-1]))
        pi = tf.reshape(pi, (-1, ))

        pi = tf.clip_by_value(pi, 0.01, 1.0)

        rgba_colors = np.zeros((len(pi), 4))
        # for red the first column needs to be one
        rgba_colors[:, 0] = 1.0
        # the fourth column needs to be your alphas
        rgba_colors[:, 3] = pi

        self.ax.scatter(mu[:, 0], mu[:, 1], color=rgba_colors)

        plt.draw()
        plt.pause(0.001)

    def act(self, obs, info):
        """Run inference and return best action."""
        del obs

        act = {'camera_config': self.camera_config, 'primitive': None}

        # Get observations and run predictions.
        gt_obs = self.info_to_gt_obs(info)

        # just for visualization
        gt_act_center = self.info_to_gt_obs(info)  # pylint: disable=unused-variable

        prediction = self.model(gt_obs[None, Ellipsis])

        if self.use_mdn:
            mdn_prediction = prediction
            pi, mu, var = mdn_prediction
            # prediction = mdn_utils.pick_max_mean(pi, mu, var)
            prediction = mdn_utils.sample_from_pdf(pi, mu, var)
            prediction = prediction[:, 0, :]

        prediction = prediction[0]  # unbatch

        # self.plot_act_mdn(gt_act_center[None, ...], mdn_prediction)

        # just go exactly to objects, from observations
        # p0_position = np.hstack((gt_obs[3:5], 0.02))
        # p0_rotation = utils.eulerXYZ_to_quatXYZW(
        #     (0, 0, -gt_obs[5] * self.theta_scale))
        # p1_position = np.hstack((gt_obs[0:2], 0.02))
        # p1_rotation = utils.eulerXYZ_to_quatXYZW(
        #     (0, 0, -gt_obs[2] * self.theta_scale))

        # just go exactly to objects, predicted
        p0_position = np.hstack((prediction[0:2], 0.02))
        p0_rotation = utils.eulerXYZ_to_quatXYZW(
            (0, 0, -prediction[2] * self.theta_scale))
        p1_position = np.hstack((prediction[3:5], 0.02))
        p1_rotation = utils.eulerXYZ_to_quatXYZW(
            (0, 0, -prediction[5] * self.theta_scale))

        # Select task-specific motion primitive.
        act['primitive'] = 'pick_place'
        if self.task == 'sweeping':
            act['primitive'] = 'sweep'
        elif self.task == 'pushing':
            act['primitive'] = 'push'

        params = {
            'pose0': (p0_position, p0_rotation),
            'pose1': (p1_position, p1_rotation)
        }
        act['params'] = params
        return act

    #-------------------------------------------------------------------------
    # Helper Functions
    #-------------------------------------------------------------------------

    def load(self, num_iter):
        """Load something."""

        # Do something here.
        # self.model.load(os.path.join(self.models_dir, model_fname))
        # Update total training iterations of agent.
        self.total_iter = num_iter

    def save(self):
        """Save models."""
        # Do something here.
        # self.model.save(os.path.join(self.models_dir, model_fname))
        pass
Beispiel #6
0
    def __init__(self, d_action, use_mdn, pretrained=True):
        super(ConvMLP, self).__init__()

        if pretrained:
            inception = hub.KerasLayer(
                "https://tfhub.dev/google/imagenet/inception_v1/feature_vector/4",
                trainable=True)
            for i in inception.weights:
                if "Conv2d_1a_7x7/weights" in i.name:
                    conv1weights = i
                    break

        self.d_action = d_action

        input_shape = (None, 320, 160, 3)

        # filters = [64, 32, 16]
        filters = [64, 64, 64]

        if pretrained:
            self.conv1 = layers.Conv2D(
                filters=filters[0],
                kernel_size=(7, 7),
                strides=(2, 2),
                weights=[conv1weights.numpy(),
                         tf.zeros(64)],
                input_shape=input_shape)

        else:
            self.conv1 = layers.Conv2D(filters=filters[0],
                                       kernel_size=(7, 7),
                                       strides=(2, 2),
                                       input_shape=input_shape)

        self.convd = layers.Conv2D(filters=filters[0],
                                   kernel_size=(7, 7),
                                   strides=(2, 2),
                                   input_shape=input_shape)
        self.bnd = layers.BatchNormalization()
        self.relud = layers.ReLU()

        self.bn1 = layers.BatchNormalization()
        self.relu1 = layers.ReLU()

        self.conv2 = layers.Conv2D(filters=filters[1],
                                   kernel_size=(5, 5),
                                   strides=(1, 1))
        self.bn2 = layers.BatchNormalization()
        self.relu2 = layers.ReLU()

        self.conv3 = layers.Conv2D(filters=filters[2],
                                   kernel_size=(5, 5),
                                   strides=(1, 1))
        self.bn3 = layers.BatchNormalization()
        self.relu3 = layers.ReLU()

        self.flatten = layers.Flatten()

        self.mlp = MlpModel(None,
                            filters[-1] * 2,
                            d_action,
                            "relu",
                            use_mdn,
                            dropout=0.0,
                            use_sinusoid=False)
Beispiel #7
0
    def init_model(self, dataset):
        """Initialize models, including normalization parameters."""
        self.set_max_obs_vector_length(dataset)

        if self.goal_conditioned:
            _, _, info, goal = dataset.random_sample(goal_images=True)
            obs_vector = self.info_to_gt_obs(info, goal=goal)
        else:
            _, _, info = dataset.random_sample()
            obs_vector = self.info_to_gt_obs(info)

        # Setup pick model, which only has act_dim=3 unlike act_dim=6 for gt_state.
        obs_dim = obs_vector.shape[0]
        act_dim = 3
        self.pick_model = MlpModel(self.BATCH_SIZE,
                                   obs_dim,
                                   act_dim,
                                   'relu',
                                   self.USE_MDN,
                                   dropout=0.1)

        # Sample points from the data to get reasonable mean / std values.
        sampled_gt_obs = []
        num_samples = 1000
        for _ in range(num_samples):
            t_worldaug_world, _ = self.get_augmentation_transform()
            if self.goal_conditioned:
                _, _, info, goal = dataset.random_sample(goal_images=True)
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world, goal=goal))
            else:
                _, _, info = dataset.random_sample()
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world))
        sampled_gt_obs = np.array(sampled_gt_obs)
        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.pick_model.set_normalization_parameters(obs_train_parameters)
        self.pick_obs_train_mean = obs_train_parameters['mean']
        self.pick_obs_train_std = obs_train_parameters['std']

        # Setup pick-conditioned place model, which adds `act_dim` to `obs_dim`.
        obs_dim = obs_vector.shape[0] + act_dim
        act_dim = 3
        self.place_model = MlpModel(self.BATCH_SIZE,
                                    obs_dim,
                                    act_dim,
                                    'relu',
                                    self.USE_MDN,
                                    dropout=0.1)

        # Sample points from the data to get reasonable mean / std values.
        sampled_gt_obs = []
        num_samples = 1000
        for _ in range(num_samples):
            t_worldaug_world, _ = self.get_augmentation_transform()
            if self.goal_conditioned:
                _, act, info, goal = dataset.random_sample(goal_images=True)
                obs = self.info_to_gt_obs(info, t_worldaug_world, goal=goal)
            else:
                _, act, info = dataset.random_sample()
                obs = self.info_to_gt_obs(info, t_worldaug_world)
            obs = np.hstack((obs, self.act_to_gt_act(
                act, t_worldaug_world)[:3]))  # Daniel: key difference?
            sampled_gt_obs.append(obs)
        sampled_gt_obs = np.array(sampled_gt_obs)
        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.place_model.set_normalization_parameters(obs_train_parameters)
        self.place_obs_train_mean = obs_train_parameters['mean']
        self.place_obs_train_std = obs_train_parameters['std']

        print('Done initializing self.model for ground truth agent.')
Beispiel #8
0
class GtState2StepAgent(GtStateAgent):
    """Agent which uses ground-truth state information -- useful as a baseline.

    It calls the superclass (the one-step GT agent) for getting the data batch, and
    the subsequent data augmentation. See docs in GtStateAgent.
    """
    def __init__(self, name, task, goal_conditioned=False, one_rot_inf=False):
        super(GtState2StepAgent, self).__init__(name, task, goal_conditioned,
                                                one_rot_inf)

        # Set up model.
        self.pick_model = None
        self.place_model = None
        self.pick_lrate = 2e-4
        self.place_lrate = 2e-4
        self.pick_optim = tf.keras.optimizers.Adam(
            learning_rate=self.pick_lrate)
        self.place_optim = tf.keras.optimizers.Adam(
            learning_rate=self.place_lrate)
        self.metric = tf.keras.metrics.Mean(name='metric')
        self.val_metric = tf.keras.metrics.Mean(name='val_metric')

    def init_model(self, dataset):
        """Initialize models, including normalization parameters."""
        self.set_max_obs_vector_length(dataset)

        if self.goal_conditioned:
            _, _, info, goal = dataset.random_sample(goal_images=True)
            obs_vector = self.info_to_gt_obs(info, goal=goal)
        else:
            _, _, info = dataset.random_sample()
            obs_vector = self.info_to_gt_obs(info)

        # Setup pick model, which only has act_dim=3 unlike act_dim=6 for gt_state.
        obs_dim = obs_vector.shape[0]
        act_dim = 3
        self.pick_model = MlpModel(self.BATCH_SIZE,
                                   obs_dim,
                                   act_dim,
                                   'relu',
                                   self.USE_MDN,
                                   dropout=0.1)

        # Sample points from the data to get reasonable mean / std values.
        sampled_gt_obs = []
        num_samples = 1000
        for _ in range(num_samples):
            t_worldaug_world, _ = self.get_augmentation_transform()
            if self.goal_conditioned:
                _, _, info, goal = dataset.random_sample(goal_images=True)
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world, goal=goal))
            else:
                _, _, info = dataset.random_sample()
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world))
        sampled_gt_obs = np.array(sampled_gt_obs)
        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.pick_model.set_normalization_parameters(obs_train_parameters)
        self.pick_obs_train_mean = obs_train_parameters['mean']
        self.pick_obs_train_std = obs_train_parameters['std']

        # Setup pick-conditioned place model, which adds `act_dim` to `obs_dim`.
        obs_dim = obs_vector.shape[0] + act_dim
        act_dim = 3
        self.place_model = MlpModel(self.BATCH_SIZE,
                                    obs_dim,
                                    act_dim,
                                    'relu',
                                    self.USE_MDN,
                                    dropout=0.1)

        # Sample points from the data to get reasonable mean / std values.
        sampled_gt_obs = []
        num_samples = 1000
        for _ in range(num_samples):
            t_worldaug_world, _ = self.get_augmentation_transform()
            if self.goal_conditioned:
                _, act, info, goal = dataset.random_sample(goal_images=True)
                obs = self.info_to_gt_obs(info, t_worldaug_world, goal=goal)
            else:
                _, act, info = dataset.random_sample()
                obs = self.info_to_gt_obs(info, t_worldaug_world)
            obs = np.hstack((obs, self.act_to_gt_act(
                act, t_worldaug_world)[:3]))  # Daniel: key difference?
            sampled_gt_obs.append(obs)
        sampled_gt_obs = np.array(sampled_gt_obs)
        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.place_model.set_normalization_parameters(obs_train_parameters)
        self.place_obs_train_mean = obs_train_parameters['mean']
        self.place_obs_train_std = obs_train_parameters['std']

        print('Done initializing self.model for ground truth agent.')

    def train(self, dataset, num_iter, writer, validation_dataset=None):
        """Train on dataset for a specific number of iterations.

        As with the gt_state, need a special case to handle the num_iter=0 case.
        """
        if self.pick_model is None:
            self.init_model(dataset)

        if self.USE_MDN:
            loss_criterion = mdn_utils.mdn_loss
        else:
            loss_criterion = tf.keras.losses.MeanSquaredError()

        @tf.function
        def train_step(pick_model, place_model, batch_obs, batch_act,
                       loss_criterion):
            with tf.GradientTape() as tape:
                prediction = pick_model(batch_obs)
                loss0 = loss_criterion(batch_act[:, 0:3], prediction)
                grad = tape.gradient(loss0, pick_model.trainable_variables)
                self.pick_optim.apply_gradients(
                    zip(grad, pick_model.trainable_variables))
            with tf.GradientTape() as tape:
                #batch_obs = tf.concat((batch_obs, batch_act[:,0:3] + tf.random.normal(shape=batch_act[:,0:3].shape, stddev=0.001)), axis=1)
                batch_obs = tf.concat((batch_obs, batch_act[:, 0:3]), axis=1)
                prediction = place_model(batch_obs)
                loss1 = loss_criterion(batch_act[:, 3:], prediction)
                grad = tape.gradient(loss1, place_model.trainable_variables)
                self.place_optim.apply_gradients(
                    zip(grad, place_model.trainable_variables))
            return loss0 + loss1

        # Need this case due to some quirks with saving this type of model. No gradient tupdates.
        if num_iter == 0:
            batch_obs, batch_act, obs, act, info = self.get_data_batch(dataset)
            print(
                'Doing a single forward pass to enable us to save a snapshot for the num_iter == 0 case.'
            )
            prediction = self.pick_model(batch_obs)
            batch_obs = tf.concat((batch_obs, batch_act[:, 0:3]), axis=1)
            prediction = self.place_model(batch_obs)

        print_rate = 50
        for i in range(num_iter):
            start = time.time()
            batch_obs, batch_act, obs, act, info = self.get_data_batch(dataset)

            # Forward through model, compute training loss, update weights.
            self.metric.reset_states()
            loss = train_step(self.pick_model, self.place_model, batch_obs,
                              batch_act, loss_criterion)
            self.metric(loss)
            with writer.as_default():
                tf.summary.scalar('gt_state_loss',
                                  self.metric.result(),
                                  step=self.total_iter + i)
            if i % print_rate == 0:
                loss = np.float32(loss)
                print(
                    f'Train Iter: {self.total_iter + i} Loss: {loss:.4f} Iter time:',
                    time.time() - start)

        self.total_iter += num_iter
        self.save()

    def act(self, obs, info, goal=None):
        """Run inference and return best action."""
        act = {'camera_config': self.camera_config, 'primitive': None}

        # Get observations and run pick prediction
        if self.goal_conditioned:
            gt_obs = self.info_to_gt_obs(info, goal=goal)
        else:
            gt_obs = self.info_to_gt_obs(info)
        pick_prediction = self.pick_model(gt_obs[None, ...])
        if self.USE_MDN:
            pi, mu, var = pick_prediction
            #prediction = mdn_utils.pick_max_mean(pi, mu, var)
            pick_prediction = mdn_utils.sample_from_pdf(pi, mu, var)
            pick_prediction = pick_prediction[:, 0, :]
        pick_prediction = pick_prediction[0]  # unbatch

        # Get observations and run place prediction
        obs_with_pick = np.hstack((gt_obs, pick_prediction))

        # since the pick at train time is always 0.0,
        # the predictions are unstable if not exactly 0
        obs_with_pick[-1] = 0.0

        place_prediction = self.place_model(obs_with_pick[None, ...])
        if self.USE_MDN:
            pi, mu, var = place_prediction
            #prediction = mdn_utils.pick_max_mean(pi, mu, var)
            place_prediction = mdn_utils.sample_from_pdf(pi, mu, var)
            place_prediction = place_prediction[:, 0, :]
        place_prediction = place_prediction[0]

        prediction = np.hstack((pick_prediction, place_prediction))

        # Daniel: like with gt_state, guessing this is just for insertion.
        # just go exactly to objects, from observations
        # p0_position = np.hstack((gt_obs[3:5], 0.02))
        # p0_rotation = utils.get_pybullet_quaternion_from_rot((0, 0, -gt_obs[5]*self.THETA_SCALE))
        # p1_position = np.hstack((gt_obs[0:2], 0.02))
        # p1_rotation = utils.get_pybullet_quaternion_from_rot((0, 0, -gt_obs[2]*self.THETA_SCALE))

        # Just go exactly to objects, predicted. Daniel: adding 1 rotation  inference case.
        p0_position = np.hstack((prediction[0:2], 0.02))
        p0_pred_rot = 0.0 if self.one_rot_inf else -prediction[
            2] * self.THETA_SCALE  # idx 2
        p0_rotation = utils.get_pybullet_quaternion_from_rot(
            (0, 0, p0_pred_rot))
        p1_position = np.hstack((prediction[3:5], 0.02))
        p1_pred_rot = 0.0 if self.one_rot_inf else -prediction[
            5] * self.THETA_SCALE  # idx 5
        p1_rotation = utils.get_pybullet_quaternion_from_rot(
            (0, 0, p1_pred_rot))

        # Select task-specific motion primitive.
        act['primitive'] = 'pick_place'
        if self.task == 'sweeping':
            act['primitive'] = 'sweep'
        elif self.task == 'pushing':
            act['primitive'] = 'push'
        params = {
            'pose0': (p0_position, p0_rotation),
            'pose1': (p1_position, p1_rotation)
        }
        act['params'] = params

        # Daniel: like transporters, determine the task stage if applicable. (AND if loading only)
        if self.task in ['bag-items-easy', 'bag-items-hard']:
            self._determine_task_stage(p0_position, p1_position)

        return act

    #-------------------------------------------------------------------------
    # Helper Functions. See gt_state, we need to do something similar here.
    #-------------------------------------------------------------------------

    def load(self, num_iter):
        """Load in a similar fashion as the 1-step GT agent."""
        pick_fname = 'gt-state-2-step-pick-ckpt-%d' % num_iter
        place_fname = 'gt-state-2-step-place-ckpt-%d' % num_iter
        pick_fname = os.path.join(self.models_dir, pick_fname)
        place_fname = os.path.join(self.models_dir, place_fname)
        self.pick_model = keras.models.load_model(pick_fname, compile=False)
        self.place_model = keras.models.load_model(place_fname, compile=False)
        self.total_iter = num_iter

        # Load other data we need for proper usage of the model.
        data_fname = os.path.join(self.models_dir, 'misc_data.pkl')
        with open(data_fname, 'rb') as fh:
            data = pickle.load(fh)
        self.max_obs_vector_length = data['max_obs_vector_length']

        # Note: we cannot call self.model methods directly, other than __call__.
        # Actually, I'm not even sure if this works, but we don't normalize
        # with the current model. TODO(daniel) does this work?
        self.pick_model.obs_train_mean = data['pick_obs_train_mean']
        self.place_model.obs_train_mean = data['place_obs_train_mean']
        self.pick_model.obs_train_std = data['pick_obs_train_std']
        self.place_model.obs_train_std = data['place_obs_train_std']

    def save(self):
        """Save models."""
        if not os.path.exists(self.models_dir):
            os.makedirs(self.models_dir)
        pick_fname = 'gt-state-2-step-pick-ckpt-%d' % self.total_iter
        place_fname = 'gt-state-2-step-place-ckpt-%d' % self.total_iter
        pick_fname = os.path.join(self.models_dir, pick_fname)
        place_fname = os.path.join(self.models_dir, place_fname)
        self.pick_model.save(pick_fname, save_format='tf')
        self.place_model.save(place_fname, save_format='tf')
        print(f'Saving model: {pick_fname}')
        print(f'Saving model: {place_fname}')

        # Save other data we need for proper usage of the model.
        data_fname = os.path.join(self.models_dir, 'misc_data.pkl')
        data = dict(max_obs_vector_length=self.max_obs_vector_length,
                    pick_obs_train_mean=self.pick_obs_train_mean,
                    place_obs_train_mean=self.place_obs_train_mean,
                    pick_obs_train_std=self.pick_obs_train_std,
                    place_obs_train_std=self.place_obs_train_std)
        with open(data_fname, 'wb') as fh:
            pickle.dump(data, fh)
Beispiel #9
0
class GtStateAgent:
    """Agent which uses ground-truth state information -- useful as a baseline.

    It performs the same set of data augmentation techniques as the image-based
    policies by getting random image transform parameters, and then using those
    to adjust the ground-truth poses.

    THETA_SCALE=10 means discretizing rotations by 36, as in the CoRL submission.
    Added `one_rot_inf`, which is NOT normally true, for backwards compatibility.
    """
    def __init__(self, name, task, goal_conditioned=False, one_rot_inf=False):
        self.name = name
        self.task = task
        if self.task in ['aligning', 'palletizing', 'packing']:
            self.use_box_dimensions = True
        else:
            self.use_box_dimensions = False
        if self.task in ['sorting', 'bag-color-goal']:
            self.use_colors = True
        else:
            self.use_colors = False
        self.total_iter = 0
        self.pixel_size = 0.003125
        self.camera_config = cameras.RealSenseD415.CONFIG
        self.models_dir = os.path.join('checkpoints', self.name)
        self.bounds = np.array([[0.25, 0.75], [-0.5, 0.5], [0, 0.28]])
        self.six_dof = False
        self.goal_conditioned = goal_conditioned
        self.one_rot_inf = one_rot_inf

        # Set up model.
        self.model = None
        self.lrate = 2e-4
        self.optim = tf.keras.optimizers.Adam(learning_rate=self.lrate)
        self.metric = tf.keras.metrics.Mean(name='metric')
        self.val_metric = tf.keras.metrics.Mean(name='val_metric')
        self.THETA_SCALE = 15.0  # 24 rotations in increments of 15.
        self.BATCH_SIZE = 128
        self.USE_MDN = True

        # TODO(daniel) Hacky. I do something similar for `agents/transporter.py`.
        self.real_task = None

    def extract_x_y_theta(self,
                          object_info,
                          t_worldaug_world=None,
                          preserve_theta=False,
                          softbody=False):
        """Given either object OR action pose info, return stuff to put in GT observation.
        Note: only called from within this class and 2-step case via subclassing.

        During training, there is normally data augmentation applied, so t_worldaug_world
        is NOT None, and augmentation is applied as if the 'image' were adjusted. However,
        for actions, we preserve theta so `object_quat_xyzw` does not change, and we query
        index=2 for the z rotation. For most deformables tasks, we did not use rotations,
        hence theta=0.

        Get t_world_object which is a 4x4 homogeneous matrix, then stick the position there (why?).
        Then we do the matrix multiplication with '@' to get the homogeneous matrix representation
        of the object after augmentation is applied.
        https://stackoverflow.com/questions/34142485/difference-between-numpy-dot-and-python-3-5-matrix-multiplication
        Then the position is extracted from the resulting homogeneous matrix. (And we ignore the
        z coordinate for that, but in fact for cloth vertices I would keep it.)

        Augmentation for cloth vertices? For now I'm following what they do for positions only
        (ignoring orientation), and returning updated (x,y,z) where the last value is the z
        (height) and not a rotation (doesn't make sense with vertrics anyway). Augmentations
        are all in SE(2), so the z coordinates should not change. Indeed that's what I see after
        extracting positions from `t_worldaug_object`.

        And yes, with vertices, they are flattened in the same way regardless of whether data
        augmentation is used, so every third value represents a vertex height. :D So we get
        [ v1_x, v1_y, v1_z, v2_x, v2_y, v2_z, ... ] in the flattened version.

        Args:
            t_worldaug_world: Only True if there's an augmentation and we need to change the
                ground truth pose as if we applied the augmentation. Only happens during (a)
                determining mean/stds, and (b) training. During initialization and the action
                (loading) we do not use this.
            preserve_theta: Only True if calling where `object_info` is an action pose.
            softbody: Only True if `object_info` is a vertex mesh of a PyBullet soft body.

        Returns:
            object_x_y_theta, pos, quat: the first is an SE(2) pose and parameterized by
            three numbers, the xy position and a scalar rotation `theta`. In case we have
            a cloth, I'm not returning anything after that (no pos or quat) so that if we
            use the older API for cloth (which we should not be!), it should crash code.
        """
        # ------------------------------------------------------------------------------------------ #
        # Daniel: sanity checks to make sure we're handling the soft body ID correctly.
        # The last `if` of `softbody` helps to distinguish among actions, which are also length 2.
        # ------------------------------------------------------------------------------------------ #
        if (self.task in TASKS_SOFT) and (len(object_info) == 2) and softbody:
            nb_vertices, vert_pos_l = object_info
            assert nb_vertices == 100, f'We should be using 100 vertices but have: {nb_vertices}'
            assert nb_vertices == len(
                vert_pos_l), f'{nb_vertices}, {len(vert_pos_l)}'
            vert_pos_np = np.array(vert_pos_l)  # (100, 3)

            if t_worldaug_world is not None:
                augmented_vertices = []

                # For each vertex, apply data augmentation.
                for i in range(vert_pos_np.shape[0]):
                    vertex_position = vert_pos_np[i, :]

                    # Use identity quaternion (w=1, others 0). Othewrise, follow normal augmentation.
                    t_world_object = transformations.quaternion_matrix(
                        (1, 0, 0, 0))
                    t_world_object[0:3, 3] = np.array(vertex_position)
                    t_worldaug_object = t_worldaug_world @ t_world_object
                    new_v_position = t_worldaug_object[0:3, 3]
                    augmented_vertices.append(new_v_position)

                # Stack everything.
                augmented_flattened = np.concatenate(
                    augmented_vertices).flatten().astype(np.float32)
                return augmented_flattened
            else:
                vert_flattened = vert_pos_np.flatten().astype(np.float32)
                return vert_flattened
        # ------------------------------------------------------------------------------------------ #
        # Now proceed as usual, untouched from normal ravens code except for documentation/assertions.

        object_position = object_info[0]
        object_quat_xyzw = object_info[1]

        if t_worldaug_world is not None:
            object_quat_wxyz = (object_quat_xyzw[3], object_quat_xyzw[0],
                                object_quat_xyzw[1], object_quat_xyzw[2])
            t_world_object = transformations.quaternion_matrix(
                object_quat_wxyz)
            t_world_object[0:3, 3] = np.array(object_position)

            # Daniel: pretty sure this has to be true. Upper left 3x3 is rotation, upper right 3x1 is position.
            assert t_world_object.shape == (
                4, 4), f'shape is not 4x4: {t_world_object.shape}'
            assert t_worldaug_world.shape == (
                4, 4), f'shape is not 4x4: {t_worldaug_world.shape}'

            # Daniel: data augmentation. Then, extract `object_position` from augmented object.
            t_worldaug_object = t_worldaug_world @ t_world_object
            object_quat_wxyz = transformations.quaternion_from_matrix(
                t_worldaug_object)
            if not preserve_theta:
                object_quat_xyzw = (object_quat_wxyz[1], object_quat_wxyz[2],
                                    object_quat_wxyz[3], object_quat_wxyz[0])
            object_position = t_worldaug_object[0:3, 3]

        object_xy = object_position[0:2]
        object_theta = -np.float32(
            utils.get_rot_from_pybullet_quaternion(object_quat_xyzw)
            [2]) / self.THETA_SCALE
        return np.hstack((object_xy, object_theta)).astype(
            np.float32), object_position, object_quat_xyzw

    def extract_box_dimensions(self, info):
        return np.array(info[2])

    def extract_color(self, info):
        # TODO(daniel) assumption is invalid, I think, if using deformable tasks. So let's change this.
        # UPDATE Oct 21: actually I changed the `info` dict for bag-color-goal so that each item (which is
        # really the `info` here) will have the rgb (not 'a' as that's always 1) info at the last part.
        # So this is fine. :D
        return np.array(info[-1])

    def info_to_gt_obs(self, info, t_worldaug_world=None, goal=None):
        """Daniel: from info dict of IDs, create the observation for GT models.

        Assumes `info` consists of just PyBullet object IDs. Creates a numpy array
        from combining the `object_x_y_theta` from all IDs, and potentially add more
        info based on if using box dimensions or colors; see `__init__()` above.

        For soft body tasks, we should have data generated so that info[cloth_id] or
        info[bag_id] contains the 3D vertex position of each point. For now we only
        test with cloth since the bag ones are honestly probably better suited with
        just the beads (but will have to check). Fortunately, all the cloth tasks as
        of late October 2020 will use ID=5 as the soft body andit's also easy to check.

        (14 Oct 2020) adding `goal` as argument for goal conditioning. Use goal['info'].
            We're going to stack the two together into one input.
        (14 Oct 2020) adding support for cloth. We don't use `pos` and `quat`, just use
            the returned object_x_y_theta.
        """
        info = self.remove_nonint_keys(info)
        if goal is not None:
            g_info = self.remove_nonint_keys(goal['info'])
        else:
            g_info = {}

        observation_vector = []
        object_keys = sorted(info.keys())
        for object_key in object_keys:
            # Daniel: adding this special case.
            if (self.task in TASKS_SOFT and len(info[object_key]) == 2):
                object_x_y_theta = self.extract_x_y_theta(info[object_key],
                                                          t_worldaug_world,
                                                          softbody=True)
            else:
                object_x_y_theta, pos, quat = self.extract_x_y_theta(
                    info[object_key], t_worldaug_world)
            observation_vector.append(object_x_y_theta)
            if self.use_box_dimensions:
                observation_vector.append(
                    self.extract_box_dimensions(info[object_key]))
            if self.use_colors:
                # For bag-color-goal we only want IDs 4 and 38.
                if self.task in ['bag-color-goal']:
                    if object_key in [4, 38]:
                        #print(f'for {object_key}, color: {self.extract_color(info[object_key])}')
                        observation_vector.append(
                            self.extract_color(info[object_key]))
                else:
                    observation_vector.append(
                        self.extract_color(info[object_key]))

        # Repeat for goal info using `g_info` instead of `info`, assuming g_info != {}.
        for object_key in sorted(g_info.keys()):
            # Daniel: adding this special case.
            if (self.task in TASKS_SOFT and len(g_info[object_key]) == 2):
                object_x_y_theta = self.extract_x_y_theta(g_info[object_key],
                                                          t_worldaug_world,
                                                          softbody=True)
            else:
                object_x_y_theta, pos, quat = self.extract_x_y_theta(
                    g_info[object_key], t_worldaug_world)
            observation_vector.append(object_x_y_theta)
            if self.use_box_dimensions:
                observation_vector.append(
                    self.extract_box_dimensions(g_info[object_key]))
            if self.use_colors:
                # For bag-color-goal we only want IDs 4 and 38.
                if self.task in ['bag-color-goal']:
                    if object_key in [4, 38]:
                        #print(f'for {object_key}, color: {self.extract_color(g_info[object_key])}')
                        observation_vector.append(
                            self.extract_color(g_info[object_key]))
                else:
                    observation_vector.append(
                        self.extract_color(g_info[object_key]))

        # Finally, make it a single 1D np.array.
        if (self.task in TASKS_SOFT):
            # Daniel: was getting some errors w/setting the type since usually we will have:
            #   [array(x1,y1,z1), array(x2,y2,z2), ...] and doing np.array() on this will return
            #   array([ [x1,y1,z1], [x2,y2,z2], ...] ) and then reshape(-1) will return
            #   array([x1,y1,z1,x2,y2,z2,...]) and it's easy to make as np.float32 type.
            # But with stuff like cloth, we have:
            #   [array(x1,y1,z1), array(c1,c2,c3,c4,...,c100), ...] and this will mean the
            # np.array().reshape(-1) results in a 2D array of arrays, so we can't do np.float32.
            # However, np.concatenate seems to fix these issues.
            observation_vector = np.concatenate(observation_vector).reshape(
                -1).astype(np.float32)
        else:
            observation_vector = np.array(observation_vector).reshape(
                -1).astype(np.float32)

        # pad with zeros
        if self.max_obs_vector_length != 0:
            observation_vector = np.pad(
                observation_vector,
                [0, self.max_obs_vector_length - len(observation_vector)])

        return observation_vector

    def act_to_gt_act(self, act, t_worldaug_world=None, transform_params=None):
        """Daniel: similarly, from action, create the appropriate ground truth action.

        This may involve a transformation if doing data augmentation.
        Comment from Andy/Pete: dont update theta due to suction invariance to theta
        """
        pick_se2, _, _ = self.extract_x_y_theta(act['params']['pose0'],
                                                t_worldaug_world,
                                                preserve_theta=True)
        place_se2, _, _ = self.extract_x_y_theta(act['params']['pose1'],
                                                 t_worldaug_world,
                                                 preserve_theta=True)
        return np.hstack((pick_se2, place_se2)).astype(np.float32)

    def set_max_obs_vector_length(self, dataset):
        """Find largest environment dimensionality.

        Daniel: likely useful for tasks such as palletizing where there's a
        variable number of objects, but I don't think it applies for my
        tasks. Make sure the dataset.path check will work, i.e., don't change
        default paths. Note:; first calls to model initialization come from
        this, can use to qwuickly debug the `info_to_gt_obs()` method as well.
        """
        if ('cable-' in dataset.path or 'insertion-goal' in dataset.path
                or 'cloth-' in dataset.path or 'bag-alone' in dataset.path
                or 'bag-items' in dataset.path):
            num_samples = 1
        else:
            num_samples = 2000

        self.max_obs_vector_length = 0
        max_obs_vector_length = 0
        for _ in range(num_samples):
            if self.goal_conditioned:
                _, _, info, goal = dataset.random_sample(goal_images=True)
                obs_vector_length = self.info_to_gt_obs(info,
                                                        goal=goal).shape[0]
            else:
                _, _, info = dataset.random_sample()
                obs_vector_length = self.info_to_gt_obs(info).shape[0]
            if obs_vector_length > max_obs_vector_length:
                max_obs_vector_length = obs_vector_length
        self.max_obs_vector_length = max_obs_vector_length

    def init_model(self, dataset):
        """Initialize self.model, including normalization parameters."""
        self.set_max_obs_vector_length(dataset)

        # Get obs dim and action dim (3 for pick, 3 for place), initialize model.
        if self.goal_conditioned:
            _, _, info, goal = dataset.random_sample(goal_images=True)
            obs_vector = self.info_to_gt_obs(info, goal=goal)
        else:
            _, _, info = dataset.random_sample()
            obs_vector = self.info_to_gt_obs(info)

        obs_dim = obs_vector.shape[0]
        act_dim = 6
        if self.six_dof:
            act_dim = 9
        self.model = MlpModel(self.BATCH_SIZE,
                              obs_dim,
                              act_dim,
                              'relu',
                              self.USE_MDN,
                              dropout=0.1)

        # Sample points from the data to get reasonable mean / std values.
        sampled_gt_obs = []
        num_samples = 1000
        for _ in range(num_samples):
            t_worldaug_world, _ = self.get_augmentation_transform()
            if self.goal_conditioned:
                _, _, info, goal = dataset.random_sample(goal_images=True)
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world, goal=goal))
            else:
                _, _, info = dataset.random_sample()
                sampled_gt_obs.append(
                    self.info_to_gt_obs(info, t_worldaug_world))
        sampled_gt_obs = np.array(sampled_gt_obs)
        obs_train_parameters = dict()
        obs_train_parameters['mean'] = sampled_gt_obs.mean(axis=(0)).astype(
            np.float32)
        obs_train_parameters['std'] = sampled_gt_obs.std(axis=(0)).astype(
            np.float32)
        self.model.set_normalization_parameters(obs_train_parameters)

        self.obs_train_mean = obs_train_parameters['mean']
        self.obs_train_std = obs_train_parameters['std']
        print('Done initializing self.model for ground truth agent.')

    def get_augmentation_transform(self):
        heightmap = np.zeros((320, 160))
        theta, trans, pivot = utils.get_random_image_transform_params(
            heightmap.shape)
        transform_params = theta, trans, pivot
        t_world_center, t_world_centeraug = utils.get_se3_from_image_transform(
            *transform_params, heightmap, self.bounds, self.pixel_size)
        t_worldaug_world = t_world_centeraug @ np.linalg.inv(t_world_center)
        return t_worldaug_world, transform_params

    def get_data_batch(self, dataset):
        """Pre-process info and obs-act, and make batch.
        Daniel: adding goal-conditioning. To make it easier, just stack observations.
        """
        batch_obs = []
        batch_act = []
        for _ in range(self.BATCH_SIZE):
            t_worldaug_world, transform_params = self.get_augmentation_transform(
            )
            if self.goal_conditioned:
                obs, act, info, goal = dataset.random_sample(goal_images=True)
                gt_obs = self.info_to_gt_obs(info, t_worldaug_world, goal=goal)
            else:
                obs, act, info = dataset.random_sample()
                gt_obs = self.info_to_gt_obs(info, t_worldaug_world)
            batch_obs.append(gt_obs)
            batch_act.append(
                self.act_to_gt_act(
                    act, t_worldaug_world,
                    transform_params))  # this samples pick points from surface
            # on insertion task only, this can be used to imagine as if the picks were deterministic
            # batch_act.append(self.info_to_gt_obs(info))
        batch_obs = np.array(batch_obs)
        batch_act = np.array(batch_act)
        return batch_obs, batch_act, obs, act, info

    def train(self, dataset, num_iter, writer, validation_dataset=None):
        """Train on dataset for a specific number of iterations.

        Daniel: not testing with validation, argument copied over from ravens. Naively,
        we can train with MSE, but better to use a mixture model (MDN) since the output
        should be multi-modal; could be several pick points, and several placing points
        wrt those pick points.

        Also, notice how one iteration involves taking a batch of data, whereas for
        Transporters, we only used one image per 'iteration'. This means that each gt-state
        model consumes 128x more data points during training.

        NOTE: there is a special case for num_iter=0 which I use for the "zero iteration" baseline.
        Without this, it is impossible to save the model because it hasn't been built yet. However,
        even with this I get the warning:

        WARNING:tensorflow:Skipping full serialization of Keras layer <tensorflow.python.keras.layers.core.Dense object at 0x7fc88c170890>, because it is not built.

        but the loading seems to be OK, so I'm not sure what could be the issue, and given that
        it's for the 0 iteration case, it's unlikely to be worth fully investigating for now.
        """
        if self.model is None:
            self.init_model(dataset)

        if self.USE_MDN:
            loss_criterion = mdn_utils.mdn_loss
        else:
            loss_criterion = tf.keras.losses.MeanSquaredError()

        @tf.function
        def train_step(model, batch_obs, batch_act, loss_criterion):
            with tf.GradientTape() as tape:
                prediction = model(batch_obs)
                loss = loss_criterion(batch_act, prediction)
                grad = tape.gradient(loss, model.trainable_variables)
                self.optim.apply_gradients(zip(grad,
                                               model.trainable_variables))
            return loss

        print_rate = 50
        VALIDATION_RATE = 1000

        # Need this case due to some quirks with saving this type of model. No gradient tupdates.
        if num_iter == 0:
            batch_obs, batch_act, obs, act, info = self.get_data_batch(dataset)
            print(
                'Doing a single forward pass to enable us to save a snapshot for the num_iter == 0 case.'
            )
            prediction = self.model(batch_obs)

        for i in range(num_iter):
            start = time.time()
            batch_obs, batch_act, obs, act, info = self.get_data_batch(dataset)

            # Forward through model, compute training loss, update weights.
            self.metric.reset_states()
            loss = train_step(self.model, batch_obs, batch_act, loss_criterion)
            self.metric(loss)
            with writer.as_default():
                tf.summary.scalar('gt_state_loss',
                                  self.metric.result(),
                                  step=self.total_iter + i)
            if i % print_rate == 0:
                loss = np.float32(loss)
                print(
                    f'Train Iter: {self.total_iter + i} Loss: {loss:.4f} Iter time:',
                    time.time() - start)

            # Compute valid loss only if we have a validation dataset.
            if ((self.total_iter + i) % VALIDATION_RATE
                    == 0) and (validation_dataset is not None):
                print("Validating!")
                tf.keras.backend.set_learning_phase(0)
                self.val_metric.reset_states()
                batch_obs, batch_act, _, _, _ = self.get_data_batch(
                    validation_dataset)
                prediction = self.model(batch_obs)
                loss = loss_criterion(batch_act, prediction)
                self.val_metric(loss)
                with writer.as_default():
                    tf.summary.scalar('validation_gt_state_loss',
                                      self.val_metric.result(),
                                      step=self.total_iter + i)
                tf.keras.backend.set_learning_phase(1)

        self.total_iter += num_iter
        self.save()

    def plot_act_mdn(self, y, mdn_predictions):
        """
        Args:
            y: true "y", shape (batch_size, d_out)
            mdn_predictions: tuple of:
                pi: (batch_size, num_gaussians)
                mu: (batch_size, num_gaussians * d_out)
                var: (batch_size, num_gaussians)
        """
        pi, mu, var = mdn_predictions

        self.ax.cla()
        self.ax.scatter(y[:, 0], y[:, 1])
        mu = tf.reshape(mu, (-1, y.shape[-1]))
        pi = tf.reshape(pi, (-1, ))
        pi = tf.clip_by_value(pi, 0.01, 1.0)

        rgba_colors = np.zeros((len(pi), 4))
        # for red the first column needs to be one
        rgba_colors[:, 0] = 1.0
        # the fourth column needs to be your alphas
        rgba_colors[:, 3] = pi
        self.ax.scatter(mu[:, 0], mu[:, 1], color=rgba_colors)

        plt.draw()
        plt.pause(0.001)

    def act(self, obs, info, goal=None):
        """Run inference and return best action."""
        act = {'camera_config': self.camera_config, 'primitive': None}

        # Get observations and run predictions (second part just for visualization).
        if self.goal_conditioned:
            gt_obs = self.info_to_gt_obs(info, goal=goal)
            gt_act_center = self.info_to_gt_obs(info, goal=goal)
        else:
            gt_obs = self.info_to_gt_obs(info)
            gt_act_center = self.info_to_gt_obs(info)

        prediction = self.model(gt_obs[None, ...])

        if self.USE_MDN:
            mdn_prediction = prediction
            pi, mu, var = mdn_prediction
            #prediction = mdn_utils.pick_max_mean(pi, mu, var)
            prediction = mdn_utils.sample_from_pdf(pi, mu, var)
            prediction = prediction[:, 0, :]

        prediction = prediction[0]  # unbatch

        # Just go exactly to objects, predicted. Daniel: adding 1 rotation inference case.
        p0_position = np.hstack((prediction[0:2], 0.02))
        p0_pred_rot = 0.0 if self.one_rot_inf else -prediction[
            2] * self.THETA_SCALE  # idx 2
        p0_rotation = utils.get_pybullet_quaternion_from_rot(
            (0, 0, p0_pred_rot))
        p1_position = np.hstack((prediction[3:5], 0.02))
        p1_pred_rot = 0.0 if self.one_rot_inf else -prediction[
            5] * self.THETA_SCALE  # idx 5
        p1_rotation = utils.get_pybullet_quaternion_from_rot(
            (0, 0, p1_pred_rot))

        # Select task-specific motion primitive.
        act['primitive'] = 'pick_place'
        if self.task == 'sweeping':
            act['primitive'] = 'sweep'
        elif self.task == 'pushing':
            act['primitive'] = 'push'
        params = {
            'pose0': (p0_position, p0_rotation),
            'pose1': (p1_position, p1_rotation)
        }
        act['params'] = params

        # Daniel: like transporters, determine the task stage if applicable. (AND if loading only)
        if self.task in ['bag-items-easy', 'bag-items-hard', 'bag-color-goal']:
            self._determine_task_stage(p0_position, p1_position)

        return act

    #-------------------------------------------------------------------------
    # Helper Functions. Since we're subclassing self.model, rather than directly
    # using self.model = tf.keras.Model(), we need to change saving/loading a bit.
    # Use tf save format (not h5) and use `keras.models.load_model()``.
    # NOTE: requires TensorFlow 2.2 or later.
    #-------------------------------------------------------------------------

    def load(self, num_iter):
        """Load parameters.

        For this, set compile=False because we're not retraining the model.
        Also, this requires the max_obs_vector_length restored, as that isn't
        in the `__init__()`.
        """
        model_fname = 'gt-state-ckpt-%d' % num_iter
        model_fname = os.path.join(self.models_dir, model_fname)
        self.model = keras.models.load_model(model_fname, compile=False)
        self.total_iter = num_iter

        # Load other data we need for proper usage of the model.
        data_fname = os.path.join(self.models_dir, 'misc_data.pkl')
        with open(data_fname, 'rb') as fh:
            data = pickle.load(fh)
        self.max_obs_vector_length = data['max_obs_vector_length']

        # Note: we cannot call self.model methods directly, other than __call__.
        # Actually, I'm not even sure if this works, but we don't normalize
        # with the current model. TODO(daniel) does this work?
        self.model.obs_train_mean = data['obs_train_mean']
        self.model.obs_train_std = data['obs_train_std']

    def save(self):
        """Save models."""
        if not os.path.exists(self.models_dir):
            os.makedirs(self.models_dir)
        model_fname = 'gt-state-ckpt-%d' % self.total_iter
        model_fname = os.path.join(self.models_dir, model_fname)
        self.model.save(model_fname, save_format='tf')
        print(f'Saving model: {model_fname}')

        # Save other data we need for proper usage of the model.
        data_fname = os.path.join(self.models_dir, 'misc_data.pkl')
        data = dict(max_obs_vector_length=self.max_obs_vector_length,
                    obs_train_mean=self.obs_train_mean,
                    obs_train_std=self.obs_train_std)
        with open(data_fname, 'wb') as fh:
            pickle.dump(data, fh)

    def remove_nonint_keys(self, info):
        return {k: info[k] for k in info if isinstance(k, int)}

    def _determine_task_stage(self, p0_position, p1_position):
        """Determines task stage for the bag-items tasks, for gt_state and gt_state_2_step.

        See agents/transporter.py for details. The ONLY difference here is that we have
        the positions and need to do a position to pixel conversion, which is trivial with our
        utility file. Otherwise, we follow the same procedure as in the transporter class, with
        the same set of drawbacks / caveats.
        """
        p0_pixel = utils.position_to_pixel(p0_position,
                                           bounds=self.bounds,
                                           pixel_size=self.pixel_size)
        p1_pixel = utils.position_to_pixel(p1_position,
                                           bounds=self.bounds,
                                           pixel_size=self.pixel_size)

        # Daniel: hack to get things in the bounds.
        p0_x = min(max(p0_pixel[0], 0), 319)
        p0_y = min(max(p0_pixel[1], 0), 159)
        p0_pixel = (int(p0_x), int(p0_y))

        real_task = self.real_task  # assume we assigned this.
        colormap, heightmap, object_mask = real_task.get_object_masks(
            real_task.env)

        if False:
            nb = len([x for x in os.listdir('.') if '.png' in x])
            mask = np.array(object_mask / np.max(object_mask) * 255).astype(
                np.uint8)
            mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)  # debugging
            p0 = (p0_pixel[1], p0_pixel[0])
            p1 = (p1_pixel[1], p1_pixel[0])
            cv2.circle(mask, p0, radius=3, color=(255, 0, 255), thickness=-1)
            cv2.circle(mask, p1, radius=3, color=(255, 255, 0), thickness=-1)
            cv2.imwrite(f'mask_{nb}.png', mask)

        # Copied from ravens/agents/transporter.py.
        if self.task in ['bag-items-easy', 'bag-items-hard']:
            if object_mask[p0_pixel] in [38, 39]:
                real_task.task_stage = 2
            elif real_task.task_stage == 2:
                real_task.task_stage = 3
        elif self.task in ['bag-color-goal']:
            if object_mask[p0_pixel] == real_task.single_block_ID:
                real_task.task_stage = 2
            else:
                real_task.task_stage = 1
        else:
            raise NotImplementedError(self.task)