Ejemplo n.º 1
0
 def get_trajectories(self, full_states):
     # print(self.head)
     obj_dumps = [s[1] for s in full_states]
     trajectory = get_individual_data(self.head, obj_dumps, pos_val_hash=1)
     # TODO: automatically determine if correlate pos_val_hash is 1 or 2
     # TODO: multiple tail support
     # TODO: Separation of Interference and Contingent objects
     if self.tail[0] == "Action":
         # print(obj_dumps, self.tail[0])
         merged = trajectory
         # correlate_trajectory = get_individual_data(self.tail[0], obj_dumps, pos_val_hash=2)
     else:
         correlate_trajectory = get_individual_data(self.tail[0], obj_dumps, pos_val_hash=1)
         merged = np.concatenate([trajectory, correlate_trajectory], axis=1)
         # print(pytorch_model.wrap(merged))
     return pytorch_model.wrap(merged).cuda()
Ejemplo n.º 2
0
 def get_state(self, state): # copy of get_trajectories, but for a single state
     # print(self.head)
     if self.head == "Block": # TODO: make not hard coded
         hstate = get_individual_data(self.head, [state[1]], pos_val_hash=3)[0]
     else:
         hstate = get_individual_data(self.head, [state[1]], pos_val_hash=1)[0]
     # TODO: automatically determine if correlate pos_val_hash is 1 or 2
     # TODO: multiple tail support
     # TODO: Separation of Interference and Contingent objects
     if self.tail[0] == "Action":
         # print(obj_dumps, self.tail[0])
         merged = hstate
         corr_state = []
         # correlate_trajectory = get_individual_data(self.tail[0], obj_dumps, pos_val_hash=2)
     else:
         corr_state = get_individual_data(self.tail[0], [state[1]], pos_val_hash=1)[0]
         merged = np.concatenate([hstate, corr_state])
         # print(pytorch_model.wrap(merged))
     return merged, [len(hstate), len(corr_state)]
Ejemplo n.º 3
0
    # python get_reward.py --record-rollouts data/ataripaddle/ --changepoint-dir data/atarigraph/ --train-edge "Paddle->Ball" --transforms WProx --determiner prox --reward-form changepoint --num-stack 1 --focus-dumps-name focus_dumps.txt --dp-gmm atariball --period 5
    # python get_reward.py --record-rollouts data/pusherrandom/ --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --transforms SVel SCorAvg --determiner overlap --reward-form markov --segment --train --num-stack 2 --gpu 1
    # python get_reward.py --record-rollouts data/extragripper/ --changepoint-dir data/pushergraphvec/ --train-edge "Gripper->Block" --transforms SProxVel --determiner merged --reward-form changepoint --segment --num-stack 2 --gpu 1 --cluster-model FDPGMM --period 9 --dp-gmm block --min-cluster 5
    # python get_reward.py --record-rollouts data/pusherrandom/ --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --transforms SVel SCorAvg --determiner overlap --reward-form markov --segment --train --num-stack 2 --gpu 1 > pusher/reward_training.txt
    dataset_path = args.record_rollouts
    changepoints_path = args.record_rollouts  # these are the same for creating rewards
    head, tail = get_edge(args.train_edge)
    cp_dict = load_from_pickle(
        os.path.join(changepoints_path, "changepoints-" + head + ".pkl"))
    changepoints, models = get_cp_models_from_dict(cp_dict)
    obj_dumps = read_obj_dumps(dataset_path,
                               i=-1,
                               rng=args.num_iters,
                               filename=args.focus_dumps_name)

    trajectory = get_individual_data(head, obj_dumps, pos_val_hash=1)
    # TODO: automatically determine if correlate pos_val_hash is 1 or 2
    # TODO: multiple tail support
    if tail[0] == "Action":
        correlate_trajectory = get_individual_data(tail[0],
                                                   obj_dumps,
                                                   pos_val_hash=2)
        new_ct = np.zeros(
            (len(correlate_trajectory), int(np.max(correlate_trajectory)) + 1))
        hot_idxes = np.array(list(range(
            len(correlate_trajectory)))), correlate_trajectory.astype(int)
        new_ct[np.array(list(range(len(correlate_trajectory)))),
               np.squeeze(correlate_trajectory.astype(int))] = 1
        correlate_trajectory = new_ct
    else:
        correlate_trajectory = get_individual_data(tail[0],