def evaluation(args, r, high_level_planning):
    velocity_tracking = np.empty(
        (6, args.num_iters, args.num_latent_action_per_iteration))
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)

    for i in range(0, args.num_iters):

        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', i + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)

        for j in range(args.num_latent_action_per_iteration):
            pre_com_state = state
            if not j % 5:
                target = target_velocity_test[int((j + 1) / 5)] / 1.5 * 2

            # take current state and plan for next z_action and sent to daisy
            latent_action = high_level_planning.plan_latent_action(
                pre_com_state, target)

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state

            # if utils.check_data_useful(post_com_state):
            high_level_obs, high_level_delta_obs = utils.HL_obs(
                pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                   post_com_state)
            predict_state = high_level_planning.model_predict(
                high_level_obs, latent_action)

            velocity_tracking[0][i][j] = target[0]
            velocity_tracking[1][i][j] = target[1]
            velocity_tracking[2][i][j] = predict_state[4]
            velocity_tracking[3][i][j] = predict_state[5]
            velocity_tracking[4][i][j] = high_level_delta_obs[4]
            velocity_tracking[5][i][j] = high_level_delta_obs[5]

        np.save(
            save_dir + '/' + args.high_level_policy_type +
            '_velocity_tracking_test.npy', velocity_tracking)
        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)

    exp_variables = ru.get_variables(r)
    velocity_record = exp_variables['record_vel']
    np.save(
        save_dir + '/' + args.high_level_policy_type + '_velocity_record.npy',
        np.array(velocity_record))
    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
def evaluation(args, r, high_level_planning):
    position_tracking = [
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
        [],
    ]
    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)

    cost_record = []

    for iter in range(0, args.num_iters):

        position_tracking = [[], [], [], []]
        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', iter + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)
        target = target_velocity

        # position_tracking[2*iter].append(state['base_pos_x'][0])
        # position_tracking[2*iter+1].append(state['base_pos_y'][0])

        j = 0
        total_cost = 0
        # while True:
        for i in range(args.num_latent_action_per_iteration):
            pre_com_state = state

            # take current state and plan for next z_action and sent to daisy
            latent_action = high_level_planning.plan_latent_action(
                pre_com_state, target)

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state

            cost = utils.easy_cost(target, pre_com_state, post_com_state)
            total_cost += cost

            # if utils.check_data_useful(post_com_state):
            high_level_obs, high_level_delta_obs = utils.HL_obs(
                pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                   post_com_state)
            predict_state = high_level_planning.model_predict(
                high_level_obs, latent_action)

            # position_tracking[2*iter].append(state['base_pos_x'][0])
            # position_tracking[2*iter+1].append(state['base_pos_y'][0])
            print([state['base_pos_x'][0], state['base_pos_y'][0]],
                  high_level_delta_obs[0:2], predict_state[0:2])

            j += 1

            # if np.linalg.norm(target[0:2] - np.array([state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.25 :
            #     print("Reach One Goal !!!!")
            # np.save(save_dir +'/' + args.high_level_policy_type +'_multi_tgt_test_'+str(iter)+'.npy', np.array(position_tracking) )

            # break

            # if j>30:
            #     print('Did not reach goal')
            #     break

        cost_record.append(total_cost)
        print(cost_record)
        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)
        input('wait for pos')
        exp_variables = ru.get_variables(r)
        # state_record = exp_variables['record_pos']
        # velocity_record = exp_variables['record_vel']
        # np.save(save_dir +'/' + args.high_level_policy_type +'_velocity_record_'+ str(cost_index) + '_' + str(iter), np.array(velocity_record))
        # np.save(save_dir +'/' + args.high_level_policy_type +'_state_record_'+ str(cost_index) + '_' + str(iter), np.array(state_record))
        np.save(
            save_dir + '/' + args.high_level_policy_type + '_' +
            args.low_level_policy_type + '_different_cost_test' +
            str(cost_index) + '.npy', np.array(cost_record))
        # np.save(save_dir + '/'+args.high_level_policy_type+'_' +args.low_level_policy_type + str(iter)+'_com.npy', np.array(position_tracking) )
    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
def collect_data_client(args, r, high_level_planning, HL_replay_buffer):
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)
    for i in range(17, args.num_iters):

        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', i + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)

        for k in range(args.num_latent_action_per_iteration):
            pre_com_state = state

            q = k % 5
            if q == 0:
                target = np.clip(0.2 * np.random.randn(3), -0.25, 0.25)
                latent_action = np.array(
                    latent_all[i]) + 0.1 * np.random.randn(args.z_dim)
                # latent_action = high_level_planning.sample_latent_action(target)
            if q == 3 or q == 4:
                target = np.zeros(3)
                latent_action = np.array(
                    [0.2068, 0.1836], ) + 0.1 * np.random.randn(args.z_dim)

            # take current state and plan for next z_action and sent to daisy
            t_start = datetime.datetime.now()
            # if args.test:
            #     latent_action = high_level_planning.plan_latent_action(pre_com_state, target)
            # else:
            #     latent_action = high_level_planning.sample_latent_action(target)

            t_end = datetime.datetime.now()

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state

            if utils.check_data_useful(post_com_state):
                high_level_obs, high_level_delta_obs = utils.HL_obs(
                    pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                       post_com_state)
                HL_replay_buffer.add(high_level_obs, latent_action, 0,
                                     high_level_delta_obs, 1)

            if utils.check_robot_dead(post_com_state):
                break

        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)

        if (i + 1) % 1 == 0:
            model_save_dir = utils.make_dir(
                os.path.join(args.save_dir + '/trial_%s' %
                             str(args.seed))) if args.save else None
            HL_replay_buffer.save_buffer(model_save_dir)

    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)
def run_LLTG_IK(env, args, r):
    # initialize robot
    a = input('Select way to initialize the robot')
    if str(a) == '1':  #start from ground
        state = motion_library.demo_standing(env,
                                             shoulder=SHOULDER,
                                             elbow=ELBOW)
    else:
        state = motion_library.hold_position(env,
                                             shoulder=SHOULDER,
                                             elbow=ELBOW)
    print('Robot initialized, press key on client!')

    velocity_stack = [[], []]
    velocity_record = [[], []]
    postion_record = [[], [], []]
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    low_level_TG = LLTG.low_level_TG(
        device=device,
        z_dim=args.z_dim,
        a_dim=args.a_dim,
        num_timestep_per_footstep=args.num_timestep_per_footstep,
        batch_size=args.batch_size,
        low_level_policy_type=args.low_level_policy_type,
        update_low_level_policy=args.update_low_level_policy,
        update_low_level_policy_lr=args.update_low_level_policy_lr,
        init_state=state,
    )
    des_height = state['base_pos_z'][0]

    exp_variables = ru.get_variables(r)
    exp_variables['finish_one_step'] = [1]
    velocity_stack[0].append(0)
    velocity_stack[1].append(0)
    exp_variables = ru.set_state(r, state, exp_variables)
    state = motion_library.hold_position(env, shoulder=SHOULDER, elbow=ELBOW)
    print('Start operation!')

    while True:
        # update swing/stance leg
        low_level_TG.policy.update_swing_stance()

        # do IK
        for step in range(1, args.num_timestep_per_footstep + 1):
            t_start = datetime.datetime.now()
            # check if footstep update/set a key stuff
            exp_variables = ru.get_variables(r)
            if exp_variables['update_z_action'][0]:
                z_action = np.array(exp_variables['z_action'])
                low_level_TG.policy.update_latent_action_params(
                    state, z_action)
                exp_variables['update_z_action'] = [0]
                ru.set_variables(r, exp_variables)

            action = low_level_TG.get_action(state, step)

            state = env.step(action)

            velocity_stack[0].append(state['base_velocity'][0])
            velocity_stack[1].append(state['base_velocity'][1])
            velocity_record[0].append(state['base_velocity'][0])
            velocity_record[1].append(state['base_velocity'][1])
            postion_record[0].append((state['base_pos_x'][0]))
            postion_record[1].append((state['base_pos_y'][0]))
            postion_record[2].append((state['base_ori_euler'][2]))
            if len(velocity_stack[0]) > STACK_LENGTH:
                velocity_stack[0].pop(0)
                velocity_stack[1].pop(0)

            t_end = datetime.datetime.now()
            t_diff = (t_end - t_start).total_seconds()
            time.sleep(max(0, 1.0 / args.control_frequency - t_diff))

        # finish one step and update to high level
        exp_variables['finish_one_step'] = [1]
        state['base_velocity'][0] = mean(velocity_stack[0])
        state['base_velocity'][1] = mean(velocity_stack[1])
        ru.set_state(r, state, exp_variables)
        exp_variables = ru.get_variables(r)

        if not exp_variables['not_finish_one_iter'][0]:
            exp_variables['not_finish_one_iter'][0] = [1]
            exp_variables['record_vel'] = velocity_record
            exp_variables['record_pos'] = postion_record
            ru.set_state(r, state, exp_variables)
            break
Esempio n. 5
0
def evaluation(args, r, high_level_planning):

    save_dir = utils.make_dir(
        os.path.join(args.save_dir +
                     '/trial_%s' % str(args.seed))) if args.save else None
    exp_variables = {
        'finish_exp': [0],
        'not_finish_one_iter': [1],
        'finish_one_step': [0],
        'update_z_action': [0],
    }
    ru.set_variables(r, exp_variables)

    for iter in range(args.num_iters):

        ru.wait_for_key(r, 'not_finish_one_iter', change=False)
        print('Iteration: ', iter + 1)
        input('Press any key after initialized robot')
        ru.wait_for_key(r, 'finish_one_step')
        state, exp_variables = ru.get_state(r)
        position_tracking[0].append(state['base_pos_x'][0])
        position_tracking[1].append(state['base_pos_y'][0])
        position_tracking[2].append(state['base_ori_euler'][2])

        j = 0
        target_index = 0
        total_cost = 0
        while True:
            target = square_circle_test[target_index]
            pre_com_state = state

            # take current state and plan for next z_action and sent to daisy
            latent_action = high_level_planning.plan_latent_action(
                pre_com_state, target)

            exp_variables['z_action'] = latent_action.tolist()
            exp_variables['update_z_action'] = [1]
            ru.set_variables(r, exp_variables)

            # check if finish one step
            ru.wait_for_key(r, 'finish_one_step')
            state, exp_variables = ru.get_state(r)
            post_com_state = state
            cost = utils.easy_cost(target, pre_com_state, post_com_state)
            total_cost += cost

            # if utils.check_data_useful(post_com_state):
            high_level_obs, high_level_delta_obs = utils.HL_obs(
                pre_com_state), utils.HL_delta_obs(pre_com_state,
                                                   post_com_state)
            predict_state = high_level_planning.model_predict(
                high_level_obs, latent_action)

            position_tracking[0].append(state['base_pos_x'][0])
            position_tracking[1].append(state['base_pos_y'][0])
            position_tracking[2].append(state['base_ori_euler'][2])
            print(state['base_pos_x'][0], state['base_pos_y'][0])

            j += 1

            if np.linalg.norm(target[0:2] - np.array(
                [state['base_pos_x'][0], state['base_pos_y'][0]])) < 0.2:
                print("Reach Goal %s!!!!" % str(target_index))
                np.save(
                    save_dir + '/' + args.high_level_policy_type +
                    '_square_test' + str(test_index) + '.npy',
                    np.array(position_tracking))
                target_index += 1
                if target_index == np.shape(square_circle_test)[0]:
                    np.save(
                        save_dir + '/' + args.high_level_policy_type +
                        '_square_test_cost_' + str(test_index) + '.npy',
                        np.array([total_cost]))
                    break

            if j > 500:
                print('Did not reach goal')
                break

        exp_variables['not_finish_one_iter'] = [0]
        ru.set_variables(r, exp_variables)
        input('wait for pos')
        exp_variables = ru.get_variables(r)
        state_record = exp_variables['record_pos']
        np.save(
            save_dir + '/' + 'state' + '_square_test' + str(test_index) +
            '.npy', np.array(state_record))

    # experiment ends
    exp_variables['finish_exp'] = [1]
    ru.set_variables(r, exp_variables)