Exemple #1
0
def specify_throws(args, d, which):
    """ 
    The user manually specifies how the throws should be done. Drag from the
    BACK to FRONT, for intuition. So the second click is where we want the
    needle tip to ENTER the phantom, and the first click is used to get the
    correct angle to which we want the needle to enter.

    Then we save the points, reset the list of points (that's why we need to
    fetch them with `global`) and do the same with the other camera image.

    BTW, if the user does this with a piece of paper that has lines, then we
    should just pull the piece of paper out once we have the correct desired
    points (if the paper is thin enough, the dvrk camera won't know about the
    height difference).
    """
    global BACK_POINTS, FRONT_POINTS, image
    if which == 'left':
        image = np.copy(d.left['raw'])
    elif which == 'right':
        image = np.copy(d.right['raw'])
    imdir = args.imdir + which
    num_throws = 0

    while num_throws < args.test:
        window_name = "Currently have {} throws out of {} planned. "+\
                "Click and drag the NEXT direction, NOW. Or press ESC to "+\
                "terminate the program.".format(num_throws, args.test)
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
        cv2.setMouseCallback(window_name,
                             click)  # Record clicks to this window!
        cv2.resizeWindow(window_name, 1800, 2600)
        U.call_wait_key(cv2.imshow(window_name, image))

        # Now save the image with the next available index.
        index = len(os.listdir(imdir))
        cv2.imwrite(imdir + "/point_" + str(index).zfill(2) + ".png", image)
        cv2.destroyAllWindows()
        num_throws += 1

    assert len(BACK_POINTS) == len(FRONT_POINTS) == num_throws
    info = {
        'num_throws': num_throws,
        'back': BACK_POINTS,
        'front': FRONT_POINTS
    }
    BACK_POINTS = []
    FRONT_POINTS = []
    return info
Exemple #2
0
def test(arm1, fname, test_method):
    """ Roll out the open loop policy. No human intervention. 
    
    test_method
    -----------
        0: roll entirely open-loop, no human intervention.
        1: roll each time step, then human provides correction (saves new file).
    """
    positions = U.load_pickle_to_list(fname)
    print("loaded {} positions from {}".format(len(positions), fname))
    if test_method == 0:
        print("We're going to run entirely open-loop.")
    elif test_method == 1:
        print("NOTE! You'll have to provide corrections after each movement.")
        revised_positions = []
    arm1.close_gripper()

    for i, (pos, rot) in enumerate(positions):
        U.move(arm1, pos, rot)
        arm1.close_gripper()
        real_pos, real_rot = U.get_pos_rot_from_arm(arm1, nparrays=True)
        print("\n({}) Target position: {},{}".format(i, pos, rot))
        print("    Actual position: {},{}".format(real_pos, real_rot))

        if test_method == 1:
            string = "Now correct the position as needed, then press any key,"+ \
                    " other than ESC (which will terminate the entire program)."
            U.call_wait_key(cv2.imshow(string, d.left_image), exit=True)
            revised_pos, revised_rot = U.get_pos_rot_from_arm(arm1,
                                                              nparrays=True)
            revised_positions.append((revised_pos, revised_rot))
            print("    Revised position: {},{}".format(revised_pos,
                                                       revised_rot))
        else:
            time.sleep(2)

    if test_method == 1:
        new_fname = fname[:-2] + '_revised.p'
        print("Storing {} positions in file {}".format(len(revised_positions),
                                                       new_fname))
        U.store_pickle(new_fname, revised_positions)
Exemple #3
0
def get_data(arm1, fname):
    """ Have the human press key to continue, escape to terminate. """
    key = -1
    positions = []
    stored = 0

    while True:
        key = U.call_wait_key(cv2.imshow(
            "ESC to exit and skip this; stored {} so far.".format(stored),
            d.left_image),
                              exit=False)
        if key in U.ESC_KEYS:
            break
        pos, rot = U.get_pos_rot_from_arm(arm1, nparrays=True)
        positions.append((pos, rot))
        print("Appended: {}".format((pos, rot)))
        stored += 1

    if stored > 0:
        print("Storing {} positions in file {}".format(stored, fname))
        U.store_pickle(fname, positions)
Exemple #4
0
def run(args, p, img_shape, save_path):
    """Run one episode, record statistics, etc.
    """
    stats = defaultdict(list)
    # using 15 for vismpc and to be fair, DAgger if we also used 15 ...
    MAX_EP_LENGTH = 15
    if args.special:
        COVERAGE_SUCCESS = 1.50
    else:
        COVERAGE_SUCCESS = 0.92
    SS_THRESH = 0.95
    dumb_correction = False
    freq = 0

    for i in range(MAX_EP_LENGTH):
        print('\n*************************************')
        print('ON TIME STEP (I.E., ACTION) NUMBER {}'.format(i + 1))
        print('*************************************\n')

        # ----------------------------------------------------------------------
        # STEP 1: move to the tab with the camera, click ENTER. This will save
        # images, and also call the neural network code to produce an action.
        # Then, load the action.
        # ----------------------------------------------------------------------
        results = U.get_net_results()  # Results from the neural network.
        print('Waiting for one more result to the {} we have so far'.format(
            len(results)))
        while len(results) == i:
            time.sleep(1)
            results = U.get_net_results()
        assert len(results) >= i
        assert len(results) == i + 1, '{} vs {}, {}'.format(
            i, results, len(results))

        # ----------------------------------------------------------------------
        # STEP 2: load those images, using same code as in the network loading
        # code. If coverage is high enough, exit now. Note: we load 56x56 for
        # training but use 100x100 for computing coverage as we have several
        # values that are heavily tuned for that.
        # ----------------------------------------------------------------------
        c_path_100x100 = join(C.DVRK_IMG_PATH,
                              '{}-c_img_crop_proc.png'.format(str(i).zfill(3)))
        c_path = join(C.DVRK_IMG_PATH,
                      '{}-c_img_crop_proc_56.png'.format(str(i).zfill(3)))
        d_path = join(C.DVRK_IMG_PATH,
                      '{}-d_img_crop_proc_56.png'.format(str(i).zfill(3)))
        c_img_100x100 = cv2.imread(c_path_100x100)
        coverage = U.calculate_coverage(c_img_100x100)  # tuned for 100x100
        c_img = cv2.imread(c_path)
        d_img = cv2.imread(d_path)
        U.single_means(c_img, depth=False)
        U.single_means(d_img, depth=True)
        assert c_img.shape == d_img.shape == img_shape
        assert args.use_rgbd
        # img = np.dstack( (c_img, d_img[:,:,0]) )  # we don't call net code here

        # Ensures we save the final image in case we exit and get high coverage.
        # Make sure it happens BEFORE the `break` command below so we get final imgs.
        stats['coverage'].append(coverage)
        stats['c_img_100x100'].append(c_img_100x100)
        stats['c_img'].append(c_img)
        stats['d_img'].append(d_img)

        if coverage >= COVERAGE_SUCCESS:
            print('\nCOVERAGE SUCCESS: {:.3f} >= {:.3f}, exiting ...\n'.format(
                coverage, COVERAGE_SUCCESS))
            break
        else:
            print('\ncurrent coverage: {:.3f}\n'.format(coverage))
        # Before Jan 2020, action selection happened later. Now it happens earlier.
        #print('  now wait a few seconds for network to run')
        #time.sleep(5)

        # ----------------------------------------------------------------------
        # STEP 3: show the output of the action to the human. HUGE ASSUMPTION:
        # that we can just look at the last item of `results` list.
        # ----------------------------------------------------------------------
        action = np.loadtxt(results[-1])
        print('neural net says: {}'.format(action))
        stats['actions'].append(action)

        # ----------------------------------------------------------------------
        # STEP 3.5, only if we're not on the first action, if current image is
        # too similar to the old one, move the target points closer towards the
        # center of the cloth plane. An approximation but likely 'good enough'.
        # It does assume the net would predict a similiar action, though ...
        # ----------------------------------------------------------------------
        similar = False
        if i > 0:
            # AH! Go to -2 because I modified code to append (c_img,d_img) above.
            prev_c = stats['c_img'][-2]
            prev_d = stats['d_img'][-2]
            diff_l2_c = np.linalg.norm(c_img - prev_c) / np.prod(c_img.shape)
            diff_l2_d = np.linalg.norm(d_img - prev_d) / np.prod(d_img.shape)
            diff_ss_c = compare_ssim(c_img, prev_c, multichannel=True)
            diff_ss_d = compare_ssim(d_img[:, :, 0], prev_d[:, :, 0])
            print('\n  (c) diff L2: {:.3f}'.format(diff_l2_c))
            print('  (d) diff L2: {:.3f}'.format(diff_l2_d))
            print('  (c) diff SS: {:.3f}'.format(diff_ss_c))
            print('  (d) diff SS: {:.3f}\n'.format(diff_ss_d))
            stats['diff_l2_c'].append(diff_l2_c)
            stats['diff_l2_d'].append(diff_l2_d)
            stats['diff_ss_c'].append(diff_ss_c)
            stats['diff_ss_d'].append(diff_ss_d)

            # Apply action 'compression'? A 0.95 cutoff empirically works well.
            if diff_ss_c > SS_THRESH:
                freq += 1
                print(
                    'NOTE structural similiarity exceeds {}'.format(SS_THRESH))
                similar = True
            #    if dumb_correction:
            #        action[0] = action[0] * (0.9 ** freq)
            #        action[1] = action[1] * (0.9 ** freq)
            #        print('revised action after \'dumb compression\': {}, freq {}'.format(
            #                action, freq))
            #    else:
            #        action = action_correction(action, freq, c_img_100x100)
            #else:
            #    freq = 0

        # Actually just use the above for recording statistics. Always check the action.
        action = action_correction(action,
                                   freq,
                                   c_img_100x100,
                                   similar=similar)

        # ----------------------------------------------------------------------
        # STEP 4. If the output would result in a dangerous position, human
        # stops by hitting ESC key. Otherwise, press any other key to continue.
        # The human should NOT normally be using this !!
        # ----------------------------------------------------------------------
        title = '{} -- ESC TO CANCEL (Or if episode done)'.format(action)
        if args.use_rgbd:
            stacked_img = np.hstack((c_img, d_img))
            exit = U.call_wait_key(cv2.imshow(title, stacked_img))
        elif args.use_color:
            exit = U.call_wait_key(cv2.imshow(title, c_img))
        else:
            exit = U.call_wait_key(cv2.imshow(title, d_img))
        cv2.destroyAllWindows()
        if exit:
            print('Warning: why are we exiting here?')
            print(
                'It should exit naturally due to (a) coverage or (b) time limits.'
            )
            print(
                'Make sure I clear any results that I do not want to record.')
            break

        # ----------------------------------------------------------------------
        # STEP 5: Watch the robot do its action. Terminate the script if the
        # resulting action makes things fail spectacularly.
        # ----------------------------------------------------------------------
        x = action[0]
        y = action[1]
        dx = action[2]
        dy = action[3]
        start_t = time.time()
        U.move_p_from_net_output(x,
                                 y,
                                 dx,
                                 dy,
                                 row_board=C.ROW_BOARD,
                                 col_board=C.COL_BOARD,
                                 data_square=C.DATA_SQUARE,
                                 p=p)
        elapsed_t = time.time() - start_t
        stats['act_time'].append(elapsed_t)
        print('Finished executing action in {:.2f} seconds.'.format(elapsed_t))

    # If we ended up using all actions above, we really need one more image.
    # Edit: need to handle case of when we exceeded coverage after 9th action.
    if len(stats['c_img']) == MAX_EP_LENGTH and coverage < COVERAGE_SUCCESS:
        assert len(stats['coverage']) == MAX_EP_LENGTH, len(stats['coverage'])
        i = MAX_EP_LENGTH

        # Results from the neural network -- still use to check if we get a NEW image.
        results = U.get_net_results()
        print('Just get the very last image we need! (Have {} so far)'.format(
            len(results)))
        while len(results) == i:
            time.sleep(1)
            results = U.get_net_results()
        assert len(results) >= i
        assert len(results) == i + 1, '{} vs {}, {}'.format(
            i, results, len(results))

        # Similar loading as earlier.
        c_path_100x100 = join(C.DVRK_IMG_PATH,
                              '{}-c_img_crop_proc.png'.format(str(i).zfill(3)))
        c_path = join(C.DVRK_IMG_PATH,
                      '{}-c_img_crop_proc_56.png'.format(str(i).zfill(3)))
        d_path = join(C.DVRK_IMG_PATH,
                      '{}-d_img_crop_proc_56.png'.format(str(i).zfill(3)))
        c_img_100x100 = cv2.imread(c_path_100x100)
        coverage = U.calculate_coverage(c_img_100x100)  # tuned for 100x100
        c_img = cv2.imread(c_path)
        d_img = cv2.imread(d_path)
        assert c_img.shape == d_img.shape == img_shape

        # Record final stats.
        stats['coverage'].append(coverage)
        stats['c_img_100x100'].append(c_img_100x100)
        stats['c_img'].append(c_img)
        stats['d_img'].append(d_img)
        print('(for full length episode) final coverage: {:.3f}'.format(
            coverage))

    # Final book-keeping and return statistics.
    stats['coverage'] = np.array(stats['coverage'])
    print('\nEPISODE DONE!')
    print('  coverage: {}'.format(stats['coverage']))
    print('  len(coverage): {}'.format(len(stats['coverage'])))
    print('  len(c_img): {}'.format(len(stats['c_img'])))
    print('  len(d_img): {}'.format(len(stats['d_img'])))
    print('  len(actions): {}'.format(len(stats['actions'])))
    print('All done with episode! Saving stats to: {}'.format(save_path))
    with open(save_path, 'wb') as fh:
        pickle.dump(stats, fh)
    return stats
Exemple #5
0
def evaluate(arm, d, args, wrist_map_c2l, wrist_map_c2r, wrist_map_l2r):
    """ Stage 2. 
    
    Testing stage. At a high level, move the dVRK end-effector to various
    locations and once again explicitly record needle tips. Steps:

    1. Start with the dVRK gripping a needle. Try different orientations, to
    ensure that performance is orientation-invariant. This time, we should have
    the suturing phantom there! For now we still have the click interface to
    accurately compute psi.

    2. To get psi, images pop up from left and right cameras. Press ESC to
    terminate the program. If we want to proceed, then click and drag boxes of
    target locations for the needle tip.
    
    3. This time, we let the needle move. We do NOT explicitly compute an offset
    vector because we do not know where the dVRK's end-effector goes.
    """
    global CENTERS, image
    which = 'left' if args.arm==1 else 'right'
    pname = args.path_offsets+'/offsets_{}_arm_data.p'.format(which)
    if which == 'left':
        R = wrist_map_c2l
    else:
        R = wrist_map_c2r
    offsets = U.load_pickle_to_list(pname)
    print("loaded offsets, length: {}".format(len(offsets)))

    # Do stuff here just to grip the needle and get set up for computing phi/psi.
    arm.open_gripper(50)
    name = "Left image for evaluation stage. MOVE DVRK, then press any key"
    U.call_wait_key( cv2.imshow(name, d.left['raw']) )
    arm.close_gripper()
    pos,rot = U.pos_rot_arm(arm, nparrays=True)
    pos[2] += args.z_height
    U.move(arm, pos, rot)
    time.sleep(2)

    # The user now clicks stuff. LEFT CAMERA, then RIGHT CAMERA.
    click_stuff('left')
    click_stuff('right')
    assert len(CENTERS) == 4, "Error, len(CENTERS): {}".format(len(CENTERS))
    assert len(POINTS) == 8, "Error, len(POINTS): {}".format(len(POINTS))

    # New to this method: move the dVRK to a different location.
    print("Not moving to a new spot, for now.")

    # Now we can actually get phi/psi.
    pos_g,rot_g = U.pos_rot_arm(arm, nparrays=True)
    camera_tip  = U.camera_pixels_to_camera_coords(CENTERS[-4], CENTERS[-2], nparrays=True)
    camera_grip = U.camera_pixels_to_camera_coords(CENTERS[-3], CENTERS[-1], nparrays=True)
    ct_h        = np.concatenate( (camera_tip,np.ones(1)) )
    cg_h        = np.concatenate( (camera_grip,np.ones(1)) )
    base_t      = R.dot(ct_h)
    base_g      = R.dot(cg_h)
    camera_dist = np.linalg.norm( camera_tip-camera_grip )

    # Distance based on click interace (should be same as camera) and based
    # on dvrk, using cartesian position function (this will be different).
    # Unfortunately the dvrk one is the easiest to use in real applications.
    base_dist_click = np.linalg.norm( base_t-base_g )
    base_dist_dvrk  = np.linalg.norm( base_t-pos_g )
    assert np.abs(base_dist_click - camera_dist) < 1e-5

    # Compute phi and psi based on the STARTING configuration.
    phi_c, psi_c = compute_phi_and_psi(d=base_dist_click)
    phi_d, psi_d = compute_phi_and_psi(d=base_dist_dvrk)

    # --------------------------------------------------------------------------
    # Compute offset vector wrt base. I don't think the order of subtraction
    # matters as long as in real applications, we are consistent with usage, so
    # in application we'd do (pos_n_tip_wrt_base)-(offset). 
    #
    # NOTE: this is only the offset vector wrt the starting position that we
    # have, but there are two hypotheses. 
    #
    #   1. That we can avoid computing these if we pre-compute before hand (as
    #   we have code for) so maybe we don't need a click interface. 
    #   2. To get the needle to go somewhere, it's a matter of doing the
    #   arithmetic as specified earlier in the comments! 
    #
    # For now we'll compute the two offsets wrt base because we might as well
    # use it to simplify the task (but this is only simplifying the detection of
    # the needle tip and where the dVRK grips it).
    # --------------------------------------------------------------------------
    offset_wrt_base_click = base_t - base_g
    offset_wrt_base_dvrk  = base_t - pos_g
    offset_saved =  get_offset(offsets, psi=psi_c, kind='click', which=which, debug=True)

    print("offset_wrt_base_click: {}".format(offset_wrt_base_click))
    print("offset_wrt_base_dvrk:  {}".format(offset_wrt_base_dvrk))
    print("offset_saved:          {}".format(offset_saved))
Exemple #6
0
def offsets(arm, d, args, wrist_map_c2l, wrist_map_c2r, wrist_map_l2r):
    """ Stage 1. 
    
    1. Before running the python script, get the setup as shown in the image in
    the README where the needle is on the foam and the dVRK (w/SNAP) is about to
    grip it. 
    
    2. Then, this code waits until the user has manipulated master tools to pick
    up the needle at some location. EDIT: actually it's easier to just move and
    then assume we translate up by a few mm. I'm getting lack of responses with
    the master tools, for some reason. So just move it directly and explicitly.
    
    3. Images will pop-up. Press ESC to terminate the program. Otherwise, we
    click and crop: (a) the end-point of the needle AND (b) the place where the
    dVRK grasps the needle. The latter is mainly to see if this coincides with
    the dVRK's actual position. MUST BE DONE IN ORDER!
    
    4. This saves into a pickle file by appending, so in theory we continually
    add data points. (The main catch is to check that if we change the setup,
    we're NOT counting those older points.)
    
    Repeat the process by moving the master tools again, and go to step 2.
    """
    global CENTERS, image
    which = 'left' if args.arm==1 else 'right'
    pname = args.path_offsets+'/offsets_{}_arm_data.p'.format(which)
    if which == 'left':
        R = wrist_map_c2l
    else:
        R = wrist_map_c2r
    num_offsets = 0

    while True:
        # Just an overall view before grabbing needle. OK to only see the left
        # image. And BTW, this is when we change the dVRK to get more data.
        arm.open_gripper(50)
        print("\nNow starting new offset with {} offsets so far...".format(num_offsets))
        name = "Left image w/{} offsets so far. MOVE DVRK, press any key".format(num_offsets)
        U.call_wait_key( cv2.imshow(name, d.left['raw']) )
        arm.close_gripper()
        pos,rot = U.pos_rot_arm(arm, nparrays=True)
        pos[2] += args.z_height
        U.move(arm, pos, rot)
        time.sleep(2)

        # The user now clicks stuff. LEFT CAMERA, then RIGHT CAMERA.
        click_stuff('left')
        click_stuff('right')

        # Compute data to store. We stored left tip, left grip, right tip, right grip.
        pos_g,rot_g = U.pos_rot_arm(arm, nparrays=True)
        assert len(CENTERS) % 4 == 0, "Error, len(CENTERS): {}".format(len(CENTERS))
        camera_tip  = U.camera_pixels_to_camera_coords(CENTERS[-4], CENTERS[-2], nparrays=True)
        camera_grip = U.camera_pixels_to_camera_coords(CENTERS[-3], CENTERS[-1], nparrays=True)

        # Map stuff to stuff. The `h` in `xyz_h` refers to homogeneous coordinates.
        ct_h = np.concatenate( (camera_tip,np.ones(1)) )
        cg_h = np.concatenate( (camera_grip,np.ones(1)) )
        base_t = R.dot(ct_h)
        base_g = R.dot(cg_h)
        camera_dist = np.linalg.norm( camera_tip-camera_grip )

        # Distance based on click interace (should be same as camera) and based
        # on dvrk, using cartesian position function (this will be different).
        # Unfortunately the dvrk one is the easiest to use in real applications.
        base_dist_click = np.linalg.norm( base_t-base_g )
        base_dist_dvrk  = np.linalg.norm( base_t-pos_g )
        assert np.abs(base_dist_click - camera_dist) < 1e-5

        # Compute offset vector wrt base frame. I don't think the order of
        # subtraction matters as long as in real applications, we are consistent
        # with usage, so in application we'd do (pos_n_tip_wrt_base)-(offset).
        offset_wrt_base_click = base_t - base_g
        offset_wrt_base_dvrk  = base_t - pos_g

        phi_c, psi_c = compute_phi_and_psi(d=base_dist_click)
        phi_d, psi_d = compute_phi_and_psi(d=base_dist_dvrk)

        # Bells and whistles.
        base = 'base_'+which+'_'
        info = {}
        info['pos_g_dvrk'] = pos_g
        info['rot_g_dvrk'] = rot_g
        info['camera_tip'] = camera_tip
        info['camera_grip'] = camera_grip
        info[base+'tip'] = base_t
        info[base+'grip'] = base_g
        info['camera_dist'] = camera_dist 
        info[base+'dist_click'] = base_dist_click
        info[base+'dist_dvrk'] = base_dist_dvrk
        info[base+'offset_click'] = offset_wrt_base_click
        info[base+'offset_dvrk'] = offset_wrt_base_dvrk
        info['phi_click_deg'] = phi_c
        info['psi_click_mm'] = psi_c
        info['phi_dvrk_deg'] = phi_d
        info['psi_dvrk_mm'] = psi_d

        num_offsets += 1
        U.store_pickle(fname=pname, info=info, mode='a')
        num_before_this = U.get_len_of_pickle(pname)

        print("Computed and saved {} offset vectors in this session".format(num_offsets))
        print("We have {} items total (including prior sessions)".format(num_before_this))
        print("  pos: {}\n  rot: {}".format(pos_g, rot_g))
        print("  for tip, CENTER coords (left,right): {}, {}".format(CENTERS[-4], CENTERS[-2]))
        print("  for grip, CENTER coords (left,right): {}, {}".format(CENTERS[-3], CENTERS[-1]))
        print("  camera_tip:  {}".format(camera_tip))
        print("  camera_grip: {}".format(camera_grip))
        print("  base_{}_tip:  {}".format(which, base_t))
        print("  base_{}_grip: {}".format(which, base_g))
        print("  camera_dist (mm):      {:.2f}".format(camera_dist*1000.))
        print("  base_dist_camera (mm): {:.2f}".format(base_dist_click*1000.))
        print("  base_dist_dvrk (mm):   {:.2f}".format(base_dist_dvrk*1000.))
        print("  camera, phi: {:.2f}, psi: {:.2f}".format(phi_c, psi_c))
        print("  base,   phi: {:.2f}, psi: {:.2f}".format(phi_d, psi_d))
def collect_guidelines(args, arm, d):
    """ Gather statistics about the workspace on how safe we can set things.
    Save things in a pickle file specified by the `directory` parameter. Click
    the ESC key to exit the program and restart if I've made an error. BTW, the
    four poses we collect will be the four "home" positions that I use later,
    though with more z-coordinate offset.

    Some information:

        `yaw` must be limited in [-180,180]  # But actually, [-90,90] is OK.
        `pitch` must be limited in [-50,50]  # I _think_ ...
        `roll` must be limited in [-180,180] # I think ...

    Remember, if I change the numbers, it doesn't impact the code until
    re-building `guidelines.p`!!
    """
    # Add stuff we should already know, particularly the rotation ranges.
    info = {}
    info['min_yaw'] = 30
    info['max_yaw'] = 150
    info['min_pitch'] = 40
    info['max_pitch'] = 80

    # Roll is annoying because of the values the dVRK provides.
    info['roll_neg_ubound'] = -140  # (-180, roll_neg_ubound)
    info['roll_pos_lbound'] = 140  # (roll_pos_lbound, 180)
    info['min_pos_roll'] = 40
    info['max_pos_roll'] = 180
    info['min_neg_roll'] = -180
    info['max_neg_roll'] = -150

    # Move arm to positions to determine approximately safe ranges for x,y,z
    # values. All the `pos_{lr,ll,ul,ur}` are in robot coordinates.
    U.call_wait_key(
        cv2.imshow("Left camera (move to lower right corner now!)",
                   d.left['raw']))
    info['pos_lr'], info['rot_lr'] = U.pos_rot_arm(arm)
    U.call_wait_key(
        cv2.imshow("Left camera (move to lower left corner now!)",
                   d.left['raw']))
    info['pos_ll'], info['rot_ll'] = U.pos_rot_arm(arm)
    U.call_wait_key(
        cv2.imshow("Left camera (move to upper left corner now!)",
                   d.left['raw']))
    info['pos_ul'], info['rot_ul'] = U.pos_rot_arm(arm)
    U.call_wait_key(
        cv2.imshow("Left camera (move to upper right corner now!)",
                   d.left['raw']))
    info['pos_ur'], info['rot_ur'] = U.pos_rot_arm(arm)

    # So P[:,0] is a vector of the x's, P[:,1] vector of y's, P[:,2] vector of z's.
    p_lr = np.squeeze(np.array(info['pos_lr']))
    p_ll = np.squeeze(np.array(info['pos_ll']))
    p_ul = np.squeeze(np.array(info['pos_ul']))
    p_ur = np.squeeze(np.array(info['pos_ur']))
    P = np.vstack((p_lr, p_ll, p_ul, p_ur))

    # Get ranges. This is a bit of a heuristic but generally good I think.
    info['min_x'] = np.min([p_lr[0], p_ll[0], p_ul[0], p_ur[0]])
    info['max_x'] = np.max([p_lr[0], p_ll[0], p_ul[0], p_ur[0]])
    info['min_y'] = np.min([p_lr[1], p_ll[1], p_ul[1], p_ur[1]])
    info['max_y'] = np.max([p_lr[1], p_ll[1], p_ul[1], p_ur[1]])

    # For z, we fit a plane. See https://stackoverflow.com/a/1400338/3287820
    # Find (alpha, beta, gamma) s.t. f(x,y) = alpha*x + beta*y + gamma = z.
    A = np.zeros((3, 3))  # Must be symmetric!
    A[0, 0] = np.sum(P[:, 0] * P[:, 0])
    A[0, 1] = np.sum(P[:, 0] * P[:, 1])
    A[0, 2] = np.sum(P[:, 0])
    A[1, 0] = np.sum(P[:, 0] * P[:, 1])
    A[1, 1] = np.sum(P[:, 1] * P[:, 1])
    A[1, 2] = np.sum(P[:, 1])
    A[2, 0] = np.sum(P[:, 0])
    A[2, 1] = np.sum(P[:, 1])
    A[2, 2] = P.shape[0]

    b = np.array([
        np.sum(P[:, 0] * P[:, 2]),
        np.sum(P[:, 1] * P[:, 2]),
        np.sum(P[:, 2])
    ])

    x = np.linalg.inv(A).dot(b)
    info['z_alpha'] = x[0]
    info['z_beta'] = x[1]
    info['z_gamma'] = x[2]

    # Sanity checks before saving stuff.
    assert info['min_x'] < info['max_x']
    assert info['min_y'] < info['max_y']
    assert P.shape == (4, 3)

    print("\nThe key/val pairings for {}.".format(args.directory))
    keys = sorted(info.keys())
    for key in keys:
        print("{:20} ==> {}".format(key, info[key]))
    print("")
    print("P:\n{}".format(P))
    print("A:\n{}".format(A))
    print("x:\n{}".format(x))
    print("b:\n{}".format(b))
    U.store_pickle(fname=args.directory, info=info, mode='w')
Exemple #8
0
def run(args, cam, p):
    """Run one episode, record statistics, etc."""
    stats = defaultdict(list)
    COVERAGE_SUCCESS = 0.92
    exponent = 0

    for i in range(args.max_ep_length):
        print('\n*************************************')
        print('ON TIME STEP (I.E., ACTION) NUMBER {}'.format(i + 1))
        print('*************************************\n')

        # ----------------------------------------------------------------------
        # STEP 1: query the image from the camera class using `cam`. To avoid
        # the flashing strobe light, you have to move to the tab with the camera.
        # ----------------------------------------------------------------------
        c_img_raw = None
        d_img_raw = None
        print(
            'Waiting for c_img, & d_img; please press ENTER in the appropriate tab'
        )
        while c_img_raw is None:
            c_img_raw = cam.read_color_data()
        while d_img_raw is None:
            d_img_raw = cam.read_depth_data()
        print('  obtained the (raw) c_img and d_img')

        # ----------------------------------------------------------------------
        # STEP 2: process image and save as a 100x100 png, see `camera.py` for some
        # tests. Image must be saved in specified DVRK_IMG_PATH for the net to see.
        # Also, if coverage is high enough, EXIT NOW!
        # ----------------------------------------------------------------------
        c_img, d_img = _process_images(c_img_raw, d_img_raw, args)
        assert c_img.shape == (100, 100, 3), c_img.shape
        assert d_img.shape == (100, 100, 3), d_img.shape
        if args.use_color:
            c_tail = "c_img_{}.png".format(str(i).zfill(2))
            img_path = join(C.DVRK_IMG_PATH, c_tail)
            cv2.imwrite(img_path, c_img)
        else:
            d_tail = "d_img_{}.png".format(str(i).zfill(2))
            img_path = join(C.DVRK_IMG_PATH, d_tail)
            cv2.imwrite(img_path, d_img)
        print('just saved to: {}\n'.format(img_path))
        U.single_means(c_img, depth=False)
        U.single_means(d_img, depth=True)

        coverage = U.calculate_coverage(c_img)

        # Ensures we save the final image in case we exit and get high coverage.
        # Make sure it happens BEFORE the `break` command below so we get final imgs.
        stats['coverage'].append(coverage)
        stats['c_img'].append(c_img)
        stats['d_img'].append(d_img)

        if coverage > COVERAGE_SUCCESS:
            print('\nCOVERAGE SUCCESS: {:.3f} > {:.3f}, exiting ...\n'.format(
                coverage, COVERAGE_SUCCESS))
            break
        else:
            print('\ncurrent coverage: {:.3f}\n'.format(coverage))
        print('  now wait a few seconds for network to run')
        time.sleep(5)

        # ----------------------------------------------------------------------
        # STEP 3: get the output from the neural network loading class (you did
        # run it in a separate terminal tab, right?) and then show it to a human.
        # HUGE ASSUMPTION: that the last text file indicates the action we want.
        # ----------------------------------------------------------------------
        dvrk_action_paths = sorted(
                [join(C.DVRK_IMG_PATH,x) for x in os.listdir(C.DVRK_IMG_PATH) \
                    if x[-4:]=='.txt']
        )
        assert len(dvrk_action_paths) > 0, 'Did you run the neural net code??'
        action = np.loadtxt(dvrk_action_paths[-1])
        print('neural net says: {}'.format(action))
        stats['actions'].append(action)

        # ----------------------------------------------------------------------
        # STEP 3.5, only if we're not on the first action, if current image is
        # too similar to the old one, move the target points closer towards the
        # center of the cloth plane. An approximation but likely 'good enough'.
        # It does assume the net would predict a similiar action, though ...
        # ----------------------------------------------------------------------
        if i > 0:
            # AH! Go to -2 because I modified code to append (c_img,d_img) above.
            prev_c = stats['c_img'][-2]
            prev_d = stats['d_img'][-2]
            diff_l2_c = np.linalg.norm(c_img - prev_c) / np.prod(c_img.shape)
            diff_l2_d = np.linalg.norm(d_img - prev_d) / np.prod(d_img.shape)
            diff_ss_c = compare_ssim(c_img, prev_c, multichannel=True)
            diff_ss_d = compare_ssim(d_img[:, :, 0], prev_d[:, :, 0])
            print('\n  (c) diff L2: {:.3f}'.format(diff_l2_c))
            print('  (d) diff L2: {:.3f}'.format(diff_l2_d))
            print('  (c) diff SS: {:.3f}'.format(diff_ss_c))
            print('  (d) diff SS: {:.3f}\n'.format(diff_ss_d))
            stats['diff_l2_c'].append(diff_l2_c)
            stats['diff_l2_d'].append(diff_l2_d)
            stats['diff_ss_c'].append(diff_ss_c)
            stats['diff_ss_d'].append(diff_ss_d)

            # Apply action 'compression'? A 0.95 cutoff empirically works well.
            ss_thresh = 0.95
            if diff_ss_c > ss_thresh:
                exponent += 1
                print(
                    'NOTE structural similiarity exceeds {}'.format(ss_thresh))
                action[0] = action[0] * (0.9**exponent)
                action[1] = action[1] * (0.9**exponent)
                print('revised action after \'compression\': {} w/exponent {}'.
                      format(action, exponent))
            else:
                exponent = 0

        # ----------------------------------------------------------------------
        # STEP 4. If the output would result in a dangerous position, human
        # stops by hitting ESC key. Otherwise, press any other key to continue.
        # The human should NOT normally be using this !!
        # ----------------------------------------------------------------------
        title = '{} -- ESC TO CANCEL (Or if episode done)'.format(action)
        if args.use_color:
            exit = U.call_wait_key(cv2.imshow(title, c_img))
        else:
            exit = U.call_wait_key(cv2.imshow(title, d_img))
        cv2.destroyAllWindows()
        if exit:
            print('Warning: why are we exiting here?')
            print(
                'It should exit naturally due to (a) coverage or (b) time limits.'
            )
            break

        # ----------------------------------------------------------------------
        # STEP 5: Watch the robot do its action. Terminate the script if the
        # resulting action makes things fail spectacularly.
        # ----------------------------------------------------------------------
        x = action[0]
        y = action[1]
        dx = action[2]
        dy = action[3]
        U.move_p_from_net_output(x,
                                 y,
                                 dx,
                                 dy,
                                 row_board=C.ROW_BOARD,
                                 col_board=C.COL_BOARD,
                                 data_square=C.DATA_SQUARE,
                                 p=p)

        # ----------------------------------------------------------------------
        # STEP 6. Record statistics. Sleep just in case, also reset images.
        # Don't save raw images -- causes file sizes to blow up.
        # ----------------------------------------------------------------------
        cam.set_color_none()
        cam.set_depth_none()
        print('Reset color/depth in camera class, waiting a few seconds ...')
        time.sleep(3)

    # If we ended up using all actions above, we really need one more image.
    if len(stats['c_img']) == args.max_ep_length:
        assert len(stats['coverage']) == args.max_ep_length, len(
            stats['coverage'])
        c_img_raw = None
        d_img_raw = None
        print(
            'Waiting for FINAL c_img, & d_img; please press ENTER in the appropriate tab'
        )
        while c_img_raw is None:
            c_img_raw = cam.read_color_data()
        while d_img_raw is None:
            d_img_raw = cam.read_depth_data()
        c_img, d_img = _process_images(c_img_raw, d_img_raw, args)
        coverage = U.calculate_coverage(c_img)
        stats['coverage'].append(coverage)
        stats['c_img'].append(c_img)
        stats['d_img'].append(d_img)
        print('(for full length episode) final coverage: {:.3f}'.format(
            coverage))

    # Final book-keeping and return statistics.
    print('\nEPISODE DONE!')
    print('  coverage: {}'.format(stats['coverage']))
    print('  len(coverage): {}'.format(len(stats['coverage'])))
    print('  len(c_img): {}'.format(len(stats['c_img'])))
    print('  len(d_img): {}'.format(len(stats['d_img'])))
    print('  len(actions): {}'.format(len(stats['actions'])))

    # File path shenanigans.
    if args.use_color:
        if args.use_other_color:
            save_path = join('results',
                             'tier{}_color_yellowcloth'.format(args.tier))
        else:
            save_path = join('results', 'tier{}_color'.format(args.tier))
    else:
        if args.use_other_color:
            save_path = join('results',
                             'tier{}_depth_yellowcloth'.format(args.tier))
        else:
            save_path = join('results', 'tier{}_depth'.format(args.tier))
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    count = len(
        [x for x in os.listdir(save_path) if 'ep_' in x and '.pkl' in x])
    save_path = join(save_path,
                     'ep_{}_{}.pkl'.format(str(count).zfill(3), U.get_date()))
    print('All done with episode! Saving stats to: {}'.format(save_path))
    with open(save_path, 'wb') as fh:
        pickle.dump(stats, fh)
    return stats
Exemple #9
0
def collect_boundaries(p, cam, data_path):
    """Gather statistics about the workspace on how safe we can set things.

    Save things in a pickle file specified by the `directory` parameter. Click
    the ESC key to exit the program and restart. BTW, the four poses we collect
    will be the four "home" positions that I use later, though with more
    z-coordinate offset.

    Some information:

        `yaw` must be limited in [-180,180]  # But [-90,90] is all we need.
        `pitch` must be limited in [-50,50]  # I _think_ ...
        `roll` must be limited in [-180,180] # I think ...

    Remember, if I change the numbers, it doesn't impact the code until
    re-building `guidelines.p`!!
    """
    # First, add stuff that we should already know. (TODO: IS THIS STILL TRUE??)
    info = {}
    info['min_yaw'] = -90
    info['max_yaw'] = 90
    info['min_pitch'] = -15
    info['max_pitch'] = 25
    info['roll_neg_ubound'] = -150  # (-180, roll_neg_ubound)
    info['roll_pos_lbound'] = 150  # (roll_pos_lbound, 180)
    info['min_roll'] = -180
    info['max_roll'] = -150

    # Be careful about this ordering! Orientation wrt us looking at workspace
    # from behind computers, so (for example) 'lower' means closer to us.
    ordering = ['lower_right', 'lower_left', 'upper_left', 'upper_right']
    boundary_poses = []

    for order in ordering:
        # Get images, refresh to None each time we need to move.
        c_img_raw = None
        d_img_raw = None
        while c_img_raw is None:
            c_img_raw = cam.read_color_data()
        while d_img_raw is None:
            d_img_raw = cam.read_depth_data()

        # Move arm to positions to determine approx. safe ranges for x,y,z values.
        txt = "Move to {} corner now!!".format(order)
        U.call_wait_key(cv2.imshow(txt, c_img_raw))
        pose = p.arm.get_current_pose(unit='rad')
        boundary_poses.append(pose)
        cv2.destroyAllWindows()
    bp = boundary_poses

    # Save these so that we can use them for `home` positions. These are POSES
    # so they are TUPLES, of (pos,rot).
    info['home_LR_pose'] = bp[0]
    info['home_LL_pose'] = bp[1]
    info['home_UL_pose'] = bp[2]
    info['home_UR_pose'] = bp[3]

    # So P[:,0] is a vector of the x's, P[:,1] vector of y's, P[:,2] vector of z's.
    # For the k-th item, bp[k][0] = its position, and bp[k][1] = orientation.
    p_lr = np.squeeze(np.array((bp[0])[0][:3]))
    p_ll = np.squeeze(np.array((bp[1])[0][:3]))
    p_ul = np.squeeze(np.array((bp[2])[0][:3]))
    p_ur = np.squeeze(np.array((bp[3])[0][:3]))
    P = np.vstack((p_lr, p_ll, p_ul, p_ur))
    print("P (shape: {}):\n{}".format(P.shape, P))

    # Get ranges. This is a bit of a heuristic, just taking min/max of the four.
    # Not sure how good this will be? TODO: check this, I wanted to get a rough
    # sense of the max and min coord ranges we should enforce.
    #info['min_x'] = np.min( [p_lr[0], p_ll[0], p_ul[0], p_ur[0]] )
    #info['max_x'] = np.max( [p_lr[0], p_ll[0], p_ul[0], p_ur[0]] )
    #info['min_y'] = np.min( [p_lr[1], p_ll[1], p_ul[1], p_ur[1]] )
    #info['max_y'] = np.max( [p_lr[1], p_ll[1], p_ul[1], p_ur[1]] )
    #assert info['min_x'] < info['max_x']
    #assert info['min_y'] < info['max_y']

    # For z, we fit a plane. See https://stackoverflow.com/a/1400338/3287820
    # Find (alpha, beta, gamma) s.t. f(x,y) = alpha*x + beta*y + gamma = z.
    A = np.zeros((3, 3))  # Must be symmetric!
    A[0, 0] = np.sum(P[:, 0] * P[:, 0])
    A[0, 1] = np.sum(P[:, 0] * P[:, 1])
    A[0, 2] = np.sum(P[:, 0])
    A[1, 0] = np.sum(P[:, 0] * P[:, 1])
    A[1, 1] = np.sum(P[:, 1] * P[:, 1])
    A[1, 2] = np.sum(P[:, 1])
    A[2, 0] = np.sum(P[:, 0])
    A[2, 1] = np.sum(P[:, 1])
    A[2, 2] = P.shape[0]

    b = np.array([
        np.sum(P[:, 0] * P[:, 2]),
        np.sum(P[:, 1] * P[:, 2]),
        np.sum(P[:, 2])
    ])

    x = np.linalg.inv(A).dot(b)
    info['z_alpha'] = x[0]
    info['z_beta'] = x[1]
    info['z_gamma'] = x[2]

    # Sanity checks before saving stuff.
    assert P.shape == (4, 3)
    print("A:\n{}".format(A))
    print("P:\n{}".format(P))
    print("x:\n{}".format(x))
    print("b:\n{}\n".format(b))

    # Log output and save! Json is a bit inconvenient so use pickle.
    for key in sorted(info.keys()):
        print('\n{}'.format(key))
        print(info[key])
    with open(data_path, 'wb') as fh:
        pickle.dump(info, fh)
    print('Just saved: {}'.format(data_path))