Ejemplo n.º 1
0
def worker(input_q, output_q):

    fps = FPS().start()
    image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
    hand_side_tf = tf.constant([[1.0,
                                 1.0]])  # Both left and right hands included
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\
        keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    net.init(sess)
    while True:
        fps.update()
        frame = input_q.get()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_raw = scipy.misc.imresize(frame, (240, 320))
        image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0)
        keypoint_coord3d_v = sess.run(keypoint_coord3d_tf,
                                      feed_dict={image_tf: image_v})
        output_q.put(
            predict_by_geometry(keypoint_coord3d_v, known_finger_poses, 0.45))

    fps.stop()
    sess.close()
Ejemplo n.º 2
0
def main(args):
    webcamId = 0
    try:
        if len(args) > 1 :
            webcamId = int(args[1])
    except ValueError:
        print("Invalid webcam id. Fall back to default value '" + str(webcamId) + "'.")

    # stream creation
    inputStream = cv2.VideoCapture(webcamId)
    if not inputStream.isOpened():
        print("Can not use camera with id " + str(webcamId) + ".")
        return 1

    # network input
    image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
    hand_side_tf = tf.constant([[1.0, 0.0]])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(session)

    while True:
        _, image_raw = inputStream.read()
        image_resized = cv2.resize(image_raw, (320, 240))
        image_rgb = cv2.cvtColor(image_resized,cv2.COLOR_BGR2RGB)

        image_v = np.expand_dims((image_rgb.astype('float') / 255.0) - 0.5, 0)

        start_time = time.time()
        hand_scoremap_v, image_crop_v, scale_v, center_v,\
        keypoints_scoremap_v, keypoint_coord3d_v = session.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,
                                                             keypoints_scoremap_tf, keypoint_coord3d_tf],
                                                            feed_dict={image_tf: image_v})
        delta_time = time.time() - start_time
        print("Inference time: " + str(delta_time))

        # post processing
        keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
        coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
        coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

        plot_hand_cv2(image_resized, coord_hw)
        image_fullsize = cv2.resize(image_resized, (1600, 1200))

        cv2.imshow('result', image_fullsize)
        cv2.waitKey(1)

    cv2.releaseAllWindows()
    return 0
Ejemplo n.º 3
0
def prepare_network():
    # network input
    image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
    hand_side_tf = tf.constant([[1.0,
                                 1.0]])  # Both left and right hands included
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\
     keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(sess)

    return sess, image_tf, keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf
Ejemplo n.º 4
0
def prepare_network():
    # Entrada a red CNN
    image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
    hand_side_tf = tf.constant([[1.0, 1.0]
                                ])  # inclusion de mano derecha e izquierda
    evaluation = tf.placeholder_with_default(True, shape=())

    # Construccion de red
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\
     keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Inicializa TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # inicializa red
    net.init(sess)

    return sess, image_tf, keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf
    def __init__(self, 
            model_weight_files=['./utilities/hand3d/weights/handsegnet-rhd.pickle', 
            './utilities/hand3d/weights/posenet3d-rhd-stb-slr-finetuned.pickle'], 
            visualize=False, 
            visualize_save_loc='visualize/handpose_estimation',
            cache_loc='cache/handpose_estimation', image_extension='.jpg',
            overwrite=False):

        self.extension_length = len(image_extension)
        self.model_weight_files = model_weight_files 
        self.visualize = visualize
        self.visualize_save_loc = visualize_save_loc
        if self.visualize:
            if not os.path.exists(self.visualize_save_loc):
                os.makedirs(self.visualize_save_loc, exist_ok=True) 
        self.cache_loc = cache_loc
        if not os.path.exists(self.cache_loc):
            os.makedirs(self.cache_loc, exist_ok=True)
        self.overwrite = overwrite 

        # input place holders 
        self.image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
        self.hand_side_tf = tf.constant([[1.0, 0.0]])  # left hand (true for all samples provided)
        self.evaluation = tf.placeholder_with_default(True, shape=())
        
        # building network      
        self.net = ColorHandPose3DNetwork()
        self.hand_scoremap_tf, self.image_crop_tf, self.scale_tf, self.center_tf,\
        self.keypoints_scoremap_tf, self.keypoint_coord3d_tf = \
        self.net.inference(self.image_tf, self.hand_side_tf, self.evaluation)

        # Start TF
        self.gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=self.gpu_options))

        self.net.init(self.sess, weight_files=self.model_weight_files)
Ejemplo n.º 6
0
    'snapshot_dir': 'snapshots_handsegnet'
}

# get dataset
dataset = BinaryDbReader(mode='training',
                         batch_size=4,
                         shuffle=True,
                         hue_aug=True,
                         random_crop_to_size=True)

# build network graph
data = dataset.get()

# build network
evaluation = tf.placeholder_with_default(True, shape=())
net = ColorHandPose3DNetwork()
hand_mask_pred = net.inference_detection(data['image'], train=True)

# Start TF
gpu_options = tf.GPUOptions(allow_growth=True, )
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
tf.train.start_queue_runners(sess=sess)

# Loss
loss = 0.0
s = data['hand_mask'].get_shape().as_list()
for i, pred_item in enumerate(hand_mask_pred):
    gt = tf.reshape(data['hand_mask'], [s[0] * s[1] * s[2], -1])
    pred = tf.reshape(hand_mask_pred, [s[0] * s[1] * s[2], -1])
    loss += tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=gt))
Ejemplo n.º 7
0
def main():
    if not os.path.isdir(save_roi_path):
        os.mkdir(save_roi_path)

    if os.path.isdir(frames_path):
        image_list = [
            os.path.join(frames_path, file) for file in os.listdir(frames_path)
            if file.endswith('.jpg')
        ]
        print("Record {} image completely from  {} ".format(
            len(image_list), frames_path))
    else:
        raise NotADirectoryError(frames_path + " isn\'t a directory.")

    # network input
    image_tf = tf.placeholder(tf.float32,
                              shape=(1, input_size, input_size,
                                     channel))  # h, w, c
    hand_side_tf = tf.constant([[1.0, 0.0]
                                ])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(sess)

    start = time.time()
    value_list = []

    column_name = [
        'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'
    ]
    dist = (width - height) // 2
    for num, image_name in enumerate(image_list, 1):
        filename = os.path.basename(image_name)  #  extract the filename
        name, ext = os.path.splitext(filename)
        label = which_label(name)
        image_bgr = cv2.imread(image_name)
        # Feed image list through network
        # image_bgr = cv2.resize(image_raw, (width, height), cv2.INTER_AREA)
        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

        if (num % 100 == 1):
            print("Process the image : " + name +
                  " ... {}/{}".format(num, len(image_list)))

        x1, x2, y1, y2 = width // 2 - dist, width // 2 + dist, 0, height
        image_v = image_rgb[y1:y2, x1:x2]  # cenetr image for label
        image_v = cv2.resize(image_v, (input_size, input_size), cv2.INTER_AREA)
        image_v = np.expand_dims(
            (np.array(image_v).astype('float') / 255.0) - 0.5, 0)

        _, _, scale_v, center_v, _, _ = sess.run([
            hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,
            keypoints_scoremap_tf, keypoint_coord3d_tf
        ],
                                                 feed_dict={image_tf: image_v})

        # left top right bottom
        y_c, x_c = np.squeeze(center_v)
        half_side_len = 128.0 / scale_v  # the length of frame
        x, y = max(x1 + int(x_c - half_side_len),
                   0), max(y1 + int(y_c - half_side_len), 0)
        xmax, ymax = min(x1 + int(x_c + half_side_len),
                         width), min(y1 + int(y_c + half_side_len), height)

        image_crop = image_bgr[y:ymax, x:xmax]
        res_path = os.path.join(save_roi_path, filename)
        cv2.imwrite(res_path, image_crop)

        value = (filename, width, height, label, x, y, xmax, ymax)
        value_list.append(value)

    xml_df = pd.DataFrame(value_list, columns=column_name)
    xml_df.to_csv(os.path.join(frames_path, 'test_images.csv'), index=None)
    endt = time.time()
    print("Handle all images completely and elapsed_time is {:.2f}s.".format(
        endt - start))
def main(argv=None):
    train_para = {
        'lr': [1e-4, 1e-5, 1e-6],
        'lr_iter': [10000, 20000],
        'max_iter': 30000,
        'show_loss_freq': 1000,
        'snapshot_freq': 5000,
        'snapshot_dir': 'snapshots_posenet'
    }

    # Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    tf.train.start_queue_runners(sess=sess)

    # get dataset
    dataset_GANerate = GANerate(batchnum=32)
    image_crop_eval, keypoint_uv21_eval, keypoint_uv_heatmap_eval, keypoint_xyz21_normed_eval = dataset_GANerate.get_batch_data_eval

    # build network
    evaluation = tf.placeholder_with_default(True, shape=())
    net = ColorHandPose3DNetwork()

    image_crop_eval = tf.add(image_crop_eval,
                             0,
                             name='input_node_representations')
    keypoints_scoremap_eval = net.inference_pose2d(image_crop_eval, train=True)
    s = keypoint_uv_heatmap_eval.get_shape().as_list()
    keypoints_scoremap_eval = [
        tf.image.resize_images(x, (s[1], s[2]))
        for x in keypoints_scoremap_eval
    ]

    # Loss
    loss_eval = 0.0
    for i, pred_item in enumerate(keypoints_scoremap_eval):
        loss_eval += tf.reduce_sum(
            tf.sqrt(
                tf.reduce_mean(tf.square(pred_item - keypoint_uv_heatmap_eval),
                               [1, 2])))
    keypoints_scoremap_eval = keypoints_scoremap_eval[-1]
    keypoints_scoremap_eval = tf.add(keypoints_scoremap_eval,
                                     0,
                                     name='final_output_node_representations')
    init = tf.global_variables_initializer()
    config = tf.ConfigProto()
    # occupy gpu gracefully
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        init.run()
        checkpoint_path = './snapshots_posenet'
        model_name = 'model-42'
        if checkpoint_path:
            saver = tf.train.Saver(max_to_keep=10)
            saver.restore(sess, checkpoint_path + '/' + model_name)
            print("restore from " + checkpoint_path + '/' + model_name)
        create_pb = True
        if create_pb:
            input_graph_def = sess.graph.as_graph_def()
            variable_names = [v.name for v in input_graph_def.node]
            print(
                '==================Model Analysis Report variable_names======================'
            )
            print(variable_names)
            print(
                '==================Model Analysis Report operations======================'
            )
            for op in sess.graph.get_operations():
                print(str(op.name))
            stats_graph(sess.graph)
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess,  # The session
                input_graph_def,  # input_graph_def is useful for retrieving the nodes
                'final_output_node_representations'.split(","))

            with tf.gfile.FastGFile(checkpoint_path + '/' + model_name + ".pb",
                                    "wb") as f:
                f.write(output_graph_def.SerializeToString())

        print("Start testing...")
        path = './snapshots_posenet/baseline'
        import matplotlib.image
        loss_eval_v = 0.0
        loss_piex_save = 0.0
        for one_epoch in tqdm(range(100)):
            image, heatmap, heatmap_pre, keypoint_uv21, loss_eval_v = sess.run(
                [
                    image_crop_eval, keypoint_uv_heatmap_eval,
                    keypoints_scoremap_eval, keypoint_uv21_eval, loss_eval
                ])
            image = (image + 0.5) * 255
            image = image.astype(np.int16)

            #根据热度图计算最大的下标
            keypoint_uv21_pre = np.zeros_like(keypoint_uv21)
            for i in range(heatmap_pre.shape[0]):
                for j in range(heatmap_pre.shape[-1]):
                    heatmap_pre_tmp = heatmap_pre[i, :, :, j]
                    cor_tmp = unravel_index(heatmap_pre_tmp.argmax(),
                                            heatmap_pre_tmp.shape)
                    keypoint_uv21_pre[i, j, 0] = cor_tmp[1]
                    keypoint_uv21_pre[i, j, 1] = cor_tmp[0]

            loss_piex = keypoint_uv21_pre - keypoint_uv21
            loss_piex = np.sqrt(
                np.square(loss_piex[:, :, 0]) + np.square(loss_piex[:, :, 1]))
            loss_piex_save = loss_piex_save + np.mean(loss_piex)

            # visualize
            fig = plt.figure(1)
            plt.clf()
            ax1 = fig.add_subplot(221)
            ax1.imshow(image[0])
            plot_hand(keypoint_uv21[0], ax1)

            ax3 = fig.add_subplot(223)
            ax3.imshow(image[0])
            ax3.set_title(str(loss_piex[0, :].astype(np.int32)), fontsize=5)
            plot_hand(keypoint_uv21_pre[0], ax3)
            plot_hand(keypoint_uv21[0], ax3)

            ax2 = fig.add_subplot(222)
            ax4 = fig.add_subplot(224)
            ax2.imshow(np.sum(heatmap[0],
                              axis=-1))  # 第一个batch的维度 hand1(0~31) back1(32~63)
            ax2.scatter(keypoint_uv21[0, :, 0],
                        keypoint_uv21[0, :, 1],
                        s=10,
                        c='k',
                        marker='.')
            ax4.imshow(np.sum(heatmap_pre[0],
                              axis=-1))  # 第一个batch的维度 hand1(0~31) back1(32~63)
            ax4.scatter(keypoint_uv21_pre[0, :, 0],
                        keypoint_uv21_pre[0, :, 1],
                        s=10,
                        c='k',
                        marker='.')

            plt.savefig(path + '/image/' + str(one_epoch).zfill(5) + '.png')
        loss_eval_v = loss_eval_v / 100
        loss_piex_save = loss_piex_save / 100
        print(loss_piex_save)  #4.472415127649567
Ejemplo n.º 9
0
def cashbox():

    ## initialize variables
    arr = [True, False, False, False]
    LOWERB = np.array([0, 0, 0])
    UPPERB = np.array([35, 35, 35])
    LOWERB_HAND = np.array([40, 60, 100])
    UPPERB_HAND = np.array([80, 100, 140])
    cap = cv2.VideoCapture(
        'http://192.168.1.38:15490/videostream.cgi?user=admin&pwd=A2345678901')
    t = time.time()
    boxOpen = False
    handUp = False
    threshold = 85000
    thresholdHand = 2000
    startCount = False
    start = time.time()

    ## for handpose usage
    image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3))
    hand_side_tf = tf.constant([[1.0, 0.0]
                                ])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    ## build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    ## Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    ## pass argument
    args = {
        'image_tf': image_tf,
        'hand_side_tf': hand_side_tf,
        'evaluation': evaluation,
        'net': net,
        'hand_scoremap_tf': hand_scoremap_tf,
        'image_crop_tf': image_crop_tf,
        'scale_tf': scale_tf,
        'center_tf': center_tf,
        'keypoints_scoremap_tf': keypoints_scoremap_tf,
        'keypoint_coord3d_tf': keypoint_coord3d_tf
    }

    ## initialize network
    net.init(sess)

    ## main script
    while (True):

        ret, frame = cap.read()
        frame_cpy = frame.copy()
        frame = frame[90:190, 90:270]
        mask = cv2.inRange(frame, LOWERB, UPPERB)
        maskHand = cv2.inRange(frame, LOWERB_HAND, UPPERB_HAND)

        cv2.imshow('mask', mask)
        cv2.imshow('mask_hand', maskHand)
        cv2.imshow('large frame', frame_cpy)
        cv2.imshow('small frame', frame_cpy[90:190, 90:270])

        if time.time() - t > 2:
            t = time.time()
            print('box: ', np.count_nonzero(mask) * 10)
            print('hand: ', np.count_nonzero(maskHand) * 10)
            if boxOpen & handUp & (np.count_nonzero(mask) * 10 < threshold):
                start = time.time()
                timenow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                print(timenow, 'CASH BOX CLOSED!', '*ref: ',
                      np.count_nonzero(mask) * 10)
                print('-----------------------------------------')
                print('-----------------------------------------')
                print('----------TRANSACTION ENDED--------------')
                print('-----------------------------------------')
                print('-----------------------------------------')
                arr[3] = True
                startCount = False
            else:
                arr[3] = False
            if boxOpen & (np.count_nonzero(maskHand) * 10 > thresholdHand):
                timenow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                print(timenow, 'HAND DETECTED.', '*ref: ',
                      np.count_nonzero(maskHand))
                doHandPoseEstimate(frame, sess, args)
                arr[2] = True
                handUp = True
                startCount = False
            else:
                arr[2] = False
                handUp = False
            if np.count_nonzero(mask) * 10 > threshold:
                if not (startCount):
                    start = time.time()
                    startCount = True
                timenow = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                print(timenow, 'CASH BOX OPENED!', '*ref: ',
                      np.count_nonzero(mask) * 10)
                arr[1] = True
                boxOpen = True
            else:
                arr[1] = False
                boxOpen = False
            print(time.time() - start, startCount)
            if (time.time() - start > 10) and startCount:
                print('Warning : Cash Box opened for more than 10 secs!')
            print('Current Status : ' + str([x * 1 for x in arr]))
            print('--------------------------------------------')
        if cv2.waitKey(1) == 27:
            break

    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 10
0
def main():
    if not os.path.isdir(save_path):
        os.mkdir(save_path)
    # image to be read
    image_list = list()
    dirname = image_path.split(os.sep)[-2]
    if os.path.isdir(image_path):
        for file in os.listdir(image_path):
            if file.endswith('.jpg'):
                image_list.append(os.path.join(image_path, file))
        print("Record all video completely from " + image_path)
    else:
        raise FileNotFoundError(image_path + " doesn't exist.")

    # network input
    image_tf = tf.placeholder(tf.float32, shape=(1, height, width, channel))
    hand_side_tf = tf.constant([[1.0, 0.0]])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(sess)

    start = time.time()
    value_list = []
    for num, image_name in enumerate(image_list, 1):
        filename = image_name.split(os.sep)[-1]  # extract the filename
        name, ext = os.path.splitext(filename)
        if num % 200 == 1:
            print("Process the image : " + name + " ... {}/{}".format(num, len(image_list)))
        image_raw = cv2.imread(image_name)
        # Feed image list through network
        image_raw = cv2.resize(image_raw, (width, height), cv2.INTER_AREA)
        image_raw = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
        image_v = np.expand_dims((np.array(image_raw).astype('float') / 255.0) - 0.5, 0)

        _, _, scale_v, center_v, _, _ = sess.run(
            [hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf],
            feed_dict={image_tf: image_v})

        # left top right bottom
        y_c, x_c = np.squeeze(center_v)
        half_side_len = 128.0 / scale_v  # the length of frame
        x, y = int(x_c - half_side_len*0.9), int(y_c - half_side_len)
        xmax, ymax = int(x_c + half_side_len*0.9), int(y_c + half_side_len)
        x, y, xmax, ymax = max(x, 0), max(y, 0), min(xmax, width), min(ymax, height)
        cv2.rectangle(image_raw, (x, y), (xmax, ymax), (77, 255, 9), 1, 1)
        res_img = name + '_v' + ext
        res_roi = cv2.cvtColor(image_raw, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(save_path, res_img), cv2.resize(res_roi, (width//2, height//2), cv2.INTER_AREA))
        value = (filename, width, height, 'hand', x, y, xmax, ymax)
        value_list.append(value)
        
    endt = time.time()
    print("Handle all images completely and elapsed_time is {:.2f}s.".format(endt-start))
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(value_list, columns=column_name)
    xml_df.to_csv(os.path.join(root_path, dirname + '.csv'), index=None)
    print('Successfully converted image to csv. --- ' + dirname)
Ejemplo n.º 11
0
 def __init__(self):
     self.num_kp = 21
     self.n_fully_connected_layers = 2
     self.n_classes = 5
     self.fully_connected_layers_size = 32
     self.color_hand_pose_net = ColorHandPose3DNetwork()
Ejemplo n.º 12
0
def get_pic(image_list):
    # images to be shown
    # image_list = list()
    # image_list.append('./data/img30.jpg')
    #image_list.append('./data/img31.jpg')
    #image_list.append('./data/img32.jpg')
    #image_list.append('./data/img33.jpg')

    # network input
    image_tf = tf.placeholder(tf.float32, shape=(1, 320, 240, 3))
    hand_side_tf = tf.constant([[1.0, 0.0]])  # left hand (true for all samples provided)
    evaluation = tf.placeholder_with_default(True, shape=())

    # build network
    net = ColorHandPose3DNetwork()
    hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\
    keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation)

    # Start TF
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    sess = tf.Session()#config=tf.ConfigProto(gpu_options=gpu_options))

    # initialize network
    net.init(sess)

    # Feed image list through network
    final = list()
    for i, image in tqdm(enumerate(image_list)):
        
        image_raw = image
        # image_raw = cv2.resize(image_raw, dsize=(240, 320))#, interpolation=cv2.INTER_CUBI)
        image_raw = make_it_small.small(image_raw)

        for row in image_raw:
            for pixel in row:
                temp = pixel[0]
                pixel[0] = pixel[2]
                pixel[2] = temp

        image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0)

        hand_scoremap_v, image_crop_v, scale_v, center_v,\
        keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,
                                                             keypoints_scoremap_tf, keypoint_coord3d_tf],
                                                            feed_dict={image_tf: image_v})

        img_angle = description_of_hand_position(keypoint_coord3d_v)
        hand_scoremap_v = np.squeeze(hand_scoremap_v)
        image_crop_v = np.squeeze(image_crop_v)
        keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v)
        keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v)

        # post processing
        image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8')
        coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v))
        coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256)

        # visualize
        fig = plt.figure(1)
        ax1 = fig.add_subplot(221)
        ax2 = fig.add_subplot(222)
        ax3 = fig.add_subplot(223)
        ax4 = fig.add_subplot(224, projection='3d')
        ax1.imshow(image_raw)
        plot_hand(coord_hw, ax1)
        ax2.imshow(image_crop_v)
        plot_hand(coord_hw_crop, ax2)
        ax3.imshow(np.argmax(hand_scoremap_v, 2))
        plot_hand_3d(keypoint_coord3d_v, ax4)
        ax4.view_init(azim=-90.0, elev=-90.0)  # aligns the 3d coord with the camera view
        ax4.set_xlim([-3, 3])
        ax4.set_ylim([-3, 1])
        ax4.set_zlim([-3, 3])
        fig.suptitle(img_angle,fontsize = 10)
        plt.savefig("imgs/{}.png".format(str(i)))
        plt.close(fig)
        img = imageio.imread("imgs/{}.png".format(str(i)))
        final.append(img)
    return final