Example #1
0
    def __init__(self, target_ip):

        self.CWD_PATH = os.getcwd()
        self.CWD_PATH = os.path.abspath(os.path.join(self.CWD_PATH, os.pardir))
        self.CWD_PATH = os.path.join(self.CWD_PATH, '3_BRobot')

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
        PATH_TO_CKPT = os.path.join(self.CWD_PATH, 'object_detection',
                                    MODEL_NAME, 'frozen_inference_graph.pb')

        # List of the strings that is used to add correct label for each box.
        PATH_TO_LABELS = os.path.join(self.CWD_PATH, 'object_detection',
                                      'data', 'mscoco_label_map.pbtxt')

        NUM_CLASSES = 90

        # Loading label map
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.right_clicks = []

        # self.right_clicks = [[375, 41], [1000, 709]]
        # mouse callback function
        def mouse_callback(event, x, y, flags, params):
            #right-click event value is 2
            if event == 2:
                if len(self.right_clicks) < 2:
                    self.right_clicks.append([x, y])
                else:
                    self.right_clicks = [[x, y]]

                print(self.right_clicks)

        CAM_ID = 1

        self.cam = cv2.VideoCapture(int(CAM_ID))

        self.window_name = 'Cam' + str(CAM_ID)
        cv2.namedWindow(self.window_name)
        cv2.setMouseCallback(self.window_name, mouse_callback)

        self.prevTime = 0
        self.window_size = (1312, 736)

        if self.cam.isOpened() == False:
            print('Can\'t open the CAM(%d)' % (CAM_ID))
            exit()

        self.face_queue = Queue()
        self.gender_queue = Queue()
        self.age_queue = Queue()

        self.process_gender = Process(target=gender_estimate,
                                      args=(self.face_queue,
                                            self.gender_queue))
        self.process_gender.start()

        self.process_age = Process(target=age_estimate,
                                   args=(self.face_queue, self.age_queue))
        self.process_age.start()

        self.w = self.window_size[0]
        self.h = self.window_size[1]
        self.e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                                 target_size=(self.w, self.h))
Example #2
0
import cv2
import numpy as np
import errno
from DNN import DNN
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path
from functions import draw_landmarks, convert_landscape_potrait, write_text_image

import matplotlib.pyplot as plt
model = "mobilenet_thin"
landmark_color = [0, 255, 0]
pose_classifier = DNN()
pose_classifier.load('/home/hrishi/workspace/repo/PoseEstimation2/model/pose_classifier.h5')
width = 640
height = 480
e = TfPoseEstimator(get_graph_path(model), target_size=(width, height))
landmarks_count = 18
required_landmarks_count = 8  # We we only need 8 landmarks for our model
frame_counter = 0
frame_per_clip = 10
significant_frame_counter = 0
files = glob.glob("Sample/*")
estimator = TfPoseEstimator(get_graph_path(model), target_size=(width, height))
expected_landmarks = 18
frame_counter = 0
boundary_frames_seperator = 15
frame_interval = 1
for i in range(len(files)):
    file = files[i]
    boundary_frames = 0
    try:
Example #3
0
class SkeletonDetector(object):
    # This class is mainly copied from https://github.com/ildoonet/tf-pose-estimation

    def __init__(self, model="cmu", image_size="432x368"):
        ''' Arguments:
            model {str}: "cmu" or "mobilenet_thin".        
            image_size {str}: resize input images before they are processed. 
                Recommends : 432x368, 336x288, 304x240, 656x368, 
        '''
        # -- Check input
        assert(model in ["mobilenet_thin", "cmu"])
        self._w, self._h = _get_input_img_size_from_string(image_size)
        
        # -- Set up openpose model
        self._model = model
        self._resize_out_ratio = 4.0 # Resize heatmaps before they are post-processed. If image_size is small, this should be large.
        self._config = _set_config()
        self._tf_pose_estimator = TfPoseEstimator(
            get_graph_path(self._model), 
            target_size=(self._w, self._h),
            tf_config=self._config)
        self._prev_t = time.time()
        self._cnt_image = 0
        
        # -- Set logger
        self._logger = _set_logger()
        

    def detect(self, image):
        ''' Detect human skeleton from image.
        Arguments:
            image: RGB image with arbitrary size. It will be resized to (self._w, self._h).
        Returns:
            humans {list of class Human}: 
                `class Human` is defined in 
                "src/githubs/tf-pose-estimation/tf_pose/estimator.py"
                
                The variable `humans` is returned by the function
                `TfPoseEstimator.inference` which is defined in
                `src/githubs/tf-pose-estimation/tf_pose/estimator.py`.

                I've written a function `self.humans_to_skels_list` to 
                extract the skeleton from this `class Human`. 
        '''

        self._cnt_image += 1
        if self._cnt_image == 1:
            self._image_h = image.shape[0]
            self._image_w = image.shape[1]
            self._scale_h = 1.0 * self._image_h / self._image_w
        t = time.time()

        # Do inference
        humans = self._tf_pose_estimator.inference(
            image, resize_to_default=(self._w > 0 and self._h > 0),
            upsample_size=self._resize_out_ratio)

        # Print result and time cost
        elapsed = time.time() - t
        self._logger.info('inference image in %.4f seconds.' % (elapsed))

        return humans
    
    def draw(self, img_disp, humans):
        ''' Draw human skeleton on img_disp inplace.
        Argument:
            img_disp {RGB image}
            humans {a class returned by self.detect}
        '''
        img_disp = TfPoseEstimator.draw_humans(img_disp, humans, imgcopy=False)
        if IS_DRAW_FPS:
            cv2.putText(img_disp,
                        "fps = {:.1f}".format( (1.0 / (time.time() - self._prev_t) )),
                        (10, 30),  cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 0, 255), 2)
        self._prev_t = time.time()

    def humans_to_skels_list(self, humans, scale_h = None): 
        ''' Get skeleton data of (x, y * scale_h) from humans.
        Arguments:
            humans {a class returned by self.detect}
            scale_h {float}: scale each skeleton's y coordinate (height) value.
                Default: (image_height / image_widht).
        Returns:
            skeletons {list of list}: a list of skeleton.
                Each skeleton is also a list with a length of 36 (18 joints * 2 coord values).
            scale_h {float}: The resultant height(y coordinate) range.
                The x coordinate is between [0, 1].
                The y coordinate is between [0, scale_h]
        '''
        if scale_h is None:
            scale_h = self._scale_h
        skeletons = []
        NaN = 0
        for human in humans:
            skeleton = [NaN]*(18*2)
            for i, body_part in human.body_parts.items(): # iterate dict
                idx = body_part.part_idx
                skeleton[2*idx]=body_part.x
                skeleton[2*idx+1]=body_part.y * scale_h
            skeletons.append(skeleton)
        return skeletons, scale_h
#from tf_pose import common
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh
from tracking_3 import tracking_function
from search_weld import search_weld

# 読み込む動画のパス
movie_file = 'test.mp4'
output_file = 'test_result7.mp4'
output_csv1 = "id_list_add_weld7.csv"

# tf-poseの準備
model = 'cmu'
w, h = model_wh('432x368')
if w == 0 or h == 0:
    e = TfPoseEstimator(get_graph_path(model), target_size=(432, 368))
else:
    e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))

# 対象の動画を読み込む
vc = cv2.VideoCapture(movie_file)

# アウトプットの準備
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
fps = int(vc.get(cv2.CAP_PROP_FPS))
size = (
    int(vc.get(cv2.CAP_PROP_FRAME_WIDTH)),
    int(vc.get(cv2.CAP_PROP_FRAME_HEIGHT))
)
vw = cv2.VideoWriter(output_file, fourcc, fps,  size)
Example #5
0
    parser.add_argument('--frame',
                        type=float,
                        default=1,
                        help='the frame parcentage of total frame count')
    parser.add_argument('--frameterm', type=int, default=1, help='frame term')

    args = parser.parse_args()
    movie = cv2.VideoCapture(args.movie)

    # get total frame count
    count = movie.get(cv2.CAP_PROP_FRAME_COUNT)

    w, h = model_wh(args.resize)

    if w == 0 or h == 0:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368))
    else:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # get 2d estimation result
    dfs = pd.DataFrame(index=[])
    columns = ['flame', 'human', 'point', 'x', 'y']

    # per frame
    for i in range(0, int(args.frame * count)):
        _, frame = movie.read()

        #only get per frameterm
        if i % int(args.frameterm) != 0:
            continue
Example #6
0
     "Neck x, Neck y, Neck score, " +
     "RShoulder x, RShoulder y, RShoulder score, " +
     "RElbow x, RElbow y, RElbow score, " +
     "RWrist x, RWrist y, RWrist score, " +
     "LShoulder x, LShoulder y, LShoulder score, " +
     "LElbow x, LElbow y, LElbow score, " +
     "LWrist x, LWrist y, LWrist score, " + "RHip x, RHip y, RHip score, " +
     "RKnee x, RKnee y, RKnee score, " +
     "RAnkle x, RAnkle y, RAnkle score, " + "LHip x, LHip y, LHip score, " +
     "LKnee x, LKnee y, LKnee score, " +
     "LAnkle x, LAnkle y, LAnkle score, " + "REye x, REye y, REye score, " +
     "LEye x, LEye y, LEye score, " + "REar x, REar y, REar score, " +
     "LEar x, LEar y, LEar score, " +
     "Background x, Background y, Background score\n")
 if FLIP:
     e = TfPoseEstimator(get_graph_path("mobilenet_thin"),
                         target_size=(368, 432))
 else:
     e = TfPoseEstimator(get_graph_path("mobilenet_thin"),
                         target_size=(432, 368))
 for is_variable, data_type in enumerate(data_types):
     files = os.listdir(path + data_type)
     files.sort()
     for image_name in files:
         t = time.time()
         split_image_name = image_name.split(".")
         if split_image_name[-1] != "jpg":
             continue
         index = int(image_name.split(".")[0])
         img = common.read_imgfile(path + data_type + "/" + image_name,
                                   None, None)
         humans = e.inference(img,
Example #7
0
        type=bool,
        default=False,
        help='for debug purpose, if enabled, speed for inference is dropped.')

    parser.add_argument('--tensorrt',
                        type=str,
                        default="False",
                        help='for tensorrt process.')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resize)
    if w > 0 and h > 0:
        e = TfPoseEstimator(get_graph_path(args.model),
                            target_size=(w, h),
                            trt_bool=str2bool(args.tensorrt))
    else:
        e = TfPoseEstimator(get_graph_path(args.model),
                            target_size=(432, 368),
                            trt_bool=str2bool(args.tensorrt))
    logger.debug('cam read+')
    cam = cv2.VideoCapture(args.video)
    ret_val, image = cam.read()
    logger.info('cam image=%dx%d' % (image.shape[1], image.shape[0]))

    while True:
        ret_val, image = cam.read()

        logger.debug('image process+')
        humans = e.inference(image,
Example #8
0
logger = logging.getLogger('TfPoseEstimatorRun')
logger.handlers.clear()
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

if __name__ == '__main__':

    w, h = model_wh('432x368')
    if w == 0 or h == 0:
        e = TfPoseEstimator(get_graph_path(
            '/content/myWorkspace/tf-pose-estimation/models/graph/cmu'),
                            target_size=(432, 368))
    else:
        e = TfPoseEstimator(get_graph_path('cmu'), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile('images/handsup1.jpg')

    t = time.time()
    r = 4.0
    humans = e.inference(image,
                         resize_to_default=(w > 0 and h > 0),
                         upsample_size=r)
    elapsed = time.time() - t

    #logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))
Example #9
0
class Terrain(object):
    def __init__(self):
        """
        Initialize the graphics window and mesh surface
        """

        # setup the view window
        self.app = QtGui.QApplication(sys.argv)
        self.window = gl.GLViewWidget()
        self.window.setWindowTitle('Terrain')
        self.window.setGeometry(0, 110, 1920, 1080)
        self.window.setCameraPosition(distance=30, elevation=12)
        self.window.show()

        gx = gl.GLGridItem()
        gy = gl.GLGridItem()
        gz = gl.GLGridItem()
        gx.rotate(90, 0, 1, 0)
        gy.rotate(90, 1, 0, 0)
        gx.translate(-10, 0, 0)
        gy.translate(0, -10, 0)
        gz.translate(0, 0, -10)
        self.window.addItem(gx)
        self.window.addItem(gy)
        self.window.addItem(gz)

        model = 'mobilenet_thin'
        camera = 0

        self.lines = {}
        self.connection = [[0, 1], [1, 2], [2, 3], [0, 4], [4, 5], [5, 6],
                           [0, 7], [7, 8], [8, 9], [9, 10], [8, 11], [11, 12],
                           [12, 13], [8, 14], [14, 15], [15, 16]]

        w, h = model_wh('432x368')
        self.e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))
        self.cam = cv2.VideoCapture(camera)
        ret_val, image = self.cam.read()
        matlabfile = os.path.join(os.getcwd(), 'tf-pose-estimation',
                                  'prob_model_params.mat')
        self.poseLifting = Prob3dPose(matlabfile)
        keypoints = self.mesh(image)

        self.points = gl.GLScatterPlotItem(pos=keypoints,
                                           color=pg.glColor((0, 255, 0)),
                                           size=15)
        self.window.addItem(self.points)

        for n, pts in enumerate(self.connection):
            self.lines[n] = gl.GLLinePlotItem(pos=np.array(
                [keypoints[p] for p in pts]),
                                              color=pg.glColor((0, 0, 255)),
                                              width=3,
                                              antialias=True)
            self.window.addItem(self.lines[n])

    def mesh(self, image):
        image_h, image_w = image.shape[:2]
        width = 640
        height = 480
        pose_2d_mpiis = []
        visibilities = []

        humans = self.e.inference(image, upsample_size=4.0)

        for human in humans:
            pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
            pose_2d_mpiis.append([(int(x * width + 0.5), int(y * height + 0.5))
                                  for x, y in pose_2d_mpii])
            visibilities.append(visibility)

        pose_2d_mpiis = np.array(pose_2d_mpiis)
        visibilities = np.array(visibilities)

        transformed_pose2d, weights = self.poseLifting.transform_joints(
            pose_2d_mpiis, visibilities)
        pose_3d = self.poseLifting.compute_3d(transformed_pose2d, weights)
        keypoints = pose_3d[0].transpose()

        return keypoints / 80

    def update(self):
        """
        update the mesh and shift the noise each time
        """
        ret_val, image = self.cam.read()
        try:
            keypoints = self.mesh(image)
        except AssertionError:
            print('body not in image')
        else:
            self.points.setData(pos=keypoints)

            for n, pts in enumerate(self.connection):
                self.lines[n].setData(pos=np.array([keypoints[p]
                                                    for p in pts]))

    def start(self):
        """
        get the graphics window open and setup
        """
        if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
            QtGui.QApplication.instance().exec_()

    def animation(self, frametime=10):
        """
        calls the update method to run in a loop
        """
        timer = QtCore.QTimer()
        timer.timeout.connect(self.update)
        timer.start(frametime)
        self.start()
Example #10
0
        default='0x0',
        help='if provided, resize images before they are processed. '
        'default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ')
    parser.add_argument(
        '--resize-out-ratio',
        type=float,
        default=4.0,
        help=
        'if provided, resize heatmaps before they are post-processed. default=1.0'
    )

    args = parser.parse_args()

    w, h = model_wh(args.resize)
    if w == 0 or h == 0:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368))
    else:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    if image is None:
        logger.error('Image can not be read, path=%s' % args.image)
        sys.exit(-1)

    t = time.time()
    humans = e.inference(image,
                         resize_to_default=(w > 0 and h > 0),
                         upsample_size=args.resize_out_ratio)
    elapsed = time.time() - t
Example #11
0
if __name__ == '__main__':

    w = 432
    h = 368
    live_x_dict = {}
    live_y_dict = {}
    live_x_arr = []
    live_y_arr = []
    orig_x_dict = {}
    orig_y_dict = {}
    orig_x_arr = []
    orig_y_arr = []

    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(432, 368),
                        trt_bool=False)
    logger.debug('Image read 1 +')

    ##############################################  LIVE ########################################

    image1 = cv2.imread('INPUTS/input1.jpg')
    humans1 = e.inference(image1,
                          resize_to_default=(w > 0 and h > 0),
                          upsample_size=4.0)

    for human in humans1:
        dict = human.body_parts
        for k, v in dict.items():
            live_x_dict[v.part_idx] = round(v.x, 2)
            live_y_dict[v.part_idx] = round(v.y, 2)

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='tf-pose-estimation run by folder')
    parser.add_argument('--folder', type=str, default='.\images')
    parser.add_argument('--resolution', type=str, default='432*368', help='network input resolution. default=432x368')
    parser.add_argument('--model', type=str, default='cmu', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    parser.add_argument('--resize-out-ratio', type=float, default=4.0,
                        help='if provided, resize heatmaps before they are post-processed. default=1.0')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    ## Dataset prepose
    data_folder = '..\\traffic_pose\\src\\'
    poses = ['go_straight', 'park_right', 'stop', 'turn_right']
    resize_data(data_folder)

    ## Pose estimation
    #data_folder = '..\\traffic_pose\\%s_new\\' %(pose)
    print("___Extracting figures form source foder: %s___" % data_folder)
    pose_estimate(data_folder, args)


    # extract keypoints' x, y coordinary
    src = "../traffic_pose/keypoint_data/"
    data_reshape(src, poses)
        ### Loading Person Detector ###
        person_image_tensor, person_tensor_dict = load_tf_ssd_detection_graph(
            PATH_TO_PERSON_DETECTION, input_graph=None)
        main_sess = tf.Session()

        ### Loading the SVM Classifier for Face ID classification ###
        with open(CLASSIFIER_PATH_SVM, 'rb') as infile:
            (model, class_names) = pickle.load(infile)

        ### Loading the KNN Classifier for Face Recognition Classifier ###
        with open(CLASSIFIER_PATH_KNN, 'rb') as infile:
            knn_model = pickle.load(infile)

        ### Loading the TF Pose Estimator ###
        w, h = model_wh('432x368')
        e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                            target_size=(w, h))

        with main_sess.as_default():
            ### Creating and Loading MTCNN ###
            pnet, rnet, onet = create_mtcnn(main_sess, None)
            ### Creating and Loading the Facenet Graph ###
            images_placeholder, embeddings, phase_train_placeholder = load_tf_facenet_graph(
                FACENET_MODEL_PATH)

        ### 0 here means start streaming video from webcam
        cap = cv2.VideoCapture(0)
        if cap.isOpened() is False:
            print("Error opening video stream or file")

        while cap.isOpened():
            _, image = cap.read()
Example #14
0
#         break
#     except ValueError as e:
#         print("Please input a number.")

n_workers = 2
print("Using 2 workers for thread pool.")
futures_q = Queue(maxsize=n_workers)
worker_mgr = None
th_signal = threading.Event()
process_th = None
send_th = None
exc_info = None
exc_thrown = False

estimator = TfPoseEstimator(
    get_graph_path("mobilenet_thin"),
    target_size=(432, 368),
    tf_config=tf.ConfigProto(log_device_placement=True))

w, h = model_wh("432x368")

s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

HOST = '0.0.0.0'
PORT = 8089
conn = None
addr = None
connected = False


def wait_for_connection():
Example #15
0
        help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
    parser.add_argument(
        '--show-process',
        type=bool,
        default=False,
        help='for debug purpose, if enabled, speed for inference is dropped.')
    parser.add_argument('--showBG',
                        type=bool,
                        default=True,
                        help='False to show skeleton only.')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    cap = cv2.VideoCapture(args.video)

    if cap.isOpened() is False:
        print("Error opening video stream or file")

    i = 0
    while cap.isOpened():
        ret_val, image = cap.read()

        #humans = e.inference(image)
        humans = e.inference(image, resize_to_default=True, upsample_size=4.0)
        if not args.showBG:
            image = np.zeros(image.shape)
        image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
Example #16
0
def abn():
    print("abn")
    speak.Speak('action detection started')

    # Name of the directory containing the object detection module we're using
    MODEL_NAME = 'inference_graph'

    # Grab path to current working directory
    CWD_PATH = os.getcwd()

    # Path to frozen detection graph .pb file, which contains the model that is used
    # for object detection.
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                'frozen_inference_graph.pb')

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt')

    # Number of classes the object detector can identify
    NUM_CLASSES = 10

    ## Load the label map.
    # Label maps map indices to category names, so that when our convolution
    # network predicts `5`, we know that this corresponds to `king`.
    # Here we use internal utility functions, but anything that returns a
    # dictionary mapping integers to appropriate string labels would be fine
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Define input and output tensors (i.e. data) for the object detection classifier

    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Output tensors are the detection boxes, scores, and classes
    # Each box represents a part of the image where a particular object was detected
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represents level of confidence for each of the objects.
    # The score is shown on the result image, tog ether with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Initialize webcam feed
    video = cv2.VideoCapture(0)
    ret = video.set(3, 1280)
    ret = video.set(4, 720)

    start_time = time.time()

    w, h = model_wh(args.resize)
    if w > 0 and h > 0:
        e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    else:
        e = TfPoseEstimator(get_graph_path(args.model),
                            target_size=(1280, 720))

    while (True):

        # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
        # i.e. a single-column array, where each item in the column has the pixel RGB value
        ret, frame = video.read()
        frame_expanded = np.expand_dims(frame, axis=0)

        # Perform the actual detection by running the model with the image as input
        (boxes, scores, classes,
         num) = sess.run([
             detection_boxes, detection_scores, detection_classes,
             num_detections
         ],
                         feed_dict={image_tensor: frame_expanded})

        # Draw the results of the detection (aka 'visulaize the results')
        vis_util.visualize_boxes_and_labels_on_image_array(
            frame,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.60)
        #print(np.squeeze(classes),np.squeeze(boxes))

        #pose
        humans = e.inference(frame,
                             resize_to_default=(w > 0 and h > 0),
                             upsample_size=args.resize_out_ratio)
        image = TfPoseEstimator.draw_humans(frame, humans, imgcopy=False)
        cv2.putText(frame, "FPS: %f" % (1.0 / (time.time() - fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if (time.time() - start_time) > 10:
            break

        # All the results have been drawn on the frame, so it's time to display it.
        cv2.imshow('ProjectInt', frame)
        fps_time = time.time()

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

    # Clean up
    video.release()
    cv2.destroyAllWindows()
def draw_image(image, humans):
    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    imgDebug = Image.fromarray(img)
    return imgDebug
Example #18
0
    def analyze(self):
        args = parserSetup.parserSetup()
        w, h = model_wh(args.resize)
        e = model(w, h, args)
        pos = position()
        gripCheckFailed = 0
        elbowCheckFailed = 0

        #cam = cv2.VideoCapture("C:/Users/Eamonn/Programming/2020-ca400-template-repo/src/GymVisionDesktop/Videos/OverheadGood.mp4")
        cam = cv2.VideoCapture(args.camera)
        ret_val, image = cam.read()
        orange_color = (0, 140, 255)

        while True:
            ret_val, image = cam.read()

            humans = e.inference(image,
                                 resize_to_default=(w > 0 and h > 0),
                                 upsample_size=args.resize_out_ratio)
            pose = humans
            image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

            if len(pose) > 0:
                # distance calculations
                for human in humans:
                    for i in range(len(humans)):

                        try:

                            pos.getPositions(human, image)

                            if gripCheckFailed < 5:
                                if not self.gripWidthCheck(human, image, pos):
                                    gripCheckFailed += 1

                            if gripCheckFailed == 5:
                                cv2.putText(image,
                                            "Grip is too wide/too close",
                                            (5, 90), cv2.FONT_HERSHEY_SIMPLEX,
                                            0.5, (0, 0, 255), 2)
                            else:
                                cv2.putText(image, "Good grip width!", (5, 90),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                            (0, 255, 0), 2)

                            if elbowCheckFailed < 5:
                                if not self.elbowCheck(human, image, pos):
                                    elbowCheckFailed += 1
                            if elbowCheckFailed == 5:
                                cv2.putText(
                                    image,
                                    "Starting position forearms not vertical",
                                    (5, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                    (0, 0, 255), 2)
                            else:
                                cv2.putText(image,
                                            "Elbows and Wrists aligned, Good",
                                            (5, 70), cv2.FONT_HERSHEY_SIMPLEX,
                                            0.5, (0, 255, 0), 2)

                        except Exception as exs:
                            print(exs)
                            pass

            cv2.imshow('tf-pose-estimation result', image)

            if cv2.waitKey(1) == 27:
                break

        cv2.destroyAllWindows()
Example #19
0
def start_game(config, params):
    cam = cv2.VideoCapture(0)
    ret, named_window = cam.read()

    # hp(health point) attributes
    hp_x = config.imWidth // 2 + 400
    hp_y = config.imHeight // 2 - 345
    hp_yy = config.imHeight // 2 - 300
    hp_w = 50
    hp_h = 42
    hp_image = cv2.imread('images/heart.png')
    score_img = cv2.imread('images/score.png')

    w = 432
    h = 368
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(w, h),
                        trt_bool=str2bool("False"))

    while True:
        params["restart"] = False
        hp = 10
        cur_order = 0
        score = 0

        game_patterns = []

        for i in params["patterns"]:
            list = []
            time1 = i[0] - 3
            time2 = i[0] + 1
            list.extend([i[0], time1, time2, False, i[10]])

            for j in range(1, 10):  # j = 1 ~ 9
                if i[j]:
                    list.append(tuple([j - 1, i[j] - 1]))
            game_patterns.append(list)

        match_list = []  # sets to be checked for scoring; reset each frame

        start_time = time.time()
        play_music(params["song"], 0)

        while True:  # game play
            ret, named_window = cam.read()
            config.named_window = cv2.resize(named_window,
                                             dsize=(1312, 736),
                                             interpolation=cv2.INTER_AREA)
            config.named_window = cv2.flip(config.named_window, 1)
            print(named_window.shape)
            humans = e.inference(named_window,
                                 resize_to_default=(w > 0 and h > 0),
                                 upsample_size=4.0)
            if not humans:
                continue

            human = humans[0]

            image_h, image_w = config.named_window.shape[:2]
            centers = []
            for i in range(common.CocoPart.Background.value):
                if i not in human.body_parts.keys():
                    centers.append((0, 0))
                else:
                    body_part = human.body_parts[i]
                    center = (image_w - int(body_part.x * image_w + 0.5),
                              int(body_part.y * image_h + 0.5))
                    centers.append(center)

            play_time = time.time() - start_time
            pattern = game_patterns[cur_order]

            if game_patterns[cur_order][1] < play_time and game_patterns[
                    cur_order] not in match_list:
                match_list.append(game_patterns[cur_order])
                cur_order += 1
                if cur_order > len(game_patterns) - 1:
                    cur_order = len(game_patterns) - 1
            if match_list:
                match_list = match(config, match_list, centers, hp, play_time,
                                   score)
            if match_list and match_list[0][2] < play_time:  # and 아직 있으면
                hp -= 1
                del match_list[0]

            cv2.putText(config.named_window, 'score:',
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 300)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)
            cv2.putText(config.named_window, '%d' % score,
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 250)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)

            if cur_order == len(game_patterns):
                config.named_window = score_img
                clear_menu(params, score)

            if cv2.waitKey(1) & 0xFF == ord('p'):
                params["exit"] = True

            if hp <= 0 or play_time > game_patterns[len(game_patterns) -
                                                    1][2] + 5:
                mixer.music.stop()
                death_menu(params)

            if params["exit"] == True:
                break
            if params["restart"] == True:
                break
            if params["menu"] == True:
                break

            for i in range(hp):
                if i < 5:
                    show_hp(config.named_window, hp_image, hp_x + i * hp_w,
                            hp_y, hp_w, hp_h)
                if i >= 5:
                    show_hp(config.named_window, hp_image,
                            hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h)

            cv2.imshow('McgBcg', config.named_window)

        if params["exit"] == True:
            break
        if params["menu"] == True:
            break
Example #20
0
class PoseEstimator(object):
    """
    The PoseEstimator class manages all operations related
    to Pose Estimation. It acts as a wrapper on top of
    TfPoseEstimator implement inside openpose model.
    The class supplies human pose coorinates to
    requestor objects.
    """
    resize_out_ratio = 4.0  # no of relevance, kept for sake of completeness

    def __init__(self, resize='0x0', model='mobilenet_thin'):

        self.humans = None  # list of humans with pose info
        self.image = None
        self.bboxes = []  # list of bbox [x1, y1, x2, y2]
        """
        Two available models are cmu & mobilenet_thin
        if running in CPU mode only, then mobilenet_thin
        is recommended. Default fetches mobilenet_thin
        """
        self.model = model
        """
        if resize value is provided, it will resize images
        before they are processed. default=0x0, Recommends:
        432x368 or 656x368 or 1312x736
        """
        self.w, self.h = model_wh(resize)
        self.loadModel()

    def loadModel(self):
        """
        Loads the cmu or mobilenet model in memory
        """
        try:
            if self.w == 0 or self.h == 0:
                self.e = TfPoseEstimator(get_graph_path(self.model),
                                         target_size=(432, 368))
            else:
                self.e = TfPoseEstimator(get_graph_path(self.model),
                                         target_size=(self.w, self.h))
        except MemoryError:
            print("couldn't load model into memory...")

    def infer(self, image):
        """
        calls the inference API inside tf_pose (openpose)
        returning the poses of humans and drawing the skeleton
        on image frame
        """
        self.image = image
        if self.image is None:
            raise Exception('The image is not valid. check your image')

        self.humans = self.e.inference(self.image,
                                       resize_to_default=(self.w > 0
                                                          and self.h > 0),
                                       upsample_size=self.resize_out_ratio)
        self.image = TfPoseEstimator.draw_humans(self.image,
                                                 self.humans,
                                                 imgcopy=False)
        return self.image

    def getHumans(self):
        return self.humans

    def getImage(self):
        return self.image

    def _normalize_values(self, width, height):
        if self.w == 0:
            width = width * 432
        else:
            width = width * self.w
        if self.h == 0:
            height = height * 368
        else:
            height = height * self.h

        return width, height

    def getBboxes(self):
        return self.bboxes

    def getKeypoints(self):
        """
        Returns a list of keypoints of all
        the persons in a frame
        keypt_list = [keypts1, keypts2]
        keypts = [x1, y1, score, ...]
        """
        keypt_list = []
        for human in self.humans:
            keypts = []
            for key, values in human.body_parts.items():
                # print (key, 'x val %.2f' % values.x, 'y val %.2f' % values.y)
                # print ('values.part_idx, values.uidx ',
                #           values.part_idx, values.uidx)
                x, y = self._normalize_values(values.x, values.y)
                keypts.extend([x, y, values.score])
            keypt_list.append(keypts)
        #print (keypt_list)
        return keypt_list

    def _updateBboxes(self):
        self.bboxes = []
        for human in self.humans:
            min_x, min_y = math.inf, math.inf
            max_x, max_y = -1, -1
            bbox = [min_x, min_y, max_x, max_y]
            for key, values in human.body_parts.items():
                if values.x < min_x:
                    min_x = values.x
                if values.y < min_y:
                    min_y = values.y
                if values.x > max_x:
                    max_x = values.x
                if values.y > max_y:
                    max_y = values.y
            bbox = [min_x, min_y, max_x, max_y]
            self.bboxes.append(bbox)

    def showResults(self):
        """
        utility method for debug purposes,
        not called from anywhere
        """
        #print (self.humans)

        cv2.imshow('tf-pose-estimation result', self.image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
Example #21
0
# import sys
# sys.path.insert(0, './action')

# from tf_pose import common
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh

w, h = 432, 368
parts = ['nose', 'neck', 'r_shoulder', 'r_elbow', 'r_wrist', 'l_shoulder', 'l_elbow', 'l_wrist', 'r_hip', 'r_knee',
         'r_ankle', 'l_hip', 'l_knee', 'l_ankle', 'r_eye', 'l_eye', 'r_ear', 'l_ear']

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75, allow_growth=True)

tf_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)

e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(w, h), tf_config=tf_config)

def action_classifie(img):
    img_shape = img.shape
    humans = e.inference(img, resize_to_default=(w > 0 and h > 0), upsample_size=4.0)

    bodys_pos = []

    for human in humans:
        temp = {}
        for i in range(len(parts)):
            if i not in human.body_parts.keys():
                continue

            body_part = human.body_parts[i]
            temp[parts[i]] = body_part
Example #22
0
    #Number of the frames in the video:
    length = int(cap.get(cv.CAP_PROP_FRAME_COUNT))

    #Check for the right model, change to its directory and import the required files:
    print("CHOOSING THE  MODEL")
    if args.model == 'mobilenet_thin' or args.model == 'cmu' or args.model == 'mobilenet_fast' or args.model == 'mobilenet_accurate':
        sys.path.insert(0, '../tf-openpose')
        from tf_pose.estimator import TfPoseEstimator
        from tf_pose.networks import get_graph_path, model_wh
        logger.debug('initialization %s : %s' %
                     (args.model, get_graph_path(args.model)))
        w, h = model_wh(args.resolution)
        #e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
        if w == 0 or h == 0:
            e = TfPoseEstimator(get_graph_path(args.model),
                                target_size=(432, 368))
        else:
            e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
    elif args.model == 'caffe':
        start_load = time.time()
        sys.path.append(
            '/home/y17bendo/stage2018/Realtime_Multi-Person_Pose_Estimation/testing/python'
        )
        import pylab as plt
        from PIL import Image
        import processing_image
        from processing_image import import_param, processing

        #Importing the parameters of the net, and loading the net:
        param, model, net = import_param()
        print("loading the net took : %f" % (time.time() - start_load))
import logging
import time
import ast

from tf_pose import common
import cv2
import numpy as np
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh

from tf_pose.lifting.prob_model import Prob3dPose
from tf_pose.lifting.draw import plot_pose

#Dataset
#import scipy.io
#dataset = scipy.io.loadmat('joints.mat')

#modelo
e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(432, 368))
#imagenes
image = common.read_imgfile('./dataset_images/im0001.jpg', None, None)
#inferencia
t = time.time()
w = 0
h = 0
humans = e.inference(image,
                     resize_to_default=(w > 0 and h > 0),
                     upsample_size=1.0)
elapsed = time.time() - t
print('inference image: %s in %.4f seconds.' % ('im0001', elapsed))
#print(humans)
Example #24
0
class Tracker:
    """
    attributes:
    
    people = dict of people, key is their uuid
    names = dict of names, key is uuid

    frame_count = count of frames tracked
    scan_every_n_frames = # of frames between person scanning their face
    maximum_difference_to_match = the maximum average differnece of points to 
                                    mark it as not the same pose

    estimator = TfPoseEstimator

    poses = [] of poses from TfPoseEstimator from current frame
    faces = [] positions of faces from current frame
    save_faces_to - None if disabled, a string for an existing dir if enabled
    """
    def __init__(self):
        self.frame_count = 0
        self.scan_every_n_frames = 120
        self.max_face_scans = 5
        self.maximum_difference_to_match = 0.08

        self.names = {}
        self.people = {}

        self.estimator = TfPoseEstimator(get_graph_path("mobilenet_thin"),
                                         target_size=(432, 368))
        self.encodings = {}
        self.save_faces_to = None

    def load_encodings(self, filepath):
        self.encodings = pickle.loads(open("./encodings.p", "rb").read())

    def save_encodings(self, filepath):
        encoding_file = open(filepath, "wb")
        encoding_file.write(pickle.dumps(self.encodings))
        encoding_file.close()

    def create_encodings(self, faces_directory):
        facedirs = [
            filename for filename in os.listdir(faces_directory)
            if not isfile(join(faces_directory, filename))
        ]
        faces = dict.fromkeys(facedirs, [])
        encodings = {}

        for name in facedirs:
            faces[name] = []

            for filepath in [
                    filename
                    for filename in os.listdir(join(faces_directory, name))
                    if isfile(join(faces_directory, name, filename))
            ]:
                faces[name].append(join(faces_directory, name, filepath))

        for name in faces:
            encodings[name] = []

            for filepath in faces[name]:
                try:
                    image = cv2.imread(filepath)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    encoding = face_recognition.face_encodings(
                        image, [(0, 0, image.shape[0], image.shape[1])])
                    encodings[name].append(encoding[0])
                except:
                    continue

        self.encodings = encodings

    def get_pose(self, image):
        w = 432
        h = 368
        return self.estimator.inference(image,
                                        resize_to_default=(w > 0 and h > 0),
                                        upsample_size=4.0)

    def draw_output(self,
                    image,
                    draw_body=True,
                    draw_face=True,
                    draw_label=True):
        if draw_body:
            poses = []
            for person in self.people:
                if person.is_visible:
                    poses.append(person.pose)

            TfPoseEstimator.draw_humans(image, poses, imgcopy=False)

        for person in self.people:
            if not self.people[person].is_visible:
                continue

            if draw_face:
                top, left, bottom, right = self.people[person].face

                top = math.floor(top * image.shape[0])
                bottom = math.floor(bottom * image.shape[0])
                left = math.floor(left * image.shape[1])
                right = math.floor(right * image.shape[1])

                cv2.rectangle(image, (left, top), (right, bottom), (0, 0, 255),
                              2, 0)

                if draw_label:
                    cv2.putText(image, self.people[person].id, (left, top),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))

        return image

    def scan_face(self, image, person):
        if not person.is_visible:
            return

        top, left, bottom, right = person.face

        top = math.floor(top * image.shape[0])
        left = math.floor(left * image.shape[1])
        bottom = math.floor(bottom * image.shape[0])
        right = math.floor(right * image.shape[1])

        encoding = face_recognition.face_encodings(
            image, [(top, right, bottom, left)])

        if len(encoding) <= 0:
            return

        encoding = encoding[0]
        person.set_encoding(encoding)

    def compare_known_faces(self, person):
        if len(list(self.encodings.keys())) is 0:
            return

        name_key = []
        encodings = []
        counts = {}
        for name in self.encodings:
            results = face_recognition.compare_faces(self.encodings[name],
                                                     person.encodings[-1])

            match_count = results.count(True)
            counts[name] = match_count

            if match_count / len(results) >= 0.75:
                break

        biggest_match = max(counts, key=counts.get)

        if (counts[biggest_match] <= 3):
            return None
        else:
            return biggest_match

    # Handed a frame to process for tracking
    def process_frame(self, image):
        self.frame_count += 1

        #1 - Generate all the poses
        self.poses = self.get_pose(image)

        #3 - Tick each person
        for person in self.people:
            self.people[person].tick()

        #2 - see if the pose is someone we've seen in our people,
        # or if it's someone new to create a new person object for
        new_people = []

        for pose in self.poses:
            handled = False
            for person in self.people:
                difference = self.people[person].distance_from_pose(pose)
                if difference < self.maximum_difference_to_match:
                    self.people[person].update(pose)

                    handled = True
                    break

            if handled:
                continue
            else:
                #Create a new person
                person = Person()
                person.update(pose)
                new_people.append(person)

        for person in new_people:
            self.people[person] = person

        #4 - Now that we've generated the people, "tock" through all people
        # in order to have their decay occur
        for person in self.people:
            #scan the face of all new people
            if person in new_people:
                self.scan_face(image, person)

            #Scan the face of everyone else that hasnt been scanned for
            #self.scan_every_n_frames

            if person.is_visible and person.last_face_scan % self.scan_every_n_frames == 0 and len(
                    person.encodings) < self.max_face_scans:
                self.scan_face(image, person)

            if person.last_face_scan == 0:
                id = self.compare_known_faces(person)

                older_person = self.people.get(id)
                if older_person is not None:
                    person[id] = person

                if id is not None:
                    person.id = id
                elif self.save_faces_to:
                    person.save_face(image, self.save_faces_to)

            self.people[person].tock()
parser.add_argument('--resize', type=str, default='0x0',
                    help='if provided, resize images before they are processed. default=0x0, Recommends : 432x368 or 656x368 or 1312x736 ')
parser.add_argument('--resize-out-ratio', type=float, default=4.0,
                    help='if provided, resize heatmaps before they are post-processed. default=1.0')
parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
parser.add_argument('--show-process', type=bool, default=False,
                    help='for debug purpose, if enabled, speed for inference is dropped.')
parser.add_argument('--tensorrt', type=str, default="False",
                    help='for tensorrt process.')
args = parser.parse_args()

#args= {'resize':'432x368', 'model':'mobilenet_thin','camera':0,'resize_out_ratio':4.0,'video':"output.avi"}
w, h = model_wh(args.resize)

if w > 0 and h > 0:
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
else:
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368))

out = False
cam = cv2.VideoCapture(args.camera)
ret_val, image = cam.read()
cam_store = cv2.VideoCapture(args.video)
ret_val_store, image_store = cam_store.read()

while cam_store.isOpened():
    dist = True 
    ret_val_store, image_store = cam_store.read()
    print("-------Next Pose Applied-------")
    while dist:
        ret_val, image = cam.read()
Example #26
0
        help='for debug purpose, if enabled, speed for inference is dropped.')
    parser.add_argument('--showBG',
                        type=bool,
                        default=True,
                        help='False to show skeleton only.')
    parser.add_argument('--tensorrt',
                        type=str,
                        default="False",
                        help='for tensorrt process.')
    args = parser.parse_args()

    logger.debug('initialization %s : %s' %
                 (args.model, get_graph_path(args.model)))
    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model),
                        target_size=(w, h),
                        trt_bool=str2bool(args.tensorrt))
    cap = cv2.VideoCapture(args.video)
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc,
                                cap.get(cv2.CAP_PROP_FPS), (640, 480))
    count = 0
    t_netfps_time = 0
    t_fps_time = 0

    if cap.isOpened() is False:
        print("Error opening video stream or file")

    try:
        while cap.isOpened():
            fps_time = time.time()
Example #27
0
def start_game(config, params):
    #게임 들어가기 전 필요한 변수들 초기화
    cam = cv2.VideoCapture(0)
    ret, named_window = cam.read()

    # 실루엣 맞추기: 카메라 키고, (사진 띄우고, point 4개 범위 안에 들어오면) X 3번 loop 나가
    # sil = ["1.png", "2.png", "3.png"] # 이런 식

    # 게임 시작: clear_menu, pause_menu, death_menu 중에 하나로 끝남
    pause_img = cv2.imread('images/pause.png')
    score_img = cv2.imread('images/score.png')
    gameover_img = cv2.imread('images/gameover.png')

    # 목숨 관련 변수들
    hp_x = config.imWidth // 2 + 400
    hp_y = config.imHeight // 2 - 345
    hp_yy = config.imHeight // 2 - 300
    hp_w = 50
    hp_h = 42
    hp_image = cv2.imread('images/heart.png')

    w = 432
    h = 368
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                        target_size=(w, h),
                        trt_bool=str2bool("False"))

    global score
    while True:  # restart 하면 여기로 돌아오지 (실루엣 다시 안 해도 됨)
        params["restart"] = False
        hp = 10  # death까지의 목숨(?) (10번 못 맞추면 death_menu)
        cur_order = 0
        # params

        score = 0

        game_patterns = []  # 재구성할 리스트

        #엑셀에서 불러 온 값
        for i in params[
                "patterns"]:  # ex) i = [4.,0 0, 0, 3, 0, 0, 12, 0, 0, 0] 여기 ~ 89 !!
            list = []
            if i[10]:  #포즈를 위해서 i[10]이 true면 포즈 있는거여서 포즈 취할 시간줌 => 필요없음
                time1 = i[0] - 6.6
                time2 = i[0]
            else:  #포즈 없는 경우 -> 원에 사람의 bodypoint touch할 시간의 범위를 줌
                time1 = i[0] - 3  # 여기 ~ 81!!
                time2 = i[0] + 1
            list.extend([i[0], time1, time2, False,
                         i[10]])  #원래는i[0]시간인데 time1~time2시간의 범위를 주겠다
            # 구역 9개에 대해서 리스트에다가 (영역, 부위) 튜플을 원소로 append
            for j in range(1, 10):  # j = 1 ~ 9
                if i[j]:  #0이 아니면...원이 나와야됨
                    list.append(tuple([j - 1, i[j] - 1
                                       ]))  #excel에서 초시간때문에 구역 번호랑 -1차이 -> j-1
            game_patterns.append(
                list)  #i[j]-1 : excel에 잘못 적음->일일이 고치기 귀찮아서 -> i[j]-1

        # params["patterns"][0] = [4,0, 0, 0, 3, 0, 0, 12, 0, 0, 0]
        #   -> game_patterns[0] = [4.0, 3.5, 4.2, False, (2, 2), (5, 11)]  (구역번호, 부위번호)
        match_list = []  # 주어진 시간 안에 해당되는, match 해볼 규칙들

        #a = input('Press...')

        start_time = time.time()
        resume_time = 0.0
        resume_start = 0.0
        play_music(params["song"], 0)
        while True:  # game play

            ret, named_window = cam.read()
            config.named_window = cv2.resize(named_window,
                                             dsize=(1312, 736),
                                             interpolation=cv2.INTER_AREA)
            config.named_window = cv2.flip(config.named_window, 1)
            print(named_window.shape)
            humans = e.inference(named_window,
                                 resize_to_default=(w > 0 and h > 0),
                                 upsample_size=4.0)  # 4 / 1 ??
            if not humans:
                continue

            human = humans[0]

            image_h, image_w = config.named_window.shape[:2]

            #Human 클래스의 add_pair 함수(estimator.py의 62줄)로 포인트를 파악하고, 파악한 좌표를 centers 리스트에 저장
            #->머리부터 발끝까지의 키 포인트들이 화면에 표시됩니다.
            centers = []
            for i in range(common.CocoPart.Background.value):  #18번
                if i not in human.body_parts.keys():
                    centers.append((0, 0))
                else:
                    body_part = human.body_parts[i]
                    center = (image_w - int(body_part.x * image_w + 0.5),
                              int(body_part.y * image_h + 0.5))
                    centers.append(center)  #사람의 keypoint받아서 화면에 출력

            # 실루엣
            play_time = time.time() - start_time  # 플레이 시간 측정
            pattern = game_patterns[cur_order]

            # 어떤 규칙이 time1을 지나면 & 아직 match_list에 없으면(= 첫번째 조건 만족해도 중복 append 방지 위해)
            #game_patterns[cur_order][1]는 맞춰야 하는 시간 범위의 최솟값 && match_list에 없으면....
            if game_patterns[cur_order][1] < play_time and game_patterns[
                    cur_order] not in match_list:
                match_list.append(game_patterns[cur_order])
                # cur_pattern = Pattern()
                cur_order += 1
                if cur_order > len(
                        game_patterns
                ) - 1:  #이 조건을 만족하면 게임이 끝난것 ->cur_order고정 -> game 종료
                    cur_order = len(game_patterns) - 1
            if match_list:  #matchlist에 원소가 하나라도 있으면 아래 인자들 match함수에 넘겨줌
                # centers resize, flip      i = [4.0, 3.5, 4.2, F, 0 or PATH, (2, 3), (5, 12)] # 여기 ~ 33 !
                match_list = match(config, match_list, centers, hp,
                                   play_time)  #=> 위에 match 함수 가기~!!!!
            if match_list and match_list[0][
                    2] < play_time:  # and 아직 있으면        #터치해야 할 시간 지났음 -> 목숨 하나 빼기
                hp -= 1
                del match_list[
                    0]  # 고침!! 항상 [0]일 테니끼 right?     #끝나면 match_list에서 지우니까 항상 [0]지움
                # match_list.remove(game_patterns[cur_order]) 도 됨

            cv2.putText(config.named_window, 'score:',
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 300)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)  #실시간으로 점수 보여주기
            cv2.putText(config.named_window, '%d' % score,
                        (int(config.imWidth / 2 - 600),
                         int(config.imHeight / 2 - 250)),
                        cv2.FONT_HERSHEY_PLAIN, 4, (255, 255, 255), 7,
                        cv2.LINE_8)

            if cur_order == len(
                    game_patterns
            ):  # 이런 식      #게임이 끝났으면(재구성한 list가) -> clear_menu보여주기
                config.named_window = score_img
                clear_menu(params, score)

            if cv2.waitKey(1) & 0xFF == ord('p'):
                params["exit"] = True

            if hp <= 0 or play_time > game_patterns[len(game_patterns) -
                                                    1][2] + 5:
                #마지막 game_patterns의 터치 허용 범위 시간이 지나고도 5초뒤
                mixer.music.stop()
                death_menu(params)  #죽음

            if params["exit"] == True:
                break
            if params["restart"] == True:  # 같은 게임 다시 시작
                break
            if params["menu"] == True:
                break

            for i in range(hp):
                if i < 5:  #실시간으로 변하는 window에 hp합성
                    show_hp(config.named_window, hp_image, hp_x + i * hp_w,
                            hp_y, hp_w, hp_h)
                if i >= 5:  #2줄로 만들었음
                    show_hp(config.named_window, hp_image,
                            hp_x + (i - 5) * hp_w, hp_yy, hp_w, hp_h)

            cv2.imshow('McgBcg', config.named_window)  #image_h, image_w

        if params["exit"] == True:
            break
        if params["menu"] == True:
            break
Example #28
0
    if not image_topic:
        rospy.logerr('Parameter \'camera\' is not provided.')
        sys.exit(-1)

    try:
        w, h = model_wh(resolution)
        graph_path = get_graph_path(model)

        rospack = rospkg.RosPack()
        graph_path = os.path.join(rospack.get_path('tfpose_ros'), graph_path)
    except Exception as e:
        rospy.logerr('invalid model: %s, e=%s' % (model, e))
        sys.exit(-1)

    pose_estimator = TfPoseEstimator(graph_path, target_size=(w, h))
    cv_bridge = CvBridge()

    rospy.Subscriber(image_topic,
                     Image,
                     callback_image,
                     queue_size=1,
                     buff_size=2**24)
    pubs = {
        j: rospy.Publisher("/fyp/pose/{}".format(j), Point2D, queue_size=1)
        for j in PARTS
    }

    rospy.loginfo('start+')
    rospy.spin()
    rospy.loginfo('finished')
     '--resize-out-ratio',
     type=float,
     default=4.0,
     help=
     'if provided, resize heatmaps before they are post-processed. default=1.0'
 )
 args = parser.parse_args()
 (sess, accuracy, pred, optimizer) = initialize_variables()
 print(pred)
 logger.debug('initialization %s : %s' %
              (args.model, get_graph_path(args.model)))
 #w, h = model_wh(args.resolution)
 #e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
 w, h = model_wh(args.resize)
 if w > 0 and h > 0:
     e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))
 else:
     e = TfPoseEstimator(get_graph_path(args.model), target_size=(432, 368))
 cap = cv2.VideoCapture(args.video)
 frame_number = 0
 sequence_arr = []
 if cap.isOpened() is False:
     print("Error opening video stream or file")
 while cap.isOpened():
     ret_val, image = cap.read()
     humans = e.inference(image,
                          resize_to_default=(w > 0 and h > 0),
                          upsample_size=args.resize_out_ratio)
     #humans = e.inference(image)
     #print("@@@@", humans)
     if not args.showBG:
Example #30
0
class Detector():
    def __init__(self, target_ip):

        self.CWD_PATH = os.getcwd()
        self.CWD_PATH = os.path.abspath(os.path.join(self.CWD_PATH, os.pardir))
        self.CWD_PATH = os.path.join(self.CWD_PATH, '3_BRobot')

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
        PATH_TO_CKPT = os.path.join(self.CWD_PATH, 'object_detection',
                                    MODEL_NAME, 'frozen_inference_graph.pb')

        # List of the strings that is used to add correct label for each box.
        PATH_TO_LABELS = os.path.join(self.CWD_PATH, 'object_detection',
                                      'data', 'mscoco_label_map.pbtxt')

        NUM_CLASSES = 90

        # Loading label map
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.right_clicks = []

        # self.right_clicks = [[375, 41], [1000, 709]]
        # mouse callback function
        def mouse_callback(event, x, y, flags, params):
            #right-click event value is 2
            if event == 2:
                if len(self.right_clicks) < 2:
                    self.right_clicks.append([x, y])
                else:
                    self.right_clicks = [[x, y]]

                print(self.right_clicks)

        CAM_ID = 1

        self.cam = cv2.VideoCapture(int(CAM_ID))

        self.window_name = 'Cam' + str(CAM_ID)
        cv2.namedWindow(self.window_name)
        cv2.setMouseCallback(self.window_name, mouse_callback)

        self.prevTime = 0
        self.window_size = (1312, 736)

        if self.cam.isOpened() == False:
            print('Can\'t open the CAM(%d)' % (CAM_ID))
            exit()

        self.face_queue = Queue()
        self.gender_queue = Queue()
        self.age_queue = Queue()

        self.process_gender = Process(target=gender_estimate,
                                      args=(self.face_queue,
                                            self.gender_queue))
        self.process_gender.start()

        self.process_age = Process(target=age_estimate,
                                   args=(self.face_queue, self.age_queue))
        self.process_age.start()

        self.w = self.window_size[0]
        self.h = self.window_size[1]
        self.e = TfPoseEstimator(get_graph_path('mobilenet_thin'),
                                 target_size=(self.w, self.h))

    def detect_objects(self, image_np, sess, detection_graph, mot_tracker,
                       img_to_color, face_detect, face_queue, gender_queue,
                       age_queue):
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

        # Each box represents a part of the image where a particular object was detected.
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        # Actual detection.
        (boxes, scores, classes, num_detections) = sess.run(
            [boxes, scores, classes, num_detections],
            feed_dict={image_tensor: image_np_expanded})

        trackers = mot_tracker.update(boxes[0])

        person_ids = [i for i, e in enumerate(classes[0]) if e == 1]

        person_attr = {'age': 'NA', 'gender': 'NA', 'color': 'NA'}

        if len(person_ids) > 0:
            selected_person_id = person_ids[0]

            person_box = boxes[0][selected_person_id]
            person_score = scores[0][selected_person_id]
            person_tracker = trackers[selected_person_id]

            if person_score > 0.6:

                def get_color(q, img):
                    try:
                        start_time = time.monotonic()

                        c = img_to_color.get(img)
                        q.put({"flag": "color", "value": c})

                        elapsed_time = time.monotonic() - start_time
                        print("Color", elapsed_time)
                    except:
                        q.put({"flag": "color", "value": False})

                def detect_face(q, img, face_detect, face_queue, gender_queue,
                                age_queue):

                    start_time = time.monotonic()
                    # your code

                    files = []

                    faces, face_files, rectangles, tgtdir = face_detect.run(
                        img)
                    face_queue.put([face_files, img, tgtdir])
                    face_queue.put([face_files, img, tgtdir])

                    person_gender = gender_queue.get()
                    person_age = age_queue.get()
                    print("gender rcvd", person_gender)
                    print("Age rcvd", person_age)

                    q.put({"flag": "gender", "value": person_gender})
                    q.put({"flag": "age", "value": person_age})

                    elapsed_time = time.monotonic() - start_time
                    print("Age/Gender", elapsed_time)

                person_img = crop_img(image_np, person_box)

                q = Queue()
                procs = []

                process_color = Process(target=get_color,
                                        args=(
                                            q,
                                            person_img,
                                        ))
                procs.append(process_color)

                process_face = Process(target=detect_face,
                                       args=(q, person_img, face_detect,
                                             face_queue, gender_queue,
                                             age_queue))
                procs.append(process_face)

                for proc in procs:
                    proc.start()

                results = []
                for proc in procs:
                    results.append(q.get())
                results.append(q.get())

                for proc in procs:
                    proc.join()

                for result in results:
                    person_attr[result['flag']] = result['value']

                # print(person_attr)
                # override boxes
                boxes = np.expand_dims(person_box, axis=0)
                classes = [1]
                scores = np.expand_dims(person_score, axis=0)
                trackers = np.expand_dims(person_tracker, axis=0)
                person_attr = [person_attr]

                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    boxes,
                    classes,
                    scores,
                    trackers,
                    person_attr,
                    self.category_index,
                    use_normalized_coordinates=True,
                    line_thickness=3)

        return image_np, person_attr

    def detect_start(self):

        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
                # Load modules
                mot_tracker = Sort()

                npz = np.load('./bin/color_extractor/color_names.npz')
                img_to_color = ImageToColor(npz['samples'], npz['labels'])

                face_detect = face_detection_model(
                    'dlib',
                    './bin/age_gender/Model/shape_predictor_68_face_landmarks.dat'
                )
                person_attr = False

                while (True):
                    ret, frame = self.cam.read()

                    # Detection
                    if len(self.right_clicks) == 2:
                        print(self.right_clicks)
                        _y, _x, _d = frame.shape
                        [_c1, _c2] = self.right_clicks
                        crop_box = [
                            _c1[0] / _x,
                            _c1[1] / _y,
                            _c2[0] / _x,
                            _c2[1] / _y,
                        ]
                        cropped_img = crop_img(frame, crop_box)

                        try:
                            image_process, person_attr = self.detect_objects(
                                cropped_img, sess, self.detection_graph,
                                mot_tracker, img_to_color, face_detect,
                                self.face_queue, self.gender_queue,
                                self.age_queue)
                            print("####", person_attr)
                            if isinstance(person_attr, list):
                                if person_attr[0][
                                        'gender'] != 'NA' and person_attr[0][
                                            'gender'] != False:
                                    break
                            else:
                                if person_attr[
                                        'gender'] != 'NA' and person_attr[
                                            'gender'] != False:
                                    break

                        except Exception as e:
                            print(e)
                            pass

                    curTime = time.time()
                    sec = curTime - self.prevTime
                    self.prevTime = curTime
                    fps = 1 / (sec)

                    str1 = "FPS : %0.1f" % fps
                    str2 = "Testing . . ."
                    cv2.putText(frame, str1, (5, 20), cv2.FONT_HERSHEY_PLAIN,
                                1, (0, 255, 0))
                    cv2.putText(frame, str2, (100, 20), cv2.FONT_HERSHEY_PLAIN,
                                1, (0, 255, 0))
                    cv2.imshow(self.window_name, frame)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        self.detect_stop()
                        break

                    # plt.figure(figsize=IMAGE_SIZE)
                    # plt.imshow(image_process)
                    # plt.show()

                if person_attr:
                    return person_attr
                else:
                    return False

    def detect_stop(self):
        self.cam.release()
        # cv2.destroyWindow(self.window_name)

        cv2.destroyAllWindows()
        # self.process_gender.join()
        # self.process_age.join()
        print("Detect Stop")
        return True

    def pose_start(self):
        print("Pose Start")
        result = False
        while (True):
            ret, frame = self.cam.read()

            cropped_img = None
            if len(self.right_clicks) == 2:

                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                # print(self.right_clicks)
                _y, _x, _d = frame.shape
                [_c1, _c2] = self.right_clicks
                crop_box = [
                    _c1[0] / _x,
                    _c1[1] / _y,
                    _c2[0] / _x,
                    _c2[1] / _y,
                ]
                cropped_img = crop_img(frame, crop_box)

                humans = self.e.inference(frame,
                                          resize_to_default=(self.w > 0
                                                             and self.h > 0),
                                          upsample_size=4.0)
                if len(humans) > 0:
                    if 7 in humans[0].body_parts or 4 in humans[0].body_parts:
                        print("Hands Detected")
                        result = 1
                        break

                image = TfPoseEstimator.draw_humans(frame,
                                                    humans,
                                                    imgcopy=False)

                cv2.imshow('tf-pose-estimation result', image)

            cv2.imshow(self.window_name, cv2.cvtColor(frame,
                                                      cv2.COLOR_RGB2BGR))

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        return result

    def hands_detect_start(self):
        print("Hands detection start")
        result = False

        im_width = 320
        im_height = 180

        self.cam.set(cv2.CAP_PROP_FRAME_WIDTH, im_width)
        self.cam.set(cv2.CAP_PROP_FRAME_HEIGHT, im_height)

        im_width, im_height = (self.cam.get(3), self.cam.get(4))

        score_thresh = 0.2

        # max number of hands we want to detect/track
        num_hands_detect = 2

        while True:
            ret, frame = self.cam.read()

            cropped_img = None
            if len(self.right_clicks) == 2:
                # resized_frame = cv2.resize(frame, (im_width, im_height))
                # print(self.right_clicks)
                _y, _x, _d = frame.shape
                [_c1, _c2] = self.right_clicks
                crop_box = [
                    _c1[0] / _x,
                    _c1[1] / _y,
                    _c2[0] / _x,
                    _c2[1] / _y,
                ]
                cropped_img = crop_img(frame, crop_box)

                # actual detection
                boxes, scores = detector_utils.detect_objects(
                    frame, self.hands_detection_graph,
                    self.hands_detection_sess)

                # Hands 위치 포지션 체크

                # 핸들 영역에 들어오면 리턴해서 게임 플레이
                # break

                # draw bounding boxes
                detector_utils.draw_box_on_image(num_hands_detect,
                                                 score_thresh, scores, boxes,
                                                 im_width, im_height, frame)

                cv2.imshow('Hands Detection', frame)

            cv2.imshow(self.window_name, frame)

            if cv2.waitKey(25) & 0xFF == ord('q'):
                break

        return result