def calculate_and_write_pose_maps(image_paths, output_directory, mode):
    cfg = load_config("demo/pose_cfg.yaml")

    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    for i, path in enumerate(image_paths):
        sys.stdout.write('\r >> Evaluating path %d of %d' %
                         (i + 1, len(image_paths)))
        sys.stdout.flush()

        # Read image from file
        image = imread(path, mode='RGB')

        target_width = image.shape[1] * TARGET_HEIGHT / image.shape[0]

        scaled_image = imresize(image, (int(TARGET_HEIGHT), int(target_width)),
                                'cubic')
        image_batch = data_to_input(scaled_image)

        # Compute prediction with the CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg)

        write_scmap(output_directory, ntpath.basename(path), scmap, mode,
                    image.shape)

    print('\nFinished generating pose maps...')
예제 #2
0
def main():

    # paths to setup
    annotation_path = '/home/babybrain/Escritorio/300145_via.json'
    frames_path = '/home/babybrain/Escritorio/300145'

    # get the x-y anotations for each frame
    annotations = load_annotations(annotation_path)

    # get x, y positions for a certain part
    part_id_index = 4  # we'll get elbows, need left and right (algorithm doesn't discriminate)
    file_anno, x_anno_r, y_anno_r = get_xy_for('r-elbow', annotations)
    _, x_anno_l, y_anno_l = get_xy_for('l-elbow', annotations)

    # get the x,y model prediction for each frame annotated
    cfg = load_config(
        "/home/babybrain/PycharmProjects/pose-tensorflow/demo/pose_cfg_babybrain.yaml"
    )
    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    # run session for each frame image annotated
    x_model = np.empty(len(file_anno))
    y_model = np.empty(len(file_anno))
    for index, an_image in enumerate(file_anno):
        infile = "{path}/{name}".format(path=frames_path, name=an_image)
        image = imread(infile, mode='RGB')
        image_batch = data_to_input(image)

        # Compute prediction with CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg)

        # Extract maximum scoring location from the heatmap, assume 1 person
        pose = predict.argmax_pose_predict(scmap, locref, cfg.stride)
        x_model[index] = pose[part_id_index, 0]
        y_model[index] = pose[part_id_index, 1]

    # now calculate distances
    distances_r = calculate_distances(x_model, y_model, x_anno_r, y_anno_r)
    distances_l = calculate_distances(x_model, y_model, x_anno_l, y_anno_l)

    # merge the best distance results
    distances = [min(xr, xl) for xr, xl in zip(distances_r, distances_l)]
    distances = np.array(distances)

    distance_steps, rates = detection_rate(distances, nsteps=50)
    rates = rates * 100

    # finally plot the graph
    fig, ax = plt.subplots()
    ax.plot(distance_steps, rates)

    ax.set_xlabel('Normalized Distance')
    ax.set_ylabel('Detection %')
    ax.set_title('Distance threshold vs Detection Ratio')
    ax.set_xlim([0, 0.5])

    plt.show()
예제 #3
0
def dlc_setupTF(options):
    dlc_config_settings = load_yaml(options['cfg_dlc'])
    cfg = load_config(dlc_config_settings['dlc_network_posecfg'])
    cfg['init_weights'] = dlc_config_settings['dlc_network_snapshot']
    scorer = dlc_config_settings['scorer']
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    return {'scorer': scorer, 'sess': sess, 'inputs': inputs, 'outputs': outputs, 'cfg': cfg}
예제 #4
0
파일: utils.py 프로젝트: Saduras/DanceNN
def preprocess(video_name, duration):
    source_path = f'./data/video/{video_name}.mp4'

    csv_base_path = './data/poses/'
    if not os.path.exists(csv_base_path):
        os.makedirs(csv_base_path)
    csv_path = f'{csv_base_path}{video_name}_poses.csv'

    audio_base_path = './data/audio/'
    if not os.path.exists(audio_base_path):
        os.makedirs(audio_base_path)
    audio_path = f'{audio_base_path}{video_name}.mp3'

    start_time = datetime.now()

    video = mpe.VideoFileClip(source_path)
    if duration < 0:
        duration = video.duration

    frame_count = int(video.fps * duration)
    frame_length = 1 / video.fps
    print(
        f'video length: {video.duration}s fps: {video.fps} frame count: {frame_count}'
    )

    # Load and setup CNN part detector
    cfg = load_config('./pose_cfg.yaml')
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    print('pose model loaded')

    poses = []
    times = []
    for i in range(frame_count):
        t = i * frame_length
        frame = video.get_frame(t)

        image_batch = data_to_input(frame)

        # Compute prediction with the CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg)

        # Extract maximum scoring location from the heatmap, assume 1 person
        pose = predict.argmax_pose_predict(scmap, locref, cfg.stride)
        poses.append(pose)
        times.append(t)

        if i % 100 == 0:
            print(
                f'processed frame: {i}/{frame_count} elapsed time: {datetime.now() - start_time}',
                end='\r')

    sess.close()
    print(f'saving poses at {csv_path}')
    save_poses(np.array(poses), times, cfg, csv_path)
    print(f'saving audio at {audio_path}')
    video.audio.write_audiofile(audio_path)
    print(f'total time: {datetime.now() - start_time}')
예제 #5
0
def run_inference(cfg, input_data, tf_var=None):
    if tf_var is None:
        sess, batch_inputs, outputs = setup_pose_prediction(cfg)
    else:
        sess = tf_var[0]
        batch_inputs = tf_var[1]
        outputs = tf_var[2]
    pose = run_pose_prediction(cfg, input_data, sess, batch_inputs, outputs)    
    return pose
def main(option):
    start_time = time.time()
    cfg = load_config("demo/pose_cfg_multi.yaml")
    dataset = create_dataset(cfg)
    sm = SpatialModel(cfg)
    sm.load()
    tf.reset_default_graph()
    draw_multi = PersonDraw()
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    fps_time = 0
    # Read image from file
    slopes = {}
    k = 0
    cap = cv2.VideoCapture("http://192.168.43.31:8081")
    cap_user = cv2.VideoCapture('/dev/video0')
    cap = cap_user

    i = 0
    while (True):
        ret, orig_frame = cap.read()
        ret2, orig_frame_user = cap_user.read()
        if i % 25 == 0:
            #frame=orig_frame
            frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50)
            user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50)
            co1 = run_predict(frame, sess, outputs, inputs, cfg, dataset, sm,
                              draw_multi)
            print("CO1            ", co1)
            user_co1 = run_predict(user_frame, sess, outputs, inputs, cfg,
                                   dataset, sm, draw_multi)
            print("USER_CO1            ", user_co1)
            print("CO1            ", co1)
            k = None
            try:
                slope_reqd, slope_user = slope_calc(co1, user_co1)
                k, s = compare_images(slope_reqd, slope_user, 0.75)
            except IndexError:
                #if len(co1)!=len(user_co1):
                print("Except condition")
                pass
            vibrate(k)
            frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0)
            user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0)
            cv2.putText(user_frame,
                        "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            cv2.imshow('user_frame', user_frame)
            cv2.imshow('frame', frame)
            fps_time = time.time()
            #visualize.waitforbuttonpress()
            if cv2.waitKey(10) == ord('q'):
                break
    elapsed = time.time() - start_time
    cap.release()
    cap_user.release()
    cv2.destroyAllWindows()
예제 #7
0
파일: test.py 프로젝트: nichtsen/pose-reg
def test_net(visualise, cache_scoremaps):
    logging.basicConfig(level=logging.INFO)

    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(False)
    dataset.set_test_mode(True)

    sess, inputs, outputs = setup_pose_prediction(cfg)

    if cache_scoremaps:
        out_dir = cfg.scoremap_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    num_images = dataset.num_images
    predictions = np.zeros((num_images, ), dtype=np.object)

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images - 1))

        batch = dataset.next_batch()

        outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})

        scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg)

        pose = argmax_pose_predict(scmap, locref, cfg.stride)

        pose_refscale = np.copy(pose)
        pose_refscale[:, 0:2] /= cfg.global_scale
        predictions[k] = pose_refscale

        if visualise:
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            visualize.show_heatmaps(cfg, img, scmap, pose)
            visualize.waitforbuttonpress()

        if cache_scoremaps:
            base = os.path.basename(batch[Batch.data_item].im_path)
            raw_name = os.path.splitext(base)[0]
            out_fn = os.path.join(out_dir, raw_name + '.mat')
            scipy.io.savemat(out_fn,
                             mdict={'scoremaps': scmap.astype('float32')})

            out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat')
            if cfg.location_refinement:
                scipy.io.savemat(
                    out_fn, mdict={'locreg_pred': locref.astype('float32')})

    scipy.io.savemat('predictions.mat', mdict={'joints': predictions})

    sess.close()
예제 #8
0
def test_net(visualise, cache_scoremaps):
    logging.basicConfig(level=logging.INFO)

    cfg = load_config()
    dataset = create_dataset(cfg)
    dataset.set_shuffle(False)
    dataset.set_test_mode(True)

    sess, inputs, outputs = setup_pose_prediction(cfg)

    if cache_scoremaps:
        out_dir = cfg.scoremap_dir
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    num_images = dataset.num_images
    predictions = np.zeros((num_images,), dtype=np.object)

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images-1))

        batch = dataset.next_batch()

        outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})

        scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg)

        pose = argmax_pose_predict(scmap, locref, cfg.stride)

        pose_refscale = np.copy(pose)
        pose_refscale[:, 0:2] /= cfg.global_scale
        predictions[k] = pose_refscale

        if visualise:
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            visualize.show_heatmaps(cfg, img, scmap, pose)
            visualize.waitforbuttonpress()

        if cache_scoremaps:
            base = os.path.basename(batch[Batch.data_item].im_path)
            raw_name = os.path.splitext(base)[0]
            out_fn = os.path.join(out_dir, raw_name + '.mat')
            scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')})

            out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat')
            if cfg.location_refinement:
                scipy.io.savemat(out_fn, mdict={'locreg_pred': locref.astype('float32')})

    scipy.io.savemat('predictions.mat', mdict={'joints': predictions})

    sess.close()
예제 #9
0
def run_dataset():
    cfg = deepcopy(load_config())

    if not os.path.exists(cfg.dir_json_pred):
        os.makedirs(cfg.dir_json_pred)

    sess, batch_inputs, outputs = setup_pose_prediction(cfg)
    tf_var = [sess, batch_inputs, outputs]

    if cfg.dataset_type == "posetrack_v1":
        from dataset.posetrack_v1 import run_posetrack_v1
        run_posetrack_v1(cfg, tf_var)
    if cfg.dataset_type == "posetrack_v2":
        from dataset.posetrack_v2 import run_posetrack_v2
        run_posetrack_v2(cfg, tf_var)
def main(option):
    cfg = load_config("demo/pose_cfg_multi.yaml")
    dataset = create_dataset(cfg)
    sm = SpatialModel(cfg)
    sm.load()
    tf.reset_default_graph()
    draw_multi = PersonDraw()
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    fps_time = 0
    # Read image from file
    cap = cv2.VideoCapture('msgifs/icon4.gif')
    cap_user = cv2.VideoCapture('user.mp4')
    i = 0
    while (True):
        ret, orig_frame = cap.read()
        ret2, orig_frame_user = cap_user.read()
        if i % 25 == 0:

            frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50)
            user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50)
            co1 = run_predict(frame, sess, inputs, outputs, cfg, dataset, sm,
                              draw_multi)
            user_co1 = run_predict(user_frame, sess, inputs, outputs, cfg,
                                   dataset, sm, draw_multi)
            try:
                slope_reqd = slope_calc(co1)
                slope_user = slope_calc(user_co1)
                compare_images(slope_reqd, slope_user, 0.1)
            except IndexError:
                #if len(co1)!=len(user_co1):
                #messagebox.showinfo("Title", "Please adjust camera to show your keypoints")
                pass
            #frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0)
            #user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0)
            cv2.putText(user_frame,
                        "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            cv2.imshow('user_frame', user_frame)
            cv2.imshow('frame', frame)
            fps_time = time.time()
            #visualize.waitforbuttonpress()
            if cv2.waitKey(10) == ord('q'):
                break
    cap.release()
    cap_user.release()
    cv2.destroyAllWindows()
    cap_user.release()
예제 #11
0
파일: utils.py 프로젝트: Saduras/DanceNN
def predict_frame(video, t):
    frame_count = int(video.fps * video.duration)
    frame_length = 1 / video.fps

    # Load and setup CNN part detector
    cfg = load_config('./pose_cfg.yaml')
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    frame = video.get_frame(t)

    image_batch = data_to_input(frame)

    # Compute prediction with the CNN
    outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
    scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg)

    # Extract maximum scoring location from the heatmap, assume 1 person
    pose = predict.argmax_pose_predict(scmap, locref, cfg.stride)

    return pose
예제 #12
0
def main():
    start_time=time.time()
    print("main hai")
    tf.reset_default_graph()
    cfg = load_config("demo/pose_cfg_multi.yaml")
    dataset = create_dataset(cfg)
    sm = SpatialModel(cfg)
    sm.load()
    draw_multi = PersonDraw()
    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    # Read image from file
    dir=os.listdir("stick")
    k=0
    cap=cv2.VideoCapture(0)
    i=0
    while (cap.isOpened()):
            if i%20 == 0:                   
                ret, orig_frame= cap.read()
                if ret==True:
                    frame = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30)
                    image= frame
                    sse=0
                    mse=0
                    
                    image_batch = data_to_input(frame)

                    # Compute prediction with the CNN
                    outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})

                    scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

                    detections = extract_detections(cfg, scmap, locref, pairwise_diff)

                    unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)

                    person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)
                    img = np.copy(image)
                    #coor = PersonDraw.draw()
                    visim_multi = img.copy()
                    co1=draw_multi.draw(visim_multi, dataset, person_conf_multi)
                    plt.imshow(visim_multi)
                    plt.show()
                    visualize.waitforbuttonpress()
                    #print("this is draw : ", co1)
                    if k==1:
                        qwr = np.zeros((1920,1080,3), np.uint8)

                        cv2.line(qwr, co1[5][0], co1[5][1],(255,0,0),3)
                        cv2.line(qwr, co1[7][0], co1[7][1],(255,0,0),3)
                        cv2.line(qwr, co1[6][0], co1[6][1],(255,0,0),3)
                        cv2.line(qwr, co1[4][0], co1[4][1],(255,0,0),3)

                        cv2.line(qwr, co1[9][0], co1[9][1],(255,0,0),3)
                        cv2.line(qwr, co1[11][0], co1[11][1],(255,0,0),3)
                        cv2.line(qwr, co1[8][0], co1[8][1],(255,0,0),3)
                        cv2.line(qwr, co1[10][0], co1[10][1],(255,0,0),3)
                        # In[9]:
                        cv2.imshow('r',qwr)
                        qwr2="stick/frame"+str(k)+".jpg"
                        qw1 = cv2.cvtColor(qwr, cv2.COLOR_BGR2GRAY)
                        qw2= cv2.cvtColor(qwr2, cv2.COLOR_BGR2GRAY)

                        fig = plt.figure("Images")
                        images = ("Original", qw1), ("Contrast", qw2)
                        for (i, (name, image)) in enumerate(images):
                                ax = fig.add_subplot(1, 3, i + 1)
                                ax.set_title(name)
                        plt.imshow(hash(tuple(image)))
                        # compare the images
                        s,m=compare_images(qw1, qw2, "Image1 vs Image2")
                        k+=1
                        sse=s
                        mse=m

                else:
                    break
    elapsed= time.time()-start_time
    #print("sse score : ", sse)
    print("Mean squared error : ", elapsed/100)
    cap.release()
    cv2.destroyAllWindows()
예제 #13
0
def video2posevideo(video_name):
    time_start = time.clock()

    import numpy as np

    sys.path.append(os.path.dirname(__file__) + "/../")

    from scipy.misc import imread, imsave

    from config import load_config
    from dataset.factory import create as create_dataset
    from nnet import predict
    from util import visualize
    from dataset.pose_dataset import data_to_input

    from multiperson.detections import extract_detections
    from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
    from multiperson.visualize import PersonDraw, visualize_detections

    import matplotlib.pyplot as plt

    from PIL import Image, ImageDraw, ImageFont
    font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 24)

    import random

    cfg = load_config("demo/pose_cfg_multi.yaml")

    dataset = create_dataset(cfg)

    sm = SpatialModel(cfg)
    sm.load()

    draw_multi = PersonDraw()

    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    ################

    video = read_video(video_name)

    video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
    video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3

    pose_frame_list = []

    point_r = 3 # radius of points
    point_min = 10 # threshold of points - If there are more than point_min points in person, we define he/she is REAL PERSON
    part_min = 3 # threshold of parts - If there are more than part_min parts in person, we define he/she is REAL PERSON / part means head, arm and leg
    point_num = 17 # There are 17 points in 1 person

    def ellipse_set(person_conf_multi, people_i, point_i):
        return (person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r)

    def line_set(person_conf_multi, people_i, point_i, point_j):
        return (person_conf_multi[people_i][point_i][0], person_conf_multi[people_i][point_i][1], person_conf_multi[people_i][point_j][0], person_conf_multi[people_i][point_j][1])

    def draw_ellipse_and_line(draw, person_conf_multi, people_i, a, b, c, point_color):
        draw.ellipse(ellipse_set(person_conf_multi, people_i, a), fill=point_color)
        draw.ellipse(ellipse_set(person_conf_multi, people_i, b), fill=point_color)
        draw.ellipse(ellipse_set(person_conf_multi, people_i, c), fill=point_color)
        draw.line(line_set(person_conf_multi, people_i, a, b), fill=point_color, width=5)
        draw.line(line_set(person_conf_multi, people_i, b, c), fill=point_color, width=5)

    for i in range(0, video_frame_number):
        image = video.get_frame(i/video.fps)

        ######################

        image_batch = data_to_input(image)

        # Compute prediction with the CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

        detections = extract_detections(cfg, scmap, locref, pairwise_diff)
        unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
        person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)

        # print('person_conf_multi: ')
        # print(type(person_conf_multi))
        # print(person_conf_multi)

        # Add library to save image
        image_img = Image.fromarray(image)

        # Save image with points of pose
        draw = ImageDraw.Draw(image_img)

        people_num = 0
        people_real_num = 0
        people_part_num = 0

        people_num = person_conf_multi.size / (point_num * 2)
        people_num = int(people_num)
        print('people_num: ' + str(people_num))

        for people_i in range(0, people_num):
            point_color_r = random.randrange(0, 256)
            point_color_g = random.randrange(0, 256)
            point_color_b = random.randrange(0, 256)
            point_color = (point_color_r, point_color_g, point_color_b, 255)
            point_list = []
            point_count = 0
            point_i = 0 # index of points
            part_count = 0 # count of parts in THAT person

            # To find rectangle which include that people - list of points x, y coordinates
            people_x = []
            people_y = []

            for point_i in range(0, point_num):
                if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
                    point_count = point_count + 1
                    point_list.append(point_i)

            # Draw each parts
            if (5 in point_list) and (7 in point_list) and (9 in point_list): # Draw left arm
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 5, 7, 9, point_color)
                part_count = part_count + 1
            if (6 in point_list) and (8 in point_list) and (10 in point_list): # Draw right arm
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 6, 8, 10, point_color)
                part_count = part_count + 1
            if (11 in point_list) and (13 in point_list) and (15 in point_list): # Draw left leg
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 11, 13, 15, point_color)
                part_count = part_count + 1
            if (12 in point_list) and (14 in point_list) and (16 in point_list): # Draw right leg
                draw_ellipse_and_line(draw, person_conf_multi, people_i, 12, 14, 16, point_color)
                part_count = part_count + 1
            if point_count >= point_min:
                people_real_num = people_real_num + 1
                for point_i in range(0, point_num):
                    if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
                        draw.ellipse(ellipse_set(person_conf_multi, people_i, point_i), fill=point_color)
                        people_x.append(person_conf_multi[people_i][point_i][0])
                        people_y.append(person_conf_multi[people_i][point_i][1])
                # Draw rectangle which include that people
                draw.rectangle([min(people_x), min(people_y), max(people_x), max(people_y)], fill=point_color, outline=5)


            if part_count >= part_min:
                people_part_num = people_part_num + 1

        draw.text((0, 0), 'People(by point): ' + str(people_real_num) + ' (threshold = ' + str(point_min) + ')', (0,0,0), font=font)
        draw.text((0, 32), 'People(by line): ' + str(people_part_num) + ' (threshold = ' + str(part_min) + ')', (0,0,0), font=font)
        draw.text((0, 64), 'Frame: ' + str(i) + '/' + str(video_frame_number), (0,0,0), font=font)
        draw.text((0, 96), 'Total time required: ' + str(round(time.clock() - time_start, 1)) + 'sec', (0,0,0))

        print('people_real_num: ' + str(people_real_num))
        print('people_part_num: ' + str(people_part_num))
        print('frame: ' + str(i))

        image_img_numpy = np.asarray(image_img)

        pose_frame_list.append(image_img_numpy)

    video_pose = ImageSequenceClip(pose_frame_list, fps=video.fps)
    video_pose.write_videofile("testset/" + video_name + "_pose.mp4", fps=video.fps)

    print("Time(s): " + str(time.clock() - time_start))
예제 #14
0
TF_CUDNN_USE_AUTOTUNE = 0

import sys, os
from PIL import Image

sys.path.insert(1, 'pose_tensorflow')

from util.config import load_config
from nnet import predict
from util import visualize
from dataset.pose_dataset import data_to_input

os.chdir("pose_tensorflow")
cfg = {}
cfg['cfg'] = load_config("demo/pose_cfg.yaml")
cfg['sess'], cfg['inputs'], cfg['outputs'] = predict.setup_pose_prediction(
    cfg['cfg'])
os.chdir("..")


def resize_image(img: Image):
    basewidth = 300

    wpercent = basewidth / img.size[0]
    hsize = int(img.size[1] * wpercent)
    img = img.resize((basewidth, hsize), Image.ANTIALIAS)

    return img


def get_pose(image, d=cfg):
    image = resize_image(image)
예제 #15
0
def video2poseframe(video_name):
    import numpy as np

    sys.path.append(os.path.dirname(__file__) + "/../")

    from scipy.misc import imread, imsave

    from config import load_config
    from dataset.factory import create as create_dataset
    from nnet import predict
    from util import visualize
    from dataset.pose_dataset import data_to_input

    from multiperson.detections import extract_detections
    from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut
    from multiperson.visualize import PersonDraw, visualize_detections

    import matplotlib.pyplot as plt

    from PIL import Image, ImageDraw

    import random

    cfg = load_config("demo/pose_cfg_multi.yaml")

    dataset = create_dataset(cfg)

    sm = SpatialModel(cfg)
    sm.load()

    # Load and setup CNN part detector
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)

    ################

    video = read_video(video_name)

    video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second
    video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3

    if not os.path.exists('testset/' + video_name):
        os.makedirs('testset/' + video_name)

    for i in range(0, video_frame_number):
        image = video.get_frame(i/video.fps)

        ######################

        image_batch = data_to_input(image)

        # Compute prediction with the CNN
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

        detections = extract_detections(cfg, scmap, locref, pairwise_diff)
        unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
        person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array)

        print('person_conf_multi: ')
        print(type(person_conf_multi))
        print(person_conf_multi)

        # Add library to save image
        image_img = Image.fromarray(image)

        # Save image with points of pose
        draw = ImageDraw.Draw(image_img)

        people_num = 0
        point_num = 17
        print('person_conf_multi.size: ')
        print(person_conf_multi.size)
        people_num = person_conf_multi.size / (point_num * 2)
        people_num = int(people_num)
        print('people_num: ')
        print(people_num)

        point_i = 0 # index of points
        point_r = 5 # radius of points

        people_real_num = 0
        for people_i in range(0, people_num):
            point_color_r = random.randrange(0, 256)
            point_color_g = random.randrange(0, 256)
            point_color_b = random.randrange(0, 256)
            point_color = (point_color_r, point_color_g, point_color_b, 255)
            point_count = 0
            for point_i in range(0, point_num):
                if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data
                    point_count = point_count + 1
            if point_count > 5: # If there are more than 5 point in person, we define he/she is REAL PERSON
                people_real_num = people_real_num + 1
                for point_i in range(0, point_num):
                    draw.ellipse((person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r), fill=point_color)

        print('people_real_num: ')
        print(people_real_num)

        video_name_result = 'testset/' + video_name + '/frame_pose_' + str(i).zfill(video_frame_ciphers) + '.jpg'
        image_img.save(video_name_result, "JPG")
예제 #16
0
파일: main.py 프로젝트: justiniansiah/Aegis
        def poser():
            global state
            global points
            global reps

            import os
            import sys
            import cv2
            import time
            import numpy as np

            sys.path.append(os.path.dirname(__file__) + "/../")

            from scipy.misc import imread

            from config import load_config
            from nnet import predict
            from util import visualize
            from dataset.pose_dataset import data_to_input
            cfg = load_config("demo/pose_cfg.yaml")

            # Load and setup CNN part detector
            sess2, inputs, outputs = predict.setup_pose_prediction(cfg)

            camera = cv2.VideoCapture(0)
            # Read image from file

            prevPoints = -1

            while self.running:
                r, image = camera.read()

                image_batch = data_to_input(image)

                # Compute prediction with the CNN
                outputs_np = sess2.run(outputs,
                                       feed_dict={inputs: image_batch})
                scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg)

                # Extract maximum scoring location from the heatmap, assume 1 person
                pose = predict.argmax_pose_predict(scmap, locref, cfg.stride)

                # Visualise
                data = visualize.visualize_joints(image, pose)
                frame = cv2.cvtColor(data, 4)
                img = QtGui.QImage(frame, frame.shape[1], frame.shape[0],
                                   QtGui.QImage.Format_RGB888)
                pix = QtGui.QPixmap.fromImage(img)
                try:
                    self.lblVideo.setPixmap(pix)
                except:
                    return

                arr = []
                for i in range(14):
                    arr += pose[i].tolist()[0:2]

                predictedPose = sess.run(prediction, feed_dict={X: [arr]})
                processPose(predictedPose)

                if prevPoints != points:
                    print("Current points: " + str(points))
                    prevPoints = points
from multiperson.visualize import PersonDraw, visualize_detections

import matplotlib.pyplot as plt


cfg = load_config("demo/pose_cfg_multi.yaml")

dataset = create_dataset(cfg)

sm = SpatialModel(cfg)
sm.load()

draw_multi = PersonDraw()

# Load and setup CNN part detector
sess, inputs, outputs = predict.setup_pose_prediction(cfg)

# Read image from file
file_name_ext = sys.argv[1] ## example: test_single_03.png
file_name = file_name_ext.split('.')[0] ## example: test_single_03
file_name_input = 'testset/' + file_name_ext
image = imread(file_name_input, mode='RGB')

image_batch = data_to_input(image)

# Compute prediction with the CNN
outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats)

detections = extract_detections(cfg, scmap, locref, pairwise_diff)
unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
예제 #18
0
def AnalyzeVideosTrial(video_file):
    """
    DeepLabCut Toolbox
    https://github.com/AlexEMG/DeepLabCut

    A Mathis, [email protected]
    M Mathis, [email protected]

    This script analyzes videos based on a trained network (as specified in myconfig_analysis.py)

    You need tensorflow for evaluation. Run by:

    python3 AnalyzeVideosTrial.py video_file

    Functionalized by Adam S. Lowet, 10/25/19
    """

    ####################################################
    # Dependencies
    ####################################################

    import os.path
    import sys
    subfolder = os.getcwd().split('analysis-tools')[0]
    sys.path.append(subfolder)
    # add parent directory: (where nnet & config are!)
    sys.path.append(os.path.join(subfolder, "pose-tensorflow"))
    sys.path.append(os.path.join(subfolder, "config"))

    from myconfig_analysis import cropping, Task, date, \
        trainingsFraction, resnet, snapshotindex, shuffle,x1, x2, y1, y2, videotype, storedata_as_csv

    # Deep-cut dependencies
    from config import load_config
    from nnet import predict
    from dataset.pose_dataset import data_to_input

    # Dependencies for video:
    import pickle
    # import matplotlib.pyplot as plt
    import imageio
    from skimage.util import img_as_ubyte
    from moviepy.editor import VideoFileClip
    import skimage
    import skimage.color
    import time
    import pandas as pd
    import numpy as np
    import os
    from tqdm import tqdm


    def getpose(image, cfg, outputs, outall=False):
        ''' Adapted from DeeperCut, see pose-tensorflow folder'''
        image_batch = data_to_input(skimage.color.gray2rgb(image))
        outputs_np = sess.run(outputs, feed_dict={inputs: image_batch})
        scmap, locref = predict.extract_cnn_output(outputs_np, cfg)
        pose = predict.argmax_pose_predict(scmap, locref, cfg.stride)
        if outall:
            return scmap, locref, pose
        else:
            return pose


    ####################################################
    # Loading data, and defining model folder
    ####################################################

    basefolder = os.path.join('..','..','pose-tensorflow','models')
    modelfolder = os.path.join(basefolder, Task + str(date) + '-trainset' +
                   str(int(trainingsFraction * 100)) + 'shuffle' + str(shuffle))

    cfg = load_config(os.path.join(modelfolder , 'test' ,"pose_cfg.yaml"))

    ##################################################
    # Load and setup CNN part detector
    ##################################################

    # Check which snapshots are available and sort them by # iterations
    Snapshots = np.array([
        fn.split('.')[0]
        for fn in os.listdir(os.path.join(modelfolder , 'train'))
        if "index" in fn
    ])
    increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots])
    Snapshots = Snapshots[increasing_indices]

    print(modelfolder)
    print(Snapshots)

    ##################################################
    # Compute predictions over images
    ##################################################

    # Check if data already was generated:
    cfg['init_weights'] = os.path.join(modelfolder , 'train', Snapshots[snapshotindex])

    # Name for scorer:
    trainingsiterations = (cfg['init_weights'].split('/')[-1]).split('-')[-1]

    # Name for scorer:
    scorer = 'DeepCut' + "_resnet" + str(resnet) + "_" + Task + str(
        date) + 'shuffle' + str(shuffle) + '_' + str(trainingsiterations)


    cfg['init_weights'] = os.path.join(modelfolder , 'train', Snapshots[snapshotindex])
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    pdindex = pd.MultiIndex.from_product(
        [[scorer], cfg['all_joints_names'], ['x', 'y', 'likelihood']],
        names=['scorer', 'bodyparts', 'coords'])

    ##################################################
    # Datafolder
    ##################################################

    # video_dir='../videos/' #where your folder with videos is.
    frame_buffer = 10

    #os.chdir(video_dir)
    #videos = np.sort([fn for fn in os.listdir(os.curdir) if (videotype in fn)])
    #print("Starting ", video_dir, videos)
    #for video in videos:
    video = video_file
    dataname = video.split('.')[0] + scorer + '.h5'
    try:
        # Attempt to load data...
        pd.read_hdf(dataname)
        print("Video already analyzed!", dataname)
    except FileNotFoundError:
        print("Loading ", video)
        clip = VideoFileClip(video)
        ny, nx = clip.size  # dimensions of frame (height, width)
        fps = clip.fps
        #nframes = np.sum(1 for j in clip.iter_frames()) #this is slow (but accurate)
        nframes_approx = int(np.ceil(clip.duration * clip.fps) + frame_buffer)
        # this will overestimage number of frames (see https://github.com/AlexEMG/DeepLabCut/issues/9) This is especially a problem
        # for high frame rates and long durations due to rounding errors (as Rich Warren found). Later we crop the result (line 187)
        
        if cropping:
            clip = clip.crop(
                y1=y1, y2=y2, x1=x1, x2=x2)  # one might want to adjust

        print("Duration of video [s]: ", clip.duration, ", recorded with ", fps,
              "fps!")
        print("Overall # of frames: ", nframes_approx,"with cropped frame dimensions: ", clip.size)

        start = time.time()
        PredicteData = np.zeros((nframes_approx, 3 * len(cfg['all_joints_names'])))
        clip.reader.initialize()
        print("Starting to extract posture")
        for index in tqdm(range(nframes_approx)):
            #image = img_as_ubyte(clip.get_frame(index * 1. / fps))
            image = img_as_ubyte(clip.reader.read_frame())
            # Thanks to Rick Warren for the  following snipplet:
            # if close to end of video, start checking whether two adjacent frames are identical
            # this should only happen when moviepy has reached the final frame
            # if two adjacent frames are identical, terminate the loop
            if index==int(nframes_approx-frame_buffer*2):
                last_image = image
            elif index>int(nframes_approx-frame_buffer*2):
                if (image==last_image).all():
                    nframes = index
                    print("Detected frames: ", nframes)
                    break
                else:
                    last_image = image
            pose = getpose(image, cfg, outputs)
            PredicteData[index, :] = pose.flatten()  # NOTE: thereby cfg['all_joints_names'] should be same order as bodyparts!

        stop = time.time()

        dictionary = {
            "start": start,
            "stop": stop,
            "run_duration": stop - start,
            "Scorer": scorer,
            "config file": cfg,
            "fps": fps,
            "frame_dimensions": (ny, nx),
            "nframes": nframes
        }
        metadata = {'data': dictionary}

        print("Saving results...")
        DataMachine = pd.DataFrame(PredicteData[:nframes,:], columns=pdindex, index=range(nframes)) #slice pose data to have same # as # of frames.
        DataMachine.to_hdf(dataname, 'df_with_missing', format='table', mode='w')
        
        if storedata_as_csv:
            DataMachine.to_csv(video.split('.')[0] + scorer+'.csv')
        
        with open(dataname.split('.')[0] + 'includingmetadata.pickle',
                  'wb') as f:
            pickle.dump(metadata, f, pickle.HIGHEST_PROTOCOL)
예제 #19
0
def analyse():
    basefolder = '../pose-tensorflow/models/'  # for cfg file & ckpt!
    modelfolder = (basefolder + Task + str(date) + '-trainset' +
                   str(int(trainingsFraction * 100)) + 'shuffle' +
                   str(shuffle))
    cfg = load_config(modelfolder + '/test/' + "pose_cfg.yaml")

    ##################################################
    # Load and setup CNN part detector
    ##################################################

    # Check which snap shots are available and sort them by # iterations
    Snapshots = np.array([
        fn.split('.')[0] for fn in os.listdir(modelfolder + '/train/')
        if "index" in fn
    ])
    increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots])
    Snapshots = Snapshots[increasing_indices]

    print(modelfolder)
    print(Snapshots)

    ##################################################
    # Compute predictions over images
    ##################################################

    # Check if data already was generated:
    cfg['init_weights'] = modelfolder + '/train/' + Snapshots[snapshotindex]

    # Name for scorer:
    trainingsiterations = (cfg['init_weights'].split('/')[-1]).split('-')[-1]

    # Name for scorer:
    scorer = 'DeepCut' + "_resnet" + str(resnet) + "_" + Task + str(
        date) + 'shuffle' + str(shuffle) + '_' + str(trainingsiterations)
    cfg['init_weights'] = modelfolder + '/train/' + Snapshots[snapshotindex]
    sess, inputs, outputs = predict.setup_pose_prediction(cfg)
    pdindex = pd.MultiIndex.from_product(
        [[scorer], cfg['all_joints_names'], ['x', 'y', 'likelihood']],
        names=['scorer', 'bodyparts', 'coords'])

    ##################################################
    # Datafolder
    ##################################################

    # Folder where your tiffstacks are:
    os.chdir(videofolder)
    videos = np.sort([fn for fn in os.listdir(os.curdir) if (".tif" in fn)])

    print("Starting ", videofolder, videos)
    for tiffstack in videos:
        dataname = tiffstack.split('.')[0] + scorer + '.h5'
        try:
            # Attempt to load data...
            pd.read_hdf(dataname)
            print("tiffstack already analyzed!", dataname)
        except:
            print("Loading ", tiffstack)

            im = io.imread(tiffstack)
            nframes = np.shape(
                im
            )[0]  # Assuming: numframes x width x height [otherwise consider changing this!]

            start = time.time()
            PredicteData = np.zeros(
                (nframes, 3 * len(cfg['all_joints_names'])))

            print("Starting to extract posture")
            for index in tqdm(range(nframes)):
                image = img_as_ubyte(im[index])
                pose = getpose(image, cfg, outputs)
                PredicteData[index, :] = pose.flatten(
                )  # NOTE: thereby cfg['all_joints_names'] should be same order as bodyparts!

            stop = time.time()

            dictionary = {
                "start": start,
                "stop": stop,
                "run_duration": stop - start,
                "Scorer": scorer,
                "config file": cfg,
                "nframes": nframes
            }
            metadata = {'data': dictionary}

            print("Saving results...")
            DataMachine = pd.DataFrame(PredicteData,
                                       columns=pdindex,
                                       index=range(nframes))
            DataMachine.to_hdf(dataname,
                               'df_with_missing',
                               format='table',
                               mode='w')
            if storedata_as_csv:
                DataMachine.to_csv(tiffstack.split('.')[0] + scorer + '.csv')
            with open(
                    dataname.split('.')[0] + 'includingmetadata.pickle',
                    'wb') as f:
                pickle.dump(metadata, f, pickle.HIGHEST_PROTOCOL)
예제 #20
0
# import video_pose

####################

cfg = load_config("demo/pose_cfg_multi.yaml")

dataset = create_dataset(cfg)

sm = SpatialModel(cfg)
sm.load()

draw_multi = PersonDraw()

# Load and setup CNN part detector
sess, inputs, outputs = predict.setup_pose_prediction(cfg)

##########
## Get the source of video

parser = ap.ArgumentParser()
parser.add_argument('-f', "--videoFile", help="Path to Video File")
parser.add_argument('-w', "--videoWidth", help="Width of Output Video")
parser.add_argument('-o', "--videoType", help="Extension of Output Video")

args = vars(parser.parse_args())

if args["videoFile"] is not None:
    video_name = args["videoFile"]
else:
    print("You have to input videoFile name")
예제 #21
0
def test_net(visualise, cache_scoremaps):
    # 打开python的日志功能
    logging.basicConfig(level=logging.INFO)

    # 加载配置文件
    cfg = load_config()
    # 根据配置文件中的信息产生数据读取类的实例
    dataset = create_dataset(cfg)
    # 不用对数据进行洗牌
    dataset.set_shuffle(False)
    # 告诉数据读取类没有类标,即处于测试模式
    dataset.set_test_mode(True)

    # 该函数返回session,输入算子,输出算子
    sess, inputs, outputs = setup_pose_prediction(cfg)

    # 是否需要保存测试过程中的heatmap
    if cache_scoremaps:
        # 保存heatmap的目录
        out_dir = cfg.scoremap_dir
        # 目录不存在则创建
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

    # 图片个数
    num_images = dataset.num_images
    # 预测的关节坐标都保存在这里
    predictions = np.zeros((num_images, ), dtype=np.object)

    for k in range(num_images):
        print('processing image {}/{}'.format(k, num_images - 1))

        # 获得一批数据
        batch = dataset.next_batch()

        # 进行预测
        outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]})

        # 得到heatmap和精细化的heatmap
        scmap, locref = extract_cnn_output(outputs_np, cfg)

        # 获得最终的关节坐标
        '''
        pose = [ [ pos_f8[::-1], [scmap[maxloc][joint_idx]] ] .... ..... ....   ]
        用我的话说就是下面的结构
        pose = [ [关节的坐标,  关节坐标的置信度] .... ..... ....  ]
        '''
        pose = argmax_pose_predict(scmap, locref, cfg.stride)

        pose_refscale = np.copy(pose)
        # 除以尺度,就能恢复到未经过缩放的图像的坐标系上去
        # 注意0:2是左开右闭的区间只取到了0和1
        pose_refscale[:, 0:2] /= cfg.global_scale
        predictions[k] = pose_refscale

        if visualise:
            # 获取图片
            img = np.squeeze(batch[Batch.inputs]).astype('uint8')
            # 显示heatmap
            visualize.show_heatmaps(cfg, img, scmap, pose)
            # 等待按键按下
            visualize.waitforbuttonpress()

        if cache_scoremaps:
            # 保存heatmap
            base = os.path.basename(batch[Batch.data_item].im_path)
            raw_name = os.path.splitext(base)[0]
            out_fn = os.path.join(out_dir, raw_name + '.mat')
            scipy.io.savemat(out_fn,
                             mdict={'scoremaps': scmap.astype('float32')})

            # 保存精细化关节定位的heatmap
            out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat')
            if cfg.location_refinement:
                scipy.io.savemat(
                    out_fn, mdict={'locreg_pred': locref.astype('float32')})

    # 将最终预测的关节坐标保存起来
    scipy.io.savemat('predictions.mat', mdict={'joints': predictions})

    sess.close()