Beispiel #1
0
def main(args):
    weight_file = args.weight
    process_speed = args.process_speed
    resize_fac = args.resize_factor

    print('start processing...')

    # Video input & output
    data_dir = args.dir
    json_path = args.coco

    # load model
    print('[*]Loading model...')
    model = get_model('vgg19')
    model.load_state_dict(torch.load(weight_file))
    model = torch.nn.DataParallel(model).cuda()
    model.float()
    model.eval()
    print('Model Ready!')

    # Video reader
    t0 = time.time()
    acc_count = 0
    data = get_data(data_dir, json_path)
    data_count = len(data)
    for i, (input_path, keypoints) in enumerate(data):
        input_image = cv2.imread(input_path)
        t1 = time.time()
        # generate image with body parts
        resized_image = cv2.resize(input_image, (0, 0),
                                   fx=1 * resize_fac,
                                   fy=1 * resize_fac,
                                   interpolation=cv2.INTER_CUBIC)
        to_plot, canvas, joint_list, person_to_joint_assoc = process(
            model, resized_image, process_speed)
        kp_count = 0
        for c, kps in keypoints:
            kp_count += c
        if len(person_to_joint_assoc) == len(keypoints):  # human count equal
            acc_count += 1
        if args.verb:
            cv2.imshow('preview', to_plot)
            cv2.waitKey(1)
        t2 = time.time()
        processBar(
            i,
            data_count,
            '[{}/{}]find {} keypoints in {} humans, groundtruth is {} kps in {} humans. acc:{} process time:{:.3f}, total time:{:.3f}'
            .format(i, data_count,
                    len(joint_list), len(person_to_joint_assoc), kp_count,
                    len(keypoints), acc_count / (i + 1), (t2 - t1), (t2 - t0)),
            length=20,
            end="\n")
    cv2.destroyAllWindows()
    processBar(data_count,
               data_count,
               '{}/{}, acc:{} total time:{:.3f}'.format(
                   data_count, data_count, acc_count / data_count,
                   (time.time() - t0)),
               length=20)
Beispiel #2
0
def load_openpose_model(weights=os.path.join(dir_name, '../PoseEstimation/network/weight/pose_model.pth'),
                        model_type='vgg19'):

    model = get_model(model_type)
    model.load_state_dict(torch.load(weights))
    model = torch.nn.DataParallel(model).to(device)
    model.float()
    model.eval()

    return model
 def __init__(self, max_size=600):
     cur_dir = os.path.dirname(os.path.abspath(__file__))
     weight_name = os.path.join(cur_dir, 'pose_model_scratch.pth')
     assert os.path.exists(
         weight_name), 'open pose model not found at {}'.format(weight_name)
     self.model = get_model('vgg19')
     state_dict = torch.load(weight_name)
     # remove 'module.' prefix
     state_dict = {k[7:]: v for k, v in state_dict.items()}
     self.model.load_state_dict(state_dict)
     self.model = torch.nn.DataParallel(self.model).cuda()
     self.model.float()
     self.model.eval()
     self.max_size = max_size
Beispiel #4
0
def create_pose_estimation_model(pretrained,
                                 dataset,
                                 arch,
                                 load_vgg19=None,
                                 parallel=True,
                                 device_ids=None):
    # noinspection PyGlobalUndefined
    global msglogger

    model = None
    dataset = dataset.lower()

    if dataset == 'coco':
        if arch == 'shufflenetv2':
            model = rtpose_shufflenetV2.Network(width_multiplier=1.0)
            if pretrained:
                msglogger.info(
                    'No pretrained ShuffleNetV2 model available. Init randomly.'
                )
        elif arch == 'vgg19':
            model = rtpose_vgg.get_model(trunk='vgg19')
            if pretrained:
                model_dir = Path('./pretrained')
                model_dir.mkdir(exist_ok=True)
                rtpose_vgg.use_vgg(model, model_path, 'vgg19')
            if load_vgg19:
                model.load_state_dict(torch.load(load_vgg19))
        elif arch == 'hourglass':
            model = rtpose_hourglass.hg(num_stacks=8,
                                        num_blocks=1,
                                        paf_classes=38,
                                        ht_classes=19)
            if pretrained:
                msglogger.info(
                    'No pretrained Hourglass model available. Init randomly.')
    else:
        raise ValueError('Could not recognize dataset {}'.format(dataset))

    msglogger.info("=> creating a %s%s model with the %s dataset" %
                   ('pretrained ' if pretrained else '', arch, dataset))

    if torch.cuda.is_available() and device_ids != -1:
        device = 'cuda'
        if parallel:
            print('Data parallel: device_ids =', device_ids)
            net = torch.nn.DataParallel(model, device_ids=device_ids)
    else:
        device = 'cpu'

    return model.to(device)
Beispiel #5
0
def pose_model():
    # Pose estimation (OpenPose)
    openpose_dir = Path('../src/pytorch_Realtime_Multi-Person_Pose_Estimation/')

    sys.path.append(str(openpose_dir))

    # get_ipython().run_line_magic('load_ext', 'autoreload')
    # get_ipython().run_line_magic('autoreload', '2')

    # openpose
    from network.rtpose_vgg import get_model

    weight_name = openpose_dir.joinpath('network/weight/pose_model.pth')
    # weight_name.mkdir(exist_ok=True)

    model = get_model('vgg19')
    model.load_state_dict(torch.load(str(weight_name)))
    model = torch.nn.DataParallel(model).cuda()
    # model.float()
    # model.eval()

    return model
Beispiel #6
0
# validation data
valid_data = get_loader(args.json_path,
                        args.data_dir,
                        args.mask_dir,
                        368,
                        8,
                        preprocess='vgg',
                        training=False,
                        batch_size=args.batch_size,
                        params_transform=params_transform,
                        shuffle=False,
                        num_workers=4)
print('val dataset len: {}'.format(len(valid_data.dataset)))

# model
model = get_model(trunk='vgg19')
#model = encoding.nn.DataParallelModel(model, device_ids=args.gpu_ids)
model = torch.nn.DataParallel(model).cuda()
# load pretrained
use_vgg(model, args.model_path, 'vgg19')

# Fix the VGG weights first, and then the weights will be released
for i in range(20):
    for param in model.module.model0[i].parameters():
        param.requires_grad = False

trainable_vars = [param for param in model.parameters() if param.requires_grad]
optimizer = torch.optim.SGD(trainable_vars,
                            lr=args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay,
Beispiel #7
0
def _train(class_name, path_to_data_dir, path_to_logs_dir, batch_size, epochs,
           restore):

    # create tensorboard
    writer = SummaryWriter(path_to_logs_dir)

    # dataloader
    train_dataset = Dataset(class_name=class_name,
                            path_to_data=path_to_data_dir)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size,
                                  shuffle=True,
                                  num_workers=0,
                                  drop_last=True)
    val_dataset = Dataset(path_to_data=path_to_data_dir,
                          class_name=class_name,
                          split='val')
    val_dataloader = DataLoader(val_dataset,
                                batch_size,
                                shuffle=False,
                                num_workers=0,
                                drop_last=True)

    # load model
    model = get_model(trunk='vgg19')
    model = model.cuda()
    use_vgg(model, './model', 'vgg19')

    # restore model
    if restore:
        model.load_state_dict(torch.load(restore))

    model.train()

    # freeze low-level layer
    for i in range(20):
        for param in model.model0[i].parameters():
            param.requires_grad = False
    trainable_vars = [
        param for param in model.parameters() if param.requires_grad
    ]
    optimizer = torch.optim.Adam(trainable_vars, lr=0.0001)

    epoch = 0
    step = 1
    best_mse = 1.0

    while epoch != epochs:
        for batch_index, (images, heatmaps_target, pafs_target, _,
                          _) in enumerate(train_dataloader):
            images = images.cuda()
            _, saved_for_loss = model(images)
            loss, heatmaps_losses, pafs_losses = _loss(saved_for_loss,
                                                       heatmaps_target.cuda(),
                                                       pafs_target.cuda())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if step % 10 == 0:
                print('Epoch: {}, Step: {}, Loss: {}'.format(
                    epoch, step, loss.data.item()))
            writer.add_scalar('train_total_loss/loss', loss, step)
            for stage, (heatmaps_loss, pafs_loss) in enumerate(
                    zip(heatmaps_losses, pafs_losses)):
                writer.add_scalar(
                    'train_heatmaps_loss/stage_{}'.format(str(stage)),
                    heatmaps_loss, step)
                writer.add_scalar(
                    'train_pafs_loss/stage_{}'.format(str(stage)), pafs_loss,
                    step)
            if step % 1000 == 0:
                pafs_loss, heatmaps_loss = _validate(model, val_dataloader)
                total_loss = pafs_loss + heatmaps_loss
                print('Validation Paf MSE: {} Heatmap MSE: {} Total MSE: {}'.
                      format(pafs_loss, heatmaps_loss, total_loss))
                writer.add_scalar('val/heatmaps_loss', heatmaps_loss, step)
                writer.add_scalar('val/pafs_loss', pafs_loss, step)
                writer.add_scalar('val/total_loss', total_loss, step)
                if total_loss < best_mse:
                    print('Save checkpoint')
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            path_to_logs_dir,
                            '{}-checkpoint-best.pth'.format(class_name)))
                    best_mse = total_loss
                    print('Best MSE: {}'.format(total_loss))
                model.train()
            step += 1
        epoch += 1
    print('Save checkpoint')
    torch.save(
        model.state_dict(),
        os.path.join(path_to_logs_dir,
                     '{}-checkpoint-last.pth'.format(class_name)))
def main(args):
    input_data = args.input
    weight_file = args.weight
    frame_rate_ratio = args.frame_ratio
    process_speed = args.process_speed
    resize_fac = args.resize_factor
    output_dir = args.output
    output_format = '.h5'
    save_demo = args.verb

    print('start processing...')

    # Video input & output

    io_paths = organize_1to1_io_paths(input_data, VIDEO_EXT, output_dir,
                                      output_format)

    # load model
    print('[*]Loading model...')
    model = get_model('vgg19')
    model.load_state_dict(torch.load(weight_file))
    model = torch.nn.DataParallel(model).cuda()
    model.float()
    model.eval()

    # Video reader
    for input_path, output_path in zip(io_paths["input"], io_paths["output"]):
        print('[*]Process video {} into {}'.format(input_path, output_path))
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # input video info
        cap = cv2.VideoCapture(input_path)
        input_fps = cap.get(cv2.CAP_PROP_FPS)
        height = int(resize_fac * cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        width = int(resize_fac * cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        ending_frame = args.out_length
        if ending_frame is None:
            ending_frame = video_length

        out_h5 = h5py.File(output_path, mode="w")
        out_h5["height"] = height
        out_h5["width"] = width
        if save_demo:  # Video writer
            demo_path = os.path.splitext(output_path)[0] + ".mp4"
            output_fps = input_fps / frame_rate_ratio
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out_demo = cv2.VideoWriter(demo_path, fourcc, output_fps,
                                       (width, height))
        i = 0  # default is 0
        t0 = time.time()
        while (cap.isOpened()) and i < ending_frame:
            ret_val, input_image = cap.read()
            if not ret_val:
                break
            if i % frame_rate_ratio == 0:
                t1 = time.time()
                # generate image with body parts
                resized_image = cv2.resize(input_image, (0, 0),
                                           fx=1 * resize_fac,
                                           fy=1 * resize_fac,
                                           interpolation=cv2.INTER_CUBIC)
                to_plot, canvas, joint_list, person_to_joint_assoc = process(
                    model, resized_image, process_speed)
                frame_h5 = out_h5.create_group("frame%d" % i)
                frame_h5.create_dataset("joint_list", data=joint_list)
                frame_h5.create_dataset("person_to_joint_assoc",
                                        data=person_to_joint_assoc)
                if save_demo:
                    out_demo.write(canvas)
                t2 = time.time()
                processBar(
                    i,
                    ending_frame,
                    '{}/{}, process time:{:.3f}, total time:{:.3f}'.format(
                        i, ending_frame, (t2 - t1), (t2 - t0)),
                    length=20)
            i += 1
        if save_demo:
            out_demo.release()
        out_h5.close()
        processBar(ending_frame,
                   ending_frame,
                   '{}/{}, total time:{:.3f}'.format(i, ending_frame,
                                                     (time.time() - t0)),
                   length=45)
    13-'left_ankle'	 14-'right_eye'	    15-'left_eye'   16-'right_ear' 
    17-'left_ear' )
    '''
    orderCOCO = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10]

    mid_1 = [1, 8, 9, 1, 11, 12, 1, 2, 3, 2, 1, 5, 6, 5, 1, 0, 0, 14, 15]

    mid_2 = [8, 9, 10, 11, 12, 13, 2, 3, 4, 16, 5, 6, 7, 17, 0, 14, 15, 16, 17]

    # This txt file is get at the caffe_rtpose repository:
    # https://github.com/CMU-Perceptual-Computing-Lab/caffe_rtpose/blob/master/image_info_val2014_1k.txt

    image_dir = '/data/coco/val2014/'
    save_dir = '/data/coco/val2014_features/'

    model = get_model('vgg19')
    model = torch.nn.DataParallel(model).cuda()
    #    model = get_ying_model(stages=5, have_bn=True, have_bias=False)
    #    Load our model
    weight_name = './network/weight/pose_model_scratch.pth'
    model.load_state_dict(torch.load(weight_name))
    model = model.module
    #    model.load_state_dict(torch.load('pose_model.pth'))
    #    model.load_state_dict(torch.load('../caffe_model/dilated3_5stage_merged.pth'))
    #    model.load_state_dict(torch.load('../caffe_model/dilated3_remove_stage1_test.pth'))

    model.eval()
    model.float()
    model.cuda()

    feature_extractor = FeatureExtractor(model.model0)
Beispiel #10
0
def main(args):
    output_dir = args.output_dir
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
    path_to_data_dir = args.path_to_data_dir
    if not os.path.exists(path_to_data_dir):
        raise FileNotFoundError(path_to_data_dir)
    path_to_checkpoint = args.checkpoint
    if not os.path.exists(path_to_checkpoint):
        raise FileNotFoundError(path_to_data_dir)
    class_name = args.class_name
    fps = args.fps
    img_prefix = args.img_prefix

    # load pre-trained model
    model = get_model(trunk='vgg19')
    model = model.cuda()
    use_vgg(model, './model', 'vgg19')
    print("=> Load pre-trained model from {}".format(path_to_checkpoint))
    model.load_state_dict(torch.load(path_to_checkpoint))
    model.eval()

    # parameter of object size for pnp solver
    print("=> Load {} object size".format(class_name))
    path_to_object_seetings = os.path.join(path_to_data_dir,
                                           '_object_settings.json')
    if not os.path.exists(path_to_object_seetings):
        raise FileNotFoundError(path_to_object_seetings)
    object_list = json.load(open(path_to_object_seetings))['exported_objects']
    object_size = None
    for obj in object_list:
        if obj['class'].find(class_name) != -1:
            object_size = obj['cuboid_dimensions']
    if not object_size:
        raise ValueError("Object size is none")
    _cuboid3d = Cuboid3d(object_size)
    cuboid3d_points = np.array(_cuboid3d.get_vertices())

    # parameter of camera for pnp solver
    path_to_camera_seetings = os.path.join(path_to_data_dir,
                                           '_camera_settings.json')
    if not os.path.exists(path_to_camera_seetings):
        raise FileNotFoundError(path_to_camera_seetings)
    intrinsic_settings = json.load(open(
        path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings']
    matrix_camera = np.zeros((3, 3))
    matrix_camera[0, 0] = intrinsic_settings['fx']
    matrix_camera[1, 1] = intrinsic_settings['fy']
    matrix_camera[0, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[1, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[2, 2] = 1
    try:
        dist_coeffs = np.array(
            json.load(open(path_to_camera_seetings))['camera_settings'][0]
            ["distortion_coefficients"])
    except KeyError:
        dist_coeffs = np.zeros((4, 1))
    path_to_sequences = sorted(
        glob.glob(os.path.join(path_to_data_dir, '*.{}'.format(img_prefix))))

    for img_path in path_to_sequences:
        original_img = crop(cv2.imread(img_path))
        ratio = max(original_img.shape[:2]) / Config.crop_size
        img = cv2.resize(original_img, (Config.crop_size, Config.crop_size))
        img = preprocess(img).float()
        img = torch.unsqueeze(img, 0)
        out, _ = model(img.cuda())
        line, vertex = out[0].squeeze(), out[1].squeeze()
        objects, peaks = find_objects(vertex, line)
        original_img = cv2.putText(original_img,
                                   "Class name: {}".format(class_name),
                                   (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1,
                                   (255, 255, 255), 2)

        if len(objects) > 0:
            for object in objects:
                cuboid2d_points = object[1] + [
                    (object[0][0] * 8, object[0][1] * 8)
                ]
                cuboid3d_points = np.array(cuboid3d_points)
                location = None
                quaternion = None
                obj_2d_points = []
                obj_3d_points = []

                for i in range(8):
                    check_point_2d = cuboid2d_points[i]
                    # Ignore invalid points
                    if check_point_2d is None:
                        continue
                    elif check_point_2d[0] < 0 or check_point_2d[
                            1] < 0 or check_point_2d[
                                0] >= Config.crop_size / Config.stride or check_point_2d[
                                    1] >= Config.crop_size / Config.stride:
                        continue
                    else:
                        check_point_2d = (check_point_2d[0] * Config.stride *
                                          ratio, check_point_2d[1] *
                                          Config.stride * ratio)
                    obj_2d_points.append(check_point_2d)
                    obj_3d_points.append(cuboid3d_points[i])
                centroid = tuple([
                    int(point * Config.stride * ratio) for point in object[0]
                ])
                original_img = cv2.circle(original_img, centroid, 5, -1)
                obj_2d_points = np.array(obj_2d_points, dtype=float)
                obj_3d_points = np.array(obj_3d_points, dtype=float)
                valid_point_count = len(obj_2d_points)
                if valid_point_count >= 5:
                    ret, rvec, tvec = cv2.solvePnP(
                        obj_3d_points,
                        obj_2d_points,
                        matrix_camera,
                        dist_coeffs,
                        flags=cv2.SOLVEPNP_ITERATIVE)
                    if ret:
                        location = list(x[0] for x in tvec)
                        quaternion = convert_rvec_to_quaternion(rvec)

                        projected_points, _ = cv2.projectPoints(
                            cuboid3d_points, rvec, tvec, matrix_camera,
                            dist_coeffs)
                        projected_points = np.squeeze(projected_points)
                        # If the location.Z is negative or object is behind the camera then flip both location and rotation
                        x, y, z = location
                        original_img = cv2.putText(
                            original_img,
                            "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}"
                            .format(x / 10, y / 10, z / 10), (50, 150),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
                        print(
                            "Location Prediction: x: {:.2f} y: {:.2f} z: {:.2f}"
                            .format(x / 10, y / 10, z / 10))
                        if z < 0:
                            # Get the opposite location
                            location = [-x, -y, -z]

                            # Change the rotation by 180 degree
                            rotate_angle = np.pi
                            rotate_quaternion = Quaternion.from_axis_rotation(
                                location, rotate_angle)
                            quaternion = rotate_quaternion.cross(quaternion)
                        vertexes = [tuple(p) for p in projected_points]
                        plot(original_img, vertexes)
            if args.save:
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir, exist_ok=True)
                output_path = os.path.join(output_dir, img_path.split('/')[-1])
                print('=> Save {}'.format(output_path))
                cv2.imwrite(output_path, original_img)
            if args.plot:
                original_img = cv2.resize(original_img, (600, 600))
                cv2.imshow('prediction', original_img)
                cv2.waitKey(int(1000 / fps))
def main(args):
    input_data = args.input
    weight_file = args.weight
    process_speed = args.process_speed
    resize_fac = args.resize_factor
    output_dir = args.output
    output_format = '.h5'
    save_demo = args.verb

    print('start processing...')

    # Video input & output
    if args.input_type == 'serial':
        io_paths = organize_Nto1_io_paths(input_data, IMAGE_EXT, output_dir,
                                          output_format)
    else:
        io_paths = organize_1to1_io_paths(input_data, IMAGE_EXT, output_dir,
                                          output_format)
    data_count = len(io_paths["input"])

    # load model
    print('[*]Loading model...')
    model = get_model('vgg19')
    model.load_state_dict(torch.load(weight_file))
    model = torch.nn.DataParallel(model).cuda()
    model.float()
    model.eval()
    print('Model Ready!')

    # Video reader
    t0 = time.time()
    for i, (input_path, output_path) in enumerate(
            zip(io_paths["input"], io_paths["output"])):
        if io_paths["type"] == "1to1":
            print('[*]Process {} into {}'.format(input_path, output_path))
            os.makedirs(os.path.dirname(output_path), exist_ok=True)

            input_image = cv2.imread(input_path)
            out_h5 = h5py.File(output_path, mode="w")
            out_h5["height"] = input_image.shape[0]
            out_h5["width"] = input_image.shape[1]
            t1 = time.time()
            # generate image with body parts
            resized_image = cv2.resize(input_image, (0, 0),
                                       fx=1 * resize_fac,
                                       fy=1 * resize_fac,
                                       interpolation=cv2.INTER_CUBIC)
            to_plot, canvas, joint_list, person_to_joint_assoc = process(
                model, resized_image, process_speed)
            frame_h5 = out_h5.create_group("frame0")
            frame_h5.create_dataset("joint_list", data=joint_list)
            frame_h5.create_dataset("person_to_joint_assoc",
                                    data=person_to_joint_assoc)
            if save_demo:
                demo_path = os.path.splitext(output_path)[0] + ".jpg"
                cv2.imwrite(demo_path, canvas)
            t2 = time.time()
            processBar(i,
                       data_count,
                       '{}/{}, process time:{:.3f}, total time:{:.3f}'.format(
                           i, data_count, (t2 - t1), (t2 - t0)),
                       length=20)
        elif len(input_path[0]) > 0:
            print('[*]Process {} into {}'.format(
                os.path.dirname(input_path[0]), output_path))
            os.makedirs(os.path.dirname(output_path), exist_ok=True)

            input_image = cv2.imread(input_path[0])
            height = input_image.shape[0]
            width = input_image.shape[1]
            out_h5 = h5py.File(output_path, mode="w")
            out_h5["height"] = height
            out_h5["width"] = width
            if save_demo:  # Video writer
                demo_path = os.path.splitext(output_path)[0] + ".mp4"
                output_fps = 15
                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                out_demo = cv2.VideoWriter(demo_path, fourcc, output_fps,
                                           (width, height))
            i = 0  # default is 0
            t0 = time.time()
            for j, path in enumerate(input_path):
                input_image = cv2.imread(path)
                t1 = time.time()
                # generate image with body parts
                resized_image = cv2.resize(input_image, (0, 0),
                                           fx=1 * resize_fac,
                                           fy=1 * resize_fac,
                                           interpolation=cv2.INTER_CUBIC)
                to_plot, canvas, joint_list, person_to_joint_assoc = process(
                    model, resized_image, process_speed)
                frame_h5 = out_h5.create_group("frame%d" % j)
                frame_h5.create_dataset("joint_list", data=joint_list)
                frame_h5.create_dataset("person_to_joint_assoc",
                                        data=person_to_joint_assoc)
                if save_demo:
                    out_demo.write(canvas)
                t2 = time.time()
                processBar(
                    j,
                    len(input_path),
                    '{}/{}, process time:{:.3f}, total time:{:.3f}'.format(
                        j, len(input_path), (t2 - t1), (t2 - t0)),
                    length=20)
            if save_demo:
                out_demo.release()
            out_h5.close()
            processBar(len(input_path),
                       len(input_path),
                       '{}/{}, total time:{:.3f}'.format(
                           j, len(input_path), (time.time() - t0)),
                       length=45)

        cv2.destroyAllWindows()
        processBar(data_count,
                   data_count,
                   '{}/{}, total time:{:.3f}'.format(i, data_count,
                                                     (time.time() - t0)),
                   length=45)
Beispiel #12
0
vis_dir = './result_vis'

# ------------------------------------------------------------------------------
tic = datetime.datetime.now()

if vis_dir is not None:
    if not os.path.exists(vis_dir):
        os.mkdir(vis_dir)
        print('mkdir:', vis_dir)
print('save vis images to:', vis_dir)

with torch.autograd.no_grad():

    if model_name == 'vgg19':  # --- VGG19
        model = get_model(trunk='vgg19',
                          numkeypoints=CF.NUM_KEYPOINTS,
                          numlims=CF.NUM_LIMBS)
        preprocess = 'vgg'
    elif model_name == 'shufflenet':  # --- ShuffleNet
        model = rtpose_shufflenetV2.Network(width_multiplier=1.0,
                                            numkeypoints=CF.NUM_KEYPOINTS,
                                            numlims=CF.NUM_LIMBS,
                                            multistage=multistage)
        preprocess = 'rtpose'
    else:
        print('Please check the model name.')
        exit(0)
    print('Network backbone:{}'.format(model_name))

    # this path is with respect to the root of the project
    state_dict = torch.load(weight_name)
# validation data
valid_data = None
if params.val_nbatch > 0:
    valid_data = get_loader(json_path,
                            data_dir,
                            mask_dir,
                            inp_size,
                            feat_stride,
                            preprocess='vgg',
                            training=False,
                            batch_size=params.batch_size,
                            shuffle=True)
    print('val dataset len: {}'.format(len(valid_data.dataset)))

# model
model = get_model(trunk=trunk)

# load pretrained
if params.ckpt is None:
    use_vgg(model, model_path, trunk)

# Fix the VGG weights first, and then the weights will be released
for i in range(20):
    for param in model.model0[i].parameters():
        param.requires_grad = False

trainable_vars = [param for param in model.parameters() if param.requires_grad]
params.optimizer = torch.optim.SGD(trainable_vars,
                                   lr=params.init_lr,
                                   momentum=momentum,
                                   weight_decay=weight_decay,
Beispiel #14
0
def extract_pose_main(video_path, source_path):
    #if __name__ == "__main__":
    '''
    :param video_path: viedo's img size size is 256*256
    :param source_path: Image size must be 256*176 and .jpg
    :return:
    '''
    os.environ["CUDA_VISIBLE_DEVICES"] = '7'

    model = get_model('vgg19')
    model.load_state_dict(torch.load(weight_name))
    model.cuda()
    model.float()
    model.eval()

    video = video_path
    print("video path is ", video_path)
    source_img = cv2.imread(source_path)
    video_capture = cv2.VideoCapture(video)

    pairLst = "./demo_data/demo-resize-pairs-test.csv"
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    print("fps is ", fps)
    a = 0

    while video_capture.isOpened():
        if a == 0:
            result_file = open("./demo_data/demo-resize-annotation-test.csv",
                               'w')
            print("name:keypoints_y:keypoints_x", file=result_file)
            result_file1 = open("./demo_data/demo-resize-pairs-test.csv", 'w')
            writer = csv.writer(result_file1)
            writer.writerow(["from", "to"])
            extract_pose(
                video_path.split("/")[-1][:-4], source_img,
                source_path.split("/")[-1][:-4], result_file, model)
        ret, oriImg = video_capture.read()
        print(oriImg.shape)
        #oriImg = oriImg[27:710,430:900,:]
        oriImg = oriImg[40:808, 8:536, :]
        oriImg = cv2.copyMakeBorder(oriImg,
                                    0,
                                    0,
                                    120,
                                    120,
                                    cv2.BORDER_CONSTANT,
                                    value=[255, 255, 255])
        # cv2.imwrite('../demo_data/test/{}.jpg'.format(a), oriImg)
        # break
        oriImg = cv2.resize(oriImg, (256, 256), interpolation=cv2.INTER_LINEAR)
        shape_dst = np.min(oriImg.shape[0:2])

        extract_pose(
            video_path.split("/")[-1][:-4], oriImg, a, result_file, model)

        # pairLst
        writer.writerow([source_path.split("/")[-1], str(a) + ".jpg"])
        print("finished {} pics".format(a))

        # cv2.imshow('Video', to_plot)
        a = a + 1
        if a > 100:
            break

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    cv2.destroyAllWindows()
    torch.cuda.empty_cache()
Beispiel #15
0
def _eval(class_name, path_to_data_dir, path_to_checkpoint, img_prefix):

    # load pre-trained model
    model = get_model(trunk='vgg19')
    model = model.cuda()
    use_vgg(model, './model', 'vgg19')
    print("=> Load pre-trained model from {}".format(path_to_checkpoint))
    model.load_state_dict(torch.load(path_to_checkpoint))
    model.eval()

    # parameter of object size for pnp solver
    print("=> Load {} object size".format(class_name))
    path_to_object_seetings = os.path.join(path_to_data_dir,
                                           '_object_settings.json')
    if not os.path.exists(path_to_object_seetings):
        raise FileNotFoundError(path_to_object_seetings)
    object_list = json.load(open(path_to_object_seetings))['exported_objects']
    object_size = None
    for obj in object_list:
        if obj['class'].find(class_name) != -1:
            object_size = obj['cuboid_dimensions']
    if not object_size:
        raise ValueError("Object size is none")
    _cuboid3d = Cuboid3d(object_size)
    cuboid3d_points = np.array(_cuboid3d.get_vertices())

    # parameter of camera for pnp solver
    path_to_camera_seetings = os.path.join(path_to_data_dir,
                                           '_camera_settings.json')
    if not os.path.exists(path_to_camera_seetings):
        raise FileNotFoundError(path_to_camera_seetings)
    intrinsic_settings = json.load(open(
        path_to_camera_seetings))['camera_settings'][0]['intrinsic_settings']
    matrix_camera = np.zeros((3, 3))
    matrix_camera[0, 0] = intrinsic_settings['fx']
    matrix_camera[1, 1] = intrinsic_settings['fy']
    matrix_camera[0, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[1, 2] = max(intrinsic_settings['cx'],
                              intrinsic_settings['cy'])
    matrix_camera[2, 2] = 1

    try:
        dist_coeffs = np.array(
            json.load(open(path_to_camera_seetings))['camera_settings'][0]
            ["distortion_coefficients"])
    except KeyError:
        dist_coeffs = np.zeros((4, 1))

    # dataloader
    val_dataset = Dataset(path_to_data=path_to_data_dir,
                          class_name=class_name,
                          split='val',
                          img_prefix=img_prefix)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=1,
                                shuffle=False,
                                num_workers=0,
                                drop_last=False)

    correct = 0
    wrong = 0
    # set threshold (cm)
    threshold = 3.0

    for batch_index, (images, _, _, location_targets,
                      ratio) in tqdm(enumerate(val_dataloader)):
        images = images.cuda()
        output, _ = model(images)
        line, vertex = output[0], output[1]
        line, vertex = line.squeeze(), vertex.squeeze()
        objects, peaks = find_objects(vertex, line)
        location_predictions = []
        if len(objects) > 0:
            for object in objects:
                cuboid2d_points = object[1] + [
                    (object[0][0] * 8, object[0][1] * 8)
                ]
                cuboid3d_points = np.array(cuboid3d_points)
                location = None
                quaternion = None
                obj_2d_points = []
                obj_3d_points = []

                for i in range(8):
                    check_point_2d = cuboid2d_points[i]
                    # Ignore invalid points
                    if (check_point_2d is None):
                        continue
                    elif check_point_2d[0] < 0 or check_point_2d[
                            1] < 0 or check_point_2d[
                                0] >= Config.crop_size / Config.stride or check_point_2d[
                                    1] >= Config.crop_size / Config.stride:
                        continue
                    else:
                        check_point_2d = (check_point_2d[0] * Config.stride *
                                          ratio, check_point_2d[1] *
                                          Config.stride * ratio)
                    obj_2d_points.append(check_point_2d)
                    obj_3d_points.append(cuboid3d_points[i])
                projected_points = object[1]
                vertexes = projected_points.copy()
                centroid = tuple([
                    int(point * Config.stride * ratio) for point in object[0]
                ])
                obj_2d_points = np.array(obj_2d_points, dtype=np.float32)
                obj_3d_points = np.array(obj_3d_points, dtype=np.float32)
                valid_point_count = len(obj_2d_points)
                if valid_point_count >= 4:
                    ret, rvec, tvec = cv2.solvePnP(
                        obj_3d_points,
                        obj_2d_points,
                        matrix_camera,
                        dist_coeffs,
                        flags=cv2.SOLVEPNP_ITERATIVE)
                    if ret:
                        location = list(x[0] for x in tvec)
                        quaternion = convert_rvec_to_quaternion(rvec)

                        projected_points, _ = cv2.projectPoints(
                            cuboid3d_points, rvec, tvec, matrix_camera,
                            dist_coeffs)
                        projected_points = np.squeeze(projected_points)
                        # If the location.Z is negative or object is behind the camera then flip both location and rotation
                        x, y, z = location
                        if z < 0:
                            # Get the opposite location
                            location = [-x, -y, -z]
                            # Change the rotation by 180 degree
                            rotate_angle = np.pi
                            rotate_quaternion = Quaternion.from_axis_rotation(
                                location, rotate_angle)
                            quaternion = rotate_quaternion.cross(quaternion)
                        vertexes = [tuple(p) for p in projected_points]
                    location_predictions.append(location)
        location_predictions = np.array(location_predictions)
        if len(location_targets) == 0:
            wrong += len(location_predictions)
        else:
            location_targets = location_targets.cpu().data.numpy()[0]
            for location_target in location_targets:
                distances = [
                    np.sqrt(
                        np.sum(
                            np.square(location_target -
                                      location_prediction / 10.0)))
                    for location_prediction in location_predictions
                ]
                if len(distances) == 0:
                    pass
                    wrong += 1
                elif min(distances) > threshold:
                    wrong += 1
                else:
                    correct += 1

    print('Object: {} Accuracy: {}%'.format(
        class_name, correct / (wrong + correct) * 100.0))
def main(args):
    input_data = args.input_dir
    input_type = args.input_type  # choose from ["image", "video"]
    output_dir = args.output_dir
    weight_file = args.weight
    input_ext = args.input_ext
    output_ext = args.out_ext
    frame_rate_ratio = args.frame_ratio  # analyze every [n] frames
    process_speed = args.process_speed  # int, 1 (fastest, lowest quality) to 4 (slowest, highest quality)
    resize_fac = args.resize_factor  # minification factor
    output_length = args.out_length  # int, frame count for output, None for input length
    show_visualize_process = args.verb  # show canvas through matplotlib
    rebuild_exist_file = args.rebuild

    ## Load Model
    model = get_model('vgg19')
    model.load_state_dict(t.load(weight_file))
    model = t.nn.DataParallel(model)
    model.cuda()
    model.float()
    model.eval()
    print("Model Ready!")

    ## Init I/O Paths
    _input_ext_ = IMAGE_EXT if input_ext == "image" \
        else VIDEO_EXT if input_ext == "video" \
        else input_ext if isinstance(input_ext, list) \
        else [input_ext]
    if input_type == "1to1":
        io_paths = organize_1to1_io_paths(input_data, _input_ext_, output_dir,
                                          output_ext)
    else:
        io_paths = organize_Nto1_io_paths(input_data, _input_ext_, output_dir,
                                          output_ext)
    total_item = len(io_paths["input"])
    print("Items count: ", total_item)

    ignore_item = 0
    for i, (input_dir, output_path) in enumerate(
            zip(io_paths["input"], io_paths["output"])):
        if os.path.isfile(output_path):
            if rebuild_exist_file:
                title = '[{}/{}]Rebuild {} from {}'
            else:
                print('[{}/{}]{} already exist, pass'.format(
                    i, total_item, output_path))
                ignore_item += 1
                continue
        else:
            title = '[{}/{}]Build {} from {}'
        if isinstance(input_dir, str):  # process video
            source_position = input_dir
            loader = load_video_frames(input_dir, output_length,
                                       frame_rate_ratio)
            length, h, w = get_video_size(input_dir, output_length)
        elif isinstance(input_dir, list):  # process images
            source_position = os.path.dirname(input_dir[0])
            loader = load_images_list(input_dir, output_length,
                                      frame_rate_ratio)
            length, h, w = get_images_size(input_dir, output_length)
        else:
            raise TypeError("Expected string or list(string), but got %s" %
                            type(input_dir))
        print(title.format(i, total_item, output_path, source_position))
        # Video writer
        try:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            output_fps = 15
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            height = int(resize_fac * h)
            width = int(resize_fac * w)
            print("source:{}x{}  target:{}x{}".format(h, w, height, width))
            out = cv2.VideoWriter(output_path, fourcc, output_fps,
                                  (width, height))
            out_h5 = h5py.File(output_path + ".h5", mode="w")
            out_h5["height"] = height
            out_h5["width"] = width
            t0 = time.time()
            for i, frame in enumerate(loader):
                t1 = time.time()
                # generate image with body parts
                resized_image = cv2.resize(frame, (0, 0),
                                           fx=1 * resize_fac,
                                           fy=1 * resize_fac,
                                           interpolation=cv2.INTER_CUBIC)
                to_plot, canvas, joint_list, person_to_joint_assoc = process(
                    model, resized_image, process_speed)
                # save outputs
                out.write(canvas)
                frame_h5 = out_h5.create_group("frame%d" % i)
                frame_h5.create_dataset("joint_list", data=joint_list)
                frame_h5.create_dataset("person_to_joint_assoc",
                                        data=person_to_joint_assoc)
                t2 = time.time()
                # print messages
                print(
                    '{}[{}/{}] process time:{:.3f}s total time:{:.3f}s'.format(
                        time.strftime('%H:%M:%S'), i, length, (t2 - t1),
                        (t2 - t0)))
                if show_visualize_process:
                    cv2.imshow(os.path.basename(output_path), to_plot)
                    cv2.waitKey(1)
        finally:
            out.release()
            out_h5.close()
            cv2.destroyAllWindows()
    print("Prosessed {} items, ignore {} existing items. Saved into {}".format(
        total_item - ignore_item, ignore_item, output_dir))
    print("All work are Finished!")