Esempio n. 1
0
def inference_video(video_path, detector_2d):
    """
    Do image -> 2d points -> 3d points to video.
    :param detector_2d: used 2d joints detector. Can be {alpha_pose, hr_pose}
    :param video_path: relative to outputs
    :return: None
    """
    args = parse_args()

    args.detector_2d = detector_2d
    dir_name = os.path.dirname(video_path)
    dir_name_split = dir_name[:dir_name.rfind('/')]
    new_dir_name = os.path.join(dir_name_split, 'outputvideo')

    basename = os.path.basename(video_path)
    video_name = basename[:basename.rfind('.')]

    args.viz_video = video_path
    #args.viz_output = f'{dir_name}/{args.detector_2d}_{video_name}.mp4'
    args.viz_output = f'{new_dir_name}/{args.detector_2d}_{video_name}.mp4'

    # args.viz_limit = 20
    # args.input_npz = 'outputs/alpha_pose_dance/dance.npz'

    args.evaluate = 'pretrained_h36m_detectron_coco.bin'

    with Timer(video_path):
        main(args)
Esempio n. 2
0
def sgdRMSPropNestorov(w0, x, y, f, grad, learning_rate=0.01,
                       batch_size=100, max_epochs=1000,
                       alpha=0.9, delta=1e-6, ro=0.9, eps=1e-6,
                       shuffle=False, rng=None):
    tm = Timer()
    n = x.shape[0]
    n_batches = get_num_batches(n, batch_size)
    w = np.copy(w0)
    v = np.zeros(len(w0), dtype=w0.dtype)  # velocity
    r = np.zeros(len(w0), dtype=w0.dtype)  # gradient accumulation variable
    epoch_losses = np.zeros(max_epochs, dtype=float)
    epoch = 0
    w_best = np.copy(w0)
    loss_best = np.inf
    if n <= batch_size:
        # no need to shuffle since all instances will be used up in one batch
        shuffle = False
    if shuffle:
        shuffled_idxs = np.arange(n)
        if rng is None:
            np.random.shuffle(shuffled_idxs)
        else:
            rng.shuffle(shuffled_idxs)
    else:
        shuffled_idxs = None
    prev_loss = np.inf
    while epoch < max_epochs:
        losses = np.zeros(n_batches, dtype=float)
        for i in range(n_batches):
            xi, yi = get_sgd_batch(x, y, i, batch_size, shuffled_idxs=shuffled_idxs)
            tw = w + alpha * v
            g = grad(tw, xi, yi)
            r[:] = ro * r + (1 - ro) * np.multiply(g, g)
            dw_scale = (learning_rate / (np.sqrt(delta + r)))
            v = alpha * v - np.multiply(dw_scale, g)
            w[:] = w + v
            losses[i] = f(w, xi, yi)
        loss = np.mean(losses)
        if np.isnan(loss):
            logger.debug("loss is nan")
            logger.debug("|w|=%f" % w.dot(w))
            raise ArithmeticError("loss is nan in sgd")
        epoch_losses[epoch] = loss
        if loss < loss_best:
            # pocket algorithm
            np.copyto(w_best, w)
            loss_best = loss
        epoch += 1
        if (loss < eps or np.abs(loss - prev_loss) < eps or
            avg_loss_check(epoch_losses, epoch, n=20, eps=eps)):
            break
        prev_loss = loss
    debug_log_sgd_losses("sgdRMSPropNestorov", epoch_losses, epoch, n=20, timer=tm)
    # logger.debug("epochs: %d" % epoch)
    # logger.debug("net losses:")
    # logger.debug("epoch losses:\n%s" % str(epoch_losses[0:epoch]))
    # logger.debug("best loss: %f" % loss_best)
    return w_best
Esempio n. 3
0
def sgd(w0, x, y, f, grad, learning_rate=0.01,
        batch_size=100, max_epochs=1000, eps=1e-6, shuffle=False, rng=None):
    tm = Timer()
    n = x.shape[0]
    n_batches = get_num_batches(n, batch_size)
    w = np.copy(w0)
    epoch_losses = np.zeros(max_epochs, dtype=float)
    epoch = 0
    w_best = np.copy(w0)
    loss_best = np.inf
    if n <= batch_size:
        shuffle = False  # no need to shuffle since all instances will be used up in one batch
    if shuffle:
        shuffled_idxs = np.arange(n)
        if rng is None:
            np.random.shuffle(shuffled_idxs)
        else:
            rng.shuffle(shuffled_idxs)
    else:
        shuffled_idxs = None
    while epoch < max_epochs:
        losses = np.zeros(n_batches, dtype=float)
        for i in range(n_batches):
            xi, yi = get_sgd_batch(x, y, i, batch_size, shuffled_idxs=shuffled_idxs)
            if xi.shape[0] == 0:
                raise ValueError("Batch size of 0")
            g = grad(w, xi, yi)
            w -= learning_rate * g
            losses[i] = f(w, xi, yi)
            if False:
                g_norm = g.dot(g)
                if np.isnan(g_norm) or np.isinf(g_norm):
                    logger.debug("|grad|=%f, i=%d/%d, epoch:%d" % (g.dot(g), i+1, n_batches, epoch))
                    logger.debug("|w0|=%f" % w0.dot(w0))
                    raise ArithmeticError("grad is nan/inf in sgd")
        loss = np.mean(losses)
        if np.isnan(loss):
            logger.debug("loss is nan")
            logger.debug("|w|=%f" % w.dot(w))
            raise ArithmeticError("loss is nan in sgd")
        epoch_losses[epoch] = loss
        if loss < loss_best:
            # pocket algorithm
            np.copyto(w_best, w)
            loss_best = loss
        epoch += 1
        if loss < eps:
            break
    debug_log_sgd_losses("sgd", epoch_losses, epoch, n=20, timer=tm)
    # logger.debug("epochs: %d" % epoch)
    # logger.debug("net losses:")
    # logger.debug("epoch losses:\n%s" % str(epoch_losses[0:epoch]))
    # logger.debug("best loss: %f" % loss_best)
    return w_best
Esempio n. 4
0
def inference_video(video_path):
    """
    Do image -> 2d points -> 3d points to video.
    :param video_path: relative to outputs
    """
    args = parse_args()
    dir_name = os.path.dirname(video_path)
    basename = os.path.basename(video_path)
    video_name = basename[:basename.rfind('.')]
    args.viz_video = video_path
    args.viz_output = '{0}/o_{1}.mp4'.format(dir_name, video_name)
    args.basename = video_name
    args.evaluate = 'pretrained_h36m_detectron_coco.bin'

    with Timer(video_path):
        main(args)
def cal_pose_iou_dm_speed_up(all_cors, pose1, pose2, num, mag):
    # with Timer('Matrix calculation'): 0.0006s
    poses_iou = []
    mag_matrix = [-mag, mag, -mag, mag]
    pose1_boxes = np.hstack((pose1, pose1))
    pose2_boxes = np.hstack((pose2, pose2))

    pose1_boxes[:, [2, 1]] = pose1_boxes[:, [1, 2]]
    pose2_boxes[:, [2, 1]] = pose2_boxes[:, [1, 2]]

    pose1_boxes += mag_matrix
    pose2_boxes += mag_matrix

    with Timer('find two pose box iou', show=False):
        for pose1_box, pose2_box in zip(pose1_boxes, pose2_boxes):
            poses_iou.append(find_two_pose_box_iou(pose1_box, pose2_box, all_cors))

    return np.mean(heapq.nlargest(num, poses_iou))
def cal_one_matching(all_cors, all_pids_fff, all_pids_info, cost_matrix, mag, num, pid1, track_vid_next_fid, weights, weights_fff):
    box1_pos = all_pids_info[pid1]['box_pos']
    box1_region_ids = find_region_cors_last(box1_pos, all_cors)
    box1_score = all_pids_info[pid1]['box_score']
    box1_pose = all_pids_info[pid1]['box_pose_pos']
    box1_fff = all_pids_fff[pid1]

    row = np.zeros(cost_matrix.shape[1])
    # print(f"Inner for loop :{track_vid_next_fid['num_boxes']}", end=' ')
    # with Timer(f"Inner for loop: {track_vid_next_fid['num_boxes']}"):
    for pid2 in range(1, track_vid_next_fid['num_boxes'] + 1):
        box2_pos = track_vid_next_fid[pid2]['box_pos']

        # with Timer('find_region_cors_next'):
        box2_region_ids = find_region_cors_next(box2_pos, all_cors)

        box2_score = track_vid_next_fid[pid2]['box_score']
        box2_pose = track_vid_next_fid[pid2]['box_pose_pos']

        # with Timer('Outer calculate'):
        inter = box1_region_ids & box2_region_ids
        union = box1_region_ids | box2_region_ids
        dm_iou = len(inter) / (len(union) + 0.00001)

        # with Timer('cal_bbox_iou'):
        box_iou = cal_bbox_iou(box1_pos, box2_pos)

        with Timer('cal_pose_iou_dm', show=False):
            pose_iou_dm = cal_pose_iou_dm_speed_up(all_cors, box1_pose, box2_pose, num, mag)

        # with Timer('cal_pose_iou'):
        pose_iou = cal_pose_iou(box1_pose, box2_pose, num, mag)

        # with Timer('cal_grade'):
        if box1_fff:
            grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights)
        else:
            grade = cal_grade([dm_iou, box_iou, pose_iou_dm, pose_iou, box1_score, box2_score], weights_fff)

        row[pid2 - 1] = grade

    return row
Esempio n. 7
0
def sgdAdam(w0, x, y, f, grad, learning_rate=0.01,
            batch_size=100, max_epochs=1000, delta=1e-8,
            ro1=0.9, ro2=0.999, eps=1e-6,
            shuffle=False, rng=None):
    tm = Timer()
    n = x.shape[0]
    n_batches = get_num_batches(n, batch_size)
    w = np.copy(w0)
    s = np.zeros(len(w0), dtype=w0.dtype)  # first moment variable
    s_hat = np.zeros(len(w0), dtype=w0.dtype)  # first moment corrected for bias
    r = np.zeros(len(w0), dtype=w0.dtype)  # second moment variable
    r_hat = np.zeros(len(w0), dtype=w0.dtype)  # second moment corrected for bias
    t = 0  # time step
    epoch_losses = np.zeros(max_epochs, dtype=float)
    epoch = 0
    w_best = np.copy(w0)
    loss_best = np.inf
    if n <= batch_size:
        # no need to shuffle since all instances will be used up in one batch
        shuffle = False
    if shuffle:
        shuffled_idxs = np.arange(n)
        if rng is None:
            np.random.shuffle(shuffled_idxs)
        else:
            rng.shuffle(shuffled_idxs)
    else:
        shuffled_idxs = None
    prev_loss = np.inf
    while epoch < max_epochs:
        losses = np.zeros(n_batches, dtype=float)
        for i in range(n_batches):
            xi, yi = get_sgd_batch(x, y, i, batch_size, shuffled_idxs=shuffled_idxs)
            g = grad(w, xi, yi)
            t += 1
            s[:] = ro1 * s + (1 - ro1) * g
            r[:] = ro2 * r + (1 - ro2) * np.multiply(g, g)
            # correct bias in first moment
            s_hat[:] = (1./(1 - ro1 ** t)) * s
            # correct bias in second moment
            r_hat[:] = (1./(1 - ro2 ** t)) * r
            dw_scale = (learning_rate / (np.sqrt(delta + r_hat)))
            dw = np.multiply(dw_scale, s_hat)
            w[:] = w - dw
            losses[i] = f(w, xi, yi)
        loss = np.mean(losses)
        if np.isnan(loss):
            logger.debug("loss is nan")
            logger.debug("|w|=%f" % w.dot(w))
            raise ArithmeticError("loss is nan in sgd")
        epoch_losses[epoch] = loss
        if loss < loss_best:
            # pocket algorithm
            np.copyto(w_best, w)
            loss_best = loss
        epoch += 1
        if (loss < eps or np.abs(loss - prev_loss) < eps or
            avg_loss_check(epoch_losses, epoch, n=20, eps=eps)):
            break
        prev_loss = loss
    debug_log_sgd_losses("sgdAdam", epoch_losses, epoch, n=20, timer=tm)
    # logger.debug("epochs: %d" % epoch)
    # logger.debug("net losses:")
    # logger.debug("epoch losses:\n%s" % str(epoch_losses[0:epoch]))
    # logger.debug("best loss: %f" % loss_best)
    return w_best
Esempio n. 8
0
 def __init__(self):
     self.log = get_logger("Base")
     self.timer = Timer(self.log)
def main(args):
    """
    See function track for the args information.
    """

    link_len = args.link
    weights = [1, 2, 1, 2, 0, 0]
    weights_fff = [0, 1, 0, 1, 0, 0]
    drop = args.drop
    num = args.num
    mag = args.mag
    match_thres = args.match

    notrack_json = args.in_json
    tracked_json = args.out_json
    image_dir = args.imgdir
    vis_dir = args.visdir

    # if json format is differnt from "alphapose-forvis.json" (pytorch version)
    if "forvis" not in notrack_json:
        results_forvis = {}
        last_image_name = ' '

        with open(notrack_json) as f:
            results = json.load(f)
            results = remove_irrelevant(results, 1)
            for i in range(len(results)):
                imgpath = results[i]['image_id']
                if last_image_name != imgpath:
                    results_forvis[imgpath] = []
                    results_forvis[imgpath].append({
                        'keypoints':
                        results[i]['keypoints'],
                        'scores':
                        results[i]['score']
                    })
                else:
                    results_forvis[imgpath].append({
                        'keypoints':
                        results[i]['keypoints'],
                        'scores':
                        results[i]['score']
                    })
                last_image_name = imgpath
        notrack_json = os.path.join(os.path.dirname(notrack_json),
                                    "alphaposse-results-forvis.json")
        with open(notrack_json, 'w') as json_file:
            json_file.write(json.dumps(results_forvis))

    notrack = {}
    track = {}
    num_persons = 0

    # load json file without tracking information
    print("Start loading json file...\n")
    with open(notrack_json, 'r') as f:
        notrack = json.load(f)
        for img_name in tqdm(sorted(notrack.keys())):
            track[img_name] = {'num_boxes': len(notrack[img_name])}
            for bid in range(len(notrack[img_name])):
                track[img_name][bid + 1] = {}
                track[img_name][
                    bid + 1]['box_score'] = notrack[img_name][bid]['scores']
                track[img_name][bid + 1]['box_pos'] = get_box(
                    notrack[img_name][bid]['keypoints'],
                    os.path.join(image_dir, img_name))
                track[img_name][bid + 1]['box_pose_pos'] = np.array(
                    notrack[img_name][bid]['keypoints']).reshape(-1, 3)[:, 0:2]
                track[img_name][bid + 1]['box_pose_score'] = np.array(
                    notrack[img_name][bid]['keypoints']).reshape(-1, 3)[:, -1]

    np.save(f'{args.result_dir}/notrack-bl.npy', track)
    # track = np.load(f'{args.result_dir}/notrack-bl.npy', allow_pickle=True).item()

    # tracking process
    max_pid_id = 0
    frame_list = sorted(list(track.keys()))

    print("Start pose tracking...\n")
    for idx, frame_name in enumerate(tqdm(frame_list[:-1])):
        frame_id = frame_name.split(".")[0]

        next_frame_name = frame_list[idx + 1]
        next_frame_id = next_frame_name.split(".")[0]

        # init tracking info of the first frame in one video
        if idx == 0:
            for pid in range(1, track[frame_name]['num_boxes'] + 1):
                track[frame_name][pid]['new_pid'] = pid
                track[frame_name][pid]['match_score'] = 0

        max_pid_id = max(max_pid_id, track[frame_name]['num_boxes'])
        cor_file = os.path.join(
            image_dir, "".join([frame_id, '_', next_frame_id, '_orb.txt']))

        # regenerate the missed pair-matching txt
        if not os.path.exists(cor_file) or os.stat(cor_file).st_size < 200:
            img1_path = os.path.join(image_dir, frame_name)
            img2_path = os.path.join(image_dir, next_frame_name)
            orb_matching(img1_path, img2_path, image_dir, frame_id,
                         next_frame_id)

        all_cors = np.loadtxt(cor_file)

        # if there is no people in this frame, then copy the info from former frame
        if track[next_frame_name]['num_boxes'] == 0:
            track[next_frame_name] = copy.deepcopy(track[frame_name])
            continue
        cur_all_pids, cur_all_pids_fff = stack_all_pids(
            track, frame_list[:-1], idx, max_pid_id, link_len)

        with Timer('best_matching_hungarian'):
            match_indexes, match_scores = best_matching_hungarian(
                all_cors, cur_all_pids, cur_all_pids_fff,
                track[next_frame_name], weights, weights_fff, num, mag)

        for pid1, pid2 in match_indexes:
            if match_scores[pid1][pid2] > match_thres:
                track[next_frame_name][
                    pid2 + 1]['new_pid'] = cur_all_pids[pid1]['new_pid']
                max_pid_id = max(max_pid_id,
                                 track[next_frame_name][pid2 + 1]['new_pid'])
                track[next_frame_name][
                    pid2 + 1]['match_score'] = match_scores[pid1][pid2]

        # add the untracked new person
        for next_pid in range(1, track[next_frame_name]['num_boxes'] + 1):
            if 'new_pid' not in track[next_frame_name][next_pid]:
                max_pid_id += 1
                track[next_frame_name][next_pid]['new_pid'] = max_pid_id
                track[next_frame_name][next_pid]['match_score'] = 0

    np.save(f'{args.result_dir}/track-bl.npy', track)
    # track = np.load(f'{args.result_dir}/track-bl.npy').item()

    # calculate number of people
    num_persons = 0
    for fid, frame_name in enumerate(frame_list):
        for pid in range(1, track[frame_name]['num_boxes'] + 1):
            num_persons = max(num_persons, track[frame_name][pid]['new_pid'])
    print("This video contains %d people." % (num_persons))

    # export tracking result into notrack json files
    print("Export tracking results to json...\n")
    for fid, frame_name in enumerate(tqdm(frame_list)):
        for pid in range(track[frame_name]['num_boxes']):
            notrack[frame_name][pid]['idx'] = track[frame_name][pid +
                                                                1]['new_pid']

    with open(tracked_json, 'w') as json_file:
        json_file.write(json.dumps(notrack))

    if len(args.visdir) > 0:
        cmap = plt.cm.get_cmap("hsv", num_persons)
        display_pose(image_dir, vis_dir, notrack, cmap)
    # super parameters
    # 1. look-ahead LINK_LEN frames to find tracked human bbox
    # 2. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score
    # 3. bbox_IoU(deepmatching), bbox_IoU(general), pose_IoU(deepmatching), pose_IoU(general), box1_score, box2_score(Non DeepMatching)
    # 4. drop low-score(<DROP) keypoints
    # 5. pick high-score(top NUM) keypoints when computing pose_IOU
    # 6. box width/height around keypoint for computing pose IoU
    # 7. match threshold in Hungarian Matching

    # User specific parameters
    video_name = os.path.basename(video_name)
    video_filename = video_name[:video_name.rfind('.')]
    args.imgdir = f'outputs/alpha_pose_{video_filename}/split_image'
    args.result_dir = f'outputs/alpha_pose_{video_filename}'
    args.in_json = f'{args.result_dir}/alphapose-results.json'
    args.out_json = f'{args.result_dir}/poseflow-results.json'
    args.visdir = f'{args.result_dir}/poseflow-vis'

    main(args)


if __name__ == '__main__':
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"

    os.chdir('../..')

    with Timer('Track'):
        track('kobe.mp4')