Example #1
0
def eval4():
    print("Evaluation 4: evaluate the impact of threshold")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)

    num = 20
    arg1 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.15, 'DIST', None,
             None) for i in range(num)]
    arg2 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.20, 'DIST', None,
             None) for i in range(num)]
    arg3 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.30, 'DIST', None,
             None) for i in range(num)]
    arg4 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.50, 'DIST', None,
             None) for i in range(num)]
    arg5 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.15, 'DIST', None,
             None) for i in range(num)]
    arg6 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.20, 'DIST', None,
             None) for i in range(num)]
    arg7 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.30, 'DIST', None,
             None) for i in range(num)]
    arg8 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.50, 'DIST', None,
             None) for i in range(num)]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper,
                      arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8)
    pool.close()

    add_overall_accuracy(result)

    tags = dict(threshold=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10) for tag in [
            't=0.15', 't=0.2', 't=0.3', 't=0.5', 't=0.15', 't=0.2', 't=0.3',
            't=0.5'
        ]
    ]),
                target=np.hstack([
                    np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10 * 4)
                    for tag in ['sandeep', 'james']
                ]))
    plot_data = gen_data_frame(result, tags).query(
        '(label == "sandeep" and target == "sandeep") or (label == "james" and target == "james")'
    )
    print(tags)

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval4_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval4_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='threshold',
                              style='target',
                              dashes=True,
                              ylim=ylim,
                              xticks=xticks)
Example #2
0
def eval3():
    print("Evaluation 3: comparison of intelligent, random and oracle sampler")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    #learning data
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)
    #learning data for oracle
    oracle_s, oracle_j = tutl.load_pkl(ORACLE_SANDEEP), tutl.load_pkl(
        ORACLE_JAMES)

    num = 20
    arg1 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.2, 'RAND', None,
             None) for i in range(num)]
    arg2 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.2, 'DIST', None,
             None) for i in range(num)]
    arg3 = [(target_s, d0_s, shuffle_data(oracle_s), 10, 0.2, 'RAND', None,
             None) for i in range(num)]
    arg4 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.2, 'RAND', None,
             None) for i in range(num)]
    arg5 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.2, 'DIST', None,
             None) for i in range(num)]
    arg6 = [(target_j, d0_j, shuffle_data(oracle_j), 10, 0.2, 'RAND', None,
             None) for i in range(num)]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, arg1 + arg2 + arg3 + arg4 + arg5 + arg6)
    pool.close()

    add_overall_accuracy(result)

    tags = dict(
        target=np.hstack([
            np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10 * 3)
            for tag in ['sandeep', 'james']
        ]),
        sampler=np.hstack([
            np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10) for tag in
            ['random', 'distance', 'oracle', 'random', 'distance', 'oracle']
        ]))
    plot_data = gen_data_frame(result, tags).query(
        '(label == "sandeep" and target == "sandeep") or (label == "james" and target == "james")'
    )

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval3_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval3_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='sampler',
                              style='target',
                              dashes=True,
                              ylim=ylim,
                              xticks=xticks)
Example #3
0
def main():
    args = setup_args()
    if args['data']:
        print("[INFO] loading data source...")
        source = tutl.load_pkl(args['data'])
    else:
        source = None

    data = gen_data(source, args['data_size'], args['label'],
                    args['exclude_label'], args['shuffle'], args['balanced'])

    print("[INFO] create pickle file...")
    tutl.write_pkl(args['output_file'], data)
Example #4
0
def eval5():
    print("Evaluation 5: Learning two targets with priority")

    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    target = dict(names=target_s['names'] + target_j['names'],
                  embeddings=target_s['embeddings'] + target_j['embeddings'],
                  video=target_s['video'] + target_j['video'],
                  frame=target_s['frame'] + target_j['frame'])
    d0 = tutl.load_pkl(DATA0_2TARGETs)
    learningset = tutl.load_pkl(LEARNING_DATA_BOTH)

    num = 20

    arg1 = [(target, d0, shuffle_data(learningset), 10, 0.2, 'WEIGHT',
             dict(sandeep=1, james=1), None) for i in range(num)]
    arg2 = [(target, d0, shuffle_data(learningset), 10, 0.2, 'WEIGHT',
             dict(sandeep=10, james=1), None) for i in range(num)]
    arg3 = [(target, d0, shuffle_data(learningset), 10, 0.2, 'WEIGHT',
             dict(sandeep=1, james=10), None) for i in range(num)]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, arg1 + arg2 + arg3)
    pool.close()
    #print(result)

    add_overall_accuracy(result)

    tags = dict(weight=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10)
        for tag in ['S1:J1', 'S10:J1', 'S1:J10']
    ]))
    plot_data = gen_data_frame(
        result, tags).query('label in ["sandeep", "james", "overall"]')

    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval5_plot.pkl'), plot_data)

    plot_file = os.path.join(RESULT_OUT_PATH, 'eval5_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='label',
                              style='weight',
                              dashes=True,
                              ylim=ylim,
                              xticks=xticks)
Example #5
0
def eval2():
    print("Evaluation 2: Accuracy distribution for all labels")
    dummy_target = tutl.load_pkl(TARGET_RSS)

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, [
        (dummy_target, empty, shuffle_data(ldata), 10, 0.2, 'RAND', None, None)
        for i in range(100)
    ])
    pool.close()

    add_overall_accuracy(result)

    plot_data = gen_data_frame(result)
    #plot_data = plot_data.query('label in ["sandeep", "james", "overall"]')
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval2_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval2_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='label',
                              ylim=ylim,
                              xticks=xticks)
Example #6
0
def eval6():
    print("Evaluation 6: comparison of impact of target imageset")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)
    oracle_s, oracle_j = tutl.load_pkl(ORACLE_SANDEEP), tutl.load_pkl(
        ORACLE_JAMES)

    db = tutl.load_pkl(os.path.join(DATA_PATH, 'face_db.pkl'))

    def preprocess(img, size=(100, 100)):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return cv2.resize(gray, size)

    pixels_s = [
        preprocess(db[video][frame][0]['face_pixels'])
        for video, frame in zip(oracle_s['video'], oracle_s['frame'])
    ]
    extr_s = TargetExtractor(pixels_s)

    pixels_j = [
        preprocess(db[video][frame][0]['face_pixels'])
        for video, frame in zip(oracle_j['video'], oracle_j['frame'])
    ]
    extr_j = TargetExtractor(pixels_j)
    #oracle_s['pixels'] = pixels

    num = 20

    ssim_idxes_s = [
        extr_s.diversity(min_tile=0, rank_by='mean') for i in range(num)
    ]
    ssim_targets_s = [{
        'names': [oracle_s['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_s['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_s['video'][idx] for idx in ssim_idx],
        'frame': [oracle_s['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssim_idxes_s]

    ssim_idxes_j = [
        extr_j.diversity(min_tile=0, rank_by='mean') for i in range(num)
    ]
    ssim_targets_j = [{
        'names': [oracle_j['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_j['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_j['video'][idx] for idx in ssim_idx],
        'frame': [oracle_j['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssim_idxes_j]

    ssimq_idxes_s = [extr_s.quantile() for i in range(num)]
    ssimq_targets_s = [{
        'names': [oracle_s['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_s['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_s['video'][idx] for idx in ssim_idx],
        'frame': [oracle_s['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssimq_idxes_s]

    ssimq_idxes_j = [extr_j.quantile() for i in range(num)]
    ssimq_targets_j = [{
        'names': [oracle_j['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_j['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_j['video'][idx] for idx in ssim_idx],
        'frame': [oracle_j['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssimq_idxes_j]

    rand_targets_s = [gen_data(oracle_s, 18) for i in range(num)]
    rand_targets_j = [gen_data(oracle_j, 18) for i in range(num)]

    arg1 = [(target_s, d0_s, shuffle_data(oracle_s), 10, 0.3, 'RAND', None,
             None) for i in range(num)]
    arg2 = [(target_s, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None,
             None) for i in range(num)]
    arg3 = [(target, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None, None)
            for target in rand_targets_s]
    arg4 = [(target, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None, None)
            for target in ssimq_targets_s]
    arg5 = [(target, d0_s, shuffle_data(ldata_s), 10, 0.3, 'DIST', None, None)
            for target in ssim_targets_s]
    arg6 = [(target_j, d0_j, shuffle_data(oracle_j), 10, 0.3, 'RAND', None,
             None) for i in range(num)]
    arg7 = [(target_j, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None,
             None) for i in range(num)]
    arg8 = [(target, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None, None)
            for target in rand_targets_j]
    arg9 = [(target, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None, None)
            for target in ssimq_targets_j]
    arg10 = [(target, d0_j, shuffle_data(ldata_j), 10, 0.3, 'DIST', None, None)
             for target in ssim_targets_j]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(
        wrapper,
        arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + arg9 + arg10)
    pool.close()

    add_overall_accuracy(result)

    tags = dict(target_generated_by=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10) for tag in [
            'oracle', 'manual', 'random', 'ssim(quantile)', 'ssim(diversity)',
            'oracle', 'manual', 'random', 'ssim(quantile)', 'ssim(diversity)'
        ]
    ]),
                target=np.hstack([
                    np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10 * 5)
                    for tag in ['sandeep', 'james']
                ]))
    plot_data = gen_data_frame(result, tags).query(
        '(label == "sandeep" and target == "sandeep") or (label == "james" and target == "james")'
    )

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval6_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval6_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='target_generated_by',
                              style='target',
                              ylim=ylim,
                              xticks=xticks)
Example #7
0
def eval0():
    print("Evaluation 0: Oracle")
    target_s, target_j = tutl.load_pkl(TARGET_MANUAL), tutl.load_pkl(
        TARGET_MANUAL_JAMES)
    d0_s, d0_j = tutl.load_pkl(DATA0_SANDEEP), tutl.load_pkl(DATA0_JAMES)
    ldata_s, ldata_j = tutl.load_pkl(LEARNING_DATA_SANDEEP), tutl.load_pkl(
        LEARNING_DATA_JAMES)
    oracle_s, oracle_j = tutl.load_pkl(ORACLE_SANDEEP), tutl.load_pkl(
        ORACLE_JAMES)

    db = tutl.load_pkl(os.path.join(DATA_PATH, 'face_db.pkl'))

    def preprocess(img, size=(100, 100)):
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return cv2.resize(gray, size)

    pixels = [
        preprocess(db[video][frame][0]['face_pixels'])
        for video, frame in zip(oracle_s['video'], oracle_s['frame'])
    ]
    extr = TargetExtractor(pixels)
    oracle_s_wp = copy.copy(oracle_s)
    oracle_s_wp['pixels'] = pixels

    num = 20

    ssim_idxes = [
        extr.diversity(min_tile=0, rank_by='mean') for i in range(num)
    ]
    ssim_targets = [{
        'names': [oracle_s['names'][idx] for idx in ssim_idx],
        'embeddings': [oracle_s['embeddings'][idx] for idx in ssim_idx],
        'video': [oracle_s['video'][idx] for idx in ssim_idx],
        'frame': [oracle_s['frame'][idx] for idx in ssim_idx]
    } for ssim_idx in ssim_idxes]

    arg1 = [(target, d0_s, shuffle_data(oracle_s), 10, 0.2, 'RAND', None, None)
            for target in ssim_targets]
    arg2 = [(target, d0_s, shuffle_data(oracle_s), 10, 0.2, 'DIST', None, None)
            for target in ssim_targets]
    arg3 = [(target, d0_s, shuffle_data(oracle_s_wp), 10, 0.2, 'DIST', None,
             extr) for target in ssim_targets]

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(wrapper, arg1 + arg2 + arg3)
    pool.close()
    add_overall_accuracy(result)

    tags = dict(sampler=np.hstack([
        np.repeat(tag, num * FirstSampler._MAX_RETRAIN * 10)
        for tag in ['random', 'distance(static)', 'distance(dynamic)']
    ]))
    plot_data = gen_data_frame(result, tags).query('label == "sandeep"')

    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval0_plot.pkl'), plot_data)
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval0_plot.pdf')
    ts_plot_with_distribution(plot_data,
                              plot_file,
                              'round#',
                              'accuracy',
                              hue='sampler',
                              ylim=ylim,
                              xticks=xticks)
Example #8
0
    eval_cases = ['sandeep', 'james', 'overall']
    dummy_target = tutl.load_pkl(TARGET_RSS)

    pool = multiprocessing.Pool(num_cpus)
    result = pool.map(
        wrapper, [(dummy_target, empty, ldata, 10, 0.2, 'RAND', None, None)])
    pool.close()

    add_overall_accuracy(result)

    plot_data = {
        case: [round[case] for round in result[0]]
        for case in eval_cases
    }
    #print(plot_data)
    tutl.write_pkl(os.path.join(RESULT_OUT_PATH, 'eval1_plot.pkl'), plot_data)

    ts_dict = {leg: dict(ts_vector=plot_data[leg]) for leg in eval_cases}
    plot_file = os.path.join(RESULT_OUT_PATH, 'eval1_plot.pdf')
    overlaid_ts(normalized_ts_dict=ts_dict,
                plot_file=plot_file,
                ylabel=ylabel,
                xlabel=xlabel,
                fontsize=18,
                xticks=xticks,
                ylim=ylim,
                DEFAULT_ALPHA=1.0,
                legend_present=True,
                DEFAULT_MARKERSIZE=15,
                delete_yticks=False,
                xlim=None)
Example #9
0
        os.sep)[-1].split('.')[0].split('_embeddings_detections')[0]
    out_emb[prefix] = {}

    frame_num = 0
    while frame_num < len(embeddings):
        if len(embeddings[frame_num]) > 0:
            distances = np.array([
                cutl.distance(embeddings[frame_num][0]["embedding"], sample)
                for sample in sample_emb["embeddings"]
            ])
            indexes = np.argsort(distances)
            top10labels = np.array(
                [sample_emb["names"][i] for i in indexes[:10]])
            if np.mean(np.sort(distances)[:10]) < 0.2:
                majority = stats.mode(top10labels)[0][0]
            else:
                majority = 'unknown'
            embeddings[frame_num][0]["label"] = majority

            #Write jpg file.
            if not os.path.exists(os.path.join(OUTPUT_PIC_DIR, majority)):
                os.makedirs(os.path.join(OUTPUT_PIC_DIR, majority))
            cv2.imwrite(
                os.path.join(OUTPUT_PIC_DIR, majority,
                             prefix + str(frame_num) + '.jpg'),
                embeddings[frame_num][0]["face_pixels"])
        out_emb[prefix][frame_num] = embeddings[frame_num]
        frame_num += 1

tutl.write_pkl(OUTPUT_PKL_PATH, out_emb)
Example #10
0
print("[INFO] shuffle data...")
emb_dict = {
    label: shuffle([
        embedding for i, embedding in enumerate(orig_embeddings)
        if label == label_str[i]
    ])
    for label in LABEL_LIST
}

print("[INFO] select embeddings...")
num = int(args['data_size']) if args['data_size'] else np.min(
    [len(emb) for label, emb in emb_dict.items()])
embeddings = []
for label in LABEL_LIST:
    embeddings.extend(emb_dict[label][:num])

print("[INFO] encoding labels...")
le = LabelEncoder()
labels = le.fit_transform(
    np.array([[label for i in range(num)] for label in LABEL_LIST]).flatten())

print("[INFO] training model...")
recognizer = SVC(C=1.0, kernel="linear", probability=True)
recognizer.fit(embeddings, labels)

print("[INFO] save model...")
tutl.write_pkl(
    args["recognizer"] if args["recognizer"] else os.path.join(
        os.environ["HARVESTNET_ROOT_DIR"], 'tmp', 'models', 'face_recg_' +
        str(num) + '.pkl'), (recognizer, le))