def process_df_frames_train_set(item, method='surf', histogram_norm='l1'):
    import FeatureExtraction
    import random
    filename, df_frames = item
    if df_frames['frame_index'].iloc[0] % 100 != 24:
        return []
    boxes_dict = Preprocessing.extract_box_img(df_frames)

    train_set = []

    kmeans = BOW.load_kmeans(f'kmeans_{method}_128.pkl')

    for category, boxes in boxes_dict.items():
        for box in boxes:
            _, des = FeatureExtraction.feature_extraction(box, method)
            if des is not None:
                histogram = BOW.compute_histogram(des,
                                                  kmeans,
                                                  method=method,
                                                  norm=histogram_norm)
                train_set.append((histogram, category))

    return train_set
def clfs_rolling_windows(img,
                         clf,
                         stride=8,
                         padding=0,
                         ratios=((1, 1), (3, 1), (1, 3)),
                         scale=1.5,
                         prob_threshold=0.75,
                         min_side_length=16,
                         method='sift',
                         kmeans_path=None):
    if method not in ('sift', 'surf'):
        raise ValueError("method param should be sift or surf")
    if kmeans_path is None:
        kmeans_path = f'kmeans_{method}_128.pkl'
    h, w = img.shape[:2]
    padded_image = np.zeros((h + padding * 2, w + padding * 2, img.shape[2]),
                            dtype='uint8')
    padded_image[padding:h + padding, padding:w + padding, :] = img
    h, w = padded_image.shape[:2]

    box_hist_dict = {
        'box': [],
        'histogram': [],
        'high_prob': [],
        'high_category': []
    }

    kmeans = BOW.load_kmeans(kmeans_path)
    kp, des = FeatureExtraction.feature_extraction(img, method)
    df_kp = pd.DataFrame({
        'x': [item.pt[0] for item in kp],
        'y': [item.pt[1] for item in kp]
    })
    for ratio in ratios:
        print(f'ratio {ratio}')
        ratio_h, ratio_w = ratio
        if ratio_h < ratio_w:
            # (h, w)
            start_window = (min_side_length,
                            int(min_side_length * ratio_w / ratio_h))
        else:
            start_window = (int(min_side_length * ratio_h / ratio_w),
                            min_side_length)
        window = start_window
        this_stride = stride
        tmp = window
        count = 0
        while tmp[0] <= h and tmp[1] <= w:
            count += 1
            tmp = (int(tmp[0] * scale), int(tmp[1] * scale))
        pbar = tqdm(total=count)
        while window[0] <= h and window[1] <= w:
            for i in range(0, h - window[0], this_stride):
                for j in range(0, w - window[1], this_stride):
                    #print(i, j)
                    box = (i, j, i + window[0], j + window[1])
                    #img_this = padded_image[box[0]:box[2], box[1]:box[3]]
                    #_, des_this = FeatureExtraction.feature_extraction(img_this, method)

                    des_this = des[np.logical_and.reduce(
                        (box[0] < df_kp['y'], df_kp['y'] < box[2],
                         box[1] < df_kp['x'], df_kp['x'] < box[3]))]
                    if des_this is None or len(des_this) == 0:
                        continue
                    histogram = BOW.compute_histogram(des_this,
                                                      kmeans,
                                                      method=method,
                                                      norm='l0')
                    if histogram is None:
                        continue
                    #print(clf.predict(histogram.reshape(1, -1)))
                    """cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE)
                    cv2.imshow('image', img_this)
                    cv2.waitKey()
                    cv2.destroyAllWindows()"""
                    box_hist_dict['box'].append(box)
                    box_hist_dict['histogram'].append(histogram)
            window = (int(window[0] * scale), int(window[1] * scale))
            this_stride = int(this_stride * np.sqrt(scale))
            pbar.update(1)
        pbar.close()
    X = np.array(box_hist_dict['histogram'])
    y_prob = clf.predict_proba(X)
    high_prob = np.max(y_prob, axis=1).ravel().tolist()
    high_category = np.argmax(y_prob, axis=1).ravel().tolist()
    box_hist_dict['high_prob'] = high_prob
    box_hist_dict['high_category'] = high_category
    df_box_hist = pd.DataFrame(box_hist_dict)
    df_box_hist = df_box_hist[df_box_hist['high_category'] != 0]
    df_box_hist = df_box_hist[df_box_hist['high_prob'] > prob_threshold]
    return df_box_hist