コード例 #1
0
ファイル: TokenizingClass.py プロジェクト: vedantdesai13/NLP
    def bag_of_words(self, words, vocab):
        words = b.rem_special_chars(words)
        words = b.convert_to_lowcase(words)
        words = b.rem_stopwords(words)
        words = b.lemma(words)
        words = self.text_to_word(words)

        bag = np.zeros(len(vocab))
        for w in words:
            for i, x in enumerate(vocab):
                if x == w:
                    bag[i] += 1
        return np.array(bag)
コード例 #2
0
def read_part_sift_descriptors(rate=0.05):
    """
    read_part_sift_descriptors select descriptors from a certain percent, indicated by rate, of
        sift features of images from each class. Actually, the descriptors are corresponding to 
        that of read_part_sift_features_images(rate)
    For example,
        Codes:
            part_sift_descriptors = read_part_sift_descriptors()
            print(part_sift_descriptors.shape)
        Outputs:
            (3236336, 128)
    """
    import BOW
    return BOW.read_list_sift_features(rate)
def process_df_frames_train_set(item, method='surf', histogram_norm='l1'):
    import FeatureExtraction
    import random
    filename, df_frames = item
    if df_frames['frame_index'].iloc[0] % 100 != 24:
        return []
    boxes_dict = Preprocessing.extract_box_img(df_frames)

    train_set = []

    kmeans = BOW.load_kmeans(f'kmeans_{method}_128.pkl')

    for category, boxes in boxes_dict.items():
        for box in boxes:
            _, des = FeatureExtraction.feature_extraction(box, method)
            if des is not None:
                histogram = BOW.compute_histogram(des,
                                                  kmeans,
                                                  method=method,
                                                  norm=histogram_norm)
                train_set.append((histogram, category))

    return train_set
コード例 #4
0
ファイル: DenseTrajBOW.py プロジェクト: sourabhd/ChaLearn
    def __init__(self):
        self.bowHOG = BOW()
        self.bowHOF = BOW()
        self.bowMBFx = BOW()
        self.bowMBFy = BOW()

        self.dimHOG = 96
        self.dimHOF = 108
        self.dimMBFx = 96
        self.dimMBFy = 96

        self.vocszHOG = 128
        self.vocszHOF = 128
        self.vocszMBFx = 128
        self.vocszMBFy = 128
コード例 #5
0
ファイル: DenseTrajBOW.py プロジェクト: sourabhd/ChaLearn
class DenseTrajBOW:

    def __init__(self):
        self.bowHOG = BOW()
        self.bowHOF = BOW()
        self.bowMBFx = BOW()
        self.bowMBFy = BOW()

        self.dimHOG = 96
        self.dimHOF = 108
        self.dimMBFx = 96
        self.dimMBFy = 96

        self.vocszHOG = 128
        self.vocszHOF = 128
        self.vocszMBFx = 128
        self.vocszMBFy = 128

    def build(self,dataHOG,dataHOF,dataMBFx,dataMBFy):
        self.bowHOG.vq(data=dataHOG,voc_size=self.vocszHOG,gt_labels=None)

    def calcFeatures(self,dataHOG,dataHOF,dataMBFx,dataMBFy):
        self.bowHOG.calc_bow_representation(fv=dataHOG)
        return self.bowHOG.bow
コード例 #6
0
def read_bowed_labeled_features(use_dl=False, use_cv=False):
    """
    use_dl: use deep learning features or BOW features
    :return a list [features, labels]
    """
    if use_dl:
        features = read_features(use_dl=True)
        labels = []
        for line in open(global_defs.PATH_LABELS, 'r'):
            labels.append(int(line))
        labels = np.array(labels)
    else:
        if not use_cv:
            import BOW
            return BOW.read_BOWed_labeled_features()
        else:
            import sift_bow_cv
            return sift_bow_cv.read_labeled_BOWed_features_cv()
    return [features, labels]
コード例 #7
0
ファイル: TokenizingClass.py プロジェクト: vedantdesai13/NLP
        words = b.convert_to_lowcase(words)
        words = b.rem_stopwords(words)
        words = b.lemma(words)
        words = self.text_to_word(words)

        bag = np.zeros(len(vocab))
        for w in words:
            for i, x in enumerate(vocab):
                if x == w:
                    bag[i] += 1
        return np.array(bag)


# Read data from a file
f = open('text2.txt', 'r')
corpus = f.read()

corpus = b.rem_special_chars(corpus)
corpus = b.convert_to_lowcase(corpus)
corpus = b.rem_stopwords(corpus)
corpus = b.lemma(corpus)

t = Tokenizer()
s = t.text_to_word(corpus)
print("word count=", t.word_count(s))
print("number of unique words", t.number_of_unique_words(s))

inp = "autonomous individuals mutual aid self governance"
print("input = ", inp)
print("bag of words = ", t.bag_of_words(inp, s))
def clfs_rolling_windows(img,
                         clf,
                         stride=8,
                         padding=0,
                         ratios=((1, 1), (3, 1), (1, 3)),
                         scale=1.5,
                         prob_threshold=0.75,
                         min_side_length=16,
                         method='sift',
                         kmeans_path=None):
    if method not in ('sift', 'surf'):
        raise ValueError("method param should be sift or surf")
    if kmeans_path is None:
        kmeans_path = f'kmeans_{method}_128.pkl'
    h, w = img.shape[:2]
    padded_image = np.zeros((h + padding * 2, w + padding * 2, img.shape[2]),
                            dtype='uint8')
    padded_image[padding:h + padding, padding:w + padding, :] = img
    h, w = padded_image.shape[:2]

    box_hist_dict = {
        'box': [],
        'histogram': [],
        'high_prob': [],
        'high_category': []
    }

    kmeans = BOW.load_kmeans(kmeans_path)
    kp, des = FeatureExtraction.feature_extraction(img, method)
    df_kp = pd.DataFrame({
        'x': [item.pt[0] for item in kp],
        'y': [item.pt[1] for item in kp]
    })
    for ratio in ratios:
        print(f'ratio {ratio}')
        ratio_h, ratio_w = ratio
        if ratio_h < ratio_w:
            # (h, w)
            start_window = (min_side_length,
                            int(min_side_length * ratio_w / ratio_h))
        else:
            start_window = (int(min_side_length * ratio_h / ratio_w),
                            min_side_length)
        window = start_window
        this_stride = stride
        tmp = window
        count = 0
        while tmp[0] <= h and tmp[1] <= w:
            count += 1
            tmp = (int(tmp[0] * scale), int(tmp[1] * scale))
        pbar = tqdm(total=count)
        while window[0] <= h and window[1] <= w:
            for i in range(0, h - window[0], this_stride):
                for j in range(0, w - window[1], this_stride):
                    #print(i, j)
                    box = (i, j, i + window[0], j + window[1])
                    #img_this = padded_image[box[0]:box[2], box[1]:box[3]]
                    #_, des_this = FeatureExtraction.feature_extraction(img_this, method)

                    des_this = des[np.logical_and.reduce(
                        (box[0] < df_kp['y'], df_kp['y'] < box[2],
                         box[1] < df_kp['x'], df_kp['x'] < box[3]))]
                    if des_this is None or len(des_this) == 0:
                        continue
                    histogram = BOW.compute_histogram(des_this,
                                                      kmeans,
                                                      method=method,
                                                      norm='l0')
                    if histogram is None:
                        continue
                    #print(clf.predict(histogram.reshape(1, -1)))
                    """cv2.namedWindow('image', cv2.WINDOW_AUTOSIZE)
                    cv2.imshow('image', img_this)
                    cv2.waitKey()
                    cv2.destroyAllWindows()"""
                    box_hist_dict['box'].append(box)
                    box_hist_dict['histogram'].append(histogram)
            window = (int(window[0] * scale), int(window[1] * scale))
            this_stride = int(this_stride * np.sqrt(scale))
            pbar.update(1)
        pbar.close()
    X = np.array(box_hist_dict['histogram'])
    y_prob = clf.predict_proba(X)
    high_prob = np.max(y_prob, axis=1).ravel().tolist()
    high_category = np.argmax(y_prob, axis=1).ravel().tolist()
    box_hist_dict['high_prob'] = high_prob
    box_hist_dict['high_category'] = high_category
    df_box_hist = pd.DataFrame(box_hist_dict)
    df_box_hist = df_box_hist[df_box_hist['high_category'] != 0]
    df_box_hist = df_box_hist[df_box_hist['high_prob'] > prob_threshold]
    return df_box_hist
コード例 #9
0
#B_gt_labels = np.ones(N_B,dtype='int')
#feat  = np.concatenate((A,B), axis=0)
#gt_labels = np.concatenate((A_gt_labels,B_gt_labels), axis=0)

# Test Case 2: Tiny dataset
#
desc = [];

desc.append([1,1])
desc.append([1.5,1])
desc.append([1,15])
desc.append([1.5,1])
desc.append([1,1.5])
desc.append([1,1.6])
desc.append([10,10])
desc.append([12,10])
desc.append([10,13])
desc.append([14,10])
desc.append([10,15])

feat = sp.vstack(tuple(desc))
gt_labels = np.array([0,0,1,0,0,0,1,1,1,1,1])


V = BOW(data=feat,voc_size=2,gt_labels=gt_labels)
#print V.vq_data
feat2 = np.concatenate((feat,feat),axis=0)
V.calc_bow_representation(fv=feat2)
print V.bow