Python BagOfWords Examples

Programming Language: Python

Namespace/Package Name: bow.bagOfWords

Class/Type: BagOfWords

Examples at hotexamples.com: 8

Python BagOfWords - 8 examples found. These are the top rated real world Python examples of bow.bagOfWords.BagOfWords extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BagOfWords(8)

transform(2)

items(1)

transform_cut(1)

Example #1

Show file

def gridSearchNGram\
(X, X_train, y_train, learner, ngrams, k=3, return_errors=False, random_state=42, method='kfold'):
	g_ngrams, scores = [], []

	rs = random_state

	total = len(ngrams)
	step = int(total / 10)

	start = time.time()

	for ng in ngrams:
		rs += 1
		bow = BagOfWords(ng).fit(X)
		XX = bow.transform(X_train)
		if method == 'kfold' :
			scores.append(KFold_score(XX, y_train, learner, k=k, random_state=rs))
		elif method == 'five2' :
			scores.append(five2_score(XX, y_train, learner, random_state=rs))
		g_ngrams.append(ng)
		if (rs - random_state) % step == 0:
			now = time.time()
			print(' +gridSearch2D : {}% t:{:.2f}'.format(int((rs - random_state)/total*100), now - start))
			start = now

	best = np.argmax(scores)
	bests = g_ngrams[best]

	if return_errors:
		return bests, (1 - np.array(scores)), g_ngrams
	else:
		return bests, np.array(scores), g_ngrams

Example #2

Show file

File: utils.py Project: l3robot/PP-Predict

def get_top(data, target, p, n, ngram=1):
    p_data = []

    for i, d in enumerate(data):
        if target[i] == p:
            p_data.append(d)

    freqs = BagOfWords(ngram).fit(p_data).freqs_

    keys, values = list(zip(*(freqs.items())))

    top = np.argsort(values)[::-1][:n]
    return np.array(keys)[top], np.array(values)[top]

Example #3

Show file

def ngramXP(X, y, ngram):

    print(' +ngram : begin with ngram {}'.format(ngram))

    bow = BagOfWords(ngram).fit(X)
    learner = LinearSVC()

    mixed_learner = BowAndLearner(bow, learner)

    lowBounds = np.arange(0, 30, 1)
    highBounds = np.arange(20, 70, 1)

    ans = gridSearch2D(X,
                       y,
                       mixed_learner,
                       lowBounds,
                       highBounds,
                       condition,
                       method='five2')
    save2DGridSearch(ans[1], ans[2], ans[3], 'borne basse', 'borne haute',
                     ngram)

    with open('results-{}.json'.format(ngram), 'wb') as f:
        json.dump('{}'.format(ans), f)

    return ans

Example #4

Show file

 def transform_cut(self, X):
     newX = super(BowPreFilter, self).transform_cut(X, self.low_,
                                                    self.high_)
     bow = BagOfWords(self.ngram_).fit(self.X_)
     return bow.transform_cut(newX)

Example #5

Show file

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print(' >> you need to give data')
        exit()
    else:
        file = sys.argv[1]
    """
    simple bow
    """

    scores = []

    X, y = load_data(os.path.join(DATA, file))
    y, reverse = target2int(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.50,
                                                        random_state=42)

    low = 18
    high = 53

    X = X_train + X_test

    XX = BagOfWords().fit(X).transform_cut(X, low, high)

    print(get_top(XX, y, 0, 5))
    print(get_top(XX, y, 1, 5))
    print(reverse)

Example #6

Show file

    scores = []

    X, y = load_data(os.path.join(DATA, file))
    y, reverse = target2int(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.50,
                                                        random_state=42)

    low = 18
    high = 53

    X = X_train + X_test

    XX = BagOfWords().fit(X).transform(X, low, high)

    XX_train = XX[:len(X_train)]
    XX_test = XX[len(X_train):]

    learner = LinearSVC()
    scores.append(KFold_score(XX_train, y_train, learner, get_scores=True))
    """
    bow ngram
    """

    X, y = load_data(os.path.join(DATA, file))
    y, reverse = target2int(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,

Example #7

Show file

File: ngramBowXP.py Project: l3robot/PP-Predict

    for ng in range(2, 6):
        ret = ngramXP(X, y, ng)
        ngs.append(ng)
        ans.append(ret[0])
        scores.append(np.max(ret[1]))

    max_ = np.argmax(scores)

    best_ng = ngs[max_]
    best_low, best_high = ans[max_]

    print(scores[max_])
    print(best_ng)
    print(best_low)
    print(best_high)

    X = X_train + X_test

    bow = BagOfWords(best_ng).fit(X)
    XX = bow.transform(X, best_low, best_high)

    learner = LinearSVC()

    XX_train = XX[:len(X_train)]
    XX_test = XX[len(X_train):]

    learner = learner.fit(XX_train, y_train)

    print((learner.predict(XX_test) == y_test).mean())

Example #8

Show file

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print(' >> you need to give data')
        exit()
    else:
        file = sys.argv[1]

    X, y = load_data(os.path.join(DATA, file))
    y, reverse = target2int(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.50,
                                                        random_state=42)

    bow = BagOfWords().fit(X)
    learner = LinearSVC()

    mixed_learner = BowAndLearner(bow, learner)

    lowBounds = np.arange(0, 30, 1)
    highBounds = np.arange(20, 70, 1)

    ans = gridSearch2D(X_train, y_train, mixed_learner, lowBounds, highBounds,
                       condition)
    # display2DGridSearch(ans[1], ans[2], ans[3], 'borne basse', 'borne haute')

    best_low, best_high = ans[0]

    print(ans[0])
    print(np.max(ans[1]))