Example #1
0
def gridSearchNGram\
(X, X_train, y_train, learner, ngrams, k=3, return_errors=False, random_state=42, method='kfold'):
	g_ngrams, scores = [], []

	rs = random_state

	total = len(ngrams)
	step = int(total / 10)

	start = time.time()

	for ng in ngrams:
		rs += 1
		bow = BagOfWords(ng).fit(X)
		XX = bow.transform(X_train)
		if method == 'kfold' :
			scores.append(KFold_score(XX, y_train, learner, k=k, random_state=rs))
		elif method == 'five2' :
			scores.append(five2_score(XX, y_train, learner, random_state=rs))
		g_ngrams.append(ng)
		if (rs - random_state) % step == 0:
			now = time.time()
			print(' +gridSearch2D : {}% t:{:.2f}'.format(int((rs - random_state)/total*100), now - start))
			start = now

	best = np.argmax(scores)
	bests = g_ngrams[best]

	if return_errors:
		return bests, (1 - np.array(scores)), g_ngrams
	else:
		return bests, np.array(scores), g_ngrams
Example #2
0
    for ng in range(2, 6):
        ret = ngramXP(X, y, ng)
        ngs.append(ng)
        ans.append(ret[0])
        scores.append(np.max(ret[1]))

    max_ = np.argmax(scores)

    best_ng = ngs[max_]
    best_low, best_high = ans[max_]

    print(scores[max_])
    print(best_ng)
    print(best_low)
    print(best_high)

    X = X_train + X_test

    bow = BagOfWords(best_ng).fit(X)
    XX = bow.transform(X, best_low, best_high)

    learner = LinearSVC()

    XX_train = XX[:len(X_train)]
    XX_test = XX[len(X_train):]

    learner = learner.fit(XX_train, y_train)

    print((learner.predict(XX_test) == y_test).mean())