def worker(X_train, y_train, X_test, y_test, pm, rs): p = PreProcessor(stop_words=pm[0], tf=pm[1], idf=pm[2], scale=pm[3]) print 'Test', p.get_name() m = SGDClassifier() m.fit(p.fit_training(X_train), y_train) y_pred = m.predict(p.fit_test(X_test)) f1 = metrics.f1_score(y_test, y_pred) rs[float(f1)] = p.get_name()
tf = bool(row[3]) idf = bool(row[4]) scale = bool(row[5]) params[category] = [stop_words, tf, idf, scale] with open(RESULT, 'w') as out: rows = csv.writer(out) for category in categories: source = os.path.join(TRAINING_CATEGORIES, category) if os.path.exists(source): X_train, y_train = load(source, category) pms = params[category] p = PreProcessor(stop_words=pms[0], tf=pms[1], idf=pms[2], scale=pms[3]) X_train = p.fit_training(X_train) print '.fit()' m = SGDClassifier() m.fit(X_train, y_train) print '.predict()' for test_id, text in test_entries(): text = p.fit_test(np.array([text])) result = [category, test_id, m.predict(text)[0]] rows.writerow(result)