def build_model(): model = NLPModel() with open('lib/data/train.tsv') as f: data = pd.read_csv(f, sep='\t') pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)] pos_neg['Binary'] = pos_neg.apply(lambda x: 0 if x['Sentiment'] == 0 else 1, axis=1) model.vectorizer_fit(pos_neg.loc[:, 'Phrase']) print('Vectorizer fit complete') X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase']) print('Vectorizer tranform complete') y = pos_neg.loc[:, 'Binaryy'] X_train, X_test, y_train, y_test = train_test_split(X, y) model.train(X_train, y_train) Print('Model training complete') model.pickle_clf() model.pickle_vectorizer() model.plot_roc(X_train, y_test)
def build_model(): model = NLPModel() # filename = os.path.join( # os.path.dirname(__file__), 'chalicelib', 'all/train.tsv') with open('lib/data/train.tsv') as f: data = pd.read_csv(f, sep='\t') pos_neg = data[(data['Sentiment'] == 0) | (data['Sentiment'] == 4)] pos_neg['Binary'] = pos_neg.apply(lambda x: 0 if x['Sentiment'] == 0 else 1, axis=1) model.vectorizer_fit(pos_neg.loc[:, 'Phrase']) print('Vectorizer fit complete') X = model.vectorizer_transform(pos_neg.loc[:, 'Phrase']) print('Vectorizer transform complete') y = pos_neg.loc[:, 'Binary'] X_train, X_test, y_train, y_test = train_test_split(X, y) model.train(X_train, y_train) print('Model training complete') model.pickle_clf() model.pickle_vectorizer() model.plot_roc(X_test, y_test, size_x=12, size_y=12)