words, valence = np.array(words), np.array(valence) words_pos, valence_pos = words[valence > np.average(valence)], valence[ valence > np.average(valence)] # avg = 5.1511713456 words_neg, valence_neg = words[valence < np.average(valence)], valence[ valence < np.average(valence)] # avg = 5.1511713456 pos = avg_valence(texts, words_pos, valence_pos) neg = avg_valence(texts, words_neg, valence_neg) from visualization import draw_scatter_with_color draw_scatter_with_color(pos, neg, labels, 'pos', 'neg') # classify polarity = [] for pos_score, neg_score in zip(pos, neg): if pos_score == -1: polarity.append(0) elif neg_score == -1: polarity.append(1) else: if pos_score > neg_score: polarity.append(1) else: polarity.append(0) from analysis import analysis_result print(labels) print(polarity) analysis_result(labels, polarity)
__author__ = "hs" __author__ = "hs" __author__ = "NLP-PC" import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB from load_data import load_selected_data print("Start") vectorizer = TFIDF_estimator() texts, train_labels = load_selected_data(data_type="train") transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_selected_data(data_type="test") transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)
if count != 0: vec /= count return vec from sklearn.preprocessing import scale train_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_train]) if scaling == True: train_vecs = scale(train_vecs) # Train word2vec on test tweets # imdb_w2v.train(x_test) # Build test tweet vectors then scale test_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_test]) if scaling == True: test_vecs = scale(test_vecs) # scaling to [0, 1] interval min_max_scaler = MinMaxScaler() train_vecs = min_max_scaler.fit_transform(train_vecs) test_vecs = min_max_scaler.fit_transform(test_vecs) # Use classification algorithm (i.e. Stochastic Logistic Regression) on training set, then assess model performance on test set from classifiers import gNB, mNB from analysis import analysis_result pre = mNB(train_vecs, y_train, test_vecs) analysis_result(pre, y_test)
test_predict, outputs_, test_loss = sess.run([pred, outputs, loss], feed_dict={ x_: test_xs, y_: test_ys }) loss_val = sess.run(loss, feed_dict={y_: test_ys, pred: test_predict}) print outputs_, 'outputs shape', np.shape(outputs_) print("RMSE: {}".format(loss_val)) # print test_predict raise KeyboardInterrupt except KeyboardInterrupt as kbi: print '#### result ###' pred = test_predict * normalize_factor test_ys = test_ys * normalize_factor analysis.analysis_result(true=test_ys, pred=pred, error_range_percent=5) test_ys = test_ys * (max_ - min_) + min_ utils.plot_xy(test_predict=test_predict, test_ys=test_ys, savename='./dynalog_result_last_' + args.save_folder_name + '.png') print '########' print test_ys[:10] print test_predict[:10] sess.close() print 'start evaluation' eval.eval(x=test_xs, y=test_ys, error_range_percent=5, model_path=os.path.join(
__author__ = 'hs' __author__ = 'hs' __author__ = 'NLP-PC' import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB from load_data import load_selected_data print('Start') vectorizer = TFIDF_estimator() texts, train_labels = load_selected_data(data_type='train') transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_selected_data(data_type='test') transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)