__author__ = "hs" __author__ = "hs" __author__ = "NLP-PC" import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB from load_data import load_selected_data print("Start") vectorizer = TFIDF_estimator() texts, train_labels = load_selected_data(data_type="train") transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_selected_data(data_type="test") transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)
occur_times = (word_count >= 1).sum() if occur_times > 0: avg = np.average(np.array(valence)[word_count >= 1]) else: avg = -1 texts_scores.append(avg) return texts_scores if __name__ == '__main__': from load_data import load_selected_data, load_anew, load_extend_anew # # print(count_matching(texts, words)) texts, labels = load_selected_data(data_type='train', stem=False) words, valence, _ = load_anew() # select_matching(texts,labels, words) # exit() words, valence = np.array(words), np.array(valence) words_pos, valence_pos = words[valence > np.average(valence)], valence[ valence > np.average(valence)] # avg = 5.1511713456 words_neg, valence_neg = words[valence < np.average(valence)], valence[ valence < np.average(valence)] # avg = 5.1511713456 pos = avg_valence(texts, words_pos, valence_pos) neg = avg_valence(texts, words_neg, valence_neg) from visualization import draw_scatter_with_color
word_count = np.array(word_count) occur_times = (word_count >= 1).sum() if occur_times > 0: avg = np.average(np.array(valence)[word_count >= 1]) else: avg = -1 texts_scores.append(avg) return texts_scores if __name__ == '__main__': from load_data import load_selected_data, load_anew, load_extend_anew # # print(count_matching(texts, words)) texts, labels = load_selected_data(data_type='train', stem=False) words, valence, _ = load_anew() # select_matching(texts,labels, words) # exit() words, valence = np.array(words), np.array(valence) words_pos, valence_pos = words[valence > np.average(valence)], valence[ valence > np.average(valence)] # avg = 5.1511713456 words_neg, valence_neg = words[valence < np.average(valence)], valence[ valence < np.average(valence)] # avg = 5.1511713456 pos = avg_valence(texts, words_pos, valence_pos) neg = avg_valence(texts, words_neg, valence_neg) from visualization import draw_scatter_with_color
__author__ = 'hs' __author__ = 'hs' __author__ = 'NLP-PC' import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB from load_data import load_selected_data print('Start') vectorizer = TFIDF_estimator() texts, train_labels = load_selected_data(data_type='train') transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_selected_data(data_type='test') transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)