logger.info(r"loading corpus from : " + corpus_name) lexicon_name = get_file_path('lexicon') logger.info(r"loading lexicon form : " + lexicon_name) expand_name = get_file_path('neural_cand') logger.info(r"loading expand_word from : " + expand_name) mark_name = get_file_path('mark') logger.info(r"loading mark from : " + mark_name) corpus = load_corpus(corpus_name) lexicon = load_lexicon(lexicon_name) mark = load_mark(mark_name) # log_state('use extend lexicon') lexicon = combine_lexicon(lexicon_name, expand_name) log_state('mean') evaluate_mean(corpus, lexicon, mark) log_state('tf_mean') evaluate_tf_mean(corpus, lexicon, mark) log_state('tfidf_mean') evaluate_tfidf_mean(corpus, lexicon, mark) log_state('geo') evaluate_geo(corpus, lexicon, mark) log_state('tfidf_geo') evaluate_tfidf_geo(corpus, lexicon, mark) log_state('tf_geo') evaluate_tf_geo(corpus, lexicon, mark)
if multivariant is False: linear_regression(X_train, X_test, Y_train, Y_test, plot=False) else: linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="ordinary_least_squares") linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="Ridge_Regression") linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="Bayesian_Regression") linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="SVR") linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="KNN_Reg") if __name__ == "__main__": normalize = True corpus = load_corpus(get_file_path("cn_corpus")) # lexicon = load_lexicon(get_file_path('lexicon')) mark = load_mark(get_file_path("mark")) lexicon = combine_lexicon(get_file_path("lexicon"), get_file_path("neural_cand")) # # the following could use to check the same words in corpus and lexicon # from visualization import show_common_term # show_common_term(corpus, lexicon) # exit() valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion(corpus, lexicon, mark) print("start.....") cv(valence_mean, valence_true, multivariant=False) cv(arousal_mean, arousal_true, multivariant=False) print("OK") valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion_sqr(corpus, lexicon, mark) print("start.....") cv(valence_mean, valence_true, multivariant=False)
if multivariant is False: linear_regression(X_train, X_test, Y_train, Y_test, plot=False) else: linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='ordinary_least_squares') linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='Ridge_Regression') linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='Bayesian_Regression') linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='SVR') linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='KNN_Reg') if __name__ == '__main__': normalize = True corpus = load_corpus(get_file_path('cn_corpus')) # lexicon = load_lexicon(get_file_path('lexicon')) mark = load_mark(get_file_path('mark')) lexicon = combine_lexicon(get_file_path('lexicon'), get_file_path('neural_cand')) # # the following could use to check the same words in corpus and lexicon # from visualization import show_common_term # show_common_term(corpus, lexicon) # exit() valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion(corpus, lexicon, mark) print('start.....') cv(valence_mean, valence_true, multivariant=False) cv(arousal_mean, arousal_true, multivariant=False) print('OK') valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion_sqr(corpus, lexicon, mark) print('start.....') cv(valence_mean, valence_true, multivariant=False)