Ejemplo n.º 1
0
    logger.info(r"loading corpus from : " + corpus_name)

    lexicon_name = get_file_path('lexicon')
    logger.info(r"loading lexicon form : " + lexicon_name)

    expand_name = get_file_path('neural_cand')
    logger.info(r"loading expand_word from : " + expand_name)

    mark_name = get_file_path('mark')
    logger.info(r"loading mark from : " + mark_name)

    corpus = load_corpus(corpus_name)
    lexicon = load_lexicon(lexicon_name)
    mark = load_mark(mark_name)
    # log_state('use extend lexicon')
    lexicon = combine_lexicon(lexicon_name, expand_name)

    log_state('mean')
    evaluate_mean(corpus, lexicon, mark)
    log_state('tf_mean')
    evaluate_tf_mean(corpus, lexicon, mark)
    log_state('tfidf_mean')
    evaluate_tfidf_mean(corpus, lexicon, mark)

    log_state('geo')
    evaluate_geo(corpus, lexicon, mark)
    log_state('tfidf_geo')
    evaluate_tfidf_geo(corpus, lexicon, mark)
    log_state('tf_geo')
    evaluate_tf_geo(corpus, lexicon, mark)
Ejemplo n.º 2
0
    if multivariant is False:
        linear_regression(X_train, X_test, Y_train, Y_test, plot=False)
    else:
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="ordinary_least_squares")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="Ridge_Regression")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="Bayesian_Regression")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="SVR")
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun="KNN_Reg")


if __name__ == "__main__":
    normalize = True
    corpus = load_corpus(get_file_path("cn_corpus"))
    # lexicon = load_lexicon(get_file_path('lexicon'))
    mark = load_mark(get_file_path("mark"))
    lexicon = combine_lexicon(get_file_path("lexicon"), get_file_path("neural_cand"))

    # # the following could use to check the same words in corpus and lexicon
    # from visualization import show_common_term
    # show_common_term(corpus, lexicon)
    # exit()

    valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion(corpus, lexicon, mark)
    print("start.....")
    cv(valence_mean, valence_true, multivariant=False)
    cv(arousal_mean, arousal_true, multivariant=False)
    print("OK")

    valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion_sqr(corpus, lexicon, mark)
    print("start.....")
    cv(valence_mean, valence_true, multivariant=False)
Ejemplo n.º 3
0
    if multivariant is False:
        linear_regression(X_train, X_test, Y_train, Y_test, plot=False)
    else:
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='ordinary_least_squares')
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='Ridge_Regression')
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='Bayesian_Regression')
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='SVR')
        linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='KNN_Reg')


if __name__ == '__main__':
    normalize = True
    corpus = load_corpus(get_file_path('cn_corpus'))
    # lexicon = load_lexicon(get_file_path('lexicon'))
    mark = load_mark(get_file_path('mark'))
    lexicon = combine_lexicon(get_file_path('lexicon'), get_file_path('neural_cand'))

    # # the following could use to check the same words in corpus and lexicon
    # from visualization import show_common_term
    # show_common_term(corpus, lexicon)
    # exit()

    valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion(corpus, lexicon, mark)
    print('start.....')
    cv(valence_mean, valence_true, multivariant=False)
    cv(arousal_mean, arousal_true, multivariant=False)
    print('OK')

    valence_mean, valence_true, arousal_mean, arousal_true = linear_fusion_sqr(corpus, lexicon, mark)
    print('start.....')
    cv(valence_mean, valence_true, multivariant=False)
Ejemplo n.º 4
0
    logger.info(r"loading corpus from : " + corpus_name)

    lexicon_name = get_file_path('lexicon')
    logger.info(r"loading lexicon form : " + lexicon_name)

    expand_name = get_file_path('neural_cand')
    logger.info(r"loading expand_word from : " + expand_name)

    mark_name = get_file_path('mark')
    logger.info(r"loading mark from : " + mark_name)

    corpus = load_corpus(corpus_name)
    lexicon = load_lexicon(lexicon_name)
    mark = load_mark(mark_name)
    # log_state('use extend lexicon')
    lexicon = combine_lexicon(lexicon_name, expand_name)

    log_state('mean')
    evaluate_mean(corpus, lexicon, mark)
    log_state('tf_mean')
    evaluate_tf_mean(corpus, lexicon, mark)
    log_state('tfidf_mean')
    evaluate_tfidf_mean(corpus, lexicon, mark)

    log_state('geo')
    evaluate_geo(corpus, lexicon, mark)
    log_state('tfidf_geo')
    evaluate_tfidf_geo(corpus, lexicon, mark)
    log_state('tf_geo')
    evaluate_tf_geo(corpus, lexicon, mark)