logging.getLogger("pyhive").setLevel(logging.CRITICAL) # avoid excessive logs logger.setLevel(logging.INFO) if __name__ == "__main__": data = m.Posts() scorer = make_scorer(fbeta_score, beta=2) trans_os = {'translate': [0.9], 'oversample': [0.9]} TARGET_LABELS = [ 'label_discriminating', 'label_inappropriate', 'label_sentimentnegative', 'label_needsmoderation' ] embedding_dict_glove = transformers.load_embedding_vectors( embedding_style='glove', file="./embeddings/glove_vectors.txt") embedding_dict_word2vec = transformers.load_embedding_vectors( embedding_style='word2vec', file="./embeddings/word2vec_vectors.txt") preps = { 'norm': lambda x: cleaning.series_apply_chaining(x, [cleaning.normalize]), 'glove': transformers.MeanEmbeddingVectorizer( embedding_dict=embedding_dict_glove).transform, 'word2vec': transformers.MeanEmbeddingVectorizer( embedding_dict=embedding_dict_word2vec).transform, } vecs = { 'count': CountVectorizer(),
'translate': [0.8, 0.9, 1.0], 'oversample': [0.8, 0.9, 1.0] } TARGET_LABELS = [ 'label_argumentsused', 'label_discriminating', 'label_inappropriate', 'label_offtopic', 'label_personalstories', 'label_possiblyfeedback', 'label_sentimentnegative', 'label_sentimentpositive', ] embedding_dict_glove = transformers.load_embedding_vectors( embedding_style='glove') embedding_dict_word2vec = transformers.load_embedding_vectors( embedding_style='word2vec') preps = { 'norm': lambda x: cleaning.series_apply_chaining(x, [cleaning.normalize]), 'stem': lambda x: cleaning.series_apply_chaining( x, [cleaning.normalize, cleaning.stem_germ]), 'lem': lambda x: cleaning.series_apply_chaining( x, [cleaning.normalize, cleaning.lem_germ]), 'glove': transformers.MeanEmbeddingVectorizer( embedding_dict=embedding_dict_glove).transform,