コード例 #1
0
logging.getLogger("pyhive").setLevel(logging.CRITICAL)  # avoid excessive logs
logger.setLevel(logging.INFO)

if __name__ == "__main__":

    data = m.Posts()
    scorer = make_scorer(fbeta_score, beta=2)

    trans_os = {'translate': [0.9], 'oversample': [0.9]}

    TARGET_LABELS = [
        'label_discriminating', 'label_inappropriate',
        'label_sentimentnegative', 'label_needsmoderation'
    ]

    embedding_dict_glove = transformers.load_embedding_vectors(
        embedding_style='glove', file="./embeddings/glove_vectors.txt")
    embedding_dict_word2vec = transformers.load_embedding_vectors(
        embedding_style='word2vec', file="./embeddings/word2vec_vectors.txt")

    preps = {
        'norm':
        lambda x: cleaning.series_apply_chaining(x, [cleaning.normalize]),
        'glove':
        transformers.MeanEmbeddingVectorizer(
            embedding_dict=embedding_dict_glove).transform,
        'word2vec':
        transformers.MeanEmbeddingVectorizer(
            embedding_dict=embedding_dict_word2vec).transform,
    }
    vecs = {
        'count': CountVectorizer(),
コード例 #2
0
        'translate': [0.8, 0.9, 1.0],
        'oversample': [0.8, 0.9, 1.0]
    }

    TARGET_LABELS = [
        'label_argumentsused',
        'label_discriminating',
        'label_inappropriate',
        'label_offtopic',
        'label_personalstories',
        'label_possiblyfeedback',
        'label_sentimentnegative',
        'label_sentimentpositive',
    ]

    embedding_dict_glove = transformers.load_embedding_vectors(
        embedding_style='glove')
    embedding_dict_word2vec = transformers.load_embedding_vectors(
        embedding_style='word2vec')

    preps = {
        'norm':
        lambda x: cleaning.series_apply_chaining(x, [cleaning.normalize]),
        'stem':
        lambda x: cleaning.series_apply_chaining(
            x, [cleaning.normalize, cleaning.stem_germ]),
        'lem':
        lambda x: cleaning.series_apply_chaining(
            x, [cleaning.normalize, cleaning.lem_germ]),
        'glove':
        transformers.MeanEmbeddingVectorizer(
            embedding_dict=embedding_dict_glove).transform,