コード例 #1
0
def bad_word_count_features_svm(config):
    preprocessed_input = inference_preprocessing(config)
    normalizer = count_features(config)
    xy_split = normalizer.get_step('xy_split')
    tfidf_word_vectorizer = bad_word_tfidf(preprocessed_input, config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[xy_split, normalizer, tfidf_word_vectorizer],
                     adapter={
                         'X': ([('normalizer', 'X'),
                                ('bad_word_tfidf_word_vectorizer', 'features')
                                ], sparse_hstack_inputs),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('svm_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
コード例 #2
0
def hand_crafted_all_svm(config):
    xy_split, normalizer, char_vector, word_vector, bad_word_vector = hand_crafted_all(
        config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[
                         xy_split, normalizer, char_vector, word_vector,
                         bad_word_vector
                     ],
                     adapter={
                         'X': ([('normalizer', 'X'),
                                ('tfidf_char_vectorizer', 'features'),
                                ('tfidf_word_vectorizer', 'features'),
                                ('bad_word_tfidf_word_vectorizer', 'features')
                                ], sparse_hstack_inputs),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('logreg_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
コード例 #3
0
def tfidf_svm(config):
    preprocessed_input = inference_preprocessing(config)
    tfidf_char_vectorizer, tfidf_word_vectorizer = tfidf(
        preprocessed_input, config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[
                         preprocessed_input, tfidf_char_vectorizer,
                         tfidf_word_vectorizer
                     ],
                     adapter={
                         'X': ([('tfidf_char_vectorizer', 'features'),
                                ('tfidf_word_vectorizer', 'features')],
                               sparse_hstack_inputs),
                         'y': ([('cleaning_output', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)
    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('logreg_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
コード例 #4
0
def count_features_svm(config):
    normalizer = count_features(config)
    xy_split = normalizer.get_step('xy_split')

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[xy_split, normalizer],
                     adapter={'X': ([('normalizer', 'X')]),
                              'y': ([('xy_split', 'y')]),
                              },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={'y_pred': ([('svm_multi', 'prediction_probability')]),
                               },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output