def bad_word_count_features_svm(config): preprocessed_input = inference_preprocessing(config) normalizer = count_features(config) xy_split = normalizer.get_step('xy_split') tfidf_word_vectorizer = bad_word_tfidf(preprocessed_input, config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[xy_split, normalizer, tfidf_word_vectorizer], adapter={ 'X': ([('normalizer', 'X'), ('bad_word_tfidf_word_vectorizer', 'features') ], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('svm_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def hand_crafted_all_svm(config): xy_split, normalizer, char_vector, word_vector, bad_word_vector = hand_crafted_all( config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[ xy_split, normalizer, char_vector, word_vector, bad_word_vector ], adapter={ 'X': ([('normalizer', 'X'), ('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features'), ('bad_word_tfidf_word_vectorizer', 'features') ], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('logreg_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def tfidf_svm(config): preprocessed_input = inference_preprocessing(config) tfidf_char_vectorizer, tfidf_word_vectorizer = tfidf( preprocessed_input, config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[ preprocessed_input, tfidf_char_vectorizer, tfidf_word_vectorizer ], adapter={ 'X': ([('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('logreg_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def count_features_svm(config): normalizer = count_features(config) xy_split = normalizer.get_step('xy_split') svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[xy_split, normalizer], adapter={'X': ([('normalizer', 'X')]), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={'y_pred': ([('svm_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output