def text_tfidf_bigram_lemmatized_bow_1000(lang): return make_pipeline(TextExtractor(column='text'), Lemmatizer(lang), CountVectorizer(strip_accents='ascii', min_df=0.001, max_df=0.8, ngram_range=(2, 2), stop_words=PipelineConfig._stop_words(lang)), TfidfTransformer(), TruncatedSVD(n_components=1000))
def text_tfidf_lemmatized_bow_500(lang): return make_pipeline(TextExtractor(column='text'), Lemmatizer(lang), CountVectorizer(strip_accents='ascii', stop_words=PipelineConfig._stop_words(lang)), TfidfTransformer(), TruncatedSVD(n_components=500))