def fit_model(df, method): """ Fitting chosen model params: df: DataFrame used, method: model chosen returns: generated model, transformed datas """ if method == "TF-IDF": model = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words=STOPS) X = model.fit_transform(df['content']) elif method == "CountVectorizer": model = CountVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words=STOPS) X = model.fit_transform(df['content']) elif method == "BERT": word_embedding_model = models.Transformer('camembert-base') pooling_model = models.Pooling( word_embedding_model.get_word_embedding_dimension(), pooling_mode_mean_tokens=True, pooling_mode_max_tokens=False) model = SentenceTransformer( modules=[word_embedding_model, pooling_model]) X = model.encode(df['content'], show_progress_bar=True) return model, X