def create_session_configs(configs_location=None, delete_old_configs=1):
        if configs_location is None:
            configs_location = ConfigReader.get_configs_location()
        if delete_old_configs:
            Storage.delete_location(configs_location)

        configs = [SessionConfigReader.get_config_template()]

        configs = SessionConfigBuilderCustom1.add_all_config_info(configs)

        n_configs = len(configs)

        SessionLogger.log('Constructed ' + str(n_configs) +
                          ' new session configs from template: \'' +
                          ConfigReader.get_config_template_id() + '\'.')

        config_ids = list()
        idx = 0
        for conf in configs:
            config_id = configs_location + '/' + SessionConfigBuilderCustom1.config_name + str(
                idx + 1)
            SessionConfigReader.set_config(conf, config_id)
            config_ids.append(config_id)
            idx = idx + 1

        SessionLogger.log('Stored ' + str(n_configs) +
                          ' session configs in \'' + configs_location + '\'.')

        return config_ids
Example #2
0
 def remove_stopwords(data_frame,
                      custom_stop_words=None,
                      download_live_stopwords=0,
                      col_name=col_name,
                      storage_level=0,
                      storage_name='',
                      log=1):
     stopwordremover = SessionConfigReader.read_value(
         StopWordRemover.stopwordremover_key)
     if custom_stop_words is None:
         custom_stop_words = list()
     custom_stop_words.extend(
         SessionConfigReader.read_value(StopWordRemover.custom_sw_key))
     if stopwordremover == StopWordRemover.stopwordremover_custom:
         return StopWordRemoverCustom.remove_stopwords(
             data_frame,
             custom_stop_words=custom_stop_words,
             download_live_stopwords=download_live_stopwords,
             col_name=col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to remove stopwords from documents. Specified Stopword Remover not supported.',
                 log_type='error')
         return pd.DataFrame()
Example #3
0
    def run_classification_test():
        corpus_id = SessionConfigReader.read_value(SetupRunner.corpus_id_key)
        vectorized_df_id = corpus_id + SetupRunner.ext_vectorized
        train_df_id = vectorized_df_id + SetupRunner.ext_train
        test_df_id = vectorized_df_id + SetupRunner.ext_test

        Storage.delete_pd_frame(train_df_id)
        Storage.delete_pd_frame(test_df_id)
        Storage.delete_h5_model(SessionConfigReader.read_value(SetupRunner.keras_nn_model_id_key))

        vectorized_df = Storage.load_pd_frame(vectorized_df_id)
        TrainTestSplitter.split_train_test(identifier=vectorized_df_id, data_frame=vectorized_df)
        train_df_id = vectorized_df_id + SetupRunner.ext_train
        train = Storage.load_pd_frame(train_df_id)
        test_df_id = vectorized_df_id + SetupRunner.ext_test
        test = Storage.load_pd_frame(test_df_id)

        train_classification_outs = ClassificationInterpreter.create_out_vectors(train)
        Classifier.create_model(train_classification_outs)
        test_classified = Classifier.classify(test)
        test_interpreted = ClassificationInterpreter.interpret_output(test_classified)
        score = ClassificationInterpreter.evaluate_output(test_interpreted)
        EvaluationHandler.add_evaluation(score)

        return test_interpreted
 def split_train_test(identifier=None, data_frame=None):
     if data_frame is None:
         data_frame = Storage.load_pd_frame(identifier)
     split_ratio = SessionConfigReader.read_value(
         TrainTestSplitterCustom1.split_ratio_key)
     if split_ratio > 1:
         split_ratio = 1
     random_state = SessionConfigReader.read_value(
         TrainTestSplitterCustom1.random_state_key)
     if isinstance(random_state, int):
         train = data_frame.sample(frac=split_ratio,
                                   random_state=random_state)
     else:
         train = data_frame.sample(frac=split_ratio)
     test = data_frame.drop(train.index)
     if identifier is None:
         identifier = SessionConfigReader.read_value(
             TrainTestSplitterCustom1.corpus_identifier_key)
     train_name = identifier + TrainTestSplitterCustom1.ext_train
     test_name = identifier + TrainTestSplitterCustom1.ext_test
     Storage.store_pd_frame(train, train_name)
     Storage.store_pd_frame(test, test_name)
     SessionLogger.log('Split \'' + identifier + '\' (' +
                       str(len(data_frame.index)) + ' entries) into \'' +
                       train_name + '\' (' + str(len(train.index)) +
                       ' entries) and \'' + test_name + '\' (' +
                       str(len(test.index)) + ' entries).')
 def import_docs(csv_path=None):
     if csv_path is None:
         session_folder = os.path.join(TenKGnadImporter.sessions_folder,
                                       SessionConfigReader.get_session_id())
         corpus_id = SessionConfigReader.read_value(
             TenKGnadImporter.corpus_id_key)
         corpus_id = DiskStorageMisc.get_identifier_path(corpus_id)
         csv_path = os.path.join(session_folder,
                                 corpus_id + TenKGnadImporter.csv_ext)
     df = pd.read_csv(
         csv_path,
         sep=';',
         quotechar='\'',
         quoting=csv.QUOTE_MINIMAL,
         header=None,
         names=[TenKGnadImporter.category_name, TenKGnadImporter.text_name])
     category_list = df[TenKGnadImporter.category_name].tolist()
     df[TenKGnadImporter.category_name] = df.apply(
         lambda x: [x[TenKGnadImporter.category_name]], axis=1)
     head, f_name = os.path.split(csv_path)
     identifier = f_name.split('.')[0]
     Storage.store_pd_frame(df, identifier)
     SessionLogger.log('TenKGnad Corpus (' + str(len(df.index)) +
                       ' entries) has been imported into \'' + identifier +
                       '\' (columns: \'' + TenKGnadImporter.category_name +
                       '\', \'' + TenKGnadImporter.text_name + '\').')
     category_set = set(category_list)
     category_list = list(category_set)
     CategoryListHandler.set_categories(category_list)
     return identifier
 def set_best_performing(eval_session_id=None):
     evals = EvaluationHandler.load_evaluations(session_id=eval_session_id)
     evals.sort_values(by=[EvaluationHandler.score_col], ascending=False)
     if evals.size > 0:
         session_id = evals.at[0, EvaluationHandler.session_id_col]
         config_id = evals.at[0, EvaluationHandler.config_id_col]
         SessionConfigReader.set_best_performing_by_ids(
             session_id=session_id, config_id=config_id)
     else:
         SessionConfigReader.set_best_performing_by_ids()
    def compare_evaluations(session_ids=None, remove_cols=None, add_cols=None):
        all_evals = pd.DataFrame()

        if session_ids is None:
            all_evals = EvaluationHandler.load_evaluations()
        else:
            for session_id in session_ids:
                all_evals = all_evals.concat(
                    EvaluationHandler.load_evaluations(session_id=session_id),
                    sort=False,
                    ignore_index=True)

        all_evals = all_evals.sort_values(by=[EvaluationHandler.score_col],
                                          ascending=False)

        i = 0
        while i < len(all_evals):
            session_id = all_evals.at[i, EvaluationHandler.session_id_col]
            conf_id = all_evals.at[i, EvaluationHandler.config_id_col]
            conf = SessionConfigReader.get_config(session_id=session_id,
                                                  config_id=conf_id)
            for key in EvaluationHandler.additional_columns:
                if key in conf:
                    value = conf[key][0]
                else:
                    value = ''
                all_evals.at[i, key] = ''
                all_evals.at[i, key] = value
            i = i + 1

        if remove_cols is not None:
            for key in remove_cols:
                if key in all_evals:
                    all_evals = all_evals.drop(columns=[key])

        if add_cols is not None:
            i = 0
            while i < len(all_evals):
                session_id = all_evals.at[i, EvaluationHandler.session_id_col]
                conf_id = all_evals.at[i, EvaluationHandler.config_id_col]
                conf = SessionConfigReader.get_config(session_id=session_id,
                                                      config_id=conf_id)
                for key in add_cols:
                    if key in conf:
                        value = conf[key][0]
                    else:
                        value = ''
                    all_evals.at[i, key] = ''
                    all_evals.at[i, key] = value
                i = i + 1

        return all_evals
 def create_out_vectors(data_frame,
                        col_name=col_name_categories,
                        new_col_name=new_col_name_cat_vec,
                        storage_level=0,
                        storage_name=''):
     classification_interpreter = SessionConfigReader.read_value(
         ClassificationInterpreter.classification_interpreter_key)
     if classification_interpreter == ClassificationInterpreter.classification_interpreter_custom1:
         return ClassificationInterpreterCustom1.create_out_vectors(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name)
     elif classification_interpreter == ClassificationInterpreter.classification_interpreter_custom2:
         return ClassificationInterpreterCustom2.create_out_vectors(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name)
     else:
         SessionLogger.log(
             'Tried to create category vectors. Specified ClassificationInterpreter is not supported.',
             log_type='error')
         return pd.DataFrame()
Example #9
0
 def get_stopwords():
     stopwordremover = SessionConfigReader.read_value(
         StopWordRemover.stopwordremover_key)
     if stopwordremover == StopWordRemover.stopwordremover_custom:
         return StopWordRemoverCustom.get_stopwords()
     else:
         return set()
 def interpret_output(data_frame,
                      col_name=col_name_class_out,
                      new_col_name=col_name_result,
                      storage_level=0,
                      storage_name='',
                      log=1):
     classification_interpreter = SessionConfigReader.read_value(
         ClassificationInterpreter.classification_interpreter_key)
     if classification_interpreter == ClassificationInterpreter.classification_interpreter_custom1:
         return ClassificationInterpreterCustom1.interpret_output(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     elif classification_interpreter == ClassificationInterpreter.classification_interpreter_custom2:
         return ClassificationInterpreterCustom2.interpret_output(
             data_frame,
             col_name=col_name,
             new_col_name=new_col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     else:
         SessionLogger.log(
             'Tried to interpret output vectors. Specified ClassificationInterpreter is not supported.',
             log_type='error')
         return pd.DataFrame()
Example #11
0
 def normalize(data_frame, col_name=col_name, storage_level=0, storage_name='', log=1):
     lemmatizer = SessionConfigReader.read_value(Lemmatizer.lemmatizer_key)
     if lemmatizer == Lemmatizer.lemmatizer_spacy_german:
         return LemmatizerSpacyGerman.normalize(data_frame, col_name=col_name, storage_level=storage_level, storage_name=storage_name, log=log)
     else:
         if log:
             SessionLogger.log('Tried to lemmatize documents. Specified Lemmatizer not supported.', log_type='error')
         return pd.DataFrame()
Example #12
0
 def get_word_vectors(words, model_id=''):
     vectorizer_type = SessionConfigReader.read_value(
         Vectorizer.vectorizer_key)
     if vectorizer_type == Vectorizer.vectorizer_gensim_w2v:
         return VectorizerGensimWord2Vec.get_word_vectors(words,
                                                          model_id=model_id)
     else:
         return list()
 def preprocess_texts(data_frame, col_name=col_name, new_col_name=new_col_name, storage_level=0, storage_name='', log=1):
     preprocessor_type = SessionConfigReader.read_value(TextPreprocessor.preprocessor_key)
     if preprocessor_type == TextPreprocessor.preprocessor_custom:
         return TextPreprocessorCustom.preprocess_texts(data_frame, col_name=col_name, new_col_name=new_col_name, storage_level=storage_level, storage_name=storage_name, log=log)
     else:
         if log:
             SessionLogger.log('Tried to preprocess texts. Specified Preprocessor is not supported.', log_type='error')
         return pd.DataFrame()
Example #14
0
 def read_categories(session_id):
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     file_name = SessionConfigReader.read_value(DiskStorageCategoryListHandler.cat_id_key) + DiskStorageCategoryListHandler.ext_json
     categories_path = os.path.join(data_path, file_name)
     if not os.path.exists(categories_path):
         return list()
     with open(categories_path, encoding='utf8') as json_file:
         file = json.load(json_file)
         return file[DiskStorageCategoryListHandler.cat_list_key]
Example #15
0
 def get_model_id():
     classifier_type = SessionConfigReader.read_value(
         Classifier.classifier_key)
     if classifier_type == Classifier.classifier_keras_nn:
         return ClassifierKerasNN.get_model_id()
     else:
         SessionLogger.log(
             'Tried to get classifier model id. Specified Classifier is not supported.',
             log_type='error')
         return ''
Example #16
0
 def import_docs():
     importer_type = SessionConfigReader.read_value(
         CorpusImporter.corpus_importer_key)
     if importer_type == CorpusImporter.tenkgnad_importer:
         return TenKGnadImporter.import_docs()
     else:
         SessionLogger.log(
             'Tried to import corpus. Specified Corpus Importer is not supported.',
             log_type='error')
         return ''
Example #17
0
 def set_categories(session_id, categories):
     data = {DiskStorageCategoryListHandler.cat_list_key: []}
     for category in categories:
         data[DiskStorageCategoryListHandler.cat_list_key].append(category)
     data_path = DiskStorageMisc.get_session_data_path(session_id)
     file_name = SessionConfigReader.read_value(DiskStorageCategoryListHandler.cat_id_key) + DiskStorageCategoryListHandler.ext_json
     categories_path = os.path.join(data_path, file_name)
     DiskStorageMisc.create_data_folder(session_id)
     with open(categories_path, 'w+', encoding='utf8') as json_file:
         json.dump(data, json_file, ensure_ascii=False)
Example #18
0
 def split_train_test(identifier=None, data_frame=None):
     tt_splitter_type = SessionConfigReader.read_value(
         TrainTestSplitter.tt_splitter_key)
     if tt_splitter_type == TrainTestSplitter.tt_splitter_custom1:
         TrainTestSplitterCustom1.split_train_test(identifier=identifier,
                                                   data_frame=data_frame)
     else:
         SessionLogger.log(
             'Tried to split \'' + identifier +
             '\' into train and test set. Specified TrainTestSplitter is not supported.',
             log_type='error')
Example #19
0
 def create_model(data_frame, new_model_id=None, col_name=col_name):
     vectorizer_type = SessionConfigReader.read_value(
         Vectorizer.vectorizer_key)
     if vectorizer_type == Vectorizer.vectorizer_gensim_w2v:
         return VectorizerGensimWord2Vec.create_model(
             data_frame, new_model_id=new_model_id, col_name=col_name)
     else:
         SessionLogger.log(
             'Tried to create vector model. Specified Vectorizer is not supported.',
             log_type='error')
         return ''
Example #20
0
 def run_setup(run_import=1, run_preprocessing=1, run_vectorization=1, run_classification=1):
     corpus_id = SessionConfigReader.read_value(SetupRunner.corpus_id_key)
     if run_import:
         Storage.delete_session_data()
         SessionLogger.clear()
         identifier = CorpusImporter.import_docs()
         df = Storage.load_pd_frame(identifier)
         StopwordDownloaderNLTK.get_stopwords()
     else:
         df = Storage.load_pd_frame(corpus_id)
     if run_preprocessing:
         df = TextPreprocessor.preprocess_texts(df, storage_level=1, storage_name=corpus_id)
     else:
         df = Storage.load_pd_frame(corpus_id+SetupRunner.ext_preprocessed)
     if run_vectorization:
         Storage.delete_model(SessionConfigReader.read_value(SetupRunner.vec_model_id_key))
         Vectorizer.create_model(df)
         df = Vectorizer.vectorize(df, storage_level=1, storage_name=corpus_id)
     else:
         df = Storage.load_pd_frame(corpus_id+SetupRunner.ext_vectorized)
     if run_classification:
         Storage.delete_h5_model(SessionConfigReader.read_value(SetupRunner.keras_nn_model_id_key))
         df = ClassificationInterpreter.create_out_vectors(df, storage_level=1, storage_name=corpus_id)
         Classifier.create_model(df)
Example #21
0
 def get_doc_vec(word_vectors):
     n_words = SessionConfigReader.read_value(
         WordVecToDocVecCustom.dimension_key)
     fv = np.zeros(n_words)
     idx = 0
     err = 0
     for vec in word_vectors:
         if vec is None:
             err = err + 1
         else:
             fv = fv + vec
         idx = idx + 1
         if idx == n_words:
             break
     return fv / (idx + 1 - err)
Example #22
0
 def train_model(data_frame,
                 model_id=None,
                 fv_col_name=fv_col_name,
                 cat_v_col_name=cat_v_col_name):
     classifier_type = SessionConfigReader.read_value(
         Classifier.classifier_key)
     if classifier_type == Classifier.classifier_keras_nn:
         return ClassifierKerasNN.train_model(data_frame,
                                              model_id=model_id,
                                              fv_col_name=fv_col_name,
                                              cat_v_col_name=cat_v_col_name)
     else:
         SessionLogger.log(
             'Tried to train classifier model. Specified Classifier is not supported.',
             log_type='error')
         return ''
Example #23
0
 def remove_noise(data_frame,
                  col_name=col_name,
                  storage_level=0,
                  storage_name='',
                  log=1):
     noiseremover_type = SessionConfigReader.read_value(
         NoiseRemover.noiseremover_key)
     if noiseremover_type == NoiseRemover.noiseremover_custom:
         return NoiseRemoverCustom.remove_noise(data_frame,
                                                col_name=col_name,
                                                storage_level=storage_level,
                                                storage_name=storage_name,
                                                log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to remove noise from documents. Specified Noise Remover not supported.',
                 log_type='error')
         return pd.DataFrame()
 def evaluate_output(data_frame,
                     col_name_categories=col_name_categories,
                     col_name_outputs=col_name_result):
     classification_interpreter = SessionConfigReader.read_value(
         ClassificationInterpreter.classification_interpreter_key)
     if classification_interpreter == ClassificationInterpreter.classification_interpreter_custom1:
         return ClassificationInterpreterCustom1.evaluate_output(
             data_frame,
             col_name_categories=col_name_categories,
             col_name_outputs=col_name_outputs)
     elif classification_interpreter == ClassificationInterpreter.classification_interpreter_custom2:
         return ClassificationInterpreterCustom2.evaluate_output(
             data_frame,
             col_name_categories=col_name_categories,
             col_name_outputs=col_name_outputs)
     else:
         SessionLogger.log(
             'Tried to evaluate classification. Specified ClassificationInterpreter is not supported.',
             log_type='error')
         return 0
Example #25
0
class DiskStorageStopwordHandler:

    stpw_id_key = 'stopwords_identifier'
    file_name = SessionConfigReader.read_value(stpw_id_key) + '.json'
    sw_list_key = 'stopwords'

    # expects a session id
    # returns a string set of the current categories for the specified session
    @staticmethod
    def read_stopwords(session_id):
        data_path = DiskStorageMisc.get_session_data_path(session_id)
        stopwords_path = os.path.join(data_path,
                                      DiskStorageStopwordHandler.file_name)
        if not os.path.exists(stopwords_path):
            return set()
        with open(stopwords_path, encoding='utf8') as json_file:
            file = json.load(json_file)
            return set(file[DiskStorageStopwordHandler.sw_list_key])

    # expects a session id and a string set of stopwords
    # sets the session's stopwords
    @staticmethod
    def set_stopwords(session_id, stopwords):
        data = {DiskStorageStopwordHandler.sw_list_key: []}
        for stopword in stopwords:
            data[DiskStorageStopwordHandler.sw_list_key].append(stopword)
        data_path = DiskStorageMisc.get_session_data_path(session_id)
        stopwords_path = os.path.join(data_path,
                                      DiskStorageStopwordHandler.file_name)
        DiskStorageMisc.create_data_folder(session_id)
        with open(stopwords_path, 'w+', encoding='utf8') as json_file:
            json.dump(data, json_file, ensure_ascii=False)

    # expects a session id and a string set of stopwords
    # adds stopwords to the session's stopwords
    @staticmethod
    def add_stopwords(session_id, stopwords):
        current_sw = DiskStorageStopwordHandler.read_stopwords(session_id)
        new_stopwords = current_sw.union(stopwords)
        DiskStorageStopwordHandler.set_stopwords(session_id, new_stopwords)
 def get_highest_similarity(vec, word_list, word_vec_list):
     vec = vec * 2 - 1  # adjust from softmax codomain
     sim_func = SessionConfigReader.read_value(
         ClassificationInterpreterCustom1.similarity_function_key)
     idx = 0
     sim = 0
     highest_word = ''
     for word in word_list:
         word_vec = word_vec_list[idx]
         new_sim = 0
         if word_vec is not None:
             if sim_func == ClassificationInterpreterCustom1.sim_func_cosine:
                 new_sim = cosine_similarity(np.asarray([word_vec]),
                                             [vec])[0][0]
             elif sim_func == ClassificationInterpreterCustom1.sim_func_eucl_dist:
                 new_sim = euclidean_distances([word_vec], [vec])[0][0]
             if new_sim < 0:
                 new_sim = new_sim * -1
         if new_sim > sim:
             sim = new_sim
             highest_word = word
         idx = idx + 1
     return highest_word
Example #27
0
 def classify(data_frame,
              model_id=None,
              col_name=fv_col_name,
              new_col_name=class_out_col_name,
              storage_level=0,
              storage_name='',
              log=1):
     classifier_type = SessionConfigReader.read_value(
         Classifier.classifier_key)
     if classifier_type == Classifier.classifier_keras_nn:
         return ClassifierKerasNN.classify(data_frame,
                                           model_id=model_id,
                                           col_name=col_name,
                                           new_col_name=new_col_name,
                                           storage_level=storage_level,
                                           storage_name=storage_name,
                                           log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to classify data. Specified Classifier is not supported.',
                 log_type='error')
         return pd.DataFrame()
Example #28
0
    def train_model(data_frame,
                    model_id=None,
                    fv_col_name=fv_col_name,
                    cat_v_col_name=cat_v_col_name):

        # read config params
        config_keys = list()
        config_keys.append(ClassifierKerasNN.model_id_key)
        config_keys.append(ClassifierKerasNN.epochs_key)
        config_keys.append(ClassifierKerasNN.batch_size_key)
        config = SessionConfigReader.read_values(config_keys)
        if model_id is None:
            model_id = config[0]
        epochs = config[1]
        batch_size = config[2]

        # extract vector lists from data frame
        doc_vectors = data_frame[fv_col_name].tolist()
        cat_vectors = data_frame[cat_v_col_name].tolist()

        # load the model
        model = Storage.load_h5_model(model_id)
        # train the model
        model.fit(np.asarray(doc_vectors),
                  np.asarray(cat_vectors),
                  epochs=epochs,
                  batch_size=batch_size)
        # store the model
        Storage.store_h5_model(model, model_id)

        # make log entry
        SessionLogger.log('Trained keras neural network \'' + model_id +
                          '\' with ' + str(len(data_frame.index)) +
                          ' new entries.')

        return model_id
    def interpret_output(data_frame,
                         col_name=col_name_class_out,
                         new_col_name=col_name_result,
                         storage_level=0,
                         storage_name='',
                         log=1):
        category_list = CategoryListHandler.read_categories()
        threshold = SessionConfigReader.read_value(
            ClassificationInterpreterCustom2.threshold_key)
        data_frame[new_col_name] = data_frame.apply(
            lambda x: ClassificationInterpreterCustom2.get_categories_from_vec(
                x[col_name], category_list, threshold),
            axis=1)

        log_text = 'Categories have been determined (' + str(
            len(data_frame.index)) + ' entries).'
        if storage_level >= 1 and storage_name != '':
            storage_name = storage_name + ClassificationInterpreterCustom2.ext_categorized
            Storage.store_pd_frame(data_frame, storage_name)
            log_text = log_text + ' Stored in \'' + storage_name + '\' (column: \'' + new_col_name + '\').'
        if log:
            SessionLogger.log(log_text)

        return data_frame
Example #30
0
 def vectorize(data_frame,
               model_id=None,
               col_name=col_name,
               new_col_name=new_col_name,
               storage_level=0,
               storage_name='',
               log=1):
     vectorizer_type = SessionConfigReader.read_value(
         Vectorizer.vectorizer_key)
     if vectorizer_type == Vectorizer.vectorizer_gensim_w2v:
         return VectorizerGensimWord2Vec.vectorize(
             data_frame,
             model_id,
             col_name,
             new_col_name,
             storage_level=storage_level,
             storage_name=storage_name,
             log=log)
     else:
         if log:
             SessionLogger.log(
                 'Tried to vectorize texts. Specified Vectorizer is not supported.',
                 log_type='error')
         return pd.DataFrame()