コード例 #1
0
def workflow(dataset):

    dataset = remove_empty_words_1(dataset)
    dataset = tokenize_data(dataset)
    dataset = remove_stop_words(dataset)
    dataset = stemming(dataset)
    dataset = lemmatization(dataset)
    dataset = remove_garbage(dataset)

    return dataset
コード例 #2
0
def testing_data():
    pos_train_data_path = os.path.join(PATH, "dataset", "pos", "")
    neg_train_data_path = os.path.join(PATH, "dataset", "neg", "")
    dataset = load_data(pos_train_data_path, neg_train_data_path)
    dataset = remove_empty_words_1(dataset)
    dataset = tokenize_data(dataset)
    dataset = remove_stop_words(dataset)
    dataset = stemming(dataset)
    dataset = lemmatization(dataset)
    dataset = remove_garbage(dataset)
    return dataset
コード例 #3
0
def testing_data():
    logging.debug('{} - ({})  processing testing data started'.format(
        script_name, str(datetime.datetime.now())))
    pos_train_data_path = os.path.join(PATH, "dataset", "pos", "")
    neg_train_data_path = os.path.join(PATH, "dataset", "neg", "")
    dataset = load_data(pos_train_data_path, neg_train_data_path)
    dataset = remove_empty_words_1(dataset)
    dataset = tokenize_data(dataset)
    dataset = remove_stop_words(dataset)
    dataset = stemming(dataset)
    dataset = lemmatization(dataset)
    dataset = remove_garbage(dataset)
    logging.debug('{} - ({})  processing training data finished'.format(
        script_name, str(datetime.datetime.now())))
    return dataset