예제 #1
0
파일: spam.py 프로젝트: benigls/spam
    enron_dataset = dataset.get_dataset()

    if CONFIG["dataset"]["output"]:
        print("Exporting the dataset..")
        dataset.to_csv(filepath=CONFIG["dataset"]["filepath"])


if CONFIG["preprocess"]["params"]["read_csv"]:
    print("Reading the dataset..")
    preprocessor = Preprocess(**CONFIG["preprocess"]["params"])
else:
    preprocessor = Preprocess(dataset=enron_dataset, **CONFIG["preprocess"]["params"])

if CONFIG["preprocess"]["clean_dataset"]:
    print("Cleaning the dataset..")
    preprocessor.clean_data()

if CONFIG["preprocess"]["output_csv"]:
    print("Exporting clean dataset..")
    preprocessor.dataset.to_csv(CONFIG["preprocess"]["output_csv_filepath"])

print("Spliting the dataset..")
enron_dataset = preprocessor.dataset
enron_dataset = utils.split_dataset(x=enron_dataset["body"].values, y=enron_dataset["label"].values)

print("Transforming dataset into vectors and matrices..")
enron_dataset = preprocessor.transform(dataset=enron_dataset)
vocabulary = preprocessor.vocabulary

print("\n{}\n".format("-" * 50))
print("Building model..")