hidden_layers = [800, 500, 300, ] noise_layers = [0.6, 0.4, ] pretr_activ = 'sigmoid' pretr_opt = 'adadelta' pretr_loss = 'mse' fine_activ = 'softmax' fine_opt = 'adadelta' fine_loss = 'categorical_crossentropy' clean = lambda words: [str(word) for word in words if type(word) is not float] print('\n{}\n'.format('-' * 50)) print('Reading the dataset..') preprocessor = Preprocess(**preprocess_params) print('Spliting the dataset..') enron_dataset = preprocessor.dataset enron_dataset = utils.split_dataset(x=enron_dataset['body'].values, y=enron_dataset['label'].values) print('Transforming dataset into vectors and matrices..') enron_dataset = preprocessor.transform(dataset=enron_dataset) vocabulary = preprocessor.vocabulary print('\n{}\n'.format('-' * 50)) print('Building model..') encoders = [] noises = []
print("\n{}\n".format("-" * 50)) if CONFIG["dataset"]["generate"]: print("Reading the dataset..") dataset = EnronDataset(path=CONFIG["dataset"]["path"]) enron_dataset = dataset.get_dataset() if CONFIG["dataset"]["output"]: print("Exporting the dataset..") dataset.to_csv(filepath=CONFIG["dataset"]["filepath"]) if CONFIG["preprocess"]["params"]["read_csv"]: print("Reading the dataset..") preprocessor = Preprocess(**CONFIG["preprocess"]["params"]) else: preprocessor = Preprocess(dataset=enron_dataset, **CONFIG["preprocess"]["params"]) if CONFIG["preprocess"]["clean_dataset"]: print("Cleaning the dataset..") preprocessor.clean_data() if CONFIG["preprocess"]["output_csv"]: print("Exporting clean dataset..") preprocessor.dataset.to_csv(CONFIG["preprocess"]["output_csv_filepath"]) print("Spliting the dataset..") enron_dataset = preprocessor.dataset enron_dataset = utils.split_dataset(x=enron_dataset["body"].values, y=enron_dataset["label"].values)