EPOCHS = 5 BATCH_SIZE = 10000 # Has to be the same as the GloVe vector dimension (look at file name) EMBEDDING_DIM = 100 # ------------ Hyperparameters end here ----- # TODO: Add test files as well and test it on that instead of validation # TODO: Decouple the GloVe and pre-processing stuff from the model creation # TODO: Create different model files for Dense Network, CNN and RNN # Step 1: Get the datasets (already split into training, validation and test sets) reader = FileReader() reader.read_from_file() all_inputs, all_labels = reader.return_all_data() training_inputs, training_labels = reader.return_training_sets() valid_inputs, valid_labels = reader.return_valid_sets() # Step 2.1: Label pre-processing label_encoder = LabelEncoder() label_encoder.fit(all_labels) encoded_training_labels = label_encoder.transform(training_labels) encoded_valid_labels = label_encoder.transform(valid_labels) categorical_training_labels = to_categorical(encoded_training_labels) categorical_valid_labels = to_categorical(encoded_valid_labels) print("Fitting the tweets into the following classes: " + str(label_encoder.classes_)) num_classes = str(len(label_encoder.classes_))