Beispiel #1
0
EPOCHS = 5
BATCH_SIZE = 10000
# Has to be the same as the GloVe vector dimension (look at file name)
EMBEDDING_DIM = 100

# ------------ Hyperparameters end here -----

# TODO: Add test files as well and test it on that instead of validation
# TODO: Decouple the GloVe and pre-processing stuff from the model creation
# TODO: Create different model files for Dense Network, CNN and RNN
# Step 1: Get the datasets (already split into training, validation and test sets)
reader = FileReader()
reader.read_from_file()

all_inputs, all_labels = reader.return_all_data()
training_inputs, training_labels = reader.return_training_sets()
valid_inputs, valid_labels = reader.return_valid_sets()

# Step 2.1: Label pre-processing
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)
encoded_training_labels = label_encoder.transform(training_labels)
encoded_valid_labels = label_encoder.transform(valid_labels)
categorical_training_labels = to_categorical(encoded_training_labels)
categorical_valid_labels = to_categorical(encoded_valid_labels)
print("Fitting the tweets into the following classes: " +
      str(label_encoder.classes_))

num_classes = str(len(label_encoder.classes_))