# Train/Test serialization file trainTestSerializationFile = ".\\DatasetBuilder\\Output\\train_test_dataset.bin" # Check if the current stage is to initialize random labels if LOAD_DATASET_FROM_SERIALIZATION_FILE: # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) # Load the dataset datasetBuilder.LoadDataset() # Form or load the train/test sets if SPLIT_DATASET_TRAIN_TEST: datasetBuilder.SplitTrainTest() datasetBuilder.SaveTrainTestDataset(trainTestSerializationFile) elif LOAD_TRAIN_TEST: datasetBuilder.LoadTrainTestDataset(trainTestSerializationFile) elif UPDATE_LABELS_FROM_CSV: # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) # Load the dataset datasetBuilder.LoadDataset() # Update the labels datasetBuilder.UpdateManualLabelsFromCSV( ) # This should be done separately when dataset is manually labeled
# Train/Test serialization file trainTestSerializationFile = ".\\DatasetBuilder\\Output\\train_test_dataset.bin" # The XLSX file name for train set xlsxTrainFileName = ".\\DatasetBuilder\\Input\\train" xlsxTestFileName = ".\\DatasetBuilder\\Input\\test" # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) # Load the dataset #datasetBuilder.LoadDataset() # Update the labels ''' numFiles = 50 for i in range(numFiles): print('Updating labels from file ' + xlsxManualLabelsFileName + "_" + str(i + 1) + '...') datasetBuilder.UpdateManualLabelsFromXLSXFile(xlsxManualLabelsFileName + "_" + str(i + 1), (i + 1)) # This should be done separately when dataset is manually labeled # Form or load the train/test sets datasetBuilder.SplitTrainTest() ''' datasetBuilder.trainSet = datasetBuilder.GetDatasetFromXLSXFile( xlsxTrainFileName) # Set the dataset to the train set so that the language model is built from train tweets only datasetBuilder.dataSet = datasetBuilder.GetDatasetFromXLSXFile( xlsxTrainFileName) datasetBuilder.testSet = datasetBuilder.GetDatasetFromXLSXFile( xlsxTestFileName)