コード例 #1
0
    # Train/Test serialization file
    trainTestSerializationFile = ".\\DatasetBuilder\\Output\\train_test_dataset.bin"

    # Check if the current stage is to initialize random labels
    if LOAD_DATASET_FROM_SERIALIZATION_FILE:
        # Initialize the DatasetBuilder from serialization file
        datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [],
                                        datasetSerializationFile)

        # Load the dataset
        datasetBuilder.LoadDataset()

        # Form or load the train/test sets
        if SPLIT_DATASET_TRAIN_TEST:
            datasetBuilder.SplitTrainTest()
            datasetBuilder.SaveTrainTestDataset(trainTestSerializationFile)
        elif LOAD_TRAIN_TEST:
            datasetBuilder.LoadTrainTestDataset(trainTestSerializationFile)

    elif UPDATE_LABELS_FROM_CSV:
        # Initialize the DatasetBuilder from serialization file
        datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [],
                                        datasetSerializationFile)

        # Load the dataset
        datasetBuilder.LoadDataset()

        # Update the labels
        datasetBuilder.UpdateManualLabelsFromCSV(
        )  # This should be done separately when dataset is manually labeled
コード例 #2
0
# Train/Test serialization file
trainTestSerializationFile = ".\\DatasetBuilder\\Output\\train_test_dataset.bin"

# The XLSX file name for train set
xlsxTrainFileName = ".\\DatasetBuilder\\Input\\train"
xlsxTestFileName = ".\\DatasetBuilder\\Input\\test"

# Initialize the DatasetBuilder from serialization file
datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [],
                                datasetSerializationFile)

# Load the dataset
#datasetBuilder.LoadDataset()

# Update the labels
'''
numFiles = 50
for i in range(numFiles):
	print('Updating labels from file ' + xlsxManualLabelsFileName  + "_" + str(i + 1) + '...')
	datasetBuilder.UpdateManualLabelsFromXLSXFile(xlsxManualLabelsFileName  + "_" + str(i + 1), (i + 1)) # This should be done separately when dataset is manually labeled

# Form or load the train/test sets
datasetBuilder.SplitTrainTest()
'''
datasetBuilder.trainSet = datasetBuilder.GetDatasetFromXLSXFile(
    xlsxTrainFileName)
# Set the dataset to the train set so that the language model is built from train tweets only
datasetBuilder.dataSet = datasetBuilder.GetDatasetFromXLSXFile(
    xlsxTrainFileName)
datasetBuilder.testSet = datasetBuilder.GetDatasetFromXLSXFile(
    xlsxTestFileName)