def createSets(setSizes=[TRAINING_SET_SIZE, TEST_SET_SIZE, VALIDATION_SET_SIZE]): """Reads IDs, creates and saves randomly shuffled subsets of these.""" reviewPairIDs, reviewIronicIDs, reviewRegularIDs = readIDs(CORPUS_PATH + REVIEW_IDS_FILENAME) reviewIDs = setToString(createIDLabelSet(reviewIronicIDs, "ironic")) reviewIDs += setToString(createIDLabelSet(reviewRegularIDs, "regular")) reviewIDs += setToString(createIDLabelSet([r for i,r in reviewPairIDs], "regular")) reviewIDs += setToString(createIDLabelSet([i for i,r in reviewPairIDs], "ironic")) sets = divideData(reviewIDs, setSizes) for i in range(len(sets)): saveSet(sets[i], CORPUS_PATH + SET_FILENAMES[i])
def shuffledSet(setFilename=REVIEW_IDS_FILENAME, path=CORPUS_PATH, randomSeed=RANDOM_SEED): """Reads IDs and saves a shuffled version of it.""" reviewPairIDs, reviewIronicIDs, reviewRegularIDs = readIDs(path + setFilename) reviewIDs = setToString(createIDLabelSet(reviewIronicIDs, "ironic")) reviewIDs += setToString(createIDLabelSet(reviewRegularIDs, "regular")) reviewIDs += setToString(createIDLabelSet([r for i,r in reviewPairIDs], "regular")) reviewIDs += setToString(createIDLabelSet([i for i,r in reviewPairIDs], "ironic")) random.seed(randomSeed) random.shuffle(reviewIDs) # for each in reviewIDs[1:20]: # print(each) saveSet(reviewIDs, path + "shuffled_set.txt")