def createTrainingSupervisedDataSet(self,msrcImages , scale , keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels , "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures , numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0,numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label-1] = 1 dataSetTrain.addSample(feature , binaryLabels) print "\tAdded" , np.size(trainData) , " labeled data points to DataSet." return dataSetTrain
def createTrainingSupervisedDataSet(self, msrcImages, scale, keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels, "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures, numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0, numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label - 1] = 1 dataSetTrain.addSample(feature, binaryLabels) print "\tAdded", np.size(trainData), " labeled data points to DataSet." return dataSetTrain
def getSuperPixelTrainingData(msrcDataDirectory, nbSuperPixels, superPixelCompactness, scale, trainSplit=0.6, validationSplit=0.2, testSplit=0.2): # Should probably make this a call to pomio in case the ordering changes in the future... voidClassLabel = pomio.getVoidIdx() # These could be user-specified if scale == None: scale = 0.05 # default to 10% of data msrcData = pomio.msrc_loadImages(msrcDataDirectory, None) print "Now generating superpixel classifier for MSRC data" # splitData = pomio.splitInputDataset_msrcData(msrcData, datasetScale=scale, keepClassDistForTraining=True, trainSplit, validationSplit, testSplit ) splitData = pomio.splitInputDataset_msrcData(msrcData, scale, True, trainSplit, validationSplit, testSplit) # prepare superpixel training data trainingMsrcImages = splitData[0] # Just use the above function to get superpixel features and labels for training data return SuperPixelClassifier.getSuperPixelData(trainingMsrcImages, nbSuperPixels, superPixelCompactness)
def getSuperPixelTrainingData(msrcDataDirectory, nbSuperPixels,superPixelCompactness, scale, trainSplit=0.6, validationSplit=0.2, testSplit=0.2): # Should probably make this a call to pomio in case the ordering changes in the future... voidClassLabel = pomio.getVoidIdx() # These could be user-specified if scale == None: scale = 0.05 # default to 10% of data msrcData = pomio.msrc_loadImages(msrcDataDirectory, None) print "Now generating superpixel classifier for MSRC data" # splitData = pomio.splitInputDataset_msrcData(msrcData, datasetScale=scale, keepClassDistForTraining=True, trainSplit, validationSplit, testSplit ) splitData = pomio.splitInputDataset_msrcData(msrcData, scale, True, trainSplit, validationSplit, testSplit ) # prepare superpixel training data trainingMsrcImages = splitData[0] # Just use the above function to get superpixel features and labels for training data return SuperPixelClassifier.getSuperPixelData(trainingMsrcImages,nbSuperPixels,superPixelCompactness)
else: # we get here if we have non-default splits writeDataType = True # Do the expected thing if we have a non-default split if args.v: print "Loading data" if writeDataType == True: # Get train, test and cv datasets print "\nSplitting data into sets: train =", trainSplit, "test =", testSplit, "cvSplit =", cvSplit keepClassDistForTraining = False #!! [trainData, cvData, testData] = pomio.splitInputDataset_msrcData( pomio.msrc_loadImages(msrcDataDirectory, subset=None), scaleFrac, keepClassDistForTraining, trainSplit, cvSplit, testSplit, ) assert trainData != None, "Training data object is null" assert len(trainData) > 0, "Training data contains no data" if testData == None or len(testData) == 0: print "WARNING: Testing data contains no data" print "Creating training set from %d images, and test set from %d images" % (len(trainData), len(testData)) # Process and persist feature and labels for superpixels in image dataset print "Create & save training feature and label superpixel data" createAndSaveFeatureLabelData(
'Images/9_2_s.bmp']) elif subsetType == 2: msrcImages = pomio.msrc_loadImages(msrcData, ['Images/7_3_s.bmp'] ) else: # Load all images msrcImages = pomio.msrc_loadImages(msrcData) if doVal or doTest: scale = 0.1 # Generate data from images and save to file print "\nProcessing " + str(scale*100) + \ "% of MSRC data on a 60/20/20 split serialised for easier file IO" splitData = pomio.splitInputDataset_msrcData( msrcImages, datasetScale=scale, keepClassDistForTraining=True, trainSplit=0.6, validationSplit=0.2, testSplit=0.2 ) validationDataset = splitData[1] testDataset = splitData[2] if doVal: print "Processing validation data::" validationData = FeatureGenerator.processLabeledImageData(validationDataset, ignoreVoid=True) if doTest: print "Processing test data::" testingData = FeatureGenerator.processLabeledImageData(testDataset, ignoreVoid=True)
'Images/9_2_s.bmp']) elif subsetType == 2: msrcImages = pomio.msrc_loadImages(msrcData, ['Images/7_3_s.bmp']) else: # Load all images msrcImages = pomio.msrc_loadImages(msrcData) if doVal or doTest: scale = 0.1 # Generate data from images and save to file print "\nProcessing " + str(scale*100) + \ "% of MSRC data on a 60/20/20 split serialised for easier file IO" splitData = pomio.splitInputDataset_msrcData( msrcImages, datasetScale=scale, keepClassDistForTraining=True, trainSplit=0.6, validationSplit=0.2, testSplit=0.2) validationDataset = splitData[1] testDataset = splitData[2] if doVal: print "Processing validation data::" validationData = FeatureGenerator.processLabeledImageData( validationDataset, ignoreVoid=True) if doTest: print "Processing test data::" testingData = FeatureGenerator.processLabeledImageData(
if (trainSplit == 1.0) and (cvSplit == 0.0 and testSplit == 0.0): writeDataType = False else: # we get here if we have non-default splits writeDataType = True # Do the expected thing if we have a non-default split if args.v: print 'Loading data' if writeDataType == True: # Get train, test and cv datasets print "\nSplitting data into sets: train =", trainSplit, "test =", testSplit, "cvSplit =", cvSplit keepClassDistForTraining = False #!! [trainData, cvData, testData] = pomio.splitInputDataset_msrcData(\ pomio.msrc_loadImages(msrcDataDirectory, subset=None), scaleFrac, keepClassDistForTraining, trainSplit , cvSplit , testSplit) assert trainData != None, "Training data object is null" assert len(trainData) > 0, "Training data contains no data" if testData == None or len(testData) == 0: print 'WARNING: Testing data contains no data' print "Creating training set from %d images, and test set from %d images" % ( len(trainData), len(testData)) # Process and persist feature and labels for superpixels in image dataset print "Create & save training feature and label superpixel data" createAndSaveFeatureLabelData(trainData, outfileBase, "train", outfileType, numberSuperPixels, superPixelCompactness,