def makeWindowGenerator(multispectralImage, panchromaticImage, windowGeoLength, scanRatio, frame=None): # Convert windowGeoLength windowGeoDimensions = windowGeoLength, windowGeoLength multispectralWindowWidth, multispectralWindowHeight = multispectralImage.convertGeoDimensionsToPixelDimensions(*windowGeoDimensions) panchromaticWindowWidth, panchromaticWindowHeight = panchromaticImage.convertGeoDimensionsToPixelDimensions(*windowGeoDimensions) # Use frame maxRight = multispectralImage.width - multispectralWindowWidth maxBottom = multispectralImage.height - multispectralWindowHeight if frame: left, top, right, bottom = frame # Include overlap outside frame right = min(right, maxRight) bottom = min(bottom, maxBottom) else: left, top, right, bottom = 0, 0, maxRight, maxBottom # Make multispectralWindowLocations multispectralWindowLocations = [(x, y) for x in xrange(left, right, int(multispectralWindowWidth / scanRatio)) for y in xrange(top, bottom, int(multispectralWindowHeight / scanRatio))] # Make panchromaticWindowLocations windowGeoLocations = multispectralImage.convertPixelLocationsToGeoLocations(multispectralWindowLocations) panchromaticWindowLocations = panchromaticImage.convertGeoLocationsToPixelLocations(windowGeoLocations) # Count locationCount = len(windowGeoLocations) # For each window, for locationIndex, multispectralWindowLocation, panchromaticWindowLocation in itertools.izip(itertools.count(), multispectralWindowLocations, panchromaticWindowLocations): # Make window multispectralPack = multispectralImage, multispectralWindowLocation, (multispectralWindowWidth, multispectralWindowHeight) panchromaticPack = panchromaticImage, panchromaticWindowLocation, (panchromaticWindowWidth, panchromaticWindowHeight) yield Window(multispectralPack, panchromaticPack) # Show feedback if locationIndex % 10 == 0: view.printPercentUpdate(locationIndex + 1, locationCount) view.printPercentFinal(locationCount)
def compareLocations(geoDiameter, rawActualLocations, rawPredictedLocations, regionGeoFrames): # Prepare actualPointMachine = point_process.PointMachine(rawActualLocations, 'REAL') predictedPointMachine = point_process.PointMachine(rawPredictedLocations, 'REAL') heap = { 'actual': set(), 'predicted': set(), 'actualNotPredicted': set(), 'predictedNotActual': set(), } # For each regionGeoFrame, for regionIndex, regionGeoFrame in enumerate(regionGeoFrames): # Filter actualLocations = actualPointMachine.getPointsInsideFrame(regionGeoFrame) predictedLocations = predictedPointMachine.getPointsInsideFrame(regionGeoFrame) # Get actualNotPredictedLocations = point_process.extractBadLocations(geoDiameter, actualLocations, rawPredictedLocations) predictedNotActualLocations = point_process.extractBadLocations(geoDiameter, predictedLocations, rawActualLocations) # Store heap['actual'].update(actualLocations) heap['predicted'].update(predictedLocations) heap['actualNotPredicted'].update(actualNotPredictedLocations) heap['predictedNotActual'].update(predictedNotActualLocations) # Show feedback view.printPercentUpdate(regionIndex, len(regionGeoFrames)) # Show feedback view.printPercentFinal(len(regionGeoFrames)) # Return return heap, { 'actual count': len(heap['actual']), 'predicted count': len(heap['predicted']), 'actual not predicted count': len(heap['actualNotPredicted']), 'predicted not actual count': len(heap['predictedNotActual']), }
def weightSamples(lushDatasetPath, sampleWeights): # Compute sampleMultipliers # sampleMultipliers = computeSampleMultipliers_multiplyBySampleCount(sampleWeights) sampleMultipliers = computeSampleMultipliers_divideByMinimum(sampleWeights) print 'max(sampleMultipliers) = %s' % max(sampleMultipliers) print 'sum(sampleMultipliers) = %s' % sum(sampleMultipliers) # Set paths temporaryFolderPath = tempfile.mkdtemp() weightedTrainingPath = os.path.join(temporaryFolderPath, 'training') sampleFile, labelFile = [open(x, 'wt') for x in classifier.makeSampleLabelPaths(weightedTrainingPath)] # Write header sampleCount = sum(sampleMultipliers) firstSample = classifier.makeSampleGeneratorFromLushDataset(lushDatasetPath).next() firstLabel = classifier.makeLabelGeneratorFromLushDataset(lushDatasetPath).next() sampleFile.write(classifier.makeLushMatrixHeaderFromPart(firstSample, sampleCount)) labelFile.write(classifier.makeLushMatrixHeaderFromPart(firstLabel, sampleCount)) # For each sample and label, for sampleIndex, sampleMultiplier, sample, label in itertools.izip(itertools.count(1), sampleMultipliers, classifier.makeSampleGeneratorFromLushDataset(lushDatasetPath), classifier.makeLabelGeneratorFromLushDataset(lushDatasetPath)): for index in xrange(sampleMultiplier): sampleFile.write(classifier.makeLushMatrixContent(sample)) labelFile.write(classifier.makeLushMatrixContent(label)) if sampleIndex % 100 == 0: view.printPercentUpdate(sampleIndex, sampleCount) view.printPercentFinal(sampleCount) # Close labelFile.close() sampleFile.close() # Return return weightedTrainingPath, sampleMultipliers
def grapeCluster(vectors, iterationCountPerBurst, maximumPixelDiameter, minimumPixelDiameter): # If we have no vectors, return empty array if not vectors: return [] # Assign all vectors to a single cluster globalClusters = [numpy.array(vectors)] globalCount = len(vectors) globalClusterMeans = [] # While there are globalClusters, while globalClusters: # Pop the last cluster globalCluster = globalClusters.pop() # Measure size sizeCategory = measureClusterSize(globalCluster, maximumPixelDiameter, minimumPixelDiameter) # If it is too big, if sizeCategory > 0: # Burst it # assignments = scipy.cluster.vq.kmeans2(globalCluster, k=2, iter=iterationCountPerBurst)[1] assignments = Pycluster.kcluster(globalCluster, npass=iterationCountPerBurst)[0] # Extract localClusters booleanAssignments = numpy.array(assignments) > 0 localClusters = globalCluster[booleanAssignments], globalCluster[~booleanAssignments] # Push localClusters to the end of the stack globalClusters.extend(localClusters) # If it is the right size, append the weighted mean elif sizeCategory == 0: globalClusterMeans.append(computeWeightedMean(globalCluster)) # Show feedback view.printPercentUpdate(globalCount - len(globalClusters), globalCount) # Return view.printPercentFinal(globalCount) return globalClusterMeans
def extract(self, hasRoof, geoCenters, multispectralImage, panchromaticImage): # Initialize windowCount = len(geoCenters) # For each geoCenter, for windowIndex, geoCenter in enumerate(geoCenters): window = [x.extractCenteredGeoWindow(geoCenter, *self.windowGeoDimensions) for x in multispectralImage, panchromaticImage] if window[0] and window[1]: self.sampleDatabase.addSample(hasRoof, geoCenter, *window) if windowIndex % 100 == 0: view.printPercentUpdate(windowIndex + 1, windowCount) view.printPercentFinal(windowCount)
def save(targetPath, datasetSampleIDs): # Open targetDataset targetDataset = Store(targetPath) # Save samples sampleCount = len(datasetSampleIDs) for sampleIndex, (sourceDataset, sampleID) in enumerate(datasetSampleIDs): targetDataset.addSample(*sourceDataset.getSample(sampleID)) if sampleIndex % 100 == 0: view.printPercentUpdate(sampleIndex + 1, sampleCount) view.printPercentFinal(sampleCount) # Return return targetDataset
def extract(targetDatasetPath, geoCenters, label, windowGeoLength, multispectralImage, panchromaticImage): # Initialize dataset = sample_store.create(targetDatasetPath) windowCount = len(geoCenters) # For each geoCenter, for windowIndex, geoCenter in enumerate(geoCenters): window = [x.extractCenteredGeoWindow(geoCenter, windowGeoLength, windowGeoLength) for x in multispectralImage, panchromaticImage] if window[0] and window[1]: dataset.addSample(label, geoCenter, *window) if windowIndex % 100 == 0: view.printPercentUpdate(windowIndex + 1, windowCount) view.printPercentFinal(windowCount) # Return return dataset
def evaluateWindowsByRegions(probabilityPath, regionPixelFrames): # Load scanInformation = probability_store.Information(probabilityPath) # Initialize multispectralImage, panchromaticImage, actualGeoCenters, windowPixelDimensions = scanInformation.getPackage() regionCount = len(regionPixelFrames) regionLabels_actual = numpy.zeros(regionCount) pixelCenters_actual = multispectralImage.convertGeoLocationsToPixelLocations(actualGeoCenters) regionLabels_predicted = numpy.zeros(len(regionPixelFrames)) pixelCenters_predicted = probability_store.loadPredictedPixelCenters(probabilityPath, windowPixelDimensions) # Build machines pointMachine_actual = point_process.PointMachine(pixelCenters_actual, 'INTEGER') pointMachine_predicted = point_process.PointMachine(pixelCenters_predicted, 'INTEGER') # For each region, print 'Evaluating windows by region...' for regionIndex, regionPixelFrame in enumerate(regionPixelFrames): # Determine actual label regionLabels_actual[regionIndex] = 1 if pointMachine_actual.getPointsInsideFrame(regionPixelFrame) else 0 # Determine predicted label regionLabels_predicted[regionIndex] = 1 if pointMachine_predicted.getPointsInsideFrame(regionPixelFrame) else 0 # Show feedback if regionIndex % 100 == 0: view.printPercentUpdate(regionIndex, len(regionPixelFrames)) # Show feedback view.printPercentFinal(len(regionPixelFrames)) # Compute truePositive = sum((regionLabels_actual == 1) * (regionLabels_predicted == 1)) trueNegative = sum((regionLabels_actual == 0) * (regionLabels_predicted == 0)) falsePositive = sum((regionLabels_actual == 0) * (regionLabels_predicted == 1)) falseNegative = sum((regionLabels_actual == 1) * (regionLabels_predicted == 0)) actualPositive = sum(regionLabels_actual == 1) actualNegative = sum(regionLabels_actual == 0) # Count actualCount = sum(regionLabels_actual) predictedCount = sum(regionLabels_predicted) # Return return { 'region count': regionCount, 'true positive count': truePositive, 'true positive rate': truePositive / float(actualPositive) if actualPositive else None, 'true negative count': trueNegative, 'false positive count': falsePositive, 'false positive rate': falsePositive / float(actualNegative) if actualNegative else None, 'false negative count': falseNegative, 'actual positive count': actualCount, 'predicted positive count': predictedCount, 'precision': truePositive / float(predictedCount) if predictedCount else None, 'recall': truePositive / float(actualCount) if actualCount else None, }
def extract(targetDatasetPath, label, windowGeoLength, multispectralImage, panchromaticImage, geoCenters): # Initialize dataset = sample_store.create(targetDatasetPath) windowCount = len(geoCenters) # For each geoCenter, for windowIndex, geoCenter in enumerate(geoCenters): window = [ x.extractCenteredGeoWindow(geoCenter, windowGeoLength, windowGeoLength) for x in multispectralImage, panchromaticImage ] if window[0] and window[1]: dataset.addSample(label, geoCenter, *window) if windowIndex % 100 == 0: view.printPercentUpdate(windowIndex + 1, windowCount) view.printPercentFinal(windowCount) # Return return dataset
def saveSamples(sampleDataset, sampleIDs, featureSet): # Initialize sampleCount = len(sampleIDs) sampleDatasetPath = sampleDataset.getDatasetPath() sampleInformation = { 'source dataset': { 'path': sampleDatasetPath, 'sample ids': ' '.join(str(x) for x in sampleIDs), }, 'feature': { 'module name': featureSet.__module__, 'class name': featureSet.__class__.__name__, } } targetSampleName = '%s-count%s-min%s' % (folder_store.getFolderName(sampleDatasetPath), sampleCount, min(sampleIDs)) targetSamplePath = os.path.join(store.makeFolderSafely(os.path.join(store.temporaryPath, 'cnn_datasets')), targetSampleName) # If targetDatasetPath exists, return if store.loadInformation(targetSamplePath) == sampleInformation: print 'Using existing samples...\n\ttargetSamplePath = ' + targetSamplePath return targetSamplePath # Save print 'Saving samples...\n\ttargetSamplePath = ' + targetSamplePath sampleGenerator = makeSampleLabelGeneratorFromSampleDataset(sampleDataset, sampleIDs, featureSet) sampleFile, labelFile = [open(x, 'wt') for x in makeSampleLabelPaths(targetSamplePath)] for sampleIndex, (sample, label) in enumerate(sampleGenerator): # If we are starting, write header if sampleIndex == 0: sampleFile.write(makeLushMatrixHeaderFromPart(sample, sampleCount)) labelFile.write(makeLushMatrixHeaderFromPart(label, sampleCount)) # Write content sampleFile.write(makeLushMatrixContent(sample)) labelFile.write(makeLushMatrixContent(label)) if sampleIndex % 100 == 0: view.printPercentUpdate(sampleIndex + 1, sampleCount) view.printPercentFinal(sampleCount) # Return labelFile.close(); sampleFile.close() store.saveInformation(targetSamplePath, sampleInformation) return targetSamplePath