#outlier_image = summary_data["outlierImage"] metadata = None ny = 0 nx = 0 for key, value in binaryhadoop.mapperInput(sys.stdin): if key == "metadata": metadata = value try: thisImageKey = metadata['originalDirName'] except KeyError: thisImageKey = metadata['outputFile'] bands = {} elif key == "mask": mask = utilities.rollMask(value > 0) ny,nx = mask.shape else: bands[key] = numpy.array(value[mask], dtype=numpy.float64) if metadata is not None: if 'HSI' in metadata.keys(): wavelengths = {} multipliers = {} for w,wave in enumerate(metadata["HSI"]["wavelength"]): wavelengths["B" + "%03d" % w] = float(wave) multipliers["B" + "%03d" % w] = 1 else: wavelengths = metadata["bandWavelength"] multipliers = metadata["bandMultiplier"]
def process(self, tup): localFileName = tup.values[0] hdfsFileName = tup.values[1] imageData = {} imageData["metadata"] = None storm.log("start processing %s %s" % (localFileName, hdfsFileName)) for key, sorter, interpretedValue in binaryhadoop.readFromHDFSiter(hdfsFileName): if key == "metadata": imageData["metadata"] = interpretedValue bands = {} storm.log(" read metadata") elif key == "mask": mask = utilities.rollMask(interpretedValue > 0) numPixels = numpy.nonzero(mask)[0].size storm.log(" read mask") else: bands[key] = interpretedValue[mask] storm.log(" read band %s" % key) if imageData["metadata"] is not None: wavelengths = imageData["metadata"]["bandWavelength"] multipliers = imageData["metadata"]["bandMultiplier"] storm.log("making imageArray 1") imageList = utilities.preprocessImage(bands, multipliers, wavelengths, imageData) #find the covariance of the image bands storm.log("making covariance") imageCov = self.makeCovariance(imageList, numPixels) #find the principal components (eigenvectors/values) storm.log("making principle components 1") imgV, imgP = numpy.linalg.eig(imageCov) # storm.log("making principle components 2") indexList = numpy.argsort(-imgV) imgV = imgV[indexList] imgP = imgP[:,indexList] storm.log("making variance percentage") xVarianceComponents = 5 variancePercentage = [x/numpy.sum(imgV) for x in imgV][:xVarianceComponents] storm.log("making rogue bands") rogueBands = self.checkpca(imgP.T,xVarianceComponents) storm.log("making gray bands") bandGray = numpy.zeros(len(imageList[0])) for band in imageList: bandGray += (numpy.array(band))**2 bandGray = numpy.sort(bandGray) bandPercent = 1 #The 99th percentile is removed to avoid skewing the mean bandGray = bandGray[bandGray < numpy.percentile(bandGray,100-bandPercent)] #Histogram is created storm.log("making gray band histogram") [hist,bin_edges] = numpy.histogram(bandGray,bins=100) #Locate the peaks on the histogram storm.log("making peaks and valleys") peaks, valleys = self.findpeaks(hist,3,(bin_edges[:-1] + bin_edges[1:])/2) #Find mean and standard deviation of all pixels bandMean = numpy.mean(bandGray) bandSigma = numpy.std(bandGray) storm.log("making JSON output") imageData["numPixels"] = int(numPixels) imageData["grayBandMean"] = float(bandMean) imageData["grayBandSigma"] = float(bandSigma) #Report percentage of total pixels which lie beyond one standard deviation from mean imageData["grayBandPlusOneSigma"] = float(numpy.sum(bandGray > (bandMean+bandSigma))/numpy.float(numPixels)) imageData["grayBandMinusOneSigma"] = float(numpy.sum(bandGray < (bandMean-bandSigma))/numpy.float(numPixels)) imageData["grayBandHistPeaks"] = [[float(x), int(y)] for x, y in peaks] #imageData["grayBandHistValleys"] = [[float(x), int(y)] for x, y in valleys] #PCA analysis and sum of first 5 principal components imageData["grayBandExplainedVariance"] = [float(x) for x in variancePercentage] #Report bands that have high leave-one-out loading variance imageData["grayBandRogueBands"] = [str(x) for x in rogueBands] #Report histogram imageData["grayBandHistogram"] = [[float(x) for x in bin_edges], [int(x) for x in hist]] #emit the final statistics storm.log("emiting Storm tuple") storm.emit([localFileName, hdfsFileName, json.dumps(imageData)], stream="summaryStatistics") storm.log("done with %s %s" % (localFileName, hdfsFileName))
['B067','B081','B083'], ['B114','B113','B115'], ['B148','B147','B149'], ] metadata = None for key, value in binaryhadoop.mapperInput(sys.stdin): if key == "metadata": sys.stderr.write('Available Metadata:\n') for k in value.keys(): sys.stderr.write('%s\n'%repr(k)) metadata = value bands = {} validBandsPresent = value["bandNames"] elif key == "mask": mask = utilities.rollMask(value == 0) else: if mask is None: mask = utilities.rollMask(value == 0) bands[key] = numpy.ma.masked_array(value, mask, dtype=numpy.float64) if config.blobspectra.E01: emptylimit = 0.85 else: emptylimit = .0005 if metadata is not None: # remove bands that have more than .05% of pixels with no calculable local standard deviation. #TEMPORARY REMOVED TO SPEED UP TESTING #for bandName, bandValue in sorted(bands.items(), key=lambda x: x[0]): # if removeBand(bandValue, emptylimit, 1):
imageData = {} imageData["metadata"] = None parameterFile = open("analyticconfig","r") for line in parameterFile.readlines(): line = line.rstrip().split("\t") if line[0]=="contourclusters.noiseFlag": noiseFlag = line[1] for key, value in binaryhadoop.mapperInput(sys.stdin): if key == "metadata": imageData["metadata"] = value bands = {} elif key == "mask": mask = utilities.rollMask(value > 0) imageData["numPixels"] = numpy.nonzero(mask)[0].size else: bands[key] = numpy.array(value[mask],dtype=numpy.float64) if imageData["metadata"] is not None: if 'HSI' in imageData["metadata"].keys(): wavelengths = {} multipliers = {} for w,wave in enumerate(imageData["metadata"][unicode("HSI")][unicode("wavelength")]): wavelengths["B" + "%03d" % w] = float(wave) multipliers["B" + "%03d" % w] = 1 else: wavelengths = imageData["metadata"]["bandWavelength"] multipliers = imageData["metadata"]["bandMultiplier"]
pass sys.stderr.write("This is pflag: %r, meandist: %r, minsize: %r\n" % (postprocessflag, meandist, clusterminsize)) sys.stderr.write("This is maxsize: %r, col: %r, row: %r, snr: %r\n" % (clustermaxsize, mincol, minrow, snr)) for key, value in binaryhadoop.mapperInput(sys.stdin): if key == "metadata": metadata = value bands = {} bandCount = 0 elif key == "mask": if 'HSI' in metadata.keys(): mask = (value > 0) mask[:,0] = False mask[:,-1] = False # This is a local copy of else: mask = utilities.rollMask(value > 0) # the roll mask function, similar to ny, nx = mask.shape # the one in utilities.py, but NOT identical else: bands[key] = numpy.array((value.T)[mask.T],dtype=numpy.float64) bandCount += 1 sys.stderr.write("READ IN BANDS\n") if metadata is not None: if 'HSI' in metadata.keys(): wavelengths = {} multipliers = {} for w,wave in enumerate(metadata[unicode("HSI")][unicode("wavelength")]): wavelengths["B" + "%03d" % w] = float(wave) multipliers["B" + "%03d" % w] = 1 else: