#outlier_image = summary_data["outlierImage"]

    metadata = None
    ny = 0
    nx = 0

    for key, value in binaryhadoop.mapperInput(sys.stdin):
        if key == "metadata":
            metadata = value
            try:
                thisImageKey = metadata['originalDirName']
            except KeyError:
                thisImageKey = metadata['outputFile']
            bands = {}
        elif key == "mask":
            mask = utilities.rollMask(value > 0)
            ny,nx = mask.shape
        else:
            bands[key] = numpy.array(value[mask], dtype=numpy.float64)

    if metadata is not None:
        if 'HSI' in metadata.keys():
            wavelengths = {}
            multipliers = {}
            for w,wave in enumerate(metadata["HSI"]["wavelength"]):
                wavelengths["B" + "%03d" % w] = float(wave)
                multipliers["B" + "%03d" % w] = 1
        else:
            wavelengths = metadata["bandWavelength"]
            multipliers = metadata["bandMultiplier"]
    def process(self, tup):

        localFileName = tup.values[0]
        hdfsFileName = tup.values[1]
        imageData = {}
        imageData["metadata"] = None

        storm.log("start processing %s %s" % (localFileName, hdfsFileName))

        for key, sorter, interpretedValue in binaryhadoop.readFromHDFSiter(hdfsFileName):
            if key == "metadata":
                imageData["metadata"] = interpretedValue
                bands = {}
                storm.log("    read metadata")
            elif key == "mask":
                mask = utilities.rollMask(interpretedValue > 0)
                numPixels = numpy.nonzero(mask)[0].size 
                storm.log("    read mask")
            else:
                bands[key] = interpretedValue[mask]
                storm.log("    read band %s" % key)

        if imageData["metadata"] is not None:
            wavelengths = imageData["metadata"]["bandWavelength"]
            multipliers = imageData["metadata"]["bandMultiplier"]

            storm.log("making imageArray 1")
            imageList = utilities.preprocessImage(bands, multipliers, wavelengths, imageData)
        
            #find the covariance of the image bands
            storm.log("making covariance")
            imageCov = self.makeCovariance(imageList, numPixels)  

            #find the principal components (eigenvectors/values)
            storm.log("making principle components 1")
            imgV, imgP = numpy.linalg.eig(imageCov)
            #
            storm.log("making principle components 2")
            indexList = numpy.argsort(-imgV)
            imgV = imgV[indexList]
            imgP = imgP[:,indexList]

            storm.log("making variance percentage")
            xVarianceComponents = 5
            variancePercentage = [x/numpy.sum(imgV) for x in imgV][:xVarianceComponents]

            storm.log("making rogue bands")
            rogueBands = self.checkpca(imgP.T,xVarianceComponents)

            storm.log("making gray bands")
            bandGray = numpy.zeros(len(imageList[0]))
            for band in imageList:
                bandGray += (numpy.array(band))**2
            bandGray = numpy.sort(bandGray) 

            bandPercent = 1
            #The 99th percentile is removed to avoid skewing the mean
            bandGray = bandGray[bandGray < numpy.percentile(bandGray,100-bandPercent)] 
 
            #Histogram is created 
            storm.log("making gray band histogram")
            [hist,bin_edges] = numpy.histogram(bandGray,bins=100)

            #Locate the peaks on the histogram
            storm.log("making peaks and valleys")
            peaks, valleys = self.findpeaks(hist,3,(bin_edges[:-1] + bin_edges[1:])/2) 

            #Find mean and standard deviation of all pixels
            bandMean = numpy.mean(bandGray)
            bandSigma = numpy.std(bandGray)
        
            storm.log("making JSON output")
            imageData["numPixels"] = int(numPixels)

            imageData["grayBandMean"] = float(bandMean)
            imageData["grayBandSigma"] = float(bandSigma)

            #Report percentage of total pixels which lie beyond one standard deviation from mean
            imageData["grayBandPlusOneSigma"] = float(numpy.sum(bandGray > (bandMean+bandSigma))/numpy.float(numPixels))
            imageData["grayBandMinusOneSigma"] = float(numpy.sum(bandGray < (bandMean-bandSigma))/numpy.float(numPixels))

            imageData["grayBandHistPeaks"] = [[float(x), int(y)] for x, y in peaks]
            #imageData["grayBandHistValleys"] = [[float(x), int(y)] for x, y in valleys]

            #PCA analysis and sum of first 5 principal components
            imageData["grayBandExplainedVariance"] = [float(x) for x in variancePercentage]
 
            #Report bands that have high leave-one-out loading variance
            imageData["grayBandRogueBands"] = [str(x) for x in rogueBands] 

            #Report histogram
            imageData["grayBandHistogram"] = [[float(x) for x in bin_edges], [int(x) for x in hist]]

            #emit the final statistics
            storm.log("emiting Storm tuple")
            storm.emit([localFileName, hdfsFileName, json.dumps(imageData)], stream="summaryStatistics")

            storm.log("done with %s %s" % (localFileName, hdfsFileName))
Ejemplo n.º 3
0
                     ['B067','B081','B083'],
                     ['B114','B113','B115'],
                     ['B148','B147','B149'],
                     ]

    metadata = None
    for key, value in binaryhadoop.mapperInput(sys.stdin):
        if key == "metadata":
            sys.stderr.write('Available Metadata:\n')
            for k in value.keys():
              sys.stderr.write('%s\n'%repr(k))
            metadata = value
            bands = {}
            validBandsPresent = value["bandNames"]
        elif key == "mask":
            mask = utilities.rollMask(value == 0)
        else:
            if mask is None:
              mask = utilities.rollMask(value == 0)
            bands[key] = numpy.ma.masked_array(value, mask, dtype=numpy.float64)
             

    if config.blobspectra.E01:
      emptylimit = 0.85
    else:
      emptylimit = .0005
    if metadata is not None:
        # remove bands that have more than .05% of pixels with no calculable local standard deviation.
        #TEMPORARY REMOVED TO SPEED UP TESTING
        #for bandName, bandValue in sorted(bands.items(), key=lambda x: x[0]):
        #    if removeBand(bandValue, emptylimit, 1):
Ejemplo n.º 4
0
    imageData = {}
    imageData["metadata"] = None

    parameterFile = open("analyticconfig","r")
    for line in parameterFile.readlines():
        line = line.rstrip().split("\t")
        if line[0]=="contourclusters.noiseFlag":
            noiseFlag = line[1]

    for key, value in binaryhadoop.mapperInput(sys.stdin):
        if key == "metadata":
            imageData["metadata"] = value 
            bands = {}
        elif key == "mask":
            mask = utilities.rollMask(value > 0)
            imageData["numPixels"] = numpy.nonzero(mask)[0].size
        else:
            bands[key] = numpy.array(value[mask],dtype=numpy.float64)

    if imageData["metadata"] is not None:
        if 'HSI' in imageData["metadata"].keys():
            wavelengths = {}
            multipliers = {}
            for w,wave in enumerate(imageData["metadata"][unicode("HSI")][unicode("wavelength")]):
                wavelengths["B" + "%03d" % w] = float(wave)
                multipliers["B" + "%03d" % w] = 1
        else:
            wavelengths = imageData["metadata"]["bandWavelength"]
            multipliers = imageData["metadata"]["bandMultiplier"]        
        
Ejemplo n.º 5
0
        pass
    sys.stderr.write("This is pflag: %r, meandist: %r, minsize: %r\n" % (postprocessflag, meandist, clusterminsize))
    sys.stderr.write("This is maxsize: %r, col: %r, row: %r, snr: %r\n" % (clustermaxsize, mincol, minrow, snr))

    for key, value in binaryhadoop.mapperInput(sys.stdin):
        if key == "metadata":
            metadata = value
            bands = {}
            bandCount = 0
        elif key == "mask":
            if 'HSI' in metadata.keys():
                mask = (value > 0)
                mask[:,0] = False
                mask[:,-1] = False   # This is a local copy of 
            else:
                mask = utilities.rollMask(value > 0)                             # the roll mask function, similar to
            ny, nx = mask.shape          # the one in utilities.py, but NOT identical
        else:
            bands[key] = numpy.array((value.T)[mask.T],dtype=numpy.float64)
            bandCount += 1

    sys.stderr.write("READ IN BANDS\n")
    if metadata is not None:

        if 'HSI' in metadata.keys():
            wavelengths = {}
            multipliers = {}
            for w,wave in enumerate(metadata[unicode("HSI")][unicode("wavelength")]):
                wavelengths["B" + "%03d" % w] = float(wave)
                multipliers["B" + "%03d" % w] = 1
        else: