def main(inputFile, classificationType=1, dataType=1, programType=1,Train=False): #inputFile is full path + file name as a string #classificationType is 1=support vector machine, 2 = naive Bayes, 3 = trees #dataType is 1 = magnitudes of band reflectivity, 2 = ratios of band reflectivity #programType is 1 = R, 2 = Python #The first input of trainingSet.main is a string containing the full path to training image data #The second input of trainingSet.main is a list of names corresponding to land cover types, which #need to be the names of sub-directories in the training image data directory. if Train: trainingSet.main('/Users/ryanmiller/amazon/mnt2/training_serialized',['cloud','land_dry','water','land_veg']) geoPicture = GeoPictureSerializer.deserialize(open(inputFile)) if programType==1: #install the required R package if classificationType==3: robjects.r('install.packages("rpart",repos="http://rweb.quant.ku.edu/cran/")') else: robjects.r('install.packages("e1071",repos="http://rweb.quant.ku.edu/cran/")') #load module modules=['flood_detection_ThreeBand_R.py'] else: #load module modules=['flood_detection_ThreeBand_PY.py'] #construct the newBand module loadedModules = [] if modules is not None: for module in modules: globalVars = {} exec(compile(open(module).read(), module, "exec"), globalVars) loadedModules.append(globalVars["newBand"]) for newBand in loadedModules: geoPicture = newBand(geoPicture,classificationType,dataType) #image processing# #class image imageArrayClass = numpy.array(geoPicture.picture[:,:,-3:]*255, dtype=numpy.uint8) #Compress to 3 bands for RGB image imageClass = Image.fromarray(imageArrayClass) imageClass.save( "tmpClass" + ".png", "PNG", option="optimize") return geoPicture
def map_bright_pixels(inputStream, outputStream, band, threshold, modules=None): loadedModules = [] if modules is not None: for module in modules: globalVars = {} exec(compile(open(module).read(), module, "exec"), globalVars) loadedModules.append(globalVars["newBand"]) while True: # load a picture line = inputStream.readline() if not line: break try: geoPicture = GeoPictureSerializer.deserialize(line) except IOError: continue # generate new bands for newBand in loadedModules: geoPicture = newBand(geoPicture) # prepare a transform function for this picture tlx, weres, werot, tly, nsrot, nsres = json.loads(geoPicture.metadata["GeoTransform"]) spatialReference = osr.SpatialReference() spatialReference.ImportFromWkt(geoPicture.metadata["Projection"]) coordinateTransform = osr.CoordinateTransformation(spatialReference, spatialReference.CloneGeogCS()) # get the relevant band picture = geoPicture.picture[:,:,geoPicture.bands.index(band)] if isinstance(threshold, basestring) and threshold[-1] == "%": threshold = numpy.percentile(picture, float(threshold[:-1])) # loop over indicies that are above threshold for i, j in numpy.argwhere(picture > threshold): longitude, latitude, altitude = coordinateTransform.TransformPoint(tlx + j*weres, tly + i*nsres) timestamp = time.mktime(datetime.datetime.strptime(json.loads(geoPicture.metadata["L1T"])["PRODUCT_METADATA"]["START_TIME"], "%Y %j %H:%M:%S").timetuple()) # ultimately, longitude/latitude points should map to different keys, one key per reducer # for now, there is only one key: world sys.stdout.write("world\t%g %g %d %g\n" % (longitude, latitude, timestamp, picture[i,j]))
def inputOneLine(inputStream): line = inputStream.readline() if not line: raise IOError("No input") return GeoPictureSerializer.deserialize(line)
def inputSequenceFile(inputStream, incomingBandRestriction): # enforce a structure on SequenceFile entries to be sure that Hadoop isn't splitting it up among multiple mappers name, metadata = sys.stdin.readline().rstrip().split("\t") if name != "metadata": raise IOError('First entry in the SequenceFile is "%s" rather than metadata' % name) metadata = json.loads(metadata) metadata_noUnicode = {} for key, value in metadata.items(): metadata_noUnicode[str(key)] = str(value) metadata = metadata_noUnicode name, bands = sys.stdin.readline().rstrip().split("\t") if name != "bands": raise IOError('Second entry in the SequenceFile is "%s" rather than bands' % name) bands = json.loads(bands) bands_noUnicode = [str(b) for b in bands] bands = bands_noUnicode name, shape = sys.stdin.readline().rstrip().split("\t") if name != "shape": raise IOError('Third entry in the SequenceFile is "%s" rather than shape' % name) shape = json.loads(shape) if incomingBandRestriction is not None: # drop undesired bands onlyload = sorted(incomingBandRestriction.intersection(bands)) shape = (shape[0], shape[1], len(onlyload)) else: onlyload = bands shape = tuple(shape) # make a master image to fill geoPicture = GeoPictureSerializer.GeoPicture() geoPicture.metadata = metadata geoPicture.bands = onlyload geoPicture.picture = numpy.empty(shape, dtype=numpy.float) # load individual bands from the SequenceFile and add them to the master image, if desired bandsSeen = [] for line in sys.stdin.xreadlines(): band, data = line.rstrip().split("\t") bandsSeen.append(band) if band not in bands: raise IOError('SequenceFile contains "%s" when it should only have %s bands' % (band, str(bands))) if band in onlyload: index = onlyload.index(band) oneBandPicture = GeoPictureSerializer.deserialize(data) if oneBandPicture.picture.shape[0:2] != geoPicture.picture.shape[0:2]: raise IOError( 'SequenceFile band "%s" has shape %s instead of %d by %d by 1' % (band, oneBandPicture.picture.shape, shape[0], shape[1]) ) geoPicture.picture[:, :, index] = oneBandPicture.picture[:, :, 0] if len(bandsSeen) == len(bands): break for band in bands: if band not in bandsSeen: raise IOError('SequenceFile does not contain "%s" when it should have %s' % (band, str(bands))) return geoPicture
def inputOneLine(inputStream): sys.stderr.write("reporter:status:inputOneLine, calling readline") line = inputStream.readline() if not line: raise IOError("No input") return GeoPictureSerializer.deserialize(line)
def __init__(self, inputStream): "Load a picture from a serialized stream (pass a file-like object or a serialized string, but not a fileName). Takes 30 seconds." self._geoPicture = GeoPictureSerializer.deserialize(inputStream)
def reduce_tiles(tiles, inputStream, outputDirectory=None, outputAccumulo=None, configuration=[{"layer": "RGB", "bands": ["B029", "B023", "B016"], "outputType": "RGB", "minRadiance": 0., "maxRadiance": "sun"}]): """Performs the part of the reducing step of the Hadoop map-reduce job that depends on key-value input. Reduce key: tile coordinate and timestamp Reduce value: transformed picture Actions: overlay images in time-order to produce one image per tile coordinate. * tiles: dictionary of tiles built up by this procedure (this function adds to tiles) * inputStream: usually sys.stdin; should be key-value pairs. * outputDirectory: local filesystem directory for debugging output (usually the output goes to Accumulo) * outputAccumulo: Java virtual machine containing AccumuloInterface classes * configuration: list of layer configurations with the following format {"layer": layerName, "bands", [band1, band2, band3], "outputType": "RGB", "yellow", etc., "minRadiance": number or string percentage, "maxRadiance": number or string percentage} If configuration is incorrectly formatted, the behavior is undefined. """ while True: line = inputStream.readline() if not line: break for config in configuration: layer = config["layer"] bands = config["bands"] outputType = config["outputType"] minRadiance = config["minRadiance"] maxRadiance = config["maxRadiance"] if isinstance(minRadiance, basestring) and minRadiance[-1] == "%": try: minPercent = float(minRadiance[:-1]) except ValueError: minPercent = 5. minRadiance = None if isinstance(maxRadiance, basestring) and maxRadiance[-1] == "%": try: maxPercent = float(maxRadiance[:-1]) except ValueError: maxPercent = 95. maxRadiance = None tabPosition = line.index("\t") key = line[:tabPosition] value = line[tabPosition+1:-1] depth, longIndex, latIndex, timestamp = map(int, key.lstrip("T").split("-")) try: geoPicture = GeoPictureSerializer.deserialize(value) except IOError: continue if (depth, longIndex, latIndex, layer) not in tiles: shape = geoPicture.picture.shape[:2] outputRed = numpy.zeros(shape, dtype=numpy.uint8) outputGreen = numpy.zeros(shape, dtype=numpy.uint8) outputBlue = numpy.zeros(shape, dtype=numpy.uint8) outputMask = numpy.zeros(shape, dtype=numpy.uint8) tiles[depth, longIndex, latIndex, layer] = (outputRed, outputGreen, outputBlue, outputMask) outputRed, outputGreen, outputBlue, outputMask = tiles[depth, longIndex, latIndex, layer] if outputType == "RGB": red = geoPicture.picture[:,:,geoPicture.bands.index(bands[0])] green = geoPicture.picture[:,:,geoPicture.bands.index(bands[1])] blue = geoPicture.picture[:,:,geoPicture.bands.index(bands[2])] if maxRadiance == "sun": l1t = json.loads(geoPicture.metadata["L1T"]) sunAngle = math.sin(float(l1t["PRODUCT_PARAMETERS"]["SUN_ELEVATION"]) * math.pi/180.) maxRadiance = sunAngle * 500. if maxRadiance < 10.: maxRadiance = 10. if minRadiance is None: for b in red, green, blue: bb = b[b > 10.] if len(bb) > 0: r = numpy.percentile(bb, minPercent) if minRadiance is None or r < minRadiance: minRadiance = r if minRadiance is None: for b in red, green, blue: r = numpy.percentile(b, minPercent) if minRadiance is None or r < minRadiance: minRadiance = r if maxRadiance is None: for b in red, green, blue: bb = b[b > 10.] if len(bb) > 0: r = numpy.percentile(bb, maxPercent) if maxRadiance is None or r > maxRadiance: maxRadiance = r if maxRadiance is None: for b in red, green, blue: r = numpy.percentile(b, maxPercent) if maxRadiance is None or r > maxRadiance: maxRadiance = r if maxRadiance == minRadiance: minRadiance = min(red.min(), green.min(), blue.min()) maxRadiance = max(red.max(), green.max(), blue.max()) if maxRadiance == minRadiance: minRadiance, maxRadiance = 0., 1. red = numpy.minimum(numpy.maximum((red - minRadiance) / (maxRadiance - minRadiance) * 255, 0), 255) green = numpy.minimum(numpy.maximum((green - minRadiance) / (maxRadiance - minRadiance) * 255, 0), 255) blue = numpy.minimum(numpy.maximum((blue - minRadiance) / (maxRadiance - minRadiance) * 255, 0), 255) mask = numpy.minimum(numpy.maximum(geoPicture.picture[:,:,geoPicture.bands.index("MASK")] * 255, 0), 255) condition = (mask > 0.5) outputRed[condition] = red[condition] outputGreen[condition] = green[condition] outputBlue[condition] = blue[condition] outputMask[condition] = mask[condition] else: b = geoPicture.picture[:,:,geoPicture.bands.index(bands[0])] if minRadiance is None: bb = b[b > 10.] if len(bb) > 0: minRadiance = numpy.percentile(bb, minPercent) else: minRadiance = numpy.percentile(b, minPercent) if maxRadiance is None: bb = b[b > 10.] if len(bb) > 0: maxRadiance = numpy.percentile(bb, maxPercent) else: maxRadiance = numpy.percentile(b, maxPercent) if maxRadiance == minRadiance: minRadiance = b.min() maxRadiance = b.max() if maxRadiance == minRadiance: minRadiance, maxRadiance = 0., 1. b = numpy.minimum(numpy.maximum((b - minRadiance) / (maxRadiance - minRadiance) * 255, 0), 255) mask = numpy.minimum(numpy.maximum(geoPicture.picture[:,:,geoPicture.bands.index("MASK")] * 255, 0), 255) condition = (mask > 0.5) if outputType == "yellow": outputRed[condition] = b[condition] outputGreen[condition] = b[condition] outputMask[condition] = b[condition] else: raise NotImplementedError image = Image.fromarray(numpy.dstack((outputRed, outputGreen, outputBlue, outputMask))) if outputDirectory is not None: image.save("%s/%s.png" % (outputDirectory, tileName(depth, longIndex, latIndex)), "PNG", options="optimize") if outputAccumulo is not None: buff = BytesIO() image.save(buff, "PNG", options="optimize") outputAccumulo.write("%s-%s" % (tileName(depth, longIndex, latIndex), layer), "{}", buff.getvalue())
def main(trainingDirectory, landCoverList, band_list=[], band_weight_list=[]): # If bandList is empty, i.e., no band grouping list was input at command line. A default list, contained in FloodClassUtils.py, # will be used. # Note that if a "band_weight_list" is input, it should correspond with the band_list, and it should be known apriori that # all serialized files input to this code contain the required bands. if band_list == []: band_list = FloodClassUtils.bandList() landCoverTypesListLength = 10 ** 50 landCoverTypes = {} flagTwo = True for landName in landCoverList: flagOne = True fileList = os.listdir(trainingDirectory + "/" + landName) for serialFile in fileList: # read in and deserialize geoPicture = GeoPictureSerializer.deserialize(open(trainingDirectory + "/" + landName + "/" + serialFile)) # do radiance correction on bands geoPicture = FloodClassUtils.radianceCorrection(geoPicture) # find bands common to training set and requested set finalBandList, bandsPresent = FloodClassUtils.commonBand(geoPicture, band_list) # Combine bands geoPicture.picture = FloodClassUtils.bandSynth(geoPicture, finalBandList, band_weight_list) # The band list of common bands are added to the geoPicture to replace the original geoPicture.bands = bandsPresent missingBandList = [] for i in numpy.arange(len(FloodClassUtils.bandList())): missingBandSet = set(FloodClassUtils.bandList()[i]) - set(finalBandList[i]) missingBandList.append(list(missingBandSet)) sys.stderr.write( "\nThe following requested bands are missing from serialized file " + str(serialFile) + ":\n" + str(missingBandList) + "\n" ) # Need to create a mask of locations where all pixels are nonzero. alphaBand = geoPicture.picture[:, :, geoPicture.bands.index(geoPicture.bands[0])] > 0 for band in geoPicture.bands[1:]: numpy.logical_and(alphaBand, geoPicture.picture[:, :, geoPicture.bands.index(band)], alphaBand) # Make bands into 2-d array # Find size of the image array rasterXsize = geoPicture.picture.shape[1] rasterYsize = geoPicture.picture.shape[0] rasterDepth = geoPicture.picture.shape[2] # Reshape all arrays geoPicture.picture = geoPicture.picture.reshape(rasterXsize * rasterYsize, rasterDepth) alphaBand = alphaBand.reshape(rasterXsize * rasterYsize) geoPicture.picture = geoPicture.picture[alphaBand, :] if flagOne == True: landCoverTypes[landName] = geoPicture.picture flagOne = False else: landCoverTypes[landName] = numpy.concatenate((landCoverTypes[landName], geoPicture.picture), axis=0) # Note that the classification digit is tied to the order of the landCoverList, i.e., classification digit = landCoverList_position + 1 landCoverTypes[landName] = numpy.concatenate( ( landCoverTypes[landName], (landCoverList.index(landName) + 1) * numpy.ones((landCoverTypes[landName].shape[0], 1), dtype=int), ), axis=1, ) landCoverTypes[landName] = landCoverTypes[landName][ numpy.random.permutation(landCoverTypes[landName].shape[0]), : ] landCoverTypesListLength = min(landCoverTypes[landName].shape[0], landCoverTypesListLength) if flagTwo == True: sampleLength = numpy.minimum(5000, landCoverTypesListLength) train = landCoverTypes[landName][:sampleLength, :] flagTwo = False else: train = numpy.concatenate((train, landCoverTypes[landName][:sampleLength, :]), axis=0) numpy.savetxt("trainingSet.txt", train, fmt=rasterDepth * "%-12.5f " + "%-d")
import math import random import time import numpy from PIL import Image from cassius import * import GeoPictureSerializer pictureName = "Australia_Karijini_bigger" # pictureName = "fresh_salt_water" # pictureName = "PotassiumChloridePlant_topQuarter" # pictureName = "NamibiaFloods-EO1A1800722011074110KF" # pictureName = "GobiDesert01" picture = GeoPictureSerializer.deserialize(open("/home/pivarski/NOBACKUP/matsu_serialized/%s.serialized" % pictureName)) numberOfClusters = 20 seedSigmas = 2.5 def wavelength(bandNumber): if bandNumber < 70.5: return (bandNumber - 10.) * 10.213 + 446. else: return (bandNumber - 79.) * 10.110 + 930. def incidentSolar(wavelength): return 2.5e-28/(math.exp(0.0143878/(wavelength*1e-9)/(5778)) - 1.)/(wavelength*1e-9)**5 ################################################
def map_to_tiles(inputStream, outputStream, depth=10, longpixels=512, latpixels=256, numLatitudeSections=1, splineOrder=3, modules=None): """Performs the mapping step of the Hadoop map-reduce job. Map: read L1G, possibly split by latitude, split by tile, transform pictures into tile coordinates, and output (tile coordinate and timestamp, transformed picture) key-value pairs. * inputStream: usually sys.stdin; should be a serialized L1G picture. * outputStream: usually sys.stdout; keys and values are separated by a tab, key-value pairs are separated by a newline. * depth: logarithmic scale of the tile; 10 is the limit of Hyperion's resolution * longpixels, latpixels: number of pixels in the output tiles * numLatitudeSections: number of latitude stripes to cut before splitting into tiles (reduces error due to Earth's curvature) * splineOrder: order of the spline used to calculate the affine_transformation (see SciPy docs); must be between 0 and 5 * modules: a list of external Python files to evaluate as analytics """ loadedModules = [] if modules is not None: for module in modules: globalVars = {} exec(compile(open(module).read(), module, "exec"), globalVars) loadedModules.append(globalVars["newBand"]) while True: line = inputStream.readline() if not line: break # get the Level-1 image try: geoPicture = GeoPictureSerializer.deserialize(line) except IOError: continue inputBands = geoPicture.bands[:] # run the external analytics for newBand in loadedModules: geoPicture = newBand(geoPicture) # convert GeoTIFF coordinates into degrees tlx, weres, werot, tly, nsrot, nsres = json.loads(geoPicture.metadata["GeoTransform"]) spatialReference = osr.SpatialReference() spatialReference.ImportFromWkt(geoPicture.metadata["Projection"]) coordinateTransform = osr.CoordinateTransformation(spatialReference, spatialReference.CloneGeogCS()) rasterXSize = geoPicture.picture.shape[1] rasterYSize = geoPicture.picture.shape[0] rasterDepth = geoPicture.picture.shape[2] # get the timestamp to use as part of the key timestamp = time.mktime(datetime.datetime.strptime(json.loads(geoPicture.metadata["L1T"])["PRODUCT_METADATA"]["START_TIME"], "%Y %j %H:%M:%S").timetuple()) for section in xrange(numLatitudeSections): bottom = (section + 0.0)/numLatitudeSections middle = (section + 0.5)/numLatitudeSections thetop = (section + 1.0)/numLatitudeSections # find the corners to determine which tile(s) this section belongs in corner1Long, corner1Lat, altitude = coordinateTransform.TransformPoint(tlx + 0.0*weres*rasterXSize, tly + bottom*nsres*rasterYSize) corner2Long, corner2Lat, altitude = coordinateTransform.TransformPoint(tlx + 0.0*weres*rasterXSize, tly + thetop*nsres*rasterYSize) corner3Long, corner3Lat, altitude = coordinateTransform.TransformPoint(tlx + 1.0*weres*rasterXSize, tly + bottom*nsres*rasterYSize) corner4Long, corner4Lat, altitude = coordinateTransform.TransformPoint(tlx + 1.0*weres*rasterXSize, tly + thetop*nsres*rasterYSize) longIndexes = [] latIndexes = [] for ti in tileIndex(depth, corner1Long, corner1Lat), tileIndex(depth, corner2Long, corner2Lat), tileIndex(depth, corner3Long, corner3Lat), tileIndex(depth, corner4Long, corner4Lat): longIndexes.append(ti[1]) latIndexes.append(ti[2]) for ti in [(depth, x, y) for x in xrange(min(longIndexes), max(longIndexes)+1) for y in xrange(min(latIndexes), max(latIndexes)+1)]: longmin, longmax, latmin, latmax = tileCorners(*ti) # find the origin and orientation of the image (not always exactly north-south-east-west) cornerLong, cornerLat, altitude = coordinateTransform.TransformPoint(tlx, tly) originLong, originLat, altitude = coordinateTransform.TransformPoint(tlx + 0.5*weres*rasterXSize, tly + middle*nsres*rasterYSize) leftLong, leftLat, altitude = coordinateTransform.TransformPoint(tlx + 0.0*weres*rasterXSize, tly + middle*nsres*rasterYSize) rightLong, rightLat, altitude = coordinateTransform.TransformPoint(tlx + 1.0*weres*rasterXSize, tly + middle*nsres*rasterYSize) upLong, upLat, altitude = coordinateTransform.TransformPoint(tlx + 0.5*weres*rasterXSize, tly + bottom*nsres*rasterYSize) downLong, downLat, altitude = coordinateTransform.TransformPoint(tlx + 0.5*weres*rasterXSize, tly + thetop*nsres*rasterYSize) # do some linear algebra to convert coordinates L2PNG_to_geo_trans = numpy.matrix([[(latmin - latmax)/float(latpixels), 0.], [0., (longmax - longmin)/float(longpixels)]]) L1TIFF_to_geo_trans = numpy.matrix([[(downLat - upLat)/((thetop - bottom)*rasterYSize), (rightLat - leftLat)/rasterXSize], [(downLong - upLong)/((thetop - bottom)*rasterYSize), (rightLong - leftLong)/rasterXSize]]) geo_to_L1TIFF_trans = L1TIFF_to_geo_trans.I trans = geo_to_L1TIFF_trans * L2PNG_to_geo_trans offset_in_deg = numpy.matrix([[latmax - cornerLat], [longmin - cornerLong]], dtype=numpy.double) # correct for the bottom != 0. case (only if section > 0) truncate_correction = L1TIFF_to_geo_trans * numpy.matrix([[int(floor(bottom*rasterYSize))], [0.]], dtype=numpy.double) # correct for the curvature of the Earth between the top of the section and the bottom of the section (that's why we cut into latitude sections) curvature_correction = L1TIFF_to_geo_trans * (geo_to_L1TIFF_trans * numpy.matrix([[leftLat - cornerLat], [leftLong - cornerLong]], dtype=numpy.double) - numpy.matrix([[(middle*rasterYSize)], [0.]], dtype=numpy.double)) offset = L1TIFF_to_geo_trans.I * (offset_in_deg - truncate_correction - curvature_correction) offset = offset[0,0], offset[1,0] # lay the GeoTIFF into the output image array inputPicture = geoPicture.picture[int(floor(bottom*rasterYSize)):int(ceil(thetop*rasterYSize)),:,:] inputMask = None for band in set(inputBands).intersection(geoPicture.bands): if inputMask is None: inputMask = (inputPicture[:,:,geoPicture.bands.index(band)] > 0.) else: numpy.logical_and(inputMask, (inputPicture[:,:,geoPicture.bands.index(band)] > 0.), inputMask) outputMask = numpy.zeros((latpixels, longpixels), dtype=geoPicture.picture.dtype) affine_transform(inputMask, trans, offset, (latpixels, longpixels), outputMask, splineOrder) if numpy.count_nonzero(outputMask > 0.5) == 0: continue offset = offset[0], offset[1], 0. trans = numpy.matrix([[trans[0,0], trans[0,1], 0.], [trans[1,0], trans[1,1], 0.], [0., 0., 1.]]) outputPicture = numpy.zeros((latpixels, longpixels, rasterDepth), dtype=geoPicture.picture.dtype) affine_transform(inputPicture, trans, offset, (latpixels, longpixels, rasterDepth), outputPicture, splineOrder) # suppress regions that should be zero but might not be because of numerical error in affine_transform # this will make more of the picture eligible for zero-suppression (which checks for pixels exactly equal to zero) cutMask = (outputMask < 0.01) outputBands = [] for i in xrange(rasterDepth): outputBands.append(outputPicture[:,:,i]) outputBands[-1][cutMask] = 0. outputBands.append(outputMask) outputBands[-1][cutMask] = 0. outputGeoPicture = GeoPictureSerializer.GeoPicture() outputGeoPicture.picture = numpy.dstack(outputBands) outputGeoPicture.metadata = geoPicture.metadata outputGeoPicture.bands = geoPicture.bands + ["MASK"] outputStream.write("%s-%010d\t" % (tileName(*ti), timestamp)) try: outputGeoPicture.serialize(outputStream) except IOError: outputStream.write("BROKEN") outputStream.write("\n")