def main(args): starttime = Timer.starttimer() #Cache thrashing is common when working with large files # we help alleviate misses by setting a larger than normal cache. 1GB gdal.SetCacheMax(1073741824) #Get stretch type stretch = OptParse.argget_stretch(args) #Get some info about the machine for mp cores = args['ncores'] if cores is None: cores = mp.cpu_count() #Load the input dataset using the GdalIO class and get / set the output datatype. dataset = OpenDataSet(args['input']) raster = dataset.load() xsize, ysize, nbands, projection, geotransform = dataset.info(raster) #Get band information bands = [raster.GetRasterBand(b) for b in range(1, nbands + 1)] bandstats = [Stats.get_band_stats(b) for b in bands] b = bands[0] banddtype = b.DataType blocksize = b.GetBlockSize() xblocksize = blocksize[0] yblocksize = blocksize[1] output = create_output(args['outputformat'], args['output'], xsize, ysize, len(bands), projection, geotransform, gdal.GetDataTypeByName(args['dtype'])) #Intelligently segment the image based upon number of cores and intrinsic block size if args['byline'] is True: segments = segment_image(xsize, ysize, 1, ysize) args['statsper'] = True elif args['bycolumn'] is True: segments = segment_image(xsize, ysize, xsize, 1) args['statsper'] = True elif args['horizontal_segments'] is not None or args[ 'vertical_segments'] is not None: #The user is defining the segmentation segments = segment_image(xsize, ysize, args['vertical_segments'], args['horizontal_segments']) else: segments = [(0, 0, xsize, ysize)] carray_dtype = _gdal_to_ctypes[banddtype] #Preallocate a sharedmem array of the correct size ctypesxsize, ctypesysize = segments[0][2:] if args['byline'] is True: ctypesysize = cores elif args['bycolumn'] is True: ctypesxsize = cores carray = mp.RawArray(carray_dtype, ctypesxsize * ctypesysize) glb.sharedarray = np.frombuffer(carray, dtype=_gdal_to_numpy[banddtype]).reshape( ctypesysize, ctypesxsize) pool = mp.Pool(processes=cores, initializer=glb.init, initargs=(glb.sharedarray, )) #A conscious decision to iterate over the bands in serial - a IO bottleneck anyway for j, band in enumerate(bands): stats = bandstats[j] bandmin = stats['minimum'] bandmax = stats['maximum'] ndv = stats['ndv'] userndv = args['ndv'] args.update(stats) if args['byline'] is True: for y in range(0, ysize, cores): xstart, ystart, intervalx, intervaly = 0, y, xsize, cores if ystart + intervaly > ysize: intervaly = ysize - ystart #print ystart, ystart + intervaly #print y, ystart, ystart+ intervaly, intervaly glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray( xstart, ystart, intervalx, intervaly) #If the input has an NDV - mask it. if stats['ndv'] != None: glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False) mask = np.ma.getmask(glb.sharedarray) #if args['statsper'] is True: #args.update(Stats.get_array_stats(glb.sharedarray, stretch)) for i in range(cores): res = pool.apply(stretch, args=(slice(i, i + 1), args)) if args['ndv'] != None: glb.sharedarray[glb.sharedarray == ndv] = args['ndv'] output.GetRasterBand(j + 1).SetNoDataValue(float(userndv)) if args['scale'] is not None: #Scale the data before writing to disk scale(args['scale'][0], args['scale'][1], bandmin, bandmax) output.GetRasterBand(j + 1).WriteArray( glb.sharedarray[:intervaly, :intervalx], xstart, ystart) if args['quiet']: print "Processed {} or {} lines \r".format(y, ysize), sys.stdout.flush() elif args['bycolumn'] is True: for x in range(0, xsize, cores): xstart, ystart, intervalx, intervaly = x, 0, cores, ysize if xstart + intervalx > xsize: intervalx = xsize - xstart glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray( xstart, ystart, intervalx, intervaly) #If the input has an NDV - mask it. if stats['ndv'] != None: glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False) mask = np.ma.getmask(glb.sharedarray) if args['statsper'] is True: args.update(Stats.get_array_stats(glb.sharedarray, stretch)) for i in range(cores): res = pool.apply(stretch, args=(slice(i, i + 1), args)) if args['ndv'] != None: glb.sharedarray[glb.sharedarray == ndv] = args['ndv'] output.GetRasterBand(j + 1).SetNoDataValue(float(userndv)) if args['scale'] is not None: scale(args['scale'][0], args['scale'][1], bandmin, bandmax) output.GetRasterBand(j + 1).WriteArray( glb.sharedarray[:intervaly, :intervalx], xstart, ystart) if args['quiet']: print "Processed {} or {} lines \r".format(x, xsize), sys.stdout.flush() #If not processing line by line, distirbuted the block over availabel cores else: for i, chunk in enumerate(segments): xstart, ystart, intervalx, intervaly = chunk #Read the array into the buffer glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray( xstart, ystart, intervalx, intervaly) #If the input has an NDV - mask it. if stats['ndv'] != None: glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False) mask = np.ma.getmask(glb.sharedarray) if args['statsper'] is True: args.update(Stats.get_array_stats(glb.sharedarray, stretch)) #Determine the decomposition for each core step = intervaly // cores starts = range(0, intervaly + 1, step) stops = starts[1:] stops.append(intervaly + 1) offsets = zip(starts, stops) for o in offsets: res = pool.apply(stretch, args=(slice(o[0], o[1]), args)) if args['ndv'] != None: glb.sharedarray[glb.sharedarray == ndv] = args['ndv'] output.GetRasterBand(j + 1).SetNoDataValue(float(userndv)) if args['scale'] is not None: #Scale the data before writing to disk scale(args['scale'][0], args['scale'][1], bandmin, bandmax) output.GetRasterBand(j + 1).WriteArray( glb.sharedarray[:intervaly, :intervalx], xstart, ystart) Timer.totaltime(starttime) #Close up dataset = None output = None pool.close() pool.join()
def main(args): starttime = Timer.starttimer() #Cache thrashing is common when working with large files # we help alleviate misses by setting a larger than normal cache. 1GB gdal.SetCacheMax(1073741824) #Get stretch type stretch = OptParse.argget_stretch(args) #Get some info about the machine for mp cores = args['ncores'] if cores is None: cores = mp.cpu_count() print "Loading the input dataset..." #Load the input dataset using the GdalIO class and get / set the output datatype. dataset = OpenDataSet(args['input']) raster = dataset.load() xsize, ysize, nbands, projection, geotransform = dataset.info(raster) #Get band information print "Computing band statistics..." bands = [raster.GetRasterBand(b) for b in range(1, nbands + 1)] bandstats = [Stats.get_band_stats(b) for b in bands] b = bands[0] banddtype = b.DataType blocksize = b.GetBlockSize() xblocksize = blocksize[0] yblocksize = blocksize[1] print "Creating output file with correct geotransformation (where applicable)..." output = create_output(args['outputformat'],args['output'], xsize, ysize, len(bands), projection, geotransform, gdal.GetDataTypeByName(args['dtype'])) print "Computing image block offsets..." #Intelligently segment the image based upon number of cores and intrinsic block size if args['byline'] is True: segments = segment_image(xsize, ysize, 1, ysize) args['statsper'] = True elif args['bycolumn'] is True: segments = segment_image(xsize, ysize, xsize, 1) args['statsper'] = True elif args['horizontal_segments'] is not None or args['vertical_segments'] is not None: #The user is defining the segmentation segments = segment_image(xsize, ysize, args['vertical_segments'],args['horizontal_segments']) else: segments = [(0,0,xsize, ysize)] carray_dtype = _gdal_to_ctypes[banddtype] print "Allocating a shared memory space for processing..." #Preallocate a sharedmem array of the correct size ctypesxsize, ctypesysize= segments[0][2:] if args['byline'] is True: ctypesysize = cores elif args['bycolumn'] is True: ctypesxsize = cores carray = mp.RawArray(carray_dtype, ctypesxsize * ctypesysize) glb.sharedarray = np.frombuffer(carray,dtype=_gdal_to_numpy[banddtype]).reshape(ctypesysize, ctypesxsize) pool = mp.Pool(processes=cores, initializer=glb.init, initargs=(glb.sharedarray, )) #A conscious decision to iterate over the bands in serial - a IO bottleneck anyway for j,band in enumerate(bands): stats = bandstats[j] args.update(stats) if args['byline'] is True: for y in range(0, ysize, cores): xstart, ystart, intervalx, intervaly = 0, y, xsize, cores if ystart + intervaly > ysize: intervaly = ysize - ystart #print ystart, ystart + intervaly #print y, ystart, ystart+ intervaly, intervaly glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(xstart, ystart, intervalx, intervaly) #If the input has an NDV - mask it. if stats['ndv'] != None: glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False) mask = np.ma.getmask(glb.sharedarray) #if args['statsper'] is True: #args.update(Stats.get_array_stats(glb.sharedarray, stretch)) for i in range(cores): res = pool.apply(stretch, args=(slice(i, i+1), args)) if args['ndv'] != None: #glb.sharedarray[mask] = args['ndv'] output.GetRasterBand(j+1).SetNoDataValue(float(args['ndv'])) output.GetRasterBand(j+1).WriteArray(glb.sharedarray[:intervaly, :intervalx], xstart,ystart) if args['quiet']: print "Processed {} or {} lines \r".format(y, ysize), sys.stdout.flush() elif args['bycolumn'] is True: for x in range(0, xsize, cores): xstart, ystart, intervalx, intervaly = x, 0, cores, ysize if xstart + intervalx > xsize: intervalx = xsize - xstart glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(xstart, ystart, intervalx, intervaly) #If the input has an NDV - mask it. if stats['ndv'] != None: glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False) mask = np.ma.getmask(glb.sharedarray) if args['statsper'] is True: args.update(Stats.get_array_stats(glb.sharedarray, stretch)) for i in range(cores): res = pool.apply(stretch, args=(slice(i, i+1), args)) if args['ndv'] != None: glb.sharedarray[mask] = args['ndv'] output.GetRasterBand(j+1).SetNoDataValue(float(args['ndv'])) output.GetRasterBand(j+1).WriteArray(glb.sharedarray[:intervaly, :intervalx], xstart,ystart) if args['quiet']: print "Processed {} or {} lines \r".format(x, xsize), sys.stdout.flush() #If not processing line by line, distirbuted the block over availabel cores else: for i, chunk in enumerate(segments): print i, len(segments) xstart, ystart, intervalx, intervaly = chunk #Read the array into the buffer bandslice = band.ReadAsArray(xstart, ystart, intervalx, intervaly) glb.sharedarray[:intervaly, :intervalx] = bandslice #If the input has an NDV - mask it. if stats['ndv'] != None: if len(np.where(bandslice != stats['ndv'])[0]) == 0: continue glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False) mask = np.ma.getmask(glb.sharedarray) if args['statsper'] is True: args.update(Stats.get_array_stats(glb.sharedarray, stretch)) #Determine the decomposition for each core step = intervaly // cores if step == 0: step = 1 starts = range(0, intervaly+1, step) stops = starts[1:] stops.append(intervaly+1) offsets = zip(starts, stops) for o in offsets: res = pool.apply(stretch, args=(slice(o[0], o[1]), args)) if args['ndv'] != None: glb.sharedarray[mask] = args['ndv'] output.GetRasterBand(j+1).SetNoDataValue(float(args['ndv'])) output.GetRasterBand(j+1).WriteArray(glb.sharedarray[:intervaly, :intervalx], xstart,ystart) Timer.totaltime(starttime) #Close up dataset = None output = None pool.close() pool.join()
def main(options, args): starttime = Timer.starttimer() #Cache thrashing is common when working with large files, we help alleviate misses by setting a larger than normal cache. 1GB gdal.SetCacheMax(1073741824) #Check for input if not args: print "\nERROR: You must supply an input data set.\n" sys.exit(0) #Get stretch type stretch = OptParse.get_stretch(options) #Get some info about the machine for multiprocessing cores = multiprocessing.cpu_count() cores *= 2 print "Processing on %i cores." %cores #Load the input dataset using the GdalIO class and get / set the output datatype. dataset = GdalIO.GdalIO(args[0]) raster = dataset.load() #Default is none, unless user specified if options['dtype'] == None: dtype = gdal.GetDataTypeName(raster.GetRasterBand(1).DataType) else: dtype=options['dtype'] #Create an output if the stretch is written to disk xsize, ysize, bands, projection, geotransform = dataset.info(raster) output = dataset.create_output("",options['output'],xsize,ysize,bands,projection, geotransform, gdal.GetDataTypeByName(dtype)) #Segment the image to handle either RAM constraints or selective processing segments = Segment.segment_image(xsize,ysize,options['vint'], options['hint']) for b in xrange(bands): band = raster.GetRasterBand(b+1) bandstats = Stats.get_band_stats(band) for key in bandstats.iterkeys(): options[key] = bandstats[key] #Get the size of the segments to be manipulated piecenumber = 1 for chunk in segments: print "Image segmented. Processing segment %i of %i" %(piecenumber, len(segments)) piecenumber += 1 (xstart, ystart, intervalx, intervaly) = chunk array = band.ReadAsArray(xstart, ystart, intervalx, intervaly).astype(numpy.float32) if options['ndv_band'] != None: array = numpy.ma.masked_values(array, options['ndv_band'], copy=False) elif options['ndv'] != None: array = numpy.ma.masked_values(array, options['ndv'], copy=False) if 'stretch' in stretch.__name__: array = Stats.normalize(array, options['bandmin'], options['bandmax'], dtype) #If the user wants to calc stats per segment: if options['segment'] == True: stats = Stats.get_array_stats(array, stretch) for key in stats.iterkeys(): options[key] = stats[key] #Otherwise use the stats per band for each segment else: options['mean'] = options['bandmean'] options['maximum'] = options['bandmax'] options['minimum'] = options['bandmin'] options['standard_deviation'] = options['bandstd'] y,x = array.shape #Calculate the hist and cdf if we need it. This way we do not calc it per core. if options['histequ_stretch'] == True: cdf, bins = Stats.gethist_cdf(array,options['num_bins']) options['cdf'] = cdf options['bins'] = bins #Fill the masked values with NaN to get to a shared array if options['ndv'] != None: array = array.filled(numpy.nan) #Create an ctypes array init(ArrayConvert.SharedMemArray(array)) step = y // cores jobs = [] if step != 0: for i in range(0,y,step): p = multiprocessing.Process(target=stretch,args= (shared_arr,slice(i, i+step)),kwargs=options) jobs.append(p) for job in jobs: job.start() del job for job in jobs: job.join() del job #Return the array to the proper data range and write it out. Scale if that is what the user wants if options['histequ_stretch'] or options['gamma_stretch']== True: pass elif 'filter' in stretch.__name__: pass else: Stats.denorm(shared_arr.asarray(), dtype, kwargs=options) if options['scale'] != None: Stats.scale(shared_arr.asarray(), kwargs=options) #If their are NaN in the array replace them with the dataset no data value Stats.setnodata(shared_arr, options['ndv']) #Write the output output.GetRasterBand(b+1).WriteArray(shared_arr.asarray(), xstart,ystart) #Manually cleanup to stop memory leaks. del array, jobs, shared_arr.data try: del stats except: pass del globals()['shared_arr'] gc.collect() if options['ndv'] != None: output.GetRasterBand(b+1).SetNoDataValue(float(options['ndv'])) elif options['ndv_band'] != None: output.GetRasterBand(b+1).SetNoDataValue(float(options['ndv_band'])) if options['visualize'] == True: Plot.show_hist(shared_arr.asarray()) Timer.totaltime(starttime) #Close up dataset = None output = None gc.collect()
def main(options, args): starttime = Timer.starttimer() #Cache thrashing is common when working with large files, we help alleviate misses by setting a larger than normal cache. 1GB gdal.SetCacheMax(1073741824) #Check for input if not args: print "\nERROR: You must supply an input data set.\n" sys.exit(0) #Get stretch type stretch = OptParse.get_stretch(options) #Get some info about the machine for multiprocessing cores = multiprocessing.cpu_count() cores *= 2 print "Processing on %i cores." % cores #Load the input dataset using the GdalIO class and get / set the output datatype. dataset = GdalIO.GdalIO(args[0]) raster = dataset.load() #Default is none, unless user specified if options['dtype'] == None: dtype = gdal.GetDataTypeName(raster.GetRasterBand(1).DataType) else: dtype = options['dtype'] #Create an output if the stretch is written to disk xsize, ysize, bands, projection, geotransform = dataset.info(raster) output = dataset.create_output("", options['output'], xsize, ysize, bands, projection, geotransform, gdal.GetDataTypeByName(dtype)) #Segment the image to handle either RAM constraints or selective processing segments = Segment.segment_image(xsize, ysize, options['vint'], options['hint']) for b in xrange(bands): band = raster.GetRasterBand(b + 1) bandstats = Stats.get_band_stats(band) for key in bandstats.iterkeys(): options[key] = bandstats[key] #Get the size of the segments to be manipulated piecenumber = 1 for chunk in segments: print "Image segmented. Processing segment %i of %i" % ( piecenumber, len(segments)) piecenumber += 1 (xstart, ystart, intervalx, intervaly) = chunk array = band.ReadAsArray(xstart, ystart, intervalx, intervaly).astype(numpy.float32) if options['ndv_band'] != None: array = numpy.ma.masked_values(array, options['ndv_band'], copy=False) elif options['ndv'] != None: array = numpy.ma.masked_values(array, options['ndv'], copy=False) if 'stretch' in stretch.__name__: array = Stats.normalize(array, options['bandmin'], options['bandmax'], dtype) #If the user wants to calc stats per segment: if options['segment'] == True: stats = Stats.get_array_stats(array, stretch) for key in stats.iterkeys(): options[key] = stats[key] #Otherwise use the stats per band for each segment else: options['mean'] = options['bandmean'] options['maximum'] = options['bandmax'] options['minimum'] = options['bandmin'] options['standard_deviation'] = options['bandstd'] y, x = array.shape #Calculate the hist and cdf if we need it. This way we do not calc it per core. if options['histequ_stretch'] == True: cdf, bins = Stats.gethist_cdf(array, options['num_bins']) options['cdf'] = cdf options['bins'] = bins #Fill the masked values with NaN to get to a shared array if options['ndv'] != None: array = array.filled(numpy.nan) #Create an ctypes array init(ArrayConvert.SharedMemArray(array)) step = y // cores jobs = [] if step != 0: for i in range(0, y, step): p = multiprocessing.Process(target=stretch, args=(shared_arr, slice(i, i + step)), kwargs=options) jobs.append(p) for job in jobs: job.start() del job for job in jobs: job.join() del job #Return the array to the proper data range and write it out. Scale if that is what the user wants if options['histequ_stretch'] or options['gamma_stretch'] == True: pass elif 'filter' in stretch.__name__: pass else: Stats.denorm(shared_arr.asarray(), dtype, kwargs=options) if options['scale'] != None: Stats.scale(shared_arr.asarray(), kwargs=options) #If their are NaN in the array replace them with the dataset no data value Stats.setnodata(shared_arr, options['ndv']) #Write the output output.GetRasterBand(b + 1).WriteArray(shared_arr.asarray(), xstart, ystart) #Manually cleanup to stop memory leaks. del array, jobs, shared_arr.data try: del stats except: pass del globals()['shared_arr'] gc.collect() if options['ndv'] != None: output.GetRasterBand(b + 1).SetNoDataValue( float(options['ndv'])) elif options['ndv_band'] != None: output.GetRasterBand(b + 1).SetNoDataValue( float(options['ndv_band'])) if options['visualize'] == True: Plot.show_hist(shared_arr.asarray()) Timer.totaltime(starttime) #Close up dataset = None output = None gc.collect()