Python Stats Exemples, pystretch.core.Stats Python Exemples

Exemple #1

0

Afficher le fichier

def main(args):
    starttime = Timer.starttimer()
    #Cache thrashing is common when working with large files
    # we help alleviate misses by setting a larger than normal cache.  1GB

    gdal.SetCacheMax(1073741824)

    #Get stretch type
    stretch = OptParse.argget_stretch(args)

    #Get some info about the machine for mp
    cores = args['ncores']
    if cores is None:
        cores = mp.cpu_count()

    #Load the input dataset using the GdalIO class and get / set the output datatype.
    dataset = OpenDataSet(args['input'])
    raster = dataset.load()
    xsize, ysize, nbands, projection, geotransform = dataset.info(raster)

    #Get band information
    bands = [raster.GetRasterBand(b) for b in range(1, nbands + 1)]
    bandstats = [Stats.get_band_stats(b) for b in bands]
    b = bands[0]
    banddtype = b.DataType
    blocksize = b.GetBlockSize()
    xblocksize = blocksize[0]
    yblocksize = blocksize[1]

    output = create_output(args['outputformat'], args['output'], xsize, ysize,
                           len(bands), projection, geotransform,
                           gdal.GetDataTypeByName(args['dtype']))

    #Intelligently segment the image based upon number of cores and intrinsic block size
    if args['byline'] is True:
        segments = segment_image(xsize, ysize, 1, ysize)
        args['statsper'] = True
    elif args['bycolumn'] is True:
        segments = segment_image(xsize, ysize, xsize, 1)
        args['statsper'] = True
    elif args['horizontal_segments'] is not None or args[
            'vertical_segments'] is not None:
        #The user is defining the segmentation
        segments = segment_image(xsize, ysize, args['vertical_segments'],
                                 args['horizontal_segments'])
    else:
        segments = [(0, 0, xsize, ysize)]

    carray_dtype = _gdal_to_ctypes[banddtype]

    #Preallocate a sharedmem array of the correct size
    ctypesxsize, ctypesysize = segments[0][2:]
    if args['byline'] is True:
        ctypesysize = cores
    elif args['bycolumn'] is True:
        ctypesxsize = cores
    carray = mp.RawArray(carray_dtype, ctypesxsize * ctypesysize)
    glb.sharedarray = np.frombuffer(carray,
                                    dtype=_gdal_to_numpy[banddtype]).reshape(
                                        ctypesysize, ctypesxsize)

    pool = mp.Pool(processes=cores,
                   initializer=glb.init,
                   initargs=(glb.sharedarray, ))

    #A conscious decision to iterate over the bands in serial - a IO bottleneck anyway
    for j, band in enumerate(bands):

        stats = bandstats[j]
        bandmin = stats['minimum']
        bandmax = stats['maximum']
        ndv = stats['ndv']
        userndv = args['ndv']
        args.update(stats)

        if args['byline'] is True:
            for y in range(0, ysize, cores):
                xstart, ystart, intervalx, intervaly = 0, y, xsize, cores
                if ystart + intervaly > ysize:
                    intervaly = ysize - ystart
                #print ystart, ystart + intervaly
                #print y, ystart, ystart+ intervaly, intervaly
                glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(
                    xstart, ystart, intervalx, intervaly)
                #If the input has an NDV - mask it.
                if stats['ndv'] != None:
                    glb.sharedarray = np.ma.masked_equal(glb.sharedarray,
                                                         stats['ndv'],
                                                         copy=False)
                    mask = np.ma.getmask(glb.sharedarray)
                #if args['statsper'] is True:
                #args.update(Stats.get_array_stats(glb.sharedarray, stretch))
                for i in range(cores):
                    res = pool.apply(stretch, args=(slice(i, i + 1), args))

                if args['ndv'] != None:
                    glb.sharedarray[glb.sharedarray == ndv] = args['ndv']
                    output.GetRasterBand(j + 1).SetNoDataValue(float(userndv))
                if args['scale'] is not None:
                    #Scale the data before writing to disk
                    scale(args['scale'][0], args['scale'][1], bandmin, bandmax)
                output.GetRasterBand(j + 1).WriteArray(
                    glb.sharedarray[:intervaly, :intervalx], xstart, ystart)

                if args['quiet']:
                    print "Processed {} or {} lines \r".format(y, ysize),
                    sys.stdout.flush()
        elif args['bycolumn'] is True:
            for x in range(0, xsize, cores):
                xstart, ystart, intervalx, intervaly = x, 0, cores, ysize
                if xstart + intervalx > xsize:
                    intervalx = xsize - xstart

                glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(
                    xstart, ystart, intervalx, intervaly)
                #If the input has an NDV - mask it.
                if stats['ndv'] != None:
                    glb.sharedarray = np.ma.masked_equal(glb.sharedarray,
                                                         stats['ndv'],
                                                         copy=False)
                    mask = np.ma.getmask(glb.sharedarray)
                if args['statsper'] is True:
                    args.update(Stats.get_array_stats(glb.sharedarray,
                                                      stretch))
                for i in range(cores):
                    res = pool.apply(stretch, args=(slice(i, i + 1), args))

                if args['ndv'] != None:
                    glb.sharedarray[glb.sharedarray == ndv] = args['ndv']
                    output.GetRasterBand(j + 1).SetNoDataValue(float(userndv))
                if args['scale'] is not None:
                    scale(args['scale'][0], args['scale'][1], bandmin, bandmax)
                output.GetRasterBand(j + 1).WriteArray(
                    glb.sharedarray[:intervaly, :intervalx], xstart, ystart)

                if args['quiet']:
                    print "Processed {} or {} lines \r".format(x, xsize),
                    sys.stdout.flush()
        #If not processing line by line, distirbuted the block over availabel cores
        else:
            for i, chunk in enumerate(segments):
                xstart, ystart, intervalx, intervaly = chunk
                #Read the array into the buffer
                glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(
                    xstart, ystart, intervalx, intervaly)

                #If the input has an NDV - mask it.
                if stats['ndv'] != None:
                    glb.sharedarray = np.ma.masked_equal(glb.sharedarray,
                                                         stats['ndv'],
                                                         copy=False)
                    mask = np.ma.getmask(glb.sharedarray)
                if args['statsper'] is True:
                    args.update(Stats.get_array_stats(glb.sharedarray,
                                                      stretch))

                #Determine the decomposition for each core

                step = intervaly // cores

                starts = range(0, intervaly + 1, step)
                stops = starts[1:]
                stops.append(intervaly + 1)
                offsets = zip(starts, stops)
                for o in offsets:
                    res = pool.apply(stretch, args=(slice(o[0], o[1]), args))

            if args['ndv'] != None:
                glb.sharedarray[glb.sharedarray == ndv] = args['ndv']
                output.GetRasterBand(j + 1).SetNoDataValue(float(userndv))
            if args['scale'] is not None:
                #Scale the data before writing to disk
                scale(args['scale'][0], args['scale'][1], bandmin, bandmax)
            output.GetRasterBand(j + 1).WriteArray(
                glb.sharedarray[:intervaly, :intervalx], xstart, ystart)

    Timer.totaltime(starttime)

    #Close up
    dataset = None
    output = None
    pool.close()
    pool.join()

Exemple #2

0

Afficher le fichier

Fichier : pystretcher.py Projet : jlaura/pysat

def main(args):
    starttime = Timer.starttimer()
    #Cache thrashing is common when working with large files
    # we help alleviate misses by setting a larger than normal cache.  1GB

    gdal.SetCacheMax(1073741824)

    #Get stretch type
    stretch = OptParse.argget_stretch(args)
    #Get some info about the machine for mp
    cores = args['ncores']
    if cores is None:
        cores = mp.cpu_count()

    print "Loading the input dataset..."
    #Load the input dataset using the GdalIO class and get / set the output datatype.
    dataset = OpenDataSet(args['input'])
    raster = dataset.load()
    xsize, ysize, nbands, projection, geotransform = dataset.info(raster)


    #Get band information
    print "Computing band statistics..."
    bands = [raster.GetRasterBand(b) for b in range(1, nbands + 1)]
    bandstats = [Stats.get_band_stats(b) for b in bands]
    b = bands[0]
    banddtype = b.DataType
    blocksize = b.GetBlockSize()
    xblocksize = blocksize[0]
    yblocksize = blocksize[1]

    print "Creating output file with correct geotransformation (where applicable)..."
    output = create_output(args['outputformat'],args['output'],
                        xsize, ysize, len(bands), projection,
                        geotransform, gdal.GetDataTypeByName(args['dtype']))

    print "Computing image block offsets..."
    #Intelligently segment the image based upon number of cores and intrinsic block size
    if args['byline'] is True:
        segments = segment_image(xsize, ysize, 1, ysize)
        args['statsper'] = True
    elif args['bycolumn'] is True:
        segments = segment_image(xsize, ysize, xsize, 1)
        args['statsper'] = True
    elif args['horizontal_segments'] is not None or args['vertical_segments'] is not None:
        #The user is defining the segmentation
        segments = segment_image(xsize, ysize, args['vertical_segments'],args['horizontal_segments'])

    else:
        segments = [(0,0,xsize, ysize)]

    carray_dtype = _gdal_to_ctypes[banddtype]

    print "Allocating a shared memory space for processing..."
    #Preallocate a sharedmem array of the correct size
    ctypesxsize, ctypesysize= segments[0][2:]
    if args['byline'] is True:
        ctypesysize = cores
    elif args['bycolumn'] is True:
        ctypesxsize = cores
    carray = mp.RawArray(carray_dtype, ctypesxsize * ctypesysize)
    glb.sharedarray = np.frombuffer(carray,dtype=_gdal_to_numpy[banddtype]).reshape(ctypesysize, ctypesxsize)

    pool = mp.Pool(processes=cores, initializer=glb.init, initargs=(glb.sharedarray, ))

    #A conscious decision to iterate over the bands in serial - a IO bottleneck anyway
    for j,band in enumerate(bands):
        stats = bandstats[j]
        args.update(stats)

        if args['byline'] is True:
            for y in range(0, ysize, cores):
                xstart, ystart, intervalx, intervaly = 0, y, xsize, cores
                if ystart + intervaly > ysize:
                    intervaly = ysize - ystart
                #print ystart, ystart + intervaly
                #print y, ystart, ystart+ intervaly, intervaly
                glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(xstart, ystart, intervalx, intervaly)
                #If the input has an NDV - mask it.
                if stats['ndv'] != None:
                    glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False)
                    mask = np.ma.getmask(glb.sharedarray)
                #if args['statsper'] is True:
                    #args.update(Stats.get_array_stats(glb.sharedarray, stretch))
                for i in range(cores):
                    res = pool.apply(stretch, args=(slice(i, i+1), args))


                if args['ndv'] != None:
                    #glb.sharedarray[mask] = args['ndv']
                    output.GetRasterBand(j+1).SetNoDataValue(float(args['ndv']))
                output.GetRasterBand(j+1).WriteArray(glb.sharedarray[:intervaly, :intervalx], xstart,ystart)

                if args['quiet']:
                    print "Processed {} or {} lines \r".format(y, ysize),
                    sys.stdout.flush()
        elif args['bycolumn'] is True:
            for x in range(0, xsize, cores):
                xstart, ystart, intervalx, intervaly = x, 0, cores, ysize
                if xstart + intervalx > xsize:
                    intervalx = xsize - xstart

                glb.sharedarray[:intervaly, :intervalx] = band.ReadAsArray(xstart, ystart, intervalx, intervaly)
                #If the input has an NDV - mask it.
                if stats['ndv'] != None:
                    glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False)
                    mask = np.ma.getmask(glb.sharedarray)
                if args['statsper'] is True:
                    args.update(Stats.get_array_stats(glb.sharedarray, stretch))
                for i in range(cores):
                    res = pool.apply(stretch, args=(slice(i, i+1), args))

                if args['ndv'] != None:
                    glb.sharedarray[mask] = args['ndv']
                    output.GetRasterBand(j+1).SetNoDataValue(float(args['ndv']))

                output.GetRasterBand(j+1).WriteArray(glb.sharedarray[:intervaly, :intervalx], xstart,ystart)

                if args['quiet']:
                    print "Processed {} or {} lines \r".format(x, xsize),
                    sys.stdout.flush()
        #If not processing line by line, distirbuted the block over availabel cores
        else:
            for i, chunk in enumerate(segments):
                print i, len(segments)
                xstart, ystart, intervalx, intervaly = chunk
                #Read the array into the buffer
                bandslice = band.ReadAsArray(xstart, ystart, intervalx, intervaly)
                glb.sharedarray[:intervaly, :intervalx] = bandslice

                #If the input has an NDV - mask it.
                if stats['ndv'] != None:

                    if len(np.where(bandslice != stats['ndv'])[0]) == 0:
                        continue
                    glb.sharedarray = np.ma.masked_equal(glb.sharedarray, stats['ndv'], copy=False)
                    mask = np.ma.getmask(glb.sharedarray)
                if args['statsper'] is True:
                    args.update(Stats.get_array_stats(glb.sharedarray, stretch))

                #Determine the decomposition for each core
                step = intervaly // cores
                if step == 0:
                    step = 1

                starts = range(0, intervaly+1, step)
                stops = starts[1:]
                stops.append(intervaly+1)
                offsets = zip(starts, stops)
                for o in offsets:
                    res = pool.apply(stretch, args=(slice(o[0], o[1]), args))

                if args['ndv'] != None:
                    glb.sharedarray[mask] = args['ndv']
                    output.GetRasterBand(j+1).SetNoDataValue(float(args['ndv']))

                output.GetRasterBand(j+1).WriteArray(glb.sharedarray[:intervaly, :intervalx], xstart,ystart)

    Timer.totaltime(starttime)

    #Close up
    dataset = None
    output = None
    pool.close()
    pool.join()

Exemple #3

0

Afficher le fichier

Fichier : pystretcher.py Projet : jlaura/PyStretch

def main(options, args):
    starttime = Timer.starttimer()
    #Cache thrashing is common when working with large files, we help alleviate misses by setting a larger than normal cache.  1GB
    gdal.SetCacheMax(1073741824)
    
    #Check for input
    if not args:
        print "\nERROR: You must supply an input data set.\n"
        sys.exit(0)
    
    #Get stretch type
    stretch = OptParse.get_stretch(options)
    
    #Get some info about the machine for multiprocessing
    cores = multiprocessing.cpu_count()
    cores *= 2
    print "Processing on %i cores." %cores
    
    #Load the input dataset using the GdalIO class and get / set the output datatype.
    dataset = GdalIO.GdalIO(args[0])
    raster = dataset.load()

    #Default is none, unless user specified
    if options['dtype'] == None:
        dtype = gdal.GetDataTypeName(raster.GetRasterBand(1).DataType)
    else:
        dtype=options['dtype']
    
    #Create an output if the stretch is written to disk
    xsize, ysize, bands, projection, geotransform = dataset.info(raster)
    output = dataset.create_output("",options['output'],xsize,ysize,bands,projection, geotransform, gdal.GetDataTypeByName(dtype))

    #Segment the image to handle either RAM constraints or selective processing
    segments = Segment.segment_image(xsize,ysize,options['vint'], options['hint'])
    
    for b in xrange(bands):
        band = raster.GetRasterBand(b+1)
        bandstats = Stats.get_band_stats(band)
        for key in bandstats.iterkeys():
            options[key] = bandstats[key]            
        
        #Get the size of the segments to be manipulated
        piecenumber = 1
        for chunk in segments:
            
            print "Image segmented.  Processing segment %i of %i" %(piecenumber, len(segments))
            piecenumber += 1
            (xstart, ystart, intervalx, intervaly) = chunk
            
            array = band.ReadAsArray(xstart, ystart, intervalx, intervaly).astype(numpy.float32)
            
            if options['ndv_band'] != None:
                array = numpy.ma.masked_values(array, options['ndv_band'], copy=False)
            elif options['ndv'] != None:
                array = numpy.ma.masked_values(array, options['ndv'], copy=False)
            
            if 'stretch' in stretch.__name__:
                array = Stats.normalize(array, options['bandmin'], options['bandmax'], dtype)
    
            #If the user wants to calc stats per segment:
            if options['segment'] == True:  
                stats = Stats.get_array_stats(array, stretch) 
                for key in stats.iterkeys():
                    options[key] = stats[key]
            #Otherwise use the stats per band for each segment
            else:
                options['mean'] = options['bandmean']
                options['maximum'] = options['bandmax']
                options['minimum'] = options['bandmin']
                options['standard_deviation'] = options['bandstd']
            
            y,x = array.shape
            
            #Calculate the hist and cdf if we need it.  This way we do not calc it per core.
            if options['histequ_stretch'] == True:
                cdf, bins = Stats.gethist_cdf(array,options['num_bins'])
                options['cdf'] = cdf
                options['bins'] = bins
            

            #Fill the masked values with NaN to get to a shared array
            if options['ndv'] != None:
                array = array.filled(numpy.nan)
            
            #Create an ctypes array
            init(ArrayConvert.SharedMemArray(array))
            
            step = y // cores
            jobs = []
            if step != 0:
                for i in range(0,y,step):        
                    p = multiprocessing.Process(target=stretch,args= (shared_arr,slice(i, i+step)),kwargs=options)
                    jobs.append(p)
                    
                for job in jobs:
                    job.start()
                    del job
                for job in jobs:
                    job.join()
                    del job
            
            #Return the array to the proper data range and write it out.  Scale if that is what the user wants
            if options['histequ_stretch'] or options['gamma_stretch']== True:
                pass
            elif 'filter' in stretch.__name__:
                pass
            else:
                Stats.denorm(shared_arr.asarray(), dtype, kwargs=options)

            if options['scale'] != None:
                Stats.scale(shared_arr.asarray(), kwargs=options)
                
            #If their are NaN in the array replace them with the dataset no data value
            Stats.setnodata(shared_arr, options['ndv'])

            #Write the output
            output.GetRasterBand(b+1).WriteArray(shared_arr.asarray(), xstart,ystart)            

            #Manually cleanup to stop memory leaks.
            del array, jobs, shared_arr.data
            try: 
                del stats
            except:
                pass
            del globals()['shared_arr']
            gc.collect()
            
            if options['ndv'] != None:
                output.GetRasterBand(b+1).SetNoDataValue(float(options['ndv']))
            elif options['ndv_band'] != None:
                output.GetRasterBand(b+1).SetNoDataValue(float(options['ndv_band']))
                
                
    if options['visualize'] == True:
        Plot.show_hist(shared_arr.asarray())
    
    Timer.totaltime(starttime)
    
    #Close up
    dataset = None
    output = None
    gc.collect()

Exemple #4

0

Afficher le fichier

Fichier : pystretcher.py Projet : Sandy4321/PyStretch

def main(options, args):
    starttime = Timer.starttimer()
    #Cache thrashing is common when working with large files, we help alleviate misses by setting a larger than normal cache.  1GB
    gdal.SetCacheMax(1073741824)

    #Check for input
    if not args:
        print "\nERROR: You must supply an input data set.\n"
        sys.exit(0)

    #Get stretch type
    stretch = OptParse.get_stretch(options)

    #Get some info about the machine for multiprocessing
    cores = multiprocessing.cpu_count()
    cores *= 2
    print "Processing on %i cores." % cores

    #Load the input dataset using the GdalIO class and get / set the output datatype.
    dataset = GdalIO.GdalIO(args[0])
    raster = dataset.load()

    #Default is none, unless user specified
    if options['dtype'] == None:
        dtype = gdal.GetDataTypeName(raster.GetRasterBand(1).DataType)
    else:
        dtype = options['dtype']

    #Create an output if the stretch is written to disk
    xsize, ysize, bands, projection, geotransform = dataset.info(raster)
    output = dataset.create_output("", options['output'], xsize, ysize, bands,
                                   projection, geotransform,
                                   gdal.GetDataTypeByName(dtype))

    #Segment the image to handle either RAM constraints or selective processing
    segments = Segment.segment_image(xsize, ysize, options['vint'],
                                     options['hint'])

    for b in xrange(bands):
        band = raster.GetRasterBand(b + 1)
        bandstats = Stats.get_band_stats(band)
        for key in bandstats.iterkeys():
            options[key] = bandstats[key]

        #Get the size of the segments to be manipulated
        piecenumber = 1
        for chunk in segments:

            print "Image segmented.  Processing segment %i of %i" % (
                piecenumber, len(segments))
            piecenumber += 1
            (xstart, ystart, intervalx, intervaly) = chunk

            array = band.ReadAsArray(xstart, ystart, intervalx,
                                     intervaly).astype(numpy.float32)

            if options['ndv_band'] != None:
                array = numpy.ma.masked_values(array,
                                               options['ndv_band'],
                                               copy=False)
            elif options['ndv'] != None:
                array = numpy.ma.masked_values(array,
                                               options['ndv'],
                                               copy=False)

            if 'stretch' in stretch.__name__:
                array = Stats.normalize(array, options['bandmin'],
                                        options['bandmax'], dtype)

            #If the user wants to calc stats per segment:
            if options['segment'] == True:
                stats = Stats.get_array_stats(array, stretch)
                for key in stats.iterkeys():
                    options[key] = stats[key]
            #Otherwise use the stats per band for each segment
            else:
                options['mean'] = options['bandmean']
                options['maximum'] = options['bandmax']
                options['minimum'] = options['bandmin']
                options['standard_deviation'] = options['bandstd']

            y, x = array.shape

            #Calculate the hist and cdf if we need it.  This way we do not calc it per core.
            if options['histequ_stretch'] == True:
                cdf, bins = Stats.gethist_cdf(array, options['num_bins'])
                options['cdf'] = cdf
                options['bins'] = bins

            #Fill the masked values with NaN to get to a shared array
            if options['ndv'] != None:
                array = array.filled(numpy.nan)

            #Create an ctypes array
            init(ArrayConvert.SharedMemArray(array))

            step = y // cores
            jobs = []
            if step != 0:
                for i in range(0, y, step):
                    p = multiprocessing.Process(target=stretch,
                                                args=(shared_arr,
                                                      slice(i, i + step)),
                                                kwargs=options)
                    jobs.append(p)

                for job in jobs:
                    job.start()
                    del job
                for job in jobs:
                    job.join()
                    del job

            #Return the array to the proper data range and write it out.  Scale if that is what the user wants
            if options['histequ_stretch'] or options['gamma_stretch'] == True:
                pass
            elif 'filter' in stretch.__name__:
                pass
            else:
                Stats.denorm(shared_arr.asarray(), dtype, kwargs=options)

            if options['scale'] != None:
                Stats.scale(shared_arr.asarray(), kwargs=options)

            #If their are NaN in the array replace them with the dataset no data value
            Stats.setnodata(shared_arr, options['ndv'])

            #Write the output
            output.GetRasterBand(b + 1).WriteArray(shared_arr.asarray(),
                                                   xstart, ystart)

            #Manually cleanup to stop memory leaks.
            del array, jobs, shared_arr.data
            try:
                del stats
            except:
                pass
            del globals()['shared_arr']
            gc.collect()

            if options['ndv'] != None:
                output.GetRasterBand(b + 1).SetNoDataValue(
                    float(options['ndv']))
            elif options['ndv_band'] != None:
                output.GetRasterBand(b + 1).SetNoDataValue(
                    float(options['ndv_band']))

    if options['visualize'] == True:
        Plot.show_hist(shared_arr.asarray())

    Timer.totaltime(starttime)

    #Close up
    dataset = None
    output = None
    gc.collect()