def Add_idx( src_dsn ): '''Add_idx( src_dsn ) add an rtree index (src.idx) to a shapefile fileset :param src1_dsn: (str) path to input shapefile, no extensions :return: None ''' # Load first dataset, and projection src_ds, src_lyr = Ogr_open(src_dsn) # Open output index r = FastRtree( src_dsn,interleaved=True) # Initialize progress updater count_max = float(src_lyr.GetFeatureCount()) # count_update = count_max * 0.05 # print progress every 5%! progress_bar = Countdown(count_max) # Loop through features in src1 for fid in range(0,int(count_max)): src_feat = src_lyr.GetFeature(fid) geom = src_feat.GetGeometryRef() xmin,xmax,ymin,ymax = geom.GetEnvelope() r.insert(fid,(xmin,ymin,xmax,ymax)) progress_bar.check(fid) progress_bar.flush() r.close() dst_r = dst_ds = dst_lyr = dst_feat = isect = defn = None
def Fill_lc_hdf( hdfp ): '''Populate a Landcover hdf5 array file. There is no mosaicing or subsetting process here, since the landcover IS the AOI. Additionally, there are probably not as many landcover fields as there are MODIS or ERA days to aggregate. Because of this simplicity, the hdf5 population for landcover doesn't have the same multiprocessing complexity. It just loops through the landcover tifs, reads in the array, and copies it to the appropriate part of the hdf5 array file. :param hdfp: (dict) array parameters :return: None ''' print 'Continuing lc',hdfp['h5f'] numfields = len(hdfp['fields']) progress_bar = Countdown(numfields) for i in range(numfields): with h5py.File(hdfp['h5f'], "a") as hdf: ras = gdal.Open(hdfp['fields'].values()[i]) r = ras.GetRasterBand(1) a = r.ReadAsArray() hdf['lc'][i,:,:] = a progress_bar.check(i) progress_bar.flush() print 'Landcover hdf5 FINISHED!'
def Lcmod_manager( project, lcm_sz, mod_ind_list ): if 'lcm_thresh' in project.keys(): try: lcm_thresh = int(project['lcm_thresh']) except: 'lcm_thresh not an integer (INPUT.txt)' lcm_thresh = 20000 else: lcm_thresh = 20000 if 'mod_per_region' in project.keys(): mod_per_candidate = project['mod_per_region'] try: mod_per_reg = float(mod_per_candidate) except: mod_default = 50. print 'mod_per_region not an integer (INPUT.txt)' print 'assigning default %s modis IDs per region' % mod_default mod_per_reg = float(mod_default) else: mod_default = 50. mod_per_reg = float(mod_default) if lcm_sz > lcm_thresh: # How many times does mod_per_reg go into len(mod_id) ? num_reg = int(math.ceil(len(mod_ind_list)/mod_per_reg)) modr = (lcm_sz,num_reg,int(mod_per_reg)) print 'Splitting %s lcm features into %s regions, %s MODIS cells each' % modr # For each region, export one set of CSV files # for the modis ids within that region progress_bar = Countdown(num_reg, update_interval=.005) for region in range(num_reg): s = int(region * mod_per_reg) e = int((region+1) * mod_per_reg) if e>=len(mod_ind_list): e=-1 region_mod_ind = mod_ind_list[s:e] Land2csv(project, region_mod_ind, region=region) for mod_type in project['modis'].keys(): for modis_sds in project['modis'][mod_type].values(): Mod2csv(project, modis_sds, region_mod_ind, region=region) progress_bar.check(region) progress_bar.flush() else: print 'Writing %s lcm features to 1 CSV per dataset:' % lcm_sz print 'lc.csv: ' Land2csv(project, mod_ind_list, region=None) for mod_type in project['modis'].keys(): for modis_sds in project['modis'][mod_type].values(): sys.stdout.write("%s.csv . . " % modis_sds) sys.stdout.flush() Mod2csv(project, modis_sds, mod_ind_list) sys.stdout.write("Done \n") sys.stdout.flush()
def Vlc2csv(project, mod_ind_list, region=None): lcm_dsn = project['prj_name']+'_lcm' lcm_path = os.path.join(project['shp_dir'],lcm_dsn) lcm_ds = ogr.Open(lcm_path+'.shp',gdalconst.GA_ReadOnly) if region==None: lc_csv_fn = os.path.join(project['csv_dir'], project['prj_name']+'_lc.csv') else: out_dir = os.path.join(project['csv_dir'],'lc') if not os.path.isdir(out_dir): os.mkdir(out_dir) lc_csv_fn = os.path.join(out_dir, project['prj_name']+'_lc_'+str(region)+'.csv') # Do we want cell-center coordinates for modis? era? lc? # Which projections? hdr = ['id','area','lc_id','modis_id','era_id'] lc_attrs = '' for attrib in project['lc'].keys(): hdr.append(attrib) lc_attrs += ',lc_'+project['lc'][attrib] lc_csv_f = open(lc_csv_fn,'wt') lc_csv = csv.writer(lc_csv_f) lc_csv.writerow(hdr) lc_sql_ = 'SELECT id,area,lc_id,mod_id,era_id'+lc_attrs+' FROM '+lcm_dsn+\ ' WHERE mod_id=%i' if region==None: progress_bar = Countdown(len(mod_ind_list)) for i in range(len(mod_ind_list)): mod_id = mod_ind_list[i][0] lc_sql = lc_sql_ % mod_id lc_lyr = lcm_ds.ExecuteSQL(lc_sql) lc_feat= lc_lyr.GetNextFeature() while lc_feat: lcm_id = lc_feat.GetField('id') lcm_area = lc_feat.GetField('area') lc_id = lc_feat.GetField('lc_id') mod_id = lc_feat.GetField('mod_id') era_id = lc_feat.GetField('era_id') out = [lcm_id,lcm_area,lc_id,mod_id,era_id] for attrib in lc_attrs.split(','): if len(attrib)>0: out.append(lc_feat.GetField(attrib)) lc_csv.writerow(out) lc_feat = lc_lyr.GetNextFeature() if region==None: progress_bar.check(i) del lcm_ds if region==None: progress_bar.flush() lc_csv_f.close()
def Continue_hdf( hdfp ): print 'Continuing', hdfp['sds'], hdfp['h5f'] start_end = Gen_appendexes( hdfp ) if start_end: progress_bar = Countdown(len(start_end)) for i,s_e in enumerate(start_end): p = multiprocessing.Process(target=Append_to_hdf, \ args=( hdfp, s_e[0], s_e[1])) p.start() p.join() # main script waits for this child to grow up progress_bar.check(i) progress_bar.flush() print hdfp['sds'],'FINISHED!'
def Continue_modis_hdf( project, hdfp ): '''Continue population of a MODIS hdf5 array file. If the file was just created with Mk_hdf, it will be populated from the beginning. If the process was previously interrupted at timestep <i>, it will be populated from <i-1>, ensuring that all resulting output timesteps are complete. This function uses multiprocessing, it is not recommended to exploit this by calling more than one child at a time. Every child process would try to modify the same file, which might adversely affect the integrity of the resulting file. Multiprocessing is used to make sure the RAM buffers are cleared periodically while looping through the potentially memory-intensive task of aggregating all MODIS tiles to a single hdf5 array file. :param project: (dict) py-stint project parameters :param hdfp: (dict) conversion parameters :return: None ''' print 'Continuing',hdfp['sds'],hdfp['h5f'] start_end = Gen_appendexes( hdfp ) if start_end: progress_bar = Countdown(len(start_end)) for i,s_e in enumerate(start_end): arrrrrgs = ( project, hdfp, s_e[0], s_e[1] ) p = multiprocessing.Process(target=Append_to_hdf, args=arrrrrgs) p.start() p.join() # main script waits for this child to grow up progress_bar.check(i) progress_bar.flush() print hdfp['sds'],'FINISHED!'
def Gen_ssarV2_tiles(project, out_dsn): # Define blocks of 30x30 MODIS cells (cells MUST NOT repeat!!) mod_params = Parse_extents(project['paths']['modis_fn']) mod_xmin = float(mod_params['xmin']) mod_xmax = float(mod_params['xmax']) mod_ymin = float(mod_params['ymin']) mod_ymax = float(mod_params['ymax']) mod_dx = mod_params['dx'] mod_dy = mod_params['dy'] mod_prj = mod_params['srs'] tile_dx = mod_dx * 30 tile_dy = mod_dy * 30 # prepare tile_bounds out tiles_out = os.path.join(project['shp_dir'],'ssarV2_tile_bounds') Mk_proj( utm33n_string,out_dsn ) # Define shapefile path for tile_bounds.shp, with feature type polygon driver = ogr.GetDriverByName('Esri Shapefile') tiles_out_ds = driver.CreateDataSource(out_dsn+'.shp') tiles_out_layer = tiles_out_ds.CreateLayer('',None,ogr.wkbPolygon) tiles_out_layer.CreateField(ogr.FieldDefn('id',ogr.OFTInteger)) # Define text field field_name = ogr.FieldDefn("tile_name", ogr.OFTString) field_name.SetWidth(24) tiles_out_layer.CreateField(ogr.FieldDefn("tile_name", ogr.OFTString)) defn = tiles_out_layer.GetLayerDefn() idVar = 0 count = 0 count_max = int(math.ceil((mod_ymax-mod_ymin) / tile_dy)) progress_bar = Countdown(count_max, update_interval=.01) tile_uly = mod_ymax tile_y_ind = 0 while round(tile_uly,3) > round(mod_ymin,3): tile_ulx = mod_xmin tile_x_ind = 0 while round(tile_ulx,3) < round(mod_xmax,3): tile_xmin = tile_ulx tile_ymin = tile_uly - tile_dy tile_xmax = tile_ulx + tile_dx tile_ymax = tile_uly tile_bbox_utm33 = Mk_bbox(tile_xmin, tile_ymin, tile_xmax, tile_ymax) tile_bbox_utm33.Transform(sin2utm33n) tile_id = '{0}_{1}'.format(tile_y_ind,tile_x_ind) feat = ogr.Feature(defn) feat.SetField('id',idVar) feat.SetField('tile_name',tile_id) feat.SetGeometry(tile_bbox_utm33) tiles_out_layer.CreateFeature(feat) feat = None idVar += 1 tile_ulx += tile_dx tile_x_ind+=1 tile_uly -= tile_dy tile_y_ind+=1 count += 1 progress_bar.check(count) # Save and close everything ds = layer = feat = perim = polygon = None progress_bar.flush()
def Isect_mod_clim_ssar(project): '''Isect_mod_clim_ssar(project, project_paths) project_paths = {'climate_fn' :climate_fn, 'modis_fn' :modis_fn, 'ssarV1_dir' : '/space/wib_data/LANDCOVER/ss_ar_shp/', 'ssarV1_tiles' : tiles_dsn } # Define blocks of 30x30 MODIS cells (cells MUST NOT repeat!!) # for given unique, unrepeated block (30x30) of MODIS cells: # which regions ('tile_id') in tile_bounds.shp intersect block # for those tile_id which intersect block: # intersect modis features, climate features, lc features # export lc isect to csv # export applicable modis and climate cells to csv''' # get ssarV1 attributes ssar_attribs = Get_lc_attribs(project) ssar_hdr = ['area','modis_area','modis_id','climate_id'] for attrib in ssar_attribs: ssar_hdr.append(attrib) # Make climate csv header start_date = dt.datetime.strptime(str(project['modis_days'][0]), '%Y%j').date() end_date = dt.datetime.strptime(str(project['modis_days'][-1]), '%Y%j').date() numdays = (end_date-start_date).days + 1 daterange = [(start_date + dt.timedelta(days=x)).strftime('%Y%j') for x in range(0, numdays)] project['climate_hdr'] = ['climate_id', 'x_ind', 'y_ind'] + daterange # load climate bbox climate_params = Parse_extents(project['paths']['climate_fn']) climate_bbox = Mk_bbox(float(climate_params['xmin']), float(climate_params['ymin']), float(climate_params['xmax']), float(climate_params['ymax'])) # Define blocks of 30x30 MODIS cells (cells MUST NOT repeat!!) mod_params = Parse_extents(project['paths']['modis_fn']) mod_xmin = float(mod_params['xmin']) mod_xmax = float(mod_params['xmax']) mod_ymin = float(mod_params['ymin']) mod_ymax = float(mod_params['ymax']) mod_dx = mod_params['dx'] mod_dy = mod_params['dy'] mod_prj = mod_params['srs'] tile_dx = mod_dx * 30 tile_dy = mod_dy * 30 # load ssarV1 tile bounds ssarV1_ds, ssarV1_lyr = Ogr_open(project['paths']['ssarV1_tiles']) ssarV1_r = FastRtree(project['paths']['ssarV1_tiles']) # Define shapefile path for tile_bounds.shp, with feature type polygon if 'restart' in project.keys(): modis_idVar = project['restart']['modis_idVar'] modis_nanmask = project['restart']['modis_nanmask'] else: # Make a mask for MODIS cells which are worth processing (contain any non-nan values from a cloud-free image) modis_nanmask = Mk_modis_nanmask(project) modis_idVar = 0 idVar = 0 count_max = modis_nanmask.sum() progress_bar = Countdown(count_max, update_interval=.01) tile_uly = mod_ymax tile_y_ind = 0 while round(tile_uly,3) > round(mod_ymin,3): tile_ulx = mod_xmin tile_x_ind = 0 while round(tile_ulx,3) < round(mod_xmax,3): if modis_nanmask[tile_y_ind,tile_x_ind]: idVar += 1 new_tile = True new_ssar_csv = True tile_xmin = tile_ulx tile_ymin = tile_uly - tile_dy tile_xmax = tile_ulx + tile_dx tile_ymax = tile_uly # if creating from scratch: # tile_y_ind = 83; tile_x_ind = 34 #tile_ymin = tile_uly - tile_y_ind*tile_dy #tile_xmax = tile_ulx + tile_x_ind*tile_dx tile_bbox_utm33 = Mk_bbox(tile_xmin, tile_ymin, tile_xmax, tile_ymax) tile_bbox_utm33.Transform(sin2utm33n) txmin,txmax,tymin,tymax = tile_bbox_utm33.GetEnvelope() modis_rows_to_write = [] # no list if no intersections climate_rows_to_write = [] # no list if no intersections if tile_bbox_utm33.Intersects(climate_bbox): hits = ssarV1_r.intersection((txmin,tymin,txmax,tymax)) # (gxmin,gymin,gxmax,gymax) for hit_fid in hits: if new_tile == True: tile_id = '{0}_{1}'.format(tile_y_ind,tile_x_ind) tile_out_fmt = os.path.join(project['csv_dir'], '{0}/ssarV2_{0}_'+tile_id+'.csv') # .format(sds) modis_rows_to_write = set() climate_rows_to_write = set() # generate modis features in tile # - transform to utm33 # - get areas # generate climate features intersecting tile modis = [(tile_xmin, tile_ymin,tile_xmax,tile_ymax), mod_params, modis_idVar] climate = project['paths']['climate_dsn']# [(txmin, tymin, txmax, tymax), climate_params] mod_clim_isect, modis_idVar = Mk_mod_clim_tile(modis, climate, tile_x_ind, tile_y_ind) mod_clim_isect_r = mod_clim_isect['idx'] new_tile = False ssarV1_tile = ssarV1_lyr.GetFeature(hit_fid) geom2 = ssarV1_tile.GetGeometryRef() if tile_bbox_utm33.Intersects(geom2): ssarV1_tile_id = ssarV1_tile.GetField('tile_id') # load tile' ssarV1_tile_dsn = os.path.join(project['paths']['ssarV1_dir'], 'ss_ar_'+str(ssarV1_tile_id)) if os.path.isfile(ssarV1_tile_dsn+'.shp'): ssarV1_tile_ds, ssarV1_tile_lyr = Ogr_open(ssarV1_tile_dsn) for fid1 in range(0,ssarV1_tile_lyr.GetFeatureCount()): ssarV1_feat = ssarV1_tile_lyr.GetFeature(fid1) ssar_geom = ssarV1_feat.GetGeometryRef() fxmin,fxmax,fymin,fymax = ssar_geom.GetEnvelope() # get attribute fields ssar_row_proto = [ssarV1_feat.GetField(attrib) for attrib in ssar_attribs] final_hits = mod_clim_isect_r.intersection((fxmin,fymin,fxmax,fymax)) for mod_clim_id in final_hits: mod_clim_feat = mod_clim_isect['geom'][mod_clim_id] if ssar_geom.Intersects(mod_clim_feat): isect = ssar_geom.Intersection(mod_clim_feat) isect_area = isect.GetArea() climate_id = mod_clim_isect['climate_id'][mod_clim_id] climate_x_ind = mod_clim_isect['climate_x_ind'][mod_clim_id] climate_y_ind = mod_clim_isect['climate_y_ind'][mod_clim_id] modis_id = mod_clim_isect['modis_id'][mod_clim_id] modis_area = mod_clim_isect['modis_area'][mod_clim_id] modis_ctr_x = mod_clim_isect['modis_ctr_x'][mod_clim_id] modis_ctr_y = mod_clim_isect['modis_ctr_y'][mod_clim_id] modis_x_ind = mod_clim_isect['modis_x_ind'][mod_clim_id] modis_y_ind = mod_clim_isect['modis_y_ind'][mod_clim_id] climate_rows_to_write.add((climate_id, climate_x_ind, climate_y_ind)) modis_rows_to_write.add((modis_id, modis_area, modis_ctr_x, modis_ctr_y, modis_x_ind, modis_y_ind)) if new_ssar_csv==True: # open ssar csv, write header ssar_fn = tile_out_fmt.format('lc') ssar_f = open(ssar_fn,'wt') ssar_csv = csv.writer(ssar_f) ssar_csv.writerow(ssar_hdr) new_ssar_csv = False ssar_row = [isect_area,modis_area,modis_id,climate_id] + ssar_row_proto ssar_csv.writerow(ssar_row) else: print ssarV1_tile_dsn, 'absent; moving on' if new_ssar_csv==False: ssar_f.close() # Write modis and climate datasets to CSVs, for all cells which had hits if len(modis_rows_to_write)>0: Write_modis_tile(project, modis_rows_to_write, tile_out_fmt) if len(climate_rows_to_write)>0: Write_climate_tile(project, climate_rows_to_write, tile_out_fmt) tile_ulx += tile_dx tile_x_ind+=1 tile_uly -= tile_dy tile_y_ind+=1 progress_bar.check(idVar) progress_bar.flush()
def Mk_polygrid(params): '''Mk_polygrid(**params) after params = Parse_extents(rasterfile) QGIS fTools approach (VectorGrid tool); modified to replace QGIS stuff with OGR and my little extent grabber functions''' xmin = float(params['xmin']) xmax = float(params['xmax']) ymin = float(params['ymin']) ymax = float(params['ymax']) dx = params['dx'] dy = params['dy'] prj = params['srs'] outf = params['outf'] if 'idx' in params.keys(): r = FastRtree(outf,interleaved=True) # Define outfile as shapefile, with feature type polygon driver = ogr.GetDriverByName('Esri Shapefile') ds = driver.CreateDataSource(outf+'.shp') layer = ds.CreateLayer('',None,ogr.wkbPolygon) layer.CreateField(ogr.FieldDefn('id',ogr.OFTInteger)) # Define extra fields linking shapefile to raster&dataset layer.CreateField(ogr.FieldDefn('ctr_x',ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('ctr_y',ogr.OFTReal)) layer.CreateField(ogr.FieldDefn('x_ind',ogr.OFTInteger)) layer.CreateField(ogr.FieldDefn('y_ind',ogr.OFTInteger)) # Define text field # field_name = ogr.FieldDefn("Name", ogr.OFTString) # field_name.SetWidth(24) # layer.CreateField(field_name) defn = layer.GetLayerDefn() idVar = 0 count = 0 count_max = (ymax-ymin) / dy # count_update = count_max * 0.05 # print progress every 5%! progress_bar = Countdown(count_max) y = ymax y_ind = 0 while round(y,3) > round(ymin,3): x = xmin x_ind = 0 while round(x,3) < round(xmax,3): perim = ogr.Geometry(ogr.wkbLinearRing) perim.AddPoint(x, y) perim.AddPoint(x + dx, y) perim.AddPoint(x + dx, y - dy) perim.AddPoint(x, y - dy) perim.AddPoint(x, y) polygon = ogr.Geometry(ogr.wkbPolygon) polygon.AddGeometry(perim) feat = ogr.Feature(defn) feat.SetField('id',idVar) feat.SetField('ctr_x',x+0.5*dx) feat.SetField('ctr_y',y-0.5*dy) feat.SetField('x_ind',x_ind) feat.SetField('y_ind',y_ind) feat.SetGeometry(polygon) layer.CreateFeature(feat) if 'idx' in params.keys(): r.insert(idVar,(x, y - dy, x + dx, y)) feat = geom = None idVar += 1 x += dx x_ind+=1 y -= dy y_ind+=1 count += 1 progress_bar.check(count) #if int( math.fmod( count, count_update ) ) == 0: # prog = int( count / count_max * 100 ) # report = '%s%% . . ' % prog # sys.stdout.write( report ) # sys.stdout.flush() # Save and close everything ds = layer = feat = perim = polygon = None # sys.stdout.write("\n") if 'idx' in params.keys(): r.close() progress_bar.flush() prj = params['srs'].ExportToWkt() Mk_proj( prj, params['outf'] )
def Reprj_and_idx( src_dsn, dst_dsn, dst_srs, fields, area=False ): ''' Reprj_and_idx(src_dsn,dst_dsn,dst_srs,fields_to_preserve) in_shp = 'path/to/in' # no extension dst_dsn = 'path/to/out' # no extension dst_srs = osr.SpatialReference(), initialized to output ref system :param area: (bool) True if 'area' attribute should be output ''' # Load source dataset src_ds = ogr.Open(src_dsn+'.shp',gdalconst.GA_ReadOnly) src_lyr = src_ds.GetLayer(0) src_srs = src_lyr.GetSpatialRef() # Open index and prepare coordinate transformation r = FastRtree(dst_dsn,interleaved=True) t = osr.CoordinateTransformation(src_srs,dst_srs) #src_srs,dst_srs # if CoordinateTransformation fails, it will return null: if t == None: print '[ERROR] Could not reproject between given reference systems' sys.exit( 1 ) # Prepare output dataset driver = ogr.GetDriverByName('Esri Shapefile') dst_ds = driver.CreateDataSource(dst_dsn+'.shp') dst_lyr = dst_ds.CreateLayer('',None,ogr.wkbPolygon) dst_lyr.CreateField(ogr.FieldDefn('id',ogr.OFTInteger)) # Define extra fields linking shapefile to raster&dataset src_lyr_defn = src_lyr.GetLayerDefn() for ind,field_name in enumerate(fields): try: field_i = src_lyr_defn.GetFieldIndex(field_name) field_defn = src_lyr_defn.GetFieldDefn(field_i) dst_lyr.CreateField(field_defn) except: print 'Source dataset has no field named',field_name jnk = fields.pop(ind) if area == True: dst_lyr.CreateField(ogr.FieldDefn('area',ogr.OFTReal)) defn = dst_lyr.GetLayerDefn() # Initialize progress updater count_max = float(src_lyr.GetFeatureCount()) # count_update = count_max * 0.05 # print progress every 5%! progress_bar = Countdown(count_max) for fid in range(0,src_lyr.GetFeatureCount()): src_feat = src_lyr.GetFeature(fid) geom = src_feat.GetGeometryRef() geom.Transform(t) # Insert to index xmin,xmax,ymin,ymax = geom.GetEnvelope() r.insert(fid,(xmin,ymin,xmax,ymax)) # Write to output shapefile dst_feat = ogr.Feature(defn) dst_feat.SetField('id',fid) for field_name in fields: field_val = src_feat.GetField(field_name) dst_feat.SetField(field_name,field_val) if area == True: dst_feat.SetField('area',geom.GetArea()) dst_feat.SetGeometry(geom) dst_lyr.CreateFeature(dst_feat) dst_feat = geom = None # Print progress: progress_bar.check(fid) #if int( math.fmod( fid, count_update ) ) == 0: # prog = int( fid / count_max * 100 ) # report = '%s%% . . ' % prog # sys.stdout.write( report ) # sys.stdout.flush() progress_bar.flush() #sys.stdout.write("\n") dst_prj = dst_srs.ExportToWkt() Mk_proj(dst_prj,dst_dsn) # Close, flush, save output files r.close() dst_ds = dst_lyr = dst_feat = geom = defn = None
def Isect_ras_poly(ras_fn,poly_dsn,dst_fn): ''' Isect_ras_poly(ras,poly_dsn,dst_dsn) raster should have extension poly_ and dst_dsn should have no extension raster and poly should already be in the same projection Function loops through features in poly, finds ras cells wholly within the feature, and intersect ras cells on the border if dst_fn.split('.')=='p', output to pickle ## use a pickle output if the dataset is smallish ## (less than millions of output lcm features) ## The output file must load completely to memory ## while writing csv output if dst_fn.split('.')=='db', output to sqlite ## Use sqlite db output for larger regions or finer resolutions. ## THe output data gets loaded step by step, so this puts less ## strain on the system's memory during csv output ## and scales better to larger output datsets ''' out = {} ras = Parse_extents(ras_fn) poly_ds = ogr.Open(poly_dsn+'.shp',gdalconst.GA_ReadOnly) poly_lyr = poly_ds.GetLayer(0) ras_x = np.arange(ras['xmin'],ras['xmax'],ras['dx']) ras_y = np.arange(ras['ymax'],ras['ymin'],-1*ras['dy']) cell_size = np.abs(ras['dy']) * np.abs(ras['dx']) ras_box = Mk_bbox(min(ras_x),min(ras_y),max(ras_x),max(ras_y)) # Initialize progress updater count_max = float(poly_lyr.GetFeatureCount()) # count_update = count_max * 0.05 # print progress every 5%! progress_bar = Countdown(count_max) if dst_fn.split('.')[-1]=='db': dst = 'db' conn = sqlite3.connect(dst_fn) c = conn.cursor() c.execute('''CREATE TABLE isect (fid integer, px integer, py integer, area real)''') conn.commit() else: dst='p' # default to pickle for fid in range(0,poly_lyr.GetFeatureCount()): # out[fid] = [[within],[intersecting]] # within = [fid,x,y,area], x,y in raster coordinates (ncol,nrow) # isecting = [fid,x,y,area] feat = poly_lyr.GetFeature(fid) geom = feat.GetGeometryRef() if geom.Intersects(ras_box): out[fid] = [[],[]] bounds = geom.GetEnvelope() x_s,x_e,y_s,y_e = Get_spatial_indexes(ras_x,ras_y,bounds) cx_range = ras_x[x_s:x_e] cy_range = ras_y[y_s:y_e] for i,cx in enumerate(cx_range): px = x_s+i for j,cy in enumerate(cy_range): py = y_s+j # xmin,ymin,xmax,ymax: ras_cell = Mk_bbox(cx,cy-ras['dy'],cx+ras['dx'],cy) if ras_cell.Intersects(geom): if ras_cell.Within(geom): if dst=='p': coords=(px,py) elif dst=='db': coords=(fid,px,py,cell_size) out[fid][0].append(coords) else: isect = ras_cell.Intersection(geom) if dst=='p': coords=(px,py,isect.Area()) elif dst=='db': coords=(fid,px,py,isect.Area()) out[fid][1].append(coords) # Tuple to Array if there were any intersecting cells # Remove this item if there were not # This conversion slows down the code, but should shrink # the pickle size at the end? keep = False if len(out[fid][0]) > 0: if dst=='p': dtype = [('x',np.int64),('y',np.int64)] out[fid][0] = np.array(out[fid][0],dtype) keep = True elif dst=='db': # c.executemany('INSERT INTO isect VALUES (?,?,?,?)',out[fid][0]) for item in out[fid][0]: sql_insert = 'INSERT INTO isect VALUES ({},{},{},{})'.format(*item) c.execute(sql_insert) if len(out[fid][1]) > 0: if dst=='p': dtype=[('x',np.int64),('y',np.int64),('area',np.float64)] out[fid][1] = np.array(out[fid][1],dtype) keep = True elif dst=='db': # c.executemany('INSERT INTO isect VALUES (?,?,?,?)',out[fid][1]) binding_error = ''' In [39]: c.execute('INSERT INTO isect VALUES (?,?,?,?)',out[fid][1][0]) --------------------------------------------------------------------------- InterfaceError Traceback (most recent call last) <ipython-input-39-aa5b6f7d2bc9> in <module>() ----> 1 c.execute('INSERT INTO isect VALUES (?,?,?,?)',out[fid][1][0]) InterfaceError: Error binding parameter 1 - probably unsupported type. In [40]: type(out[fid][1][0]) Out[40]: tuple In [41]: c.execute('INSERT INTO isect VALUES (?,?,?,?)',(423, 9324, 0, 9.130357817019103)) Out[41]: <sqlite3.Cursor at 0x24e2ea0> In [42]: (423, 9324, 0, 9.130357817019103) == out[fid][1][0] Out[42]: True ''' # This is 'the wrong way', but since the right way gives the binding error, and this # will never interface with The Internet, I'm ok with that. for item in out[fid][1]: sql_insert = 'INSERT INTO isect VALUES ({},{},{},{})'.format(*item) c.execute(sql_insert) if not keep: jnk = out.pop(fid) conn.commit() progress_bar.check(fid) progress_bar.flush() if dst=='p': cPickle.dump(out,open(dst_fn,'w')) elif dst=='db': ### CREATE INDEX idx_sql = '''CREATE INDEX IF NOT EXISTS %s ON %s (%s)''' idx_name = 'fid_idx' ds_name = 'isect' idx_col = 'fid' sql = idx_sql % (idx_name, ds_name, idx_col) c.execute(sql) ### CLOSE CONNECTION conn.close() del out
def Isect_poly_idx( src1_dsn, src1_pre, src1_id, src1_fields, area, src2_dsn, src2_pre, src2_id, src2_fields, dst_dsn ): '''Intersect two shapefiles, assuming both datasets: * contain only polygon geometries * consist of a complete set of shapefile and rtree idx extensions * are already in the same projection or spatial reference system :param src1_dsn: (str) path to first input shapefile, no extensions :param src1_pre: (str) characters to prepend to output fields preserved from 1st src dsn :param src1_id: (str) characters to prepend to id field in output dataset :param src1_fields: (list) list of field names to preserve from 1st input dataset :param area: (bool) True if 'area' attribute should be output :param src2_dsn: (str) path to second input shapefile, no extensions :param src2_pre: (str) characters to prepend to output fields preserved from 2nd src dsn :param src2_id: (str) characters to prepend to id field in output dataset :param src2_fields: (list) list of field names to preserve from 2nd input dataset :param dst_dsn: (str) path to output shapefile, no extensions :return: None ''' # Load first dataset, and projection src_ds1, src_lyr1 = Ogr_open(src1_dsn) srs = src_lyr1.GetSpatialRef() # Load second source dataset, plus index src_ds2, src_lyr2 = Ogr_open(src2_dsn) src_r2 = FastRtree(src2_dsn) # Prepare output dataset driver = ogr.GetDriverByName('Esri Shapefile') dst_ds = driver.CreateDataSource(dst_dsn+'.shp') dst_lyr = dst_ds.CreateLayer('',None,ogr.wkbPolygon) # Basic id fields dst_lyr.CreateField(ogr.FieldDefn('id',ogr.OFTInteger)) dst_lyr.CreateField(ogr.FieldDefn(src1_id+'id',ogr.OFTInteger)) dst_lyr.CreateField(ogr.FieldDefn(src2_id+'id',ogr.OFTInteger)) # Additional fields from src1 src_lyr1_defn = src_lyr1.GetLayerDefn() for ind,field_name in enumerate(src1_fields): try: field_i = src_lyr1_defn.GetFieldIndex(field_name) field_defn = src_lyr1_defn.GetFieldDefn(field_i) field_defn.SetName(src1_pre+field_name) dst_lyr.CreateField(field_defn) except: print 'First source dataset has no field named',field_name jnk = src1_fields.pop(ind) # Additional fields from src2 src_lyr2_defn = src_lyr2.GetLayerDefn() for ind,field_name in enumerate(src2_fields): try: field_i = src_lyr2_defn.GetFieldIndex(field_name) field_defn = src_lyr2_defn.GetFieldDefn(field_i) field_defn.SetName(src2_pre+field_name) dst_lyr.CreateField(field_defn) except: print 'First source dataset has no field named',field_name jnk = src2_fields.pop(ind) if area == True: dst_lyr.CreateField(ogr.FieldDefn('area',ogr.OFTReal)) defn = dst_lyr.GetLayerDefn() # Open output index dst_r = FastRtree(dst_dsn,interleaved=True) # ID counter for output shapefile idVar = 0 # Initialize progress updater count_max = float(src_lyr1.GetFeatureCount()) # count_update = count_max * 0.05 # print progress every 5%! progress_bar = Countdown(count_max) # Loop through features in src1 for fid1 in range(0,src_lyr1.GetFeatureCount()): src_feat1 = src_lyr1.GetFeature(fid1) geom1 = src_feat1.GetGeometryRef() gxmin,gxmax,gymin,gymax = geom1.GetEnvelope() # use src2 index to find intersections hits = src_r2.intersection((gxmin,gymin,gxmax,gymax)) for hit_fid in hits: src_feat2 = src_lyr2.GetFeature(hit_fid) geom2 = src_feat2.GetGeometryRef() if geom1.Intersects(geom2): isect = geom1.Intersection(geom2) # Insert to index xmin,xmax,ymin,ymax = isect.GetEnvelope() dst_r.insert(idVar,(xmin,ymin,xmax,ymax)) # Write to output shapefile dst_feat = ogr.Feature(defn) dst_feat.SetField('id',idVar) dst_feat.SetField(src1_id+'id',fid1) dst_feat.SetField(src2_id+'id',hit_fid) for field_name in src1_fields: field_val = src_feat1.GetField(src1_pre+field_name) dst_feat.SetField(src1_pre+field_name,field_val) for field_name in src2_fields: field_val = src_feat2.GetField(src2_pre+field_name) dst_feat.SetField(src2_pre+field_name,field_val) if area == True: dst_feat.SetField('area',isect.GetArea()) dst_feat.SetGeometry(isect) dst_lyr.CreateFeature(dst_feat) dst_feat = isect = None idVar+=1 progress_bar.check(fid1) progress_bar.flush() dst_r.close() dst_r = dst_ds = dst_lyr = dst_feat = isect = defn = None dst_prj = srs.ExportToWkt() Mk_proj(dst_prj,dst_dsn)
def Veclc2csv( project ): '''Workflow for projects with shapefile aoi (project['lc_type']=='shp') Open prj_lcm.shp *Collect unique values for field era_id Write output file with era timeseries, 1 line/cell, *scale+off Count unique values for attribute field mod_id Determine number of output tables numfiles, such that each file reports max n mod_id (n=50?) Write output files for MODIS and landcover: for i in range(numfiles): for mod_id in that outnum: write fileA: forestry/lc attrs + mod_id,era_id: 1 line/feat write fileB1, B2, ...: modis timeseries: 1 line/cell, *scale+off ''' ## Define some paths lcm_dsn = project['prj_name']+'_lcm' lcm_path = os.path.join(project['shp_dir'],lcm_dsn) mod_dsn = project['prj_name']+'_modis_reprj' mod_path= os.path.join(project['shp_dir'],mod_dsn) ## Get ERA id and hdf5 indices from lcm shapefile lcm_ds = ogr.Open(lcm_path+'.shp',gdalconst.GA_ReadOnly) era_id_list = Unique_values( lcm_path,"era_id") era_ind_list= [] ind_sql_ = 'SELECT era_x_ind,era_y_ind FROM %s WHERE era_id=%i' print 'Summarizing %s ERA cells from lcm' % len(era_id_list) for era_id in era_id_list: ind_sql = ind_sql_ % (lcm_dsn, era_id) ind_lyr = lcm_ds.ExecuteSQL(ind_sql) ind_feat= ind_lyr.GetNextFeature() era_x_ind = ind_feat.GetField(0) era_y_ind = ind_feat.GetField(1) era_ind_list.append((era_id,era_x_ind,era_y_ind)) del lcm_ds,ind_lyr ## Write each ERA dataset to CSV, one row per cell within aoi for era_sds in project['era'].keys(): Era2csv(project, era_sds, era_ind_list) ## Get MODIS id and hdf5 indices from lcm shapefile lcm_ds = ogr.Open(lcm_path+'.shp',gdalconst.GA_ReadOnly) mod_ds = ogr.Open(mod_path+'.shp',gdalconst.GA_ReadOnly) mod_id_list = Unique_values( lcm_path,"mod_id") mod_ind_list= [] ind_sql_ = 'SELECT mod_x_ind,mod_y_ind FROM %s WHERE mod_id=%i' mod_sql_ = 'SELECT ctr_x,ctr_y FROM %s WHERE id=%i' print 'Summarizing %s MODIS cells from lcm' % len(mod_id_list) progress_bar = Countdown(len(mod_id_list)) for i in range(len(mod_id_list)): mod_id = mod_id_list[i] ind_sql = ind_sql_ % (lcm_dsn, mod_id) mod_sql = mod_sql_ % (mod_dsn, mod_id) ind_lyr = lcm_ds.ExecuteSQL(ind_sql) ind_feat= ind_lyr.GetNextFeature() mod_x_ind = ind_feat.GetField(0) mod_y_ind = ind_feat.GetField(1) mod_lyr = mod_ds.ExecuteSQL(mod_sql) mod_feat = mod_lyr.GetNextFeature() geom = mod_feat.GetGeometryRef() mod_area = geom.Area() mod_ind_list.append((mod_id,mod_x_ind,mod_y_ind,mod_area)) progress_bar.check(i) progress_bar.flush() del lcm_ds,ind_lyr ## Check for number of landcover features lcm_ds = ogr.Open(lcm_path+'.shp',gdalconst.GA_ReadOnly) lcm_lyr = lcm_ds.GetLayer(0) lcm_sz = lcm_lyr.GetFeatureCount() del lcm_ds,lcm_lyr # Export Landcover/Modis datasets to csv: # use lcm_sz to control regionification Lcmod_manager( project, lcm_sz, mod_ind_list)