def import_from_shape(upload, start_row=0, max_rows=200000, create_int_style_cols=True): """ a shapeUpload object max_rows - any more than this is ignored centroid - if it's a (multi)polygon, should we also create a geometry_centroid field """ upload.status = 2 #set this right away so it doesn't get reprocessed upload.save() ds = DataSource(upload.shapefile) layer = ds[0] fields = layer.fields num_features = len(layer) #set max # of _style features max_distinct_style_vals = max(min(num_features / 100, 50),10) print 'there are %d features' % num_features upload.total_rows = num_features if not num_features: print 'no rows, returning' upload.status = 6 upload.save() return rows = [] #get field types field_map = { 'OFTString':'STRING', 'OFTReal':'NUMBER', 'OFTInteger':'NUMBER', 'OFTDate':'DATETIME' } field_types = [field_map[f.__name__] for f in layer.field_types] field_layers = layer.fields #insert geometry layers first field_layers.insert(0,'geometry') field_types.insert(0,'LOCATION') field_layers.insert(1,'geometry_vertex_count') field_types.insert(1,'NUMBER') if upload.create_simplify: field_layers.insert(0,'geometry_simplified') field_types.insert(0,'LOCATION') field_layers.insert(1,'geometry_simplified_vertex_count') field_types.insert(1,'NUMBER') #use sorted dict so we can ensure table has geom columns upfront field_dict = SortedDict(zip(field_layers, field_types)) #set up extra fields if creating int/style cols if create_int_style_cols: int_style_dict = {} for field,field_type in field_dict.items(): if field_type == 'STRING': field_dict[field + '_ft_style'] = 'NUMBER' int_style_dict[field] = {} print field_dict #add some custom import fields field_dict['import_notes'] = 'STRING' print 'FIELD DICT', field_dict print 'starting to process' for i, feat in enumerate(layer): if i > max_rows: continue if start_row and i < start_row: continue upload.rows_processed = i + 1 if not i % ((num_features / 50) or 5): print upload.rows_processed,'rp' upload.save() upload.save() rd = {} #geom = fromstr(feat.geom.wkt,srid=srid) if layer.srs: try: geom = OGRGeometry(feat.geom.wkt, layer.srs.proj4) geom.transform(4326) except Exception, e: print 'FAIL GEOM' print e, geom = None else: geom = OGRGeometry(feat.geom.wkt) if geom: geom = fromstr(geom.wkt) #create optional centroid for polys if upload.create_centroid and 'oly' in geom.geom_type: field_dict['geometry_pos'] = 'LOCATION' rd['geometry_pos'] = geom.point_on_surface.kml if upload.create_centroid_poly and 'oly' in geom.geom_type: field_dict['geometry_pos_poly_2'] = 'LOCATION' field_dict['geometry_pos_poly_3'] = 'LOCATION' rd['geometry_pos_poly_2'] = geom.point_on_surface.buffer(.0001,10).kml rd['geometry_pos_poly_3'] = geom.point_on_surface.buffer(.0005,10).kml #if it's > 1M characters, we need to simplify it for FT simplify_tolerance = .0001 while len(geom.kml) > 1000000: geom = geom.simplify(simplify_tolerance) print 'simplified to %f' % simplify_tolerance rd['import_notes'] = 'simplified to %d DD' % simplify_tolerance simplify_tolerance = simplify_tolerance * 1.5 if not geom.valid: rd['import_notes'] = '<br>Geometry not valid' kml = geom.kml rd['geometry'] = kml rd['geometry_vertex_count'] = geom.num_coords if upload.create_simplify and not 'oint' in geom.geom_type: amt = .002 if 'oly' in geom.geom_type: buffer_geom = geom.buffer(amt) buffer_geom = buffer_geom.buffer(amt * -1) simple_geom = buffer_geom.simplify(amt) else: simple_geom = geom.simplify(amt) rd['geometry_simplified'] = simple_geom.kml rd['geometry_simplified_vertex_count'] = simple_geom.num_coords for f in fields: val = feat.get(f) #make sure we have proper null type for diff fields if val == '<Null>': continue if not val: continue if field_dict[f] == 'DATETIME': val = val.isoformat().split('T')[0] if field_dict[f] == 'STRING' \ and create_int_style_cols \ and field_dict.has_key(f + '_ft_style'): #check to see if we have a number for this yet try: rd[f + '_ft_style'] = int_style_dict[f][val] except: int_style_dict[f][val] = len(int_style_dict[f]) rd[f + '_ft_style'] = int_style_dict[f][val] #however if we have too many distinct vals, let's just not do this anymore if len(int_style_dict[f]) > max_distinct_style_vals: print 'DELETING FD %s' % f del field_dict[f + '_ft_style'] del rd[f + '_ft_style'] #sucks, but now we should just remove all these fields from previous rows for srow in rows: try:del srow[f + '_ft_style'] except: pass #probably this was a null value? rd[f] = val rows.append(rd) #let's process 10k rows at a time.. not keep everything in memory if len(rows) > 10000: uploadRows(upload, field_dict, rows) rows = []
def import_from_shape(upload, start_row=0, max_rows=200000, create_int_style_cols=True): """ a shapeUpload object max_rows - any more than this is ignored centroid - if it's a (multi)polygon, should we also create a geometry_centroid field """ upload.status = 2 #set this right away so it doesn't get reprocessed upload.save() ds = DataSource(upload.shapefile) layer = ds[0] fields = layer.fields num_features = len(layer) #set max # of _style features max_distinct_style_vals = max(min(num_features / 100, 50), 10) print 'there are %d features' % num_features upload.total_rows = num_features if not num_features: print 'no rows, returning' upload.status = 6 upload.save() return rows = [] #get field types field_map = { 'OFTString': 'STRING', 'OFTReal': 'NUMBER', 'OFTInteger': 'NUMBER', 'OFTDate': 'DATETIME' } field_types = [field_map[f.__name__] for f in layer.field_types] field_layers = layer.fields #insert geometry layers first field_layers.insert(0, 'geometry') field_types.insert(0, 'LOCATION') field_layers.insert(1, 'geometry_vertex_count') field_types.insert(1, 'NUMBER') if upload.create_simplify: field_layers.insert(0, 'geometry_simplified') field_types.insert(0, 'LOCATION') field_layers.insert(1, 'geometry_simplified_vertex_count') field_types.insert(1, 'NUMBER') #use sorted dict so we can ensure table has geom columns upfront field_dict = SortedDict(zip(field_layers, field_types)) #set up extra fields if creating int/style cols if create_int_style_cols: int_style_dict = {} for field, field_type in field_dict.items(): if field_type == 'STRING': field_dict[field + '_ft_style'] = 'NUMBER' int_style_dict[field] = {} print field_dict #add some custom import fields field_dict['import_notes'] = 'STRING' print 'FIELD DICT', field_dict print 'starting to process' for i, feat in enumerate(layer): if i > max_rows: continue if start_row and i < start_row: continue upload.rows_processed = i + 1 if not i % ((num_features / 50) or 5): print upload.rows_processed, 'rp' upload.save() upload.save() rd = {} #geom = fromstr(feat.geom.wkt,srid=srid) if layer.srs: try: geom = OGRGeometry(feat.geom.wkt, layer.srs.proj4) geom.transform(4326) except Exception, e: print 'FAIL GEOM' print e, geom = None else: geom = OGRGeometry(feat.geom.wkt) if geom: geom = fromstr(geom.wkt) #create optional centroid for polys if upload.create_centroid and 'oly' in geom.geom_type: field_dict['geometry_pos'] = 'LOCATION' rd['geometry_pos'] = geom.point_on_surface.kml if upload.create_centroid_poly and 'oly' in geom.geom_type: field_dict['geometry_pos_poly_2'] = 'LOCATION' field_dict['geometry_pos_poly_3'] = 'LOCATION' rd['geometry_pos_poly_2'] = geom.point_on_surface.buffer( .0001, 10).kml rd['geometry_pos_poly_3'] = geom.point_on_surface.buffer( .0005, 10).kml #if it's > 1M characters, we need to simplify it for FT simplify_tolerance = .0001 while len(geom.kml) > 1000000: geom = geom.simplify(simplify_tolerance) print 'simplified to %f' % simplify_tolerance rd['import_notes'] = 'simplified to %d DD' % simplify_tolerance simplify_tolerance = simplify_tolerance * 1.5 if not geom.valid: rd['import_notes'] = '<br>Geometry not valid' kml = geom.kml rd['geometry'] = kml rd['geometry_vertex_count'] = geom.num_coords if upload.create_simplify and not 'oint' in geom.geom_type: amt = .002 if 'oly' in geom.geom_type: buffer_geom = geom.buffer(amt) buffer_geom = buffer_geom.buffer(amt * -1) simple_geom = buffer_geom.simplify(amt) else: simple_geom = geom.simplify(amt) rd['geometry_simplified'] = simple_geom.kml rd['geometry_simplified_vertex_count'] = simple_geom.num_coords for f in fields: val = feat.get(f) #make sure we have proper null type for diff fields if val == '<Null>': continue if not val: continue if field_dict[f] == 'DATETIME': val = val.isoformat().split('T')[0] if field_dict[f] == 'STRING' \ and create_int_style_cols \ and field_dict.has_key(f + '_ft_style'): #check to see if we have a number for this yet try: rd[f + '_ft_style'] = int_style_dict[f][val] except: int_style_dict[f][val] = len(int_style_dict[f]) rd[f + '_ft_style'] = int_style_dict[f][val] #however if we have too many distinct vals, let's just not do this anymore if len(int_style_dict[f]) > max_distinct_style_vals: print 'DELETING FD %s' % f del field_dict[f + '_ft_style'] del rd[f + '_ft_style'] #sucks, but now we should just remove all these fields from previous rows for srow in rows: try: del srow[f + '_ft_style'] except: pass #probably this was a null value? rd[f] = val rows.append(rd) #let's process 10k rows at a time.. not keep everything in memory if len(rows) > 10000: uploadRows(upload, field_dict, rows) rows = []