def convertToJSON(provider_dir): """Converts the given directory into a JSON file (stored in the directory itself as 'all.json'). If '$dir/all.json' already exists, it will be overwritten. """ original_dir = os.path.abspath(os.path.curdir) os.chdir(provider_dir) logging.info("Now in directory '%s'." % provider_dir) # TODO: how? # filename = "mol_source_%s.json" % provider_dir filename = "mol_source_this.json" if os.path.exists(filename): os.remove(filename) # all_json = open(filename, "a") all_json = codecs.open(filename, encoding='utf-8', mode="w") all_json.write("""{ "type": "FeatureCollection", "features": [""") # We wrap this processing in a try-finally so that, no matter what happens, # we change back to the original directory before we leave this subroutine. try: # Step 1. Load and validate the config.yaml file. config = ProviderConfig("config.yaml", os.path.basename(provider_dir)) config.validate() all_features = [] # Step 2. For each collection, and then each shapefile in that collection: for collection in config.collections(): name = collection.getname() logging.info("Switching to collection '%s'." % name) # This is where we will store all the features. features = [] if os.path.isdir(name): # A directory of shapefiles. shapefiles = glob.glob('*.shp') for shapefile in shapefiles: # Determine the "name" (filename without extension) of this file. name = shapefile[0:shapefile.index('.shp')] # Step 2.1. Convert this shapefile into a GeoJSON file, projected to # EPSG 4326 (WGS 84). json_filename = '%s.json' % name # Delete existing geojson file since we're appending. if os.path.exists(json_filename): os.remove(json_filename) command = [ogr2ogr_path(), '-f', 'GeoJSON', '-t_srs', 'EPSG:4326', json_filename, '%s.shp' % name ] try: subprocess.call(command) except: logging.warn('Unable to convert %s to GeoJSON - %s' % (name, command)) if os.path.exists(json_filename): os.remove(json_filename) continue # Step 2.2. Load that GeoJSON file and do the mapping. #logging.info('Mapping fields from DBF to specification: %s' % json_filename) geojson = None try: geojson = simplejson.loads( codecs.open(json_filename, encoding='utf-8').read(), encoding='utf-8') except: logging.error('Unable to open or process %s' % json_filename) continue features = geojson['features'] elif os.path.isfile(name) and name.lower().rfind('.csv', len(name) - 4, len(name)) != -1: # This is a .csv file! csvfile = open(name, "r") reader = UnicodeDictReader(csvfile) features = [] feature_index = 0 for entry in reader: feature_index += 1 feature = {} # As per the spec at http://geojson.org/geojson-spec.html feature['type'] = 'Feature' feature['properties'] = entry lat = entry['Latitude'] if not lat: logging.warn("Feature %d has no latitude, ignoring." % feature_index) continue # Ignore features without latitudes. long = entry['Longitude'] if not long: logging.warn("Feature %d has no longitude, ignoring." % feature_index) continue # Ignore features without longitudes. feature['geometry'] = {'type': 'Point', 'coordinates': [ float(entry['Longitude']), float(entry['Latitude']) ]} # TODO: We assume latitude and longitude (in WGS84) are # present in the columns 'Latitude' and 'Longitude' # respectively. # IMPORTANT TODO: at the moment, we assume the incoming coordinates # are already in WGS84! THIS MIGHT NOT BE TRUE! features.append(feature) csvfile.close() # Step 2.3. For every feature: row_count = 0 for feature in features: row_count = row_count + 1 properties = feature['properties'] new_properties = collection.default_fields() # Map the properties over. for key in properties.keys(): (new_key, new_value) = collection.map_field(row_count, key, properties[key]) if new_value is not None: new_properties[new_key] = unicode(new_value) # Convert field names to dbfnames. dbf_properties = {} for fieldname in new_properties.keys(): dbf_properties[ProviderConfig.fieldname_to_dbfname(fieldname)] = new_properties[fieldname] # Replace the existing properties with the new one. # feature['properties'] = dbf_properties # No - let's try uploading to CartoDB without. feature['properties'] = new_properties # Upload to CartoDB. uploadGeoJSONEntry(feature, _getoptions().table_name) # Save into all_features. all_features.append(feature) features_json = [] for feature in all_features: try: features_json.append(simplejson.dumps(feature, ensure_ascii=False)) except: logging.info('Unable to convert feature to JSON: %s' % feature) all_json.write(','.join(features_json)) all_json.write(',') all_json.flush() all_features = [] logging.info('%s converted to GeoJSON' % name) os.chdir('..') # Zip up the GeoJSON document all_json.write("""]}""") all_json.close() #myzip = ZipFile('%s.zip' % filename, 'w') #myzip.write(filename) # TODO: Fails for big files (4GB) #myzip.close() logging.info("%s written successfully." % filename) finally: os.chdir(original_dir) logging.info("Processing of directory '%s' completed." % provider_dir)