Example #1
0
def convertToJSON(provider_dir):
    """Converts the given directory into a JSON file (stored in the directory itself as 'all.json').

    If '$dir/all.json' already exists, it will be overwritten.
    """
    
    original_dir = os.path.abspath(os.path.curdir)
    os.chdir(provider_dir)
    logging.info("Now in directory '%s'." % provider_dir)

    # TODO: how?
    # filename = "mol_source_%s.json" % provider_dir
    filename = "mol_source_this.json"
    if os.path.exists(filename):
        os.remove(filename)
    # all_json = open(filename, "a")
    all_json = codecs.open(filename, encoding='utf-8', mode="w")
    all_json.write("""{
  "type": "FeatureCollection",
  "features": [""")

    # We wrap this processing in a try-finally so that, no matter what happens,
    # we change back to the original directory before we leave this subroutine.
    try:
        # Step 1. Load and validate the config.yaml file.
        config = ProviderConfig("config.yaml", os.path.basename(provider_dir))
        config.validate()

        all_features = []

        # Step 2. For each collection, and then each shapefile in that collection:
        for collection in config.collections():
            name = collection.getname()

            logging.info("Switching to collection '%s'." % name)

            # This is where we will store all the features.
            features = []

            if os.path.isdir(name):
                # A directory of shapefiles.

                shapefiles = glob.glob('*.shp')
                for shapefile in shapefiles:

                    # Determine the "name" (filename without extension) of this file.
                    name = shapefile[0:shapefile.index('.shp')]

                    # Step 2.1. Convert this shapefile into a GeoJSON file, projected to
                    # EPSG 4326 (WGS 84).
                    json_filename = '%s.json' % name
                    
                    # Delete existing geojson file since we're appending.
                    if os.path.exists(json_filename):
                        os.remove(json_filename)

                    command = [ogr2ogr_path(), 
                        '-f', 'GeoJSON', 
                        '-t_srs', 'EPSG:4326',
                        json_filename,
                        '%s.shp' % name
                    ]
                                    
                    try:
                        subprocess.call(command)
                    except:
                        logging.warn('Unable to convert %s to GeoJSON - %s' % (name, command))
                        if os.path.exists(json_filename):
                            os.remove(json_filename)
                        continue

                    # Step 2.2. Load that GeoJSON file and do the mapping.
                    #logging.info('Mapping fields from DBF to specification: %s' % json_filename)
                    geojson = None
                    try:
                        geojson = simplejson.loads(
                            codecs.open(json_filename, encoding='utf-8').read(), 
                            encoding='utf-8')

                    except:
                        logging.error('Unable to open or process %s' % json_filename)
                        continue

                    features = geojson['features']

            elif os.path.isfile(name) and name.lower().rfind('.csv', len(name) - 4, len(name)) != -1:
                # This is a .csv file! 
                csvfile = open(name, "r")
                reader = UnicodeDictReader(csvfile)

                features = []
                feature_index = 0
                for entry in reader:
                    feature_index += 1
                    feature = {}

                    # As per the spec at http://geojson.org/geojson-spec.html
                    feature['type'] = 'Feature'
                    feature['properties'] = entry

                    lat = entry['Latitude']
                    if not lat:
                        logging.warn("Feature %d has no latitude, ignoring." % feature_index)
                        continue # Ignore features without latitudes.
                    long = entry['Longitude']
                    if not long:
                        logging.warn("Feature %d has no longitude, ignoring." % feature_index)
                        continue # Ignore features without longitudes.

                    feature['geometry'] = {'type': 'Point', 'coordinates': [
                            float(entry['Longitude']),
                            float(entry['Latitude'])
                        ]}
                        # TODO: We assume latitude and longitude (in WGS84) are
                        # present in the columns 'Latitude' and 'Longitude'
                        # respectively.
                    
                        # IMPORTANT TODO: at the moment, we assume the incoming coordinates
                        # are already in WGS84! THIS MIGHT NOT BE TRUE!

                    features.append(feature)

                csvfile.close()
                
            # Step 2.3. For every feature:
            row_count = 0
            for feature in features:
                row_count = row_count + 1

                properties = feature['properties']
                new_properties = collection.default_fields()

                # Map the properties over.
                for key in properties.keys():
                    (new_key, new_value) = collection.map_field(row_count, key, properties[key])
                    if new_value is not None:
                        new_properties[new_key] = unicode(new_value)

                # Convert field names to dbfnames.
                dbf_properties = {}
                for fieldname in new_properties.keys():
                    dbf_properties[ProviderConfig.fieldname_to_dbfname(fieldname)] = new_properties[fieldname]

                # Replace the existing properties with the new one.
                # feature['properties'] = dbf_properties
                # No - let's try uploading to CartoDB without.
                feature['properties'] = new_properties

                # Upload to CartoDB.
                uploadGeoJSONEntry(feature, _getoptions().table_name)

                # Save into all_features.
                all_features.append(feature)
            
            features_json = []
            for feature in all_features:
                try:
                    features_json.append(simplejson.dumps(feature, ensure_ascii=False))
                except:
                    logging.info('Unable to convert feature to JSON: %s' % feature)

            all_json.write(','.join(features_json))
            all_json.write(',')
            all_json.flush()
            all_features = []                
                
            logging.info('%s converted to GeoJSON' % name)

            os.chdir('..')

        # Zip up the GeoJSON document
        all_json.write("""]}""")
        all_json.close()

        #myzip = ZipFile('%s.zip' % filename, 'w')
        #myzip.write(filename) # TODO: Fails for big files (4GB)
        #myzip.close()

        logging.info("%s written successfully." % filename)

    finally:
        os.chdir(original_dir)

    logging.info("Processing of directory '%s' completed." % provider_dir)