Exemple #1
0
    def load_shapefile(self):
        """ Validate the shapefile saved on disk and load into db """
        self.status = self.StatusTypes.PROCESSING
        self.save()

        try:
            temp_dir = extract_zip_to_temp_dir(self.source_file)
            shapefiles = get_shapefiles_in_dir(temp_dir)

            if len(shapefiles) != 1:
                raise ValueError('Exactly one shapefile (.shp) required')

            shapefile_path = os.path.join(temp_dir, shapefiles[0])
            shape_datasource = GDALDataSource(shapefile_path)
            if len(shape_datasource) > 1:
                raise ValueError('Shapefile must have exactly one layer')

            boundary_layer = shape_datasource[0]
            if boundary_layer.srs is None:
                raise ValueError('Shapefile must include a .prj file')
            self.data_fields = boundary_layer.fields
            for feature in boundary_layer:
                feature.geom.transform(settings.GROUT['SRID'])
                geometry = make_multipolygon(feature.geom)
                data = {
                    field: feature.get(field)
                    for field in self.data_fields
                }
                self.polygons.create(geom=geometry, data=data)

            self.status = self.StatusTypes.COMPLETE
            self.save()
        except Exception as e:
            if self.errors is None:
                self.errors = {}
            self.errors['message'] = str(e)
            # Relabel geography to allow saving a valid shapefile in this namespace
            self.label = self.label + '_' + str(uuid.uuid4())
            self.status = self.StatusTypes.ERROR
            self.save()
        finally:
            shutil.rmtree(temp_dir, ignore_errors=True)
def run_load_shapefile_data(demographicdata_id, pop1_field, pop2_field,
                            dest1_field):
    """Load data from DemographicDataSource into DemographicDataFeature objects.
    Opens the Shapefile associated with demographicdata_id and reads data from the fields
    whose names are given in pop1_field, pop2_field, and dest1_field. Generates a
    series of DemographicDataFeature objects containing the geometries in the shapefile,
    as well as the data from the three specified fields associated with each geometry.
    Params:
        :pop1_field: The name of the field in the Shapefile from which to import data that
        will be placed in the population_metric_1 field of each generated
        DemographicDataFeature object.
        :pop2_field: Same as pop1_field, but data goes into the population_metric_2 field
        of the generated DemographicDataFeature objects.
        :dest2_field: Same as pop1_field and pop2_field, but data from this field of the
        Shapefile will end up in the destination_metric_1 field of each
        DemographicDataFeature object.
    """
    # We can assume that the shapefile is valid because get_shapefile_fields
    # has been run, so jump straight to getting the data.
    demog_data = DemographicDataSource.objects.get(pk=demographicdata_id)
    demog_data.status = DemographicDataSource.Statuses.IMPORTING
    demog_data.save()

    # Get rid of any existing data--need to do a full reload if user changes
    # the configuration.
    # demog_data should have its is_loaded attribute set to False by the view
    # which launches this job.
    demog_data.demographicdatafeature_set.all().delete()
    error_factory = ErrorFactory(DemographicDataSourceProblem, demog_data,
                                 'datasource')
    try:
        temp_dir = extract_zip_to_temp_dir(demog_data.source_file)
        shapefile = os.path.join(temp_dir, get_shapefiles_in_dir(temp_dir)[0])
        data_layer = GDALDataSource(shapefile)[0]

        for feature in data_layer:
            try:
                pop1_val = feature.get(str(pop1_field)) if pop1_field else None
                pop2_val = feature.get(str(pop2_field)) if pop2_field else None
                dest1_val = feature.get(
                    str(dest1_field)) if dest1_field else None
                # If this is too slow we can maybe speed it up with bulk_create
                (DemographicDataFeature.objects.get_or_create(
                    population_metric_1=pop1_val,
                    population_metric_2=pop2_val,
                    destination_metric_1=dest1_val,
                    geom=make_multipolygon(
                        feature.geom.transform(settings.DJANGO_SRID,
                                               clone=True)),
                    datasource=demog_data))
            except ValueError as e:
                error_factory.warn('Could not import 1 feature.', str(e))

        # make raw SQL query to execute function that processes demographic data, first
        # clipping it to region boundary, then turning it into a regular point grid
        with connection.cursor() as c:
            c.execute('SELECT ClipDemographics();')
            c.execute('SELECT CreateGrid();')

        demog_data.status = DemographicDataSource.Statuses.COMPLETE
        demog_data.save()
        num_loaded_features = DemographicDataFeature.objects.filter(
            datasource=demog_data).count()
        if (num_loaded_features < demog_data.num_features):
            error_factory.warn(
                'Only %s out of %s features loaded; some features may be invalid.'
                % (num_loaded_features, demog_data.num_features))
    except Exception as e:
        error_factory.error('Unexpected error loading shapefile.', str(e))
        demog_data.status = DemographicDataSource.Statuses.WAITING_USER_INPUT
        demog_data.save()
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)
        return
def run_shapefile_to_boundary(boundary_id):
    """Populate a boundary's geom field from a shapefile."""
    # Get the boundary object we're processing, note that we're processing, and
    # prepare to store any errors encountered.
    boundary = Boundary.objects.get(pk=boundary_id)
    boundary.status = Boundary.Statuses.IMPORTING
    boundary.save()
    error_factory = ErrorFactory(BoundaryProblem, boundary, 'boundary')

    def handle_error(title, description):
        """Helper method to handle shapefile errors."""
        error_factory.error(title, description)
        boundary.status = Boundary.Statuses.ERROR
        boundary.save()
        return

    try:
        # There must be valid GTFS data in order to upload a boundary,
        # otherwise UTM projection may not work.
        # TODO: Refactor Shapefile validation and avoid repetition.
        if GTFSFeed.objects.filter(
                status=GTFSFeed.Statuses.COMPLETE).count() < 1:
            handle_error(
                'No valid GTFS feed.',
                'Please upload a valid GTFS feed before adding boundary data.')
            return
        # Set up temporary directory and unzip to there.
        temp_dir = extract_zip_to_temp_dir(boundary.source_file)
        shapefiles = get_shapefiles_in_dir(temp_dir)

        if len(shapefiles) > 1:
            handle_error('Multiple shapefiles found.',
                         'Upload only one shapefile at a time.')
            return
        elif len(shapefiles) < 1:
            handle_error(
                'No shapefile found.',
                'The zip archive must include exactly one shapefile.')
            return

        shapefile_path = os.path.join(temp_dir, shapefiles[0])
        shape_datasource = GDALDataSource(shapefile_path)
        if len(shape_datasource) > 1:
            handle_error('Multiple layers in shapefile.',
                         'The boundary shapefile must have only one layer.')
            return

        boundary_layer = shape_datasource[0]
        if boundary_layer.srs is None:
            handle_error('Missing .prj file.',
                         'Boundary shapefile must include a .prj file.')
            return

        # Since this will become a boundary for a city / region, attempt to flatten
        # all features into one feature.
        try:
            union = get_union([feature.geom for feature in boundary_layer])
        except ValueError as e:
            handle_error('Could not create geometry union.', str(e))
            return

        # Transform to our internal database SRID
        union.transform(settings.DJANGO_SRID)

        # Wrap in a MultiPolygon if necessary
        geometry = make_multipolygon(union)

        # Write out the data and save
        boundary.geom = geometry
        boundary.status = Boundary.Statuses.COMPLETE
        boundary.save()
    except Exception as e:
        handle_error('Unexpected error processing shapefile.', str(e))
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)
        return
def run_get_shapefile_fields(demographicdata_id):
    """Get the column field names from a shapefile.
    Opens the Shapefile associated with demographicdata_id, validates it, and extracts the field
    names available in the Shapefile. Saves them as DemographicDataFieldName objects.
    Params:
        :demographicdata_id: ID of a DemographicDataSource from which to get field names.
    """
    # Note that we're processing, and
    # prepare to store any errors encountered.
    demog_data = DemographicDataSource.objects.get(pk=demographicdata_id)
    demog_data.status = DemographicDataSource.Statuses.PROCESSING
    demog_data.save()
    error_factory = ErrorFactory(DemographicDataSourceProblem, demog_data,
                                 'datasource')

    def handle_error(title, description):
        """Helper method to handle shapefile errors."""
        error_factory.error(title, description)
        demog_data.status = DemographicDataSource.Statuses.ERROR
        demog_data.save()
        return

    try:
        # Set up temporary directory and unzip to there.
        temp_dir = extract_zip_to_temp_dir(demog_data.source_file)
        shapefiles = get_shapefiles_in_dir(temp_dir)

        # There must be valid GTFS data in order to load demographic data
        # otherwise UTM projection may not work.
        if GTFSFeed.objects.filter(
                status=GTFSFeed.Statuses.COMPLETE).count() < 1:
            error_factory.error(
                'No valid GTFS feed.',
                'Please upload a valid GTFS feed before trying again.')
            return

        if len(shapefiles) > 1:
            handle_error('Multiple shapefiles found.',
                         'Upload only one shapefile at a time.')
            return
        elif len(shapefiles) < 1:
            handle_error(
                'No shapefile found.',
                'The zip archive must include exactly one shapefile.')
            return

        shapefile_path = os.path.join(temp_dir, shapefiles[0])
        shape_datasource = GDALDataSource(shapefile_path)
        if len(shape_datasource) > 1:
            handle_error('Multiple layers in shapefile.',
                         'The boundary shapefile must have only one layer.')
            return
        demographic_layer = shape_datasource[0]
        if demographic_layer.srs is None:
            handle_error('Missing .prj file.',
                         'Demographic shapefile must include a .prj file.')
            return

        for field_name in demographic_layer.fields:
            DemographicDataFieldName.objects.get_or_create(
                datasource=demog_data, name=field_name)

        demog_data.num_features = len(demographic_layer)
        demog_data.status = DemographicDataSource.Statuses.WAITING_USER_INPUT
        demog_data.save()

    except Exception as e:
        handle_error('Unexpected error processing shapefile.', str(e))
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)
        return