Ejemplo n.º 1
0
def extract_tags(df, extract_tags):
    from sqlalchemy.dialects.postgresql import HSTORE

    h = HSTORE()
    f = h.result_processor(None, None)

    # Prune the dataset to just the records that have the tags we want.
    # before getting to the more expensive operation of extracting the tags.
    # This should reduce the dataset from 24M rows to less than 6M.
    t = df.dropna(subset=['other_tags'])
    t = t[t.highway.isnull()]

    flags = [t.other_tags.str.contains(e) for e in extract_tags]
    comb_flags = [any(e) for e in list(zip(*flags))]

    t = t[comb_flags]

    rows = []
    errors = []
    for idx, r in t.set_index('osm_id')[['other_tags']].iterrows():
        try:
            d = f(r.other_tags)
            rows.append([idx] + [d.get(e) for e in extract_tags])
        except TypeError as e:
            errors.append(r, e)

    return (rows, errors)
Ejemplo n.º 2
0
def transform(source: str, input_filename: str, schema_filename: str, output_filename: str):
    """
    :param source: "cod" or "osm"
    """
    print(source, input_filename, schema_filename, output_filename)
    config = parse_yaml('config.yml')

    if source == "osm":

        df_roads = gpd.read_file(input_filename)
        # df_roads = convert_osm_to_gpkg(input_filename, 'osm_roads.gpkg', 'lines')
        schema_mapping = {
            'name:en': 'name_en',
            'name': 'name_loc',
            'highway': 'fclass'
            }
        # GDAL converts OSM to GPKG, tags are written as hstore key-value in attribute 'other_tags'
        # method to convert hstore string to dictionary from SqlAlchemy
        hstore = HSTORE.result_processor(None, None, 'string')
        df_roads['other_tags']=df_roads['other_tags'].apply(hstore)

        for key, value in schema_mapping.items():
            # temp dictionary for pandas rename method. Don't use original dict as want to see whether
            # each input attribute is present.
            temp_schema_dict = {key: value}
            try:
                # rename column if exists.
                df_roads = df_roads.rename(columns=temp_schema_dict, errors="raise")
            except:
                # as error raised, input attribute is not present.
                # now make sure output attribute is NOT present.  If not pull from 'other_tags'
                if value not in df_roads.columns:
                    df_roads[value] = df_roads['other_tags'].apply(lambda x: x.get(key) if type(x) == dict else x)

        # now remove columns which aren't in schema:
        schema_to_keep = list(schema_mapping.values())
        # add geometry to schema
        schema_to_keep.append('geometry')
        df_roads = df_roads.filter(schema_to_keep)


    elif source == "cod":
        df_roads = gpd.read_file(f'zip://{input_filename}')

        # COD data has some NAs
        df_roads = df_roads[df_roads['geometry'].notna()]
        schema_mapping = {'TYPE': 'fclass'}
        # Rename columns using schema_mapping
        df_roads = df_roads.rename(columns=schema_mapping)

# TODO need to convert from XML to GPKG rather than OSM to GPKG
    # Change CRS
    df_roads = df_roads.to_crs(config['constants']['crs'])
    # Make columns needed for validation
    ### df_roads['geometry_type'] = df_roads['geometry'].apply(lambda x: x.geom_type)
    ### df_roads['crs'] = df_roads.crs
    # Validate
    ### validate(instance=df_roads.to_dict('list'), schema=parse_yaml(schema_filename))
    # Write to output
    df_roads.to_file(output_filename,encoding='utf8')
Ejemplo n.º 3
0
def transform_osm(input_filename, schema_mapping):
    df_roads = gpd.read_file(input_filename)
    # df_roads = convert_osm_to_gpkg(input_filename, 'osm_roads.gpkg', 'lines')

    # GDAL converts OSM to GPKG, tags are written as hstore key-value in attribute 'other_tags'
    # method to convert hstore string to dictionary from SqlAlchemy
    hstore = HSTORE.result_processor(None, None, 'string')
    df_roads['other_tags'] = df_roads['other_tags'].apply(hstore)

    for key, value in schema_mapping.items():
        # temp dictionary for pandas rename method. Don't use original dict as want to see whether
        # each input attribute is present.
        temp_schema_dict = {key: value}
        try:
            # rename column if exists.
            df_roads = df_roads.rename(columns=temp_schema_dict,
                                       errors="raise")
        except:
            # as error raised, input attribute is not present.
            # now make sure output attribute is NOT present.  If not pull from 'other_tags'
            if value not in df_roads.columns:
                df_roads[value] = df_roads['other_tags'].apply(
                    lambda x: x.get(key) if type(x) == dict else x)

    # now remove columns which aren't in schema:
    schema_to_keep = list(schema_mapping.values())
    # add geometry to schema
    schema_to_keep.append('geometry')
    return df_roads.filter(schema_to_keep)
Ejemplo n.º 4
0
    def test_postgresql_hstore_subtypes(self):
        eq_ignore_whitespace(
            autogenerate.render._repr_type(HSTORE(), self.autogen_context),
            "postgresql.HSTORE(text_type=sa.Text())")

        eq_ignore_whitespace(
            autogenerate.render._repr_type(HSTORE(text_type=String()),
                                           self.autogen_context),
            "postgresql.HSTORE(text_type=sa.String())")

        eq_ignore_whitespace(
            autogenerate.render._repr_type(HSTORE(text_type=BYTEA()),
                                           self.autogen_context),
            "postgresql.HSTORE(text_type=postgresql.BYTEA())")

        assert 'from sqlalchemy.dialects import postgresql' in \
            self.autogen_context.imports
Ejemplo n.º 5
0
def hstore2dict(str):
    """ Return a python dictionary from a HSTORE data type.
    This data is used by GDAL to store the key-value pairs
    from the OSM XML into a single string attribute"""
    hstore = HSTORE.result_processor(None, None, 'string')
    return hstore(str)