コード例 #1
0
ファイル: cli.py プロジェクト: sunlightpolicy/sql4housing
def get_connection(source):
    '''
    Get a DB connection from the CLI args or defaults to postgres:///mydb

    '''
    source.engine = create_engine(source.db_name)
    ui.header('Connecting to database %s' % source.db_name)

    if not database_exists(source.engine.url):
        create_database(source.engine.url)
        ui.item("Creating database %s" % source.db_name)

    Session = sessionmaker()
    Session.configure(bind=source.engine)

    source.session = Session()

    gis_q = 'SELECT PostGIS_version();'
    # Check for PostGIS support
    try:
        source.session.execute(gis_q)
        source.geo = True
    except OperationalError:
        source.geo = False
    except ProgrammingError:
        source.geo = False
    source.session.commit()

    if source.geo:
        ui.item('PostGIS is installed. Geometries will be imported '
                'as PostGIS geoms.')
コード例 #2
0
    def __get_socrata_data(self, page_size=5000):
        '''
        Iterate over a datasets pages using the Socrata API
        '''
        ui.item("Gathering data (this can take a bit for large datasets).")
        page_num = 0
        more_pages = True

        while more_pages:
            try:

                api_data = self.client.get(
                    self.dataset_id,
                    limit=page_size,
                    offset=page_size * page_num,
                )

                if len(api_data) < page_size:
                    more_pages = False
                page_num += 1
                yield api_data

            except:
                ui.item("Sleeping for 10 seconds to avoid timeout")
                time.sleep(10)
コード例 #3
0
def create_metadata(data, mappings):
    '''
    Given a dictionary of data, maps python types of each value to
    SQLAlchemy types.
    '''
    ui.item("Gathering metadata")
    print()
    metadata = []
    for col_name in data[0].keys():

        for record in data:
            if col_name == 'geometry':
                metadata.append(
                    (col_name, Geometry(geometry_type='GEOMETRY', srid=4326)))
                break
            elif record[col_name]:
                try:        
                    py_type = type(record[col_name])
                    print(col_name, ":", py_type)
                    metadata.append((col_name, mappings[py_type]))
                    break
                except KeyError:
                    warnings.warn(
                        'Unable to map "%s" to a SQL type.' % col_name)
                    break
    return metadata
コード例 #4
0
 def __get_metadata(self):
     '''
     Maps provided esriFieldTypes to sqlalchemy types.
     '''
     ui.item("Gathering metadata")
     print()
     metadata = []
     for col in self.data_info['fields']:
         col_name = col['name'].lower().replace(" ", "_")
         print(col_name, ": ", col['type'])
         metadata.append((col_name, self.col_mappings[col['type']]))
     metadata.append(('geometry', \
         Geometry(geometry_type='GEOMETRY', srid=self.srid)))
     return metadata
コード例 #5
0
def geojson_data(geojson):
    '''
    Parses ['features'] within geojson data and reformats all variable names.
    '''
    ui.item(
        "Gathering data (this can take a bit for large datasets).")
    new_data = []
    data = geojson['features']
    for row in data:
        output = \
            {k.lower().replace(" ", "_"): v \
            for k, v in row['properties'].items()}
        output['geometry'] = row['geometry']
        new_data.append(output)
    return new_data
コード例 #6
0
def spreadsheet_metadata(spreadsheet):
    '''
    Given a spreadsheet object, maps column types as interpreted by pandas into
    SQLAlchemy types.
    '''
    ui.item("Gathering metadata")
    print()
    metadata = []
    for col_name, col_type in dict(spreadsheet.df.dtypes).items():
        print(col_name, ":", col_type)
        try:
            metadata.append((col_name, spreadsheet.col_mappings[col_type]))
        except KeyError:
            warnings.warn('Unable to map "%s" to a SQL type.' % col_name)
            continue
    return metadata
コード例 #7
0
 def __get_metadata(self):
     '''
     Uses provided metadata to map column types to SQLAlchemy.
     '''
     ui.item("Gathering metadata")
     print()
     metadata = []
     for col in self.client.get_metadata(self.dataset_id)['columns']:
         print(col['fieldName'], ":", col['dataTypeName'])
         try:
             metadata.append(
                 (col['fieldName'], self.col_mappings[col['dataTypeName']]))
         except KeyError:
             warnings.warn('Unable to map "%s" to a SQL type.' %
                           col['fieldName'])
             continue
     return metadata
コード例 #8
0
ファイル: cli.py プロジェクト: sunlightpolicy/sql4housing
def get_binding(source):
    '''
    Translate the source's metadata into a SQLAlchemy binding

    This looks at each column type in the metadata and creates a
    SQLAlchemy binding with columns to match. For now it fails loudly if it
    encounters a column type we've yet to map to its SQLAlchemy type.
    '''

    record_fields = {
        '__tablename__': source.tbl_name,
        '_pk_': Column(Integer, primary_key=True)
    }

    ui.header('Setting up new table, "%s", from %s source fields' %
              (source.tbl_name, source.name))

    for col_name, col_type in source.metadata:

        if isinstance(col_type, type(Geometry())) and not source.geo:
            try:
                source.session.execute("CREATE EXTENSION POSTGIS;")
                ui.item(
                    "Adding PostGIS extension to support %s column." \
                    % col_name)
                source.session.commit()
                source.geo = True
            except:
                msg = (
                    '"%s" is a %s column but your database doesn\'t support '
                    'PostGIS so it\'ll be skipped.') % (
                        col_name,
                        col_type,
                    )
                ui.item(msg)
                continue

        if col_name.startswith(':@computed'):
            ui.item('Ignoring computed column "%s".' % col_name)

            continue

        try:

            col_name = utils.clean_string(col_name)

            assert (col_type
                    ), 'Unable to map %s type to a SQL type.' % (source.name)
            record_fields[col_name] = Column(col_type)

        except NotImplementedError as e:
            ui.item('%s' % str(e))

    source.binding = type('DataRecord', (declarative_base(), ), record_fields)
コード例 #9
0
ファイル: cli.py プロジェクト: sunlightpolicy/sql4housing
def insert_source(source):
    '''
    Gets the connection and binding and inserts data.
    '''

    get_connection(source)

    if not isinstance(source, sc.CenPy):
        get_binding(source)

    if source.engine.dialect.has_table(source.engine, source.tbl_name):
        print()
        warnings.warn(("Destination table already exists. Current table " +
                       "will be dropped and replaced."))
        print()
        if not isinstance(source, sc.CenPy):
            source.binding.__table__.drop(source.engine)

    try:
        if not isinstance(source, sc.CenPy):
            source.binding.__table__.create(source.engine)
    except ProgrammingError as e:

        raise CLIError('Error creating destination table: %s' % str(e))

    circle_bar = FillingCirclesBar('  ▶ Loading from source',
                                   max=source.num_rows)

    source.insert(circle_bar)

    circle_bar.finish()

    ui.item('Committing rows (this can take a bit for large datasets).')
    source.session.commit()

    success = 'Successfully imported %s rows.' % (source.num_rows)
    ui.header(success, color='\033[92m')
    if source.name == "Socrata" and source.client:
        source.client.close()

    return
コード例 #10
0
ファイル: cli.py プロジェクト: sunlightpolicy/sql4housing
    def parse_items(output_dict):
        try:

            for dataset in output[output_dict]:
                if dataset:
                    location, tbl_name = list(dataset.items())[0]
                    source = source_mapper[output_dict](location)
                    if tbl_name:
                        source.tbl_name = tbl_name
                    if db_name:
                        source.db_name = db_name
                    insert_source(source)
                else:
                    continue
        except Exception as e:

            ui.item(("Skipping %s load due to error: \"%s\". Double check " +
                     "formatting of bulk_load.yaml if this was " +
                     "unintentional.") % (output_dict, e))
            print()
            pass
コード例 #11
0
    def __extract_file(self):
        '''
        Extracts data from zip files if a hyperlink is provided and reads
        the saved shp file. Creates the default table name using a sanitized
        version of the file's name.
        '''
        try:
            z = zipfile.ZipFile(io.BytesIO(
                requests.get(self.location).content))
            ui.item("Extracting shapefile to folder")
            z.extractall()
            shp = [y for y in sorted(z.namelist()) for ending in \
            ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)][2]

        except:
            shp = self.location

        ui.item("Reading shapefile")
        #set default table name
        tbl_name = shp[shp.rfind("/") + 1:-4].lower()
        tbl_name = utils.clean_string(tbl_name)
        return tbl_name, shapefile.Reader(shp).__geo_interface__
コード例 #12
0
    def create_df(self, product, year, place_type, place, level, variables):
        if product == 'Decennial2010':
            cen_prod = products.Decennial2010()
        elif product == 'ACS' and year:
            cen_prod = products.ACS(year)
        elif product == 'ACS':
            cen_prod = products.ACS()

        place_mapper = {
            'msa': cen_prod.from_msa,
            'csa': cen_prod.from_csa,
            'county': cen_prod.from_county,
            'state': cen_prod.from_state,
            'placename': cen_prod.from_place
        }
        ui.item(("Retrieving variables %s for all %ss in %s. " +
            "This can take some time for large datasets.") % \
            (variables, level, place))
        print()
        print(place, level, variables)
        df = place_mapper[place_type](place, level=level, variables=variables)
        df.columns = [utils.clean_string(x) for x in df.columns]
        return df
コード例 #13
0
ファイル: cli.py プロジェクト: sunlightpolicy/sql4housing
def load_yaml():
    output = yaml.load(open('bulk_load.yaml'), Loader=Loader)

    db_name = output['DATABASE']

    source_mapper = {
        'GEOJSONS': sc.GeoJson,
        'SHAPEFILES': sc.Shape,
        'CSVS': sc.Csv,
        'EXCELS': sc.Excel,
        'HUD_TABLES': sc.HudPortal
    }

    def parse_items(output_dict):
        try:

            for dataset in output[output_dict]:
                if dataset:
                    location, tbl_name = list(dataset.items())[0]
                    source = source_mapper[output_dict](location)
                    if tbl_name:
                        source.tbl_name = tbl_name
                    if db_name:
                        source.db_name = db_name
                    insert_source(source)
                else:
                    continue
        except Exception as e:

            ui.item(("Skipping %s load due to error: \"%s\". Double check " +
                     "formatting of bulk_load.yaml if this was " +
                     "unintentional.") % (output_dict, e))
            print()
            pass

    for output_dict in source_mapper.keys():
        parse_items(output_dict)

    try:
        socrata_sites = output.get('SOCRATA').get('sites')
        app_token = output.get('SOCRATA').get('app_token')
        if socrata_sites:
            for site in socrata_sites:
                url = site['url']
                for dataset in site['datasets']:
                    dataset_id, tbl_name = list(dataset.items())[0]
                    source = sc.SocrataPortal(url, dataset_id, app_token,
                                              tbl_name)
                    if db_name:
                        source.db_name = db_name
                    if tbl_name:
                        source.tbl_name = tbl_name
                    insert_source(source)
    except Exception as e:
        ui.item(("Skipping Socrata load due to error: \"%s\". Double check " +
            "formatting of bulk_load.yaml if this is was " +
            "unintentional.") \
            % e)
        print()
        pass

    try:
        place_type = output['CENSUS'].get('place_type')
        place_name = output['CENSUS'].get('place_name')
        level = output['CENSUS'].get('level')
        for dataset in output['CENSUS']['datasets']:
            if dataset.get('ACS'):
                product = 'ACS'
            if dataset.get('DECENNIAL2010'):
                product = 'Decennial2010'
            year = dataset[product].get('year')
            tbl_name = dataset[product]['tbl_name']
            variables = dataset[product]['variables']
            source = sc.CenPy(product, year, place_type, place_name, level,
                              variables)
            if db_name:
                source.db_name = db_name
            if tbl_name:
                source.tbl_name = tbl_name
            insert_source(source)
    except Exception as e:
        ui.item(
            ("Skipping Census load due to error: \"%s\". Double check " +
             "formatting of bulk_load.yaml if this was unintentional.") % e)
        print()
        pass
コード例 #14
0
 def insert(self, circle_bar):
     ui.item("Inserting into PostGIS.")
     self.df.postgis.to_postgis(con=self.engine,
                                table_name=self.tbl_name,
                                geometry='geometry',
                                if_exists='replace')