def get_connection(source): ''' Get a DB connection from the CLI args or defaults to postgres:///mydb ''' source.engine = create_engine(source.db_name) ui.header('Connecting to database %s' % source.db_name) if not database_exists(source.engine.url): create_database(source.engine.url) ui.item("Creating database %s" % source.db_name) Session = sessionmaker() Session.configure(bind=source.engine) source.session = Session() gis_q = 'SELECT PostGIS_version();' # Check for PostGIS support try: source.session.execute(gis_q) source.geo = True except OperationalError: source.geo = False except ProgrammingError: source.geo = False source.session.commit() if source.geo: ui.item('PostGIS is installed. Geometries will be imported ' 'as PostGIS geoms.')
def parse_items(output_dict): try: for dataset in output[output_dict]: location, tbl_name = list(dataset.items())[0] source = source_mapper[output_dict](location) if tbl_name: source.tbl_name = tbl_name insert_source(source) except Exception as e: ui.item(("Skipping %s load due to error: \"%s\". Double check " + "formatting of bulk_load.yaml if this was " + "unintentional.") % (output_dict, e)) print() pass
def get_binding(source): ''' Translate the source's metadata into a SQLAlchemy binding This looks at each column type in the metadata and creates a SQLAlchemy binding with columns to match. For now it fails loudly if it encounters a column type we've yet to map to its SQLAlchemy type. ''' record_fields = { '__tablename__': source.tbl_name, '_pk_': Column(Integer, primary_key=True) } ui.header('Setting up new table, "%s", from %s source fields' % (source.tbl_name, source.name)) for col_name, col_type in source.metadata: if isinstance(col_type, type(Geometry())) and not source.geo: try: source.session.execute("CREATE EXTENSION POSTGIS;") ui.item( "Adding PostGIS extension to support %s column." \ % col_name) source.session.commit() source.geo = True except: msg = ( '"%s" is a %s column but your database doesn\'t support ' 'PostGIS so it\'ll be skipped.') % ( col_name, col_type, ) ui.item(msg) continue if col_name.startswith(':@computed'): ui.item('Ignoring computed column "%s".' % col_name) continue try: assert (col_type ), 'Unable to map %s type to a SQL type.' % (source.name) record_fields[col_name] = Column(col_type) except NotImplementedError as e: ui.item('%s' % str(e)) source.binding = type('DataRecord', (declarative_base(), ), record_fields)
def insert_source(source): ''' Gets the connection and binding and inserts data. ''' get_connection(source) if not isinstance(source, sc.CenPy): get_binding(source) if source.engine.dialect.has_table(source.engine, source.tbl_name): print() warnings.warn(("Destination table already exists. Current table " + "will be dropped and replaced.")) print() if not isinstance(source, sc.CenPy): source.binding.__table__.drop(source.engine) try: if not isinstance(source, sc.CenPy): source.binding.__table__.create(source.engine) except ProgrammingError as e: raise CLIError('Error creating destination table: %s' % str(e)) circle_bar = FillingCirclesBar(' ▶ Loading from source', max=source.num_rows) source.insert(circle_bar) circle_bar.finish() ui.item('Committing rows (this can take a bit for large datasets).') source.session.commit() success = 'Successfully imported %s rows.' % (source.num_rows) ui.header(success, color='\033[92m') if source.name == "Socrata" and source.client: source.client.close() return
def get_binding(dataset_metadata, geo, dest, source): """Translate the Socrata API metadata into a SQLAlchemy binding This looks at each column type in the Socrata API response and creates a SQLAlchemy binding with columns to match. For now it fails loudly if it encounters a column type we've yet to map to its SQLAlchemy type.""" if dest: table_name = dest elif source == "Socrata": table_name = get_table_name(dataset_metadata['name']) record_fields = { '__tablename__': table_name, '_pk_': Column(Integer, primary_key=True) } ui.header( 'Setting up new table, "%s", from %s source fields' % (table_name, source) ) geo_types = ('location', 'point', 'multipolygon', 'esriFieldTypeGeometry') for col in dataset_metadata: if source == "Socrata": col_name = col['fieldName'].lower() col_type = col['dataTypeName'] elif source == "HUD": col_name = col['name'].lower() col_type = col['type'] if col_type in geo_types and geo is False: msg = ( '"%s" is a %s column but your database doesn\'t support ' 'PostGIS so it\'ll be skipped.' ) % (col_name, col_type,) ui.item(msg) continue if col_name.startswith(':@computed'): ui.item('Ignoring computed column "%s".' % col_name) continue try: print(col_name, ": ", col_type) record_fields[col_name] = get_sql_col(col_type, source) except NotImplementedError as e: ui.item('%s' % str(e)) continue return type('SocrataRecord', (declarative_base(),), record_fields)
def get_connection(db_str, dataset_metadata, source): """Get a DB connection from the CLI args and Socrata API metadata Uess the DB URL passed in by the user to generate a database connection. By default, returns a local SQLite database.""" if db_str: engine = create_engine(db_str) ui.header('Connecting to database') else: default = default_db_str(source) ui.header('Connecting to database') engine = create_engine(default) ui.item('Using default SQLite database "%s".' % default) Session = sessionmaker() Session.configure(bind=engine) session = Session() # Check for PostGIS support gis_q = 'SELECT PostGIS_version();' try: session.execute(gis_q) geo_enabled = True except OperationalError: geo_enabled = False except ProgrammingError: geo_enabled = False finally: session.commit() if geo_enabled: ui.item( 'PostGIS is installed. Geometries will be imported ' 'as PostGIS geoms.' ) else: ui.item('Query "%s" failed. Geometry columns will be skipped.' % gis_q) return engine, session, geo_enabled
def load_yaml(): output = yaml.load(open('bulk_load.yaml'), Loader=Loader) db_name = output['DATABASE'] source_mapper = { 'GEOJSONS': sc.GeoJson, 'SHAPEFILES': sc.Shape, 'CSVS': sc.Csv, 'EXCELS': sc.Excel, 'HUD_TABLES': sc.HudPortal } def parse_items(output_dict): try: for dataset in output[output_dict]: location, tbl_name = list(dataset.items())[0] source = source_mapper[output_dict](location) if tbl_name: source.tbl_name = tbl_name insert_source(source) except Exception as e: ui.item(("Skipping %s load due to error: \"%s\". Double check " + "formatting of bulk_load.yaml if this was " + "unintentional.") % (output_dict, e)) print() pass for output_dict in source_mapper.keys(): parse_items(output_dict) try: socrata_sites = output.get('SOCRATA').get('sites') app_token = output.get('SOCRATA').get('app_token') if socrata_sites: for site in socrata_sites: url = site['url'] for dataset in site['datasets']: dataset_id, tbl_name = list(dataset.items())[0] source = sc.SocrataPortal(url, dataset_id, app_token, tbl_name) insert_source(source) except Exception as e: ui.item(("Skipping Socrata load due to error: \"%s\". Double check " + "formatting of bulk_load.yaml if this is was " + "unintentional.") \ % e) print() pass try: place_type = output['CENSUS'].get('place_type') place_name = output['CENSUS'].get('place_name') level = output['CENSUS'].get('level') for dataset in output['CENSUS']['datasets']: if dataset.get('ACS'): product = 'ACS' if dataset.get('DECENNIAL2010'): product = 'Decennial2010' year = dataset[product].get('year') tbl_name = dataset[product]['tbl_name'] variables = dataset[product]['variables'] source = sc.CenPy(product, year, place_type, place_name, level, variables) except Exception as e: ui.item( ("Skipping Census load due to error: \"%s\". Double check " + "formatting of bulk_load.yaml if this was unintentional.") % e) print() pass
def main(): arguments = docopt(__doc__) site = arguments['<site>'] if arguments['--HUD']: source = "HUD" dataset_id = site client = None if arguments['--Socrata']: source = "Socrata" client = Socrata(site, arguments.get('-a')) try: if arguments.get('ls'): datasets = list_datasets(client, site) print(tabulate(datasets, headers='keys', tablefmt='psql')) elif arguments.get('insert'): if source == "Socrata": dataset_id = arguments['<dataset_id>'] metadata = client.get_metadata(dataset_id)['columns'] if source == "HUD": metadata = json.loads( urllib.request.urlopen(site).read())['fields'] engine, session, geo = \ get_connection(arguments['-d'], metadata, source) if arguments['-t']: Binding = get_binding( metadata, geo, arguments['-t'], source ) else: Binding = get_binding( metadata, geo, dataset_id, source ) # Create the table try: Binding.__table__.create(engine) except ProgrammingError as e: # Catch these here because this is our first attempt to # actually use the DB if 'already exists' in str(e): raise CLIError( 'Destination table already exists. Specify a new table' ' name with -t.' ) raise CLIError('Error creating destination table: %s' % str(e)) num_rows, data = get_data(source, dataset_id, client) bar = FillingCirclesBar(' ▶ Loading from source', max=num_rows) # Iterate the dataset and INSERT each page if source == "Socrata": for page in data: insert_data(page, session, bar, Binding) if source == "HUD": insert_data(data, session, bar, Binding) bar.finish() ui.item( 'Committing rows (this can take a bit for large datasets).' ) session.commit() success = 'Successfully imported %s rows.' % ( num_rows ) ui.header(success, color='\033[92m') if client: client.close() except CLIError as e: ui.header(str(e), color='\033[91m')