예제 #1
0
def startup():
    """Startup fixture: make database connections and define tables to ignore"""
    new_db_map = {
        'ais-api-broad': 'engine_broad',
        'ais-api-market': 'engine_market',
    }
    proc = subprocess.Popen(
        ['bash', '-c', '. ../../../bin/eb_env_utils.sh; get_prod_env'],
        stdout=subprocess.PIPE)
    output = proc.stdout.read()
    old_prod_env = output.rstrip()
    old_prod_env = old_prod_env.decode('utf-8')
    old_db = datum.connect(config['DATABASES'][new_db_map[old_prod_env]])
    new_db = datum.connect(config['DATABASES']['engine'])
    unused_tables = ('spatial_ref_sys', 'alembic_version', 'multiple_seg_line',
                     'service_area_diff', 'address_zip', 'zip_range')
    changed_tables = ()
    ignore_tables = unused_tables + changed_tables

    return {
        'new_db': new_db,
        'old_db': old_db,
        'unused_tables': unused_tables,
        'changed_tables': changed_tables,
        'ignore_tables': ignore_tables
    }
예제 #2
0
def startup():
    """Startup fixture: make database connections and define tables to ignore"""
    new_db_map = {
        'ais-api-broad':     'engine_broad',
        'ais-api-market':    'engine_market',
    }
    proc = subprocess.Popen(['bash', '-c', '. ../../../bin/eb_env_utils.sh; get_prod_env'], stdout=subprocess.PIPE)
    output = proc.stdout.read()
    old_prod_env = output.rstrip()
    old_prod_env = old_prod_env.decode('utf-8')
    old_db = datum.connect(config['DATABASES'][new_db_map[old_prod_env]])
    new_db = datum.connect(config['DATABASES']['engine'])
    unused_tables =  ('spatial_ref_sys', 'alembic_version', 'multiple_seg_line', 'service_area_diff', 'address_zip', 'zip_range', 'dor_parcel_address_analysis')
    changed_tables = ()
    ignore_tables = unused_tables + changed_tables

    return {'new_db': new_db, 'old_db': old_db, 'unused_tables': unused_tables, 'changed_tables': changed_tables, 'ignore_tables': ignore_tables}
    def get_conn(self):
        if self.conn is None:
            params = self.get_connection(self.db_conn_id)

            if params.conn_type not in self.SCHEMAS:
                raise AirflowException('Could not create Datum connection for connection type {}'.format(params.conn_type))

            logging.info('Establishing connection to {}'.format(self.db_conn_id))
            conn_string = self.get_conn_str()
            self.conn = datum.connect(conn_string)
        return self.conn
예제 #4
0
    def get_conn(self):
        if self.conn is None:
            params = self.get_connection(self.db_conn_id)

            if params.conn_type not in self.SCHEMAS:
                raise AirflowException(
                    'Could not create Datum connection for connection type {}'.
                    format(params.conn_type))

            logging.info('Establishing connection to {}'.format(
                self.db_conn_id))
            conn_string = self.get_conn_str()
            self.conn = datum.connect(conn_string)
        return self.conn
예제 #5
0
from ais.models import Address
from ais.util import parity_for_num, parity_for_range
from passyunk.parser import PassyunkParser
# DEV
# import traceback
# from pprint import pprint

print('Starting...')
start = datetime.now()

config = app.config
Parser = config['PARSER']

parser_tags = config['ADDRESSES']['parser_tags']
sources = config['ADDRESSES']['sources']
db = datum.connect(config['DATABASES']['engine'])
address_table = db['address']
address_tag_table = db['address_tag']
source_address_table = db['source_address']
address_link_table = db['address_link']
street_segment_table = db['street_segment']
address_street_table = db['address_street']
true_range_view_name = 'true_range'

# TODO: something more elegant here.
true_range_select_stmt = '''
    select
         coalesce(r.seg_id, l.seg_id) as seg_id,
         r.low as true_right_from,
         r.high as true_right_to,
         l.low as true_left_from,
예제 #6
0
from passyunk.data import DIRS_STD, SUFFIXES_STD
import datum
from ais.models import Address
from ais.util import parity_for_num, parity_for_range
from ais import app
# DEV
from pprint import pprint
import traceback

start = datetime.now()
print('Starting...')

"""SET UP"""

config = app.config
db = datum.connect(config['DATABASES']['engine'])

source_def = config['BASE_DATA_SOURCES']['parcels']['dor']
source_db_name = source_def['db']
source_db_url = config['DATABASES'][source_db_name]
source_db = datum.connect(source_db_url)
source_field_map = source_def['field_map']
source_table_name = source_def['table']
source_table = source_db[source_table_name]
source_geom_field = source_table.geom_field
field_map = source_def['field_map']

street_table = db['street_segment']
parcel_table = db['dor_parcel']
parcel_error_table = db['dor_parcel_error']
parcel_error_polygon_table = db['dor_parcel_error_polygon']
예제 #7
0
from ais.models import Address
from ais.util import parity_for_num, parity_for_range
from passyunk.parser import PassyunkParser
# DEV
# import traceback
# from pprint import pprint

print('Starting...')
start = datetime.now()

config = app.config
Parser = config['PARSER']

parser_tags = config['ADDRESSES']['parser_tags']
sources = config['ADDRESSES']['sources']
db = datum.connect(config['DATABASES']['engine'])
address_table = db['address']
address_tag_table = db['address_tag']
source_address_table = db['source_address']
address_link_table = db['address_link']
street_segment_table = db['street_segment']
address_street_table = db['address_street']
true_range_view_name = 'true_range'

# TODO: something more elegant here.
true_range_select_stmt = '''
    select
         coalesce(r.seg_id, l.seg_id) as seg_id,
         r.low as true_right_from,
         r.high as true_right_to,
         l.low as true_left_from,
예제 #8
0
import sys
import datum
from ais import app
# DEV
import traceback
from pprint import pprint
"""SET UP"""

config = app.config
source_def = config['BASE_DATA_SOURCES']['curbs']
source_db = datum.connect(config['DATABASES'][source_def['db']])
source_table = source_db[source_def['table']]
field_map = source_def['field_map']
db = datum.connect(config['DATABASES']['engine'])
curb_table = db['curb']
parcel_curb_table = db['parcel_curb']
"""MAIN"""

# print('Dropping parcel-curb view...')
# db.drop_mview('parcel_curb')

print('Dropping indexes...')
curb_table.drop_index('curb_id')
parcel_curb_table.drop_index('curb_id')
parcel_curb_table.drop_index('parcel_source', 'parcel_row_id')

print('Deleting existing curbs...')
curb_table.delete()

print('Reading curbs from source...')
source_rows = source_table.read()
예제 #9
0
def sync(date, alerts, verbose):
    status = 'ERROR'

    with warnings.catch_warnings(record=True) as w:
        try:
            if verbose:
                console_handler = logging.StreamHandler()
                console_handler.setLevel(logging.DEBUG)
                console_handler.setFormatter(formatter)
                logger.addHandler(console_handler)

            logger.info('Starting...')
            start = arrow.now()

            # Connect to Salesforce
            sf = Salesforce(username=SF_USER, \
                            password=SF_PASSWORD, \
                            security_token=SF_TOKEN)

            # Connect to database
            dest_db = datum.connect(DEST_DB_DSN)
            dest_tbl = dest_db[DEST_TABLE]
            tmp_tbl = dest_db[DEST_TEMP_TABLE]

            logger.info('Truncating temp table...')
            tmp_tbl.delete()

            sf_query = SF_QUERY

            # If a start date was passed in, handle it.
            if date:
                warnings.warn('Fetched records for {} only'.format(date))
                try:
                    date_comps = [int(x) for x in date.split('-')]
                    start_date = arrow.get(date_obj(*date_comps), 'US/Eastern')\
                                      .to('Etc/UTC')
                except ValueError:
                    raise HandledError('Date parameter is invalid')
                end_date = start_date.replace(days=1)

                sf_query += ' AND (LastModifiedDate >= {})'.format(start_date)
                sf_query += ' AND (LastModifiedDate < {})'.format(end_date)

            # Otherwise, grab the last updated date from the DB.
            else:
                logger.info('Getting last updated date...')
                start_date_str = dest_db.execute('select max({}) from {}'\
                                            .format(DEST_UPDATED_FIELD, DEST_TABLE))[0]
                start_date = arrow.get(start_date_str,
                                       'US/Eastern').to('Etc/UTC')
                sf_query += ' AND (LastModifiedDate > {})'.format(
                    start_date.isoformat())

            logger.info('Fetching new records from Salesforce...')
            try:
                sf_rows = sf.query_all(sf_query)['records']
            except SalesforceMalformedRequest:
                raise HandledError('Could not query Salesforce')

            logger.info('Processing rows...')
            rows = [process_row(sf_row, FIELD_MAP) for sf_row in sf_rows]

            logger.info('Writing to temp table...')
            tmp_tbl.write(rows)

            logger.info('Deleting updated records...')
            update_count = dest_db.execute(DEL_STMT)
            add_count = len(rows) - update_count

            logger.info('Appending new records...')
            dest_tbl.write(rows)

            # We should have added and updated at least 1 record
            if add_count == 0:
                warnings.warn('No records added')
            if update_count == 0:
                warnings.warn('No records updated')

            # TODO this check was causing an obscure httplib error
            # (essentially, timing out) so disabling it for now

            # Check count against Salesforce
            # sf_count = sf.query_all(SF_COUNT_QUERY)['totalSize']
            # db_count = dest_tbl.count()
            # if sf_count != db_count:
            #     warnings.warn('Salesforce has {} rows, database has {}'\
            #                             .format(sf_count, db_count))

            # If we got here, it was successful.
            status = 'SUCCESS'
            logger.info('Ran successfully. Added {}, updated {}.'\
                                    .format(add_count, update_count))

        except:
            logger.exception('Unhandled error')

        finally:
            if alerts:
                msg = '[311] {} - {}'.format(__file__, status)
                if status == 'SUCCESS':
                    msg += ' - {} added, {} updated'\
                                    .format(add_count, update_count)
                if len(w) > 0:
                    msg += ' - {}.'.format('; '.join(
                        [str(x.message) for x in w]))

                # Try to post to Slack
                try:
                    slack = Slacker(SLACK_TOKEN)
                    slack.chat.post_message(SLACK_CHANNEL, msg)
                except Exception as e:
                    logger.error('Could not post to Slack. '
                                 'The message was:\n\n{}\n\n'
                                 'The error was:\n\n{}'.format(msg, e))
from datetime import datetime
import datum
from ais import app

start = datetime.now()
print('Starting...')

'''
SET UP
'''

config = app.config
Parser = config['PARSER']
parser = Parser()
db = datum.connect(config['DATABASES']['engine'])
WRITE_OUT = True
geocode_table = db['geocode']
address_tag_table = db['address_tag']
geocode_tag_map = {
    'pwd_parcel_id': (1, 3, 7),
    'dor_parcel_id': (2, 4, 8)
}
new_geocode_rows = []

print('Reading geocode rows...')
geocode_map = {}
geocode_rows = geocode_table.read()
print('Mapping geocode rows...')
for geocode_row in geocode_rows:
    street_address = geocode_row['street_address']
    if not street_address in geocode_map:
예제 #11
0
파일: load_curbs.py 프로젝트: mjumbewu/ais
import sys
import datum
from ais import app
# DEV
import traceback
from pprint import pprint


"""SET UP"""

config = app.config
source_def = config['BASE_DATA_SOURCES']['curbs']
source_db = datum.connect(config['DATABASES'][source_def['db']])
source_table = source_db[source_def['table']]
field_map = source_def['field_map']
db = datum.connect(config['DATABASES']['engine'])
curb_table = db['curb']
parcel_curb_table = db['parcel_curb']


"""MAIN"""

# print('Dropping parcel-curb view...')
# db.drop_mview('parcel_curb')

print('Dropping indexes...')
curb_table.drop_index('curb_id')
parcel_curb_table.drop_index('curb_id')
parcel_curb_table.drop_index('parcel_source', 'parcel_row_id')

print('Deleting existing curbs...')
예제 #12
0
import sys
import csv
from datetime import datetime
# import logging
import datum
from common import process_row
from config import *
from pprint import pprint

start = datetime.now()
print('Starting...')

dest_db = datum.connect(DEST_DB_DSN)
dest_table = dest_db[DEST_TABLE]

print('Dropping existing rows...')
dest_table.delete()

file_path = sys.argv[1]

with open(file_path) as f:
    reader = csv.DictReader(f)

    reader_rows = []
    for r in reader:
        reader_rows.append(r)

    print('Reading...')
    # dest_rows = [process_row(row, FIELD_MAP) for row in reader_rows[5:7]]
    dest_rows = [process_row(row, FIELD_MAP) for row in reader_rows]
def db_conn(db_conn_string):
    db = datum.connect(db_conn_string)
    yield db
    db.save()
    db.close()
예제 #14
0
import datum
from ais import app
from ais.models import Address
# DEV
import traceback
from pprint import pprint

print('Starting...')
start = datetime.now()

# TODO: This should probably make a DB query for each address, rather than chunking
# into street names. Getting hard to manage.
"""SET UP"""

config = app.config
db = datum.connect(config['DATABASES']['engine'])
tag_fields = config['ADDRESS_SUMMARY']['tag_fields']
geocode_table = db['geocode']
address_table = db['address']
max_values = config['ADDRESS_SUMMARY']['max_values']
geocode_types = config['ADDRESS_SUMMARY']['geocode_types']
geocode_priority_map = config['ADDRESS_SUMMARY']['geocode_priority']
#geocode_types_on_curb = config['ADDRESS_SUMMARY']['geocode_types_on_curb']
geocode_types_in_street = config['ADDRESS_SUMMARY']['geocode_types_in_street']

tag_table = db['address_tag']
link_table = db['address_link']
address_summary_table = db['address_summary']

# DEV
WRITE_OUT = True
예제 #15
0
def sync(date, alerts, verbose):
    status = 'ERROR'

    with warnings.catch_warnings(record=True) as w:
        try:
            if verbose:
                console_handler = logging.StreamHandler()
                console_handler.setLevel(logging.DEBUG)
                console_handler.setFormatter(formatter)
                logger.addHandler(console_handler)

            logger.info('Starting...')
            start = arrow.now()

            # Connect to Salesforce
            sf = Salesforce(username=SF_USER, \
                            password=SF_PASSWORD, \
                            security_token=SF_TOKEN)

            # Connect to database
            dest_db = datum.connect(DEST_DB_DSN)
            dest_tbl = dest_db[DEST_TABLE]
            tmp_tbl = dest_db[DEST_TEMP_TABLE]

            logger.info('Truncating temp table...')
            tmp_tbl.delete()

            sf_query = SF_QUERY

            # If a start date was passed in, handle it.
            if date:
                warnings.warn('Fetched records for {} only'.format(date))
                try:
                    date_comps = [int(x) for x in date.split('-')]
                    start_date = arrow.get(date_obj(*date_comps), 'US/Eastern')\
                                      .to('Etc/UTC')
                except ValueError:
                    raise HandledError('Date parameter is invalid')
                end_date = start_date.replace(days=1)

                sf_query += ' AND (LastModifiedDate >= {})'.format(start_date)
                sf_query += ' AND (LastModifiedDate < {})'.format(end_date)

            # Otherwise, grab the last updated date from the DB.
            else:
                logger.info('Getting last updated date...')
                start_date_str = dest_db.execute('select max({}) from {}'\
                                            .format(DEST_UPDATED_FIELD, DEST_TABLE))[0]
                start_date = arrow.get(start_date_str, 'US/Eastern').to('Etc/UTC')
                sf_query += ' AND (LastModifiedDate > {})'.format(start_date.isoformat())

            logger.info('Fetching new records from Salesforce...')
            try:
                sf_rows = sf.query_all(sf_query)['records']
            except SalesforceMalformedRequest:
                raise HandledError('Could not query Salesforce')

            logger.info('Processing rows...')
            rows = [process_row(sf_row, FIELD_MAP) for sf_row in sf_rows]

            logger.info('Writing to temp table...')
            tmp_tbl.write(rows)

            logger.info('Deleting updated records...')
            update_count = dest_db.execute(DEL_STMT)
            add_count = len(rows) - update_count

            logger.info('Appending new records...')
            dest_tbl.write(rows)

            # We should have added and updated at least 1 record
            if add_count == 0:
                warnings.warn('No records added')
            if update_count == 0:
                warnings.warn('No records updated')

            # TODO this check was causing an obscure httplib error
            # (essentially, timing out) so disabling it for now

            # Check count against Salesforce
            # sf_count = sf.query_all(SF_COUNT_QUERY)['totalSize']
            # db_count = dest_tbl.count()
            # if sf_count != db_count:
            #     warnings.warn('Salesforce has {} rows, database has {}'\
            #                             .format(sf_count, db_count))

            # If we got here, it was successful.
            status = 'SUCCESS'
            logger.info('Ran successfully. Added {}, updated {}.'\
                                    .format(add_count, update_count))

        except:
            logger.exception('Unhandled error')

        finally:
            if alerts:
                msg = '[311] {} - {}'.format(__file__, status)
                if status == 'SUCCESS':
                    msg += ' - {} added, {} updated'\
                                    .format(add_count, update_count)
                if len(w) > 0:
                    msg += ' - {}.'.format('; '.join([str(x.message) for x in w]))

                # Try to post to Slack
                try:
                    slack = Slacker(SLACK_TOKEN)
                    slack.chat.post_message(SLACK_CHANNEL, msg)
                except Exception as e:
                    logger.error(
                        'Could not post to Slack. '
                        'The message was:\n\n{}\n\n'
                        'The error was:\n\n{}'.format(msg, e)
                    )
예제 #16
0
import datum
from ais import app
from ais.models import Address
# DEV
from pprint import pprint
import traceback


start = datetime.now()
print('Starting...')


"""SET UP"""

config = app.config
db = datum.connect(config['DATABASES']['engine'])
parcel_table = db['pwd_parcel']
parcel_geom_field = parcel_table.geom_field


source_def = config['BASE_DATA_SOURCES']['parcels']['pwd']
source_db_name = source_def['db']
source_db_url = config['DATABASES'][source_db_name]
source_db = datum.connect(source_db_url)
source_field_map = source_def['field_map']
source_table_name = source_def['table']
source_table = source_db[source_table_name]
source_geom_field = source_table.geom_field

# Read in OPA account nums and addresses
opa_source_def = config['BASE_DATA_SOURCES']['opa_owners']
예제 #17
0
import csv
from copy import deepcopy
from datetime import datetime
import datum
from ais import app
from ais.models import Address
# DEV
import traceback
from pprint import pprint

start = datetime.now()

"""SET UP"""

config = app.config
db = datum.connect(config['DATABASES']['engine'])
source_db = datum.connect(config['DATABASES']['gis'])
# source_table = source_db['usps_zip4s']
source_table = source_db['vw_usps_zip4s_ais']
field_map = {
	'usps_id':			'updatekey',
	'address_low':		'addrlow',
	'address_high':		'addrhigh',
	'address_oeb':		'addroeb',
	'street_predir':	'streetpre',
	'street_name':		'streetname',
	'street_suffix':	'streetsuff',
	'street_postdir':	'streetpost',
	'unit_type':		'addrsecondaryabbr',
	'unit_low':			'addrsecondarylow',
	'unit_high':		'addrsecondaryhigh',