Example #1
0
def main():
    cfg = settings.get_config()
    data_dir = os.path.join(cfg['ingestion_settings']['data_directory'])

    for source in cfg['datasources'].keys():
        if source is not 'indego':
            stations_dir = os.path.join(data_dir, source, 'stations')
            if os.path.isdir(stations_dir):

                mappings = cfg['datasources'][source]['station_fields']

                for file in os.listdir(stations_dir):
                    if file.endswith('.csv'):
                        print('Processing ' + source + ' ' + file)

                        with open(os.path.join(stations_dir, file), 'r') as fh:

                            station_data = CSVSource(fh)

                            for row in station_data:
                                fix_mappings(row, mappings)
                                if 'short_name' in row.keys():
                                    if 'capacity' not in row.keys():
                                        row['capacity'] = -1
                                    row['system_name'] = source
                                    row['system_id'] = dw.system_dimension.ensure(
                                        row)
                                    setdefaults(row, DEFAULTS)
                                    insert_station_dimensions(row)
Example #2
0
def main():
    cfg = settings.get_config()
    db_conn = util.get_database_connection()
    data_dir = os.path.join(cfg['ingestion_settings']['data_directory'])

    source = 'indego'

    stations_dir = os.path.join(data_dir, source, 'stations')
    if os.path.isdir(stations_dir):

        mappings = cfg['datasources'][source]['station_fields']
        names = []

        for file in os.listdir(stations_dir):
            if file.endswith('.csv'):
                print('Processing ' + source + ' ' + file)

                with open(os.path.join(stations_dir, file), 'r') as fh:
                    data = csv.DictReader(fh)

                    for row in data:
                        fix_mappings(
                            row,
                            cfg['datasources']['indego']['station_fields'])
                        names.append((row['name'], row['short_name']))

                db_conn.executemany(QUERY_UPDATE_START_STATIONS, names)
                db_conn.executemany(QUERY_UPDATE_END_STATIONS, names)
                db_conn.commit()
                db_conn.close()
Example #3
0
#!/usr/bin/env python
"""
    Ingest trip data into the staging database.
"""

import csv
import logging
import os
import time
from dateutil import parser
from pygrametl.datasources import CSVSource
import pygrametl
from model.DW import DW
from settings import settings

cfg = settings.get_config()
dw = DW()

DEFAULTS = [
    ('customer_gender', 'unspecified'), ('customer_birthyear', -1),
    ('customer_type', 'unspecified'), ('trip_category', 'Round Trip'),
    ('start_station_name', 'unspecified'), ('start_station_latitude', None),
    ('start_station_longitude', None), ('start_station_capacity', None),
    ('end_station_name', 'unspecified'), ('end_station_latitude', None),
    ('end_station_longitude', None), ('end_station_capacity', None)
]

logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(levelname)-8s %(message)s')
logger = logging.getLogger()
ch = logging.StreamHandler()
Example #4
0
File: util.py Project: j6r/IST402
def get_database_connection():

    cfg = settings.get_config()
    return sqlite3.connect(cfg['ingestion_settings']['staging_db_location'])
Example #5
0
from googlemaps import Client
from googlemaps.elevation import elevation
from settings import settings
from util import util

API_KEY = settings.get_config()['google_api']['api_key']

GET_START_STATIONS_QUERY = """
SELECT start_station_id, start_station_latitude, start_station_longitude
    FROM start_station
    WHERE start_station_latitude NOT IN('#N/A', '')
        AND start_station_longitude  NOT IN ('#N/A', '')
        AND start_station_elevation IS NULL
    LIMIT 500
"""

GET_END_STATIONS_QUERY = """
SELECT end_station_id, end_station_latitude, end_station_longitude
    FROM end_station
    WHERE end_station_latitude NOT IN ('#N/A', '') 
        AND end_station_longitude NOT IN ('#N/A', '')
        AND end_station_elevation IS NULL
    LIMIT 500
"""

UPDATE_START_STATION_QUERY = """
    UPDATE start_station
    SET start_station_elevation = ?
    WHERE start_station_latitude = ? AND start_station_longitude = ?
"""
Example #6
0
def set_config(application):
    application['config'] = get_config()
Example #7
0
                        default=False,
                        help="Start as webservice")
    args = parser.parse_args()


def cli():
    minifyier = Minifyer(Base64EncoderStrategy())

    actions_mapping = {
        "minify": minifyier.minify,
        "deminify": minifyier.deminify
    }

    result = actions_mapping[args.action](args.url)
    print(result)


def start_web_server():
    print("Start Flask")
    start()


if __name__ == "__main__":

    get_arguments()

    get_config(args.config)
    if args.http:
        start_web_server()
    else:
        cli()