Beispiel #1
0
 def test_get_datafile_path_with_zip_path(self):
     destination = tempfile.TemporaryDirectory()
     zip_path = os.path.join(destination.name, "foo.zip")
     self.make_test_zip(zip_path)
     path_to_first_file = get_datafile_path(None, destination.name, zip_path=zip_path)
     self.assertEqual(os.path.join(destination.name, "file1.txt"), path_to_first_file)
     destination.cleanup()
Beispiel #2
0
def run(url, report, destination=None, download=True):
    """Download IL data, extract, load, and compute some simple stats"""
    logger.info('*** IL Data Analysis Started ***')
    destination = download_and_unzip_data(url, destination)
    csv_path = get_datafile_path(url, destination)
    stops = load_csv(csv_path)
    analyze(stops, report)
def run(url, report, destination=None, download=True):
    """Download IL data, extract, load, and compute some simple stats"""
    logger.info('*** IL Data Analysis Started ***')
    destination = download_and_unzip_data(url, destination)
    csv_path = get_datafile_path(url, destination)
    stops = load_csv(csv_path)
    analyze(stops, report)
Beispiel #4
0
def run(url, report, destination=None, download=True):
    """Download MD data, extract, load, and compute some simple stats"""
    logger.info('*** MD Data Analysis Started ***')
    destination = download_and_unzip_data(url, destination)
    xls_path = get_datafile_path(url, destination)
    stops = load_xls(xls_path)
    stops = process_raw_data(stops, to_drop=())
    analyze(stops, report)
Beispiel #5
0
 def test_get_datafile_path_with_url(self):
     destination = tempfile.TemporaryDirectory()
     zip_path = os.path.join(destination.name, 'foo.zip')
     self.make_test_zip(zip_path)
     path_to_first_file = get_datafile_path('http://example.com/foo.zip',
                                            destination.name)
     self.assertEqual(os.path.join(destination.name, 'file1.txt'),
                      path_to_first_file)
     destination.cleanup()
 def test_get_datafile_path_with_zip_path(self):
     destination = tempfile.TemporaryDirectory()
     zip_path = os.path.join(destination.name, 'foo.zip')
     self.make_test_zip(zip_path)
     path_to_first_file = get_datafile_path(None, destination.name, zip_path=zip_path)
     self.assertEqual(
         os.path.join(destination.name, 'file1.txt'),
         path_to_first_file
     )
     destination.cleanup()
def run(url, destination=None, download=True):
    """Download MD data, extract, convert to CSV, and scan for issues"""
    logger.info('*** MD Data Scan Started ***')
    destination = download_and_unzip_data(url, destination)
    # Convert to CSV
    xls_path = get_datafile_path(url, destination)
    csv_path = get_csv_path(url, destination)
    if not os.path.exists(csv_path):
        xls_to_csv(xls_path, csv_path)
    else:
        logger.info("{} exists, skipping XLS->CSV conversion".format(csv_path))
    csv_count = line_count(csv_path)
    logger.debug('Rows: {}'.format(csv_count))
    scan([csv_path])
def run(url, destination=None, download=True):
    """Download IL data, extract, and load into PostgreSQL"""
    logger.info('*** IL Data Import Started ***')
    destination = download_and_unzip_data(url, destination)
    # Convert to CSV
    raw_csv_path = get_datafile_path(url, destination)
    processed_csv_path = get_csv_path(url, destination)
    if not os.path.exists(processed_csv_path):
        raw_to_processed(raw_csv_path, processed_csv_path)
    else:
        logger.info("{} exists, skipping cleanup".format(processed_csv_path))
    csv_count = line_count(processed_csv_path)
    logger.debug('Rows: {}'.format(csv_count))
    # drop constraints/indexes
    drop_constraints_and_indexes(connections['traffic_stops_il'].cursor())
    # use COPY to load CSV file as quickly as possible
    copy_from(processed_csv_path)
    # Clear the query cache
    flush_memcached()
Beispiel #9
0
def run(url, destination=None, download=True):
    """Download IL data, extract, and load into PostgreSQL"""
    logger.info('*** IL Data Import Started ***')
    destination = download_and_unzip_data(url, destination)
    # Convert to CSV
    raw_csv_path = get_datafile_path(url, destination)
    processed_csv_path = get_csv_path(url, destination)
    if not os.path.exists(processed_csv_path):
        raw_to_processed(raw_csv_path, processed_csv_path)
    else:
        logger.info("{} exists, skipping cleanup".format(processed_csv_path))
    csv_count = line_count(processed_csv_path)
    logger.debug('Rows: {}'.format(csv_count))
    # drop constraints/indexes
    drop_constraints_and_indexes(connections['traffic_stops_il'].cursor())
    # use COPY to load CSV file as quickly as possible
    copy_from(processed_csv_path)
    # Clear the query cache
    flush_memcached()
Beispiel #10
0
def run(url, destination=None, download=True):
    """Download MD data, extract, convert to CSV, and load into PostgreSQL"""
    logger.info('*** MD Data Import Started ***')
    destination = download_and_unzip_data(url, destination)
    # Convert to CSV
    xls_path = get_datafile_path(url, destination)
    csv_path = get_csv_path(url, destination)
    if not os.path.exists(csv_path):
        xls_to_csv(xls_path, csv_path)
    else:
        logger.info("{} exists, skipping XLS->CSV conversion".format(csv_path))
    csv_count = line_count(csv_path)
    logger.debug('Rows: {}'.format(csv_count))
    # drop constraints/indexes
    drop_constraints_and_indexes(connections['traffic_stops_md'].cursor())
    # use COPY to load CSV files as quickly as possible
    copy_from(csv_path)
    # Clear the query cache
    flush_memcached()
Beispiel #11
0
 def test_get_datafile_path_special(self):
     with self.assertRaises(ValueError):
         get_datafile_path(None, 'anything')
     with self.assertRaises(ValueError):
         get_datafile_path('something', 'anything', zip_path='anything')
Beispiel #12
0
 def test_get_datafile_path_special(self):
     with self.assertRaises(ValueError):
         get_datafile_path(None, "anything")
     with self.assertRaises(ValueError):
         get_datafile_path("something", "anything", zip_path="anything")
 def test_get_datafile_path_special(self):
     with self.assertRaises(ValueError):
         get_datafile_path(None, 'anything')
     with self.assertRaises(ValueError):
         get_datafile_path('something', 'anything', zip_path='anything')