def test_strip_newlines_local_custom_mini(self):
        test_filename_uri = './tap_spreadsheets_anywhere/test/sample_with_bad_newlines.csv'
        iterator = get_row_iterator(TEST_TABLE_SPEC['tables'][0],
                                    test_filename_uri)

        for row in iterator:
            self.assertTrue(
                row['id'].isnumeric(),
                "Parsed ID is not a number for: {}".format(row['id']))
    def test_handle_newlines_local_json(self):
        test_filename_uri = './tap_spreadsheets_anywhere/test/sample.json'
        iterator = get_row_iterator(TEST_TABLE_SPEC['tables'][3],
                                    test_filename_uri)

        for row in iterator:
            self.assertTrue(
                isinstance(row['id'], float) or isinstance(row['id'], int),
                "Parsed ID is not a number for: {}".format(row['id']))
Ejemplo n.º 3
0
    def test_renamed_https_object(self):
        table_spec = TEST_TABLE_SPEC['tables'][6]
        modified_since = dateutil.parser.parse(table_spec['start_date'])
        target_files = file_utils.get_matching_objects(table_spec,
                                                       modified_since)
        assert len(target_files) == 1

        target_uri = table_spec['path'] + '/' + table_spec['pattern']
        iterator = get_row_iterator(TEST_TABLE_SPEC['tables'][6], target_uri)

        row = next(iterator)
        self.assertTrue(len(row) > 1, "Not able to read a row.")
    def test_https_bucket(self):
        table_spec = TEST_TABLE_SPEC['tables'][4]
        modified_since = dateutil.parser.parse(table_spec['start_date'])
        target_files = file_utils.get_matching_objects(table_spec,
                                                       modified_since)
        assert len(target_files) == 1

        target_uri = table_spec['path'] + '/' + target_files[0]["key"]
        iterator = get_row_iterator(TEST_TABLE_SPEC['tables'][4], target_uri)

        row = next(iterator)
        self.assertTrue(int(row['id']) > 0, row['id'] + " was not positive")
Ejemplo n.º 5
0
    def test_indirect_https_bucket(self):
        table_spec = TEST_TABLE_SPEC['tables'][5]
        modified_since = dateutil.parser.parse(table_spec['start_date'])
        target_files = file_utils.get_matching_objects(table_spec,
                                                       modified_since)
        assert len(target_files) == 1

        target_uri = table_spec['path'] + '/' + table_spec['pattern']
        iterator = get_row_iterator(TEST_TABLE_SPEC['tables'][5], target_uri)

        row = next(iterator)
        self.assertTrue(row['1976'] == '1976',
                        "Row did not contain expected data")