def test_native_iterator(): # Get table def generator(): yield ['id', 'name'] yield ['1', 'english'] yield ['2', '中国人'] with pytest.raises(exceptions.ParsingError) as excinfo: iterator = generator() topen(iterator) assert 'callable' in str(excinfo.value)
def test_storage(): # Get resources articles_schema = json.load(io.open('data/articles.json', encoding='utf-8')) comments_schema = json.load(io.open('data/comments.json', encoding='utf-8')) articles_data = topen('data/articles.csv', with_headers=True).read() comments_data = topen('data/comments.csv', with_headers=True).read() # Engine engine = create_engine(os.environ['DATABASE_URL']) # Storage storage = Storage(engine=engine, prefix='prefix_') # Delete tables for table in reversed(storage.tables): storage.delete(table) # Create tables storage.create(['articles', 'comments'], [articles_schema, comments_schema]) # Write data to tables storage.write('articles', articles_data) storage.write('comments', comments_data) # Create new storage to use reflection only storage = Storage(engine=engine, prefix='prefix_') # Create existent table with pytest.raises(RuntimeError): storage.create('articles', articles_schema) # Get table representation assert repr(storage).startswith('Storage') # Get tables list assert storage.tables == ['articles', 'comments'] # Get table schemas assert storage.describe('articles') == convert_schema(articles_schema) assert storage.describe('comments') == convert_schema(comments_schema) # Get table data assert list(storage.read('articles')) == convert_data(articles_schema, articles_data) assert list(storage.read('comments')) == convert_data(comments_schema, comments_data) # Delete tables for table in reversed(storage.tables): storage.delete(table) # Delete non existent table with pytest.raises(RuntimeError): storage.delete('articles')
def test_native_iterator(): # Get table def generator(): yield ["id", "name"] yield ["1", "english"] yield ["2", "中国人"] with pytest.raises(exceptions.SourceError) as excinfo: iterator = generator() topen(iterator) assert "callable" in str(excinfo.value)
def test_save_csv(tmpdir): # Save table path = str(tmpdir.join("table.csv")) table = topen("data/table.csv", headers=1) table.save(path) # Open saved table table = topen(path, headers=1) # Make assertions assert table.headers == ["id", "name"] assert table.read(extended=True) == [(2, ["id", "name"], ["1", "english"]), (3, ["id", "name"], ["2", "中国人"])]
def test_save_csv(tmpdir): # Save table path = str(tmpdir.join('table.csv')) table = topen('data/table.csv', headers=1) table.save(path) # Open saved table table = topen(path, headers=1) # Make assertions assert table.headers == ['id', 'name'] assert table.read(extended=True) == [(2, ['id', 'name'], ['1', 'english']), (3, ['id', 'name'], ['2', '中国人'])]
def test_save_csv(tmpdir): # Save table path = str(tmpdir.join('table.csv')) table = topen('data/table.csv', headers=1) table.save(path) # Open saved table table = topen(path, headers=1) # Make assertions assert table.headers == ['id', 'name'] assert table.read(extended=True) == [ (2, ['id', 'name'], ['1', 'english']), (3, ['id', 'name'], ['2', '中国人'])]
def test_file_csv_with_bom(): # Get table table = topen("data/special/bom.csv", encoding="utf-8") # Make assertions assert table.headers is None assert table.read() == [["id", "name"], ["1", "english"], ["2", "中国人"]] # Get table table = topen("data/special/bom.csv") # Make assertions assert table.headers is None assert table.read() == [["id", "name"], ["1", "english"], ["2", "中国人"]]
def test_file_csv_with_bom(): # Get table table = topen('data/special/bom.csv', encoding='utf-8') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']] # Get table table = topen('data/special/bom.csv') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_html_content(): # Check raises source = 'https://github.com/frictionlessdata/tabulator-py/blob/master/data/table.csv' with pytest.raises(exceptions.TabulatorException) as excinfo: table = topen(source, headers='row1') assert 'HTML' in str(excinfo.value)
def test_processors_chain(): # Processors def skip_commented_rows(extended_rows): for number, headers, row in extended_rows: if row and hasattr(row[0], "startswith") and row[0].startswith("#"): continue yield (number, headers, row) def skip_blank_rows(extended_rows): for number, headers, row in extended_rows: if not row: continue yield (number, headers, row) def cast_rows(extended_rows): for number, headers, row in extended_rows: crow = [] for value in row: try: if isinstance(value, six.string_types): value = ast.literal_eval(value) except Exception: pass crow.append(value) yield (number, headers, crow) # Get table source = [["id", "name"], ["#1", "english"], [], ["2", "中国人"]] table = topen(source, headers="row1", post_parse=[skip_commented_rows, skip_blank_rows, cast_rows]) # Make assertions assert table.headers == ["id", "name"]
def test_reset_and_sample_size(): # Get table table = topen("data/special/long.csv", headers=1, sample_size=3) # Make assertions assert table.read(extended=True) == [ (2, ["id", "name"], ["1", "a"]), (3, ["id", "name"], ["2", "b"]), (4, ["id", "name"], ["3", "c"]), (5, ["id", "name"], ["4", "d"]), (6, ["id", "name"], ["5", "e"]), (7, ["id", "name"], ["6", "f"]), ] assert table.sample == [["1", "a"], ["2", "b"]] assert table.read() == [] # Reset table table.reset() # Make assertions assert table.read(extended=True, limit=3) == [ (2, ["id", "name"], ["1", "a"]), (3, ["id", "name"], ["2", "b"]), (4, ["id", "name"], ["3", "c"]), ] assert table.sample == [["1", "a"], ["2", "b"]] assert table.read(extended=True) == [ (5, ["id", "name"], ["4", "d"]), (6, ["id", "name"], ["5", "e"]), (7, ["id", "name"], ["6", "f"]), ]
def test_html_content(): # Check raises source = "https://github.com/frictionlessdata/tabulator-py/blob/master/data/table.csv" with pytest.raises(exceptions.FormatError) as excinfo: table = topen(source, headers="row1") assert "HTML" in str(excinfo.value)
def test_reset_and_sample_size(): # Get table table = topen('data/special/long.csv', headers=1, sample_size=3) # Make assertions assert table.read(extended=True) == [(2, ['id', 'name'], ['1', 'a']), (3, ['id', 'name'], ['2', 'b']), (4, ['id', 'name'], ['3', 'c']), (5, ['id', 'name'], ['4', 'd']), (6, ['id', 'name'], ['5', 'e']), (7, ['id', 'name'], ['6', 'f'])] assert table.sample == [['1', 'a'], ['2', 'b']] assert table.read() == [] # Reset table table.reset() # Make assertions assert table.read(extended=True, limit=3) == [(2, ['id', 'name'], ['1', 'a']), (3, ['id', 'name'], ['2', 'b']), (4, ['id', 'name'], ['3', 'c'])] assert table.sample == [['1', 'a'], ['2', 'b']] assert table.read(extended=True) == [(5, ['id', 'name'], ['4', 'd']), (6, ['id', 'name'], ['5', 'e']), (7, ['id', 'name'], ['6', 'f'])]
def test_reset_and_sample_size(): # Get table table = topen('data/special/long.csv', headers=1, sample_size=3) # Make assertions assert table.read(extended=True) == [ (2, ['id', 'name'], ['1', 'a']), (3, ['id', 'name'], ['2', 'b']), (4, ['id', 'name'], ['3', 'c']), (5, ['id', 'name'], ['4', 'd']), (6, ['id', 'name'], ['5', 'e']), (7, ['id', 'name'], ['6', 'f'])] assert table.sample == [['1', 'a'], ['2', 'b']] assert table.read() == [] # Reset table table.reset() # Make assertions assert table.read(extended=True, limit=3) == [ (2, ['id', 'name'], ['1', 'a']), (3, ['id', 'name'], ['2', 'b']), (4, ['id', 'name'], ['3', 'c'])] assert table.sample == [['1', 'a'], ['2', 'b']] assert table.read(extended=True) == [ (5, ['id', 'name'], ['4', 'd']), (6, ['id', 'name'], ['5', 'e']), (7, ['id', 'name'], ['6', 'f'])]
def import_resource(storage, table, schema, data): """Import JSONTableSchema resource to storage's table. Parameters ---------- storage: object Storage object. table: str Table name. schema: str Path to schema file. data: str Path to data file. """ # Create table model = SchemaModel(schema) schema = model.as_python if storage.check(table): storage.delete(table) storage.create(table, schema) # Write data with topen(data, with_headers=True) as data: storage.write(table, data)
def test_processors_chain(): # Processors def skip_commented_rows(extended_rows): for number, headers, row in extended_rows: if (row and hasattr(row[0], 'startswith') and row[0].startswith('#')): continue yield (number, headers, row) def skip_blank_rows(extended_rows): for number, headers, row in extended_rows: if not row: continue yield (number, headers, row) def cast_rows(extended_rows): for number, headers, row in extended_rows: crow = [] for value in row: try: if isinstance(value, six.string_types): value = ast.literal_eval(value) except Exception: pass crow.append(value) yield (number, headers, crow) # Get table source = [['id', 'name'], ['#1', 'english'], [], ['2', '中国人']] table = topen(source, headers='row1', post_parse=[ skip_commented_rows, skip_blank_rows, cast_rows]) # Make assertions assert table.headers == ['id', 'name']
def test_web_csv_non_ascii_url(): # Get table table = topen("http://data.defra.gov.uk/ops/government_procurement_card/over_£500_GPC_apr_2013.csv") # Make assertions assert table.sample[0] == ["Entity", "Transaction Posting Date", "Merchant Name", "Amount", "Description"]
def test_file_csv_parser_class(): # Get table table = topen("data/table.csv", parser_class=CSVParser) # Make assertions assert table.headers is None assert table.read() == [["id", "name"], ["1", "english"], ["2", "中国人"]]
def test_file_xls(): # Get table table = topen('data/table.xls') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], [1.0, 'english'], [2.0, '中国人']]
def test_file_json_lists(): # Get table table = topen("data/table-lists.json") # Make assertions assert table.headers is None assert table.read() == [["id", "name"], [1, "english"], [2, "中国人"]]
def test_web_excel(): # Get table table = topen(BASE_URL % 'data/table.xls') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], [1.0, 'english'], [2.0, '中国人']]
def test_headers_with_headers_argument(): # Get table table = topen("data/table.csv", with_headers=True) # Make assertions assert table.headers == ["id", "name"] assert list(table.iter(keyed=True)) == [{"id": "1", "name": "english"}, {"id": "2", "name": "中国人"}]
def test_file_csv_parser_options(): # Get table table = topen('data/table.csv', parser_options={'constructor': CSVParser}) # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_web_csv(): # Get table table = topen(BASE_URL % 'data/table.csv') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_file_csv_parser_class(): # Get table table = topen('data/table.csv', parser_class=parsers.CSV) # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_file_csv_parser_options(): # Get table table = topen("data/table.csv", parser_options={"constructor": CSVParser}) # Make assertions assert table.headers is None assert table.read() == [["id", "name"], ["1", "english"], ["2", "中国人"]]
def test_file_json_lists(): # Get table table = topen('data/table-lists.json') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], [1, 'english'], [2, '中国人']]
def test_file_csv_parser_class(): # Get table table = topen('data/table.csv', parser_class=CSVParser) # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_file_xls(): # Get table table = topen("data/table.xls") # Make assertions assert table.headers is None assert table.read() == [["id", "name"], [1.0, "english"], [2.0, "中国人"]]
def test_web_json_dicts(): # Get table table = topen(BASE_URL % 'data/table-dicts.json') # Make assertions assert table.headers is None assert table.read() == [[1, 'english'], [2, '中国人']]
def test_file_xls(self): # Get table table = topen(FPATH % 'table.xls') # Make assertions assert table.headers is None assert table.read() == [('id', 'name'), (1.0, 'english'), (2.0, '中国人')]
def test_file_json_lists(self): # Get table table = topen(FPATH % 'table-lists.json') # Make assertions assert table.headers is None assert table.read() == [('id', 'name'), (1, 'english'), (2, '中国人')]
def test_file_csv(self): # Get table table = topen(FPATH % 'table.csv') # Make assertions assert table.headers is None assert table.read() == [('id', 'name'), ('1', 'english'), ('2', '中国人')]
def test_web_json_dicts(self): # Get table table = topen(WPATH % 'table-dicts.json') # Make assertions assert table.headers is None assert table.read() == [(1, 'english'), (2, '中国人')]
def test_native_iterator(): # Get table source = iter([['id', 'name'], ['1', 'english'], ['2', '中国人']]) table = topen(source) # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_text_json_lists(): # Get table source = '[["id", "name"], [1, "english"], [2, "中国人"]]' table = topen(source, scheme='text', format='json') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], [1, 'english'], [2, '中国人']]
def test_text_json_dicts(): # Get table source = '[{"id": 1, "name": "english" }, {"id": 2, "name": "中国人" }]' table = topen(source, scheme='text', format='json') # Make assertions assert table.headers is None assert table.read() == [[1, 'english'], [2, '中国人']]
def test_text_csv(): # Get table source = 'text://id,name\n1,english\n2,中国人\n' table = topen(source, format='csv') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def iter(self): '''Lazily-iterates over rows in data. This method is useful when you don't want to load all data in memory at once. Returns: iter: An iterator that yields each row in this resource. Raises: ValueError: If the data isn't tabular, if the resource has no data, or if its specified encoding is incorrect IOError: If there was some problem opening the data file (e.g. it doesn't exist or we don't have permissions to read it). ''' result = None inline_data = self.metadata.get('data') if self.local_data_path and os.path.isfile(self.local_data_path): data_path_or_url = self.local_data_path else: data_path_or_url = self.remote_data_path if inline_data: inline_data = self._parse_inline_data() result = iter(inline_data) elif data_path_or_url: dialect = self.metadata.get('dialect', {}) parser_options = {} parser_class = None if 'delimiter' in dialect: parser_options['delimiter'] = dialect['delimiter'] if 'lineTerminator' in dialect: parser_options['lineterminator'] = dialect['lineTerminator'] if len(dialect) > 0: parser_class = tabulator.parsers.CSV try: table = tabulator.topen(data_path_or_url, with_headers=True, encoding=self.metadata.get('encoding'), parser_class=parser_class, parser_options=parser_options) result = TabulatorIterator(table) except tabulator.errors.Error as e: msg = 'Data at \'{0}\' isn\'t in a known tabular data format' six.raise_from(ValueError(msg.format(data_path_or_url)), e) if result is None: if self.metadata.get('path'): # FIXME: This is a hack to throw an IOError when local data # exists but couldn't be loaded for some reason. If "path" # existed and there were no issues opening it, "result" would # never be None. raise IOError('Resource\'s data couldn\'t be loaded.') raise ValueError('Resource has no data') return result
def test_native_keyed(): # Get table source = [{'id': '1', 'name': 'english'}, {'id': '2', 'name': '中国人'}] table = topen(source, scheme='native', format='native') # Make assertions assert table.headers is None assert table.read() == [['1', 'english'], ['2', '中国人']]
def test_stream_xlsx(): # Get table source = io.open('data/table.xlsx', mode='rb') table = topen(source, format='xlsx') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], [1.0, 'english'], [2.0, '中国人']]
def test_stream_csv(): # Get table source = io.open('data/table.csv', mode='rb') table = topen(source, format='csv') # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_sample(): # Get table source = [['id', 'name'], ['1', 'english'], ['2', '中国人']] table = topen(source, headers='row1') # Make assertions assert table.headers == ['id', 'name'] assert table.sample == [['1', 'english'], ['2', '中国人']]
def test_headers_native_keyed_headers_is_none(): # Get table source = [{'id': '1', 'name': 'english'}, {'id': '2', 'name': '中国人'}] table = topen(source, headers=None) # Make assertions assert table.headers == None assert list(table.iter(extended=True)) == [(1, None, ['1', 'english']), (2, None, ['2', '中国人'])]
def test_save_xls(tmpdir): # Save table path = str(tmpdir.join('table.xls')) table = topen('data/table.csv', headers=1) # Assert raises with pytest.raises(exceptions.WritingError) as excinfo: table.save(path) assert 'xls' in str(excinfo.value)
def test_headers_native(): # Get table source = [[], ['id', 'name'], ['1', 'english'], ['2', '中国人']] table = topen(source, headers='row2') # Make assertions assert table.headers == ['id', 'name'] assert table.read(extended=True) == [(3, ['id', 'name'], ['1', 'english']), (4, ['id', 'name'], ['2', '中国人'])]
def test_headers_stream_context_manager(): # Get source source = io.open('data/table.csv', mode='rb') # Make assertions with topen(source, headers='row1', format='csv') as table: assert table.headers == ['id', 'name'] assert table.read(extended=True) == [(2, ['id', 'name'], ['1', 'english']), (3, ['id', 'name'], ['2', '中国人'])]
def test_web_csv_non_ascii_url(): # Get table table = topen( 'http://data.defra.gov.uk/ops/government_procurement_card/over_£500_GPC_apr_2013.csv' ) # Make assertions assert table.sample[0] == [ 'Entity', 'Transaction Posting Date', 'Merchant Name', 'Amount', 'Description' ]
def test_native_generator(): # Get table def generator(): yield ['id', 'name'] yield ['1', 'english'] yield ['2', '中国人'] table = topen(generator) # Make assertions assert table.headers is None assert table.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
def test_processors_sample(): # Processors def only_first_row(extended_rows): for number, header, row in extended_rows: if number == 1: yield (number, header, row) # Get table table = topen('data/table.csv', post_parse=[only_first_row]) # Make assertions assert table.sample == [['id', 'name']]
def test_processors_chain(): # Get table source = [['id', 'name'], ['#1', 'english'], [], ['2', '中国人']] table = topen(source, headers='row1', post_parse=[ processors.skip_commented_rows, processors.skip_blank_rows, processors.convert_rows ]) # Make assertions assert table.headers == ['id', 'name'] assert table.read() == [[2, '中国人']]
def test_headers_with_headers_argument(): # Get table table = topen('data/table.csv', with_headers=True) # Make assertions assert table.headers == ['id', 'name'] assert list(table.iter(keyed=True)) == [{ 'id': '1', 'name': 'english' }, { 'id': '2', 'name': '中国人' }]