def test_iter_no_row_metadata(self): # both csv headers and self.headers do not contains row metadata data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [[1, 2], [2, 1]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_iter_with_table_row_metadata(self): # csv file has row metadata, self.headers does not data = "ROW_ID,ROW_VERSION,col\n" \ "1,2,\"I like trains\"\n" \ "5,1,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [["I like trains"], ["weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_iter_with_mismatch_row_metadata(self): # self.headers and csv file headers contains mismatch row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() pytest.raises(ValueError, next, iter)
def test_iter_row_metadata_mismatch_in_headers(self): # csv file does not contain row metadata, self.headers does data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() pytest.raises(ValueError, next, iter)
def test_iter_with_file_view_row_metadata(self): # csv file and self.headers contain matching row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING"), SelectColumn(name="ROW_ETAG", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [['1', '2', "etag1", "I like trains"], ['5', '1', "etag2", "weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_iter_with_no_headers_in_csv(self): # csv file does not have headers string_io = StringIOContextManager("1,2,etag1,\"I like trains\"\n" "5,1,etag2,\"weeeeeeeeeeee\"\n") with patch.object(io, "open", return_value=string_io): table = CsvFileTable("syn123", "/fake/file/path", header=False) iter = table.__iter__() assert_raises(ValueError, next, iter)
def test_iter_with_no_headers(self): # self.headers is None string_io = StringIOContextManager("ROW_ID,ROW_VERSION,ROW_ETAG,col\n" "1,2,etag1,\"I like trains\"\n" "5,1,etag2,\"weeeeeeeeeeee\"\n") with patch.object(io, "open", return_value=string_io): table = CsvFileTable("syn123", "/fake/file/path") iter = table.__iter__() assert_raises(ValueError, next, iter)
def test_iter_metadata__no_etag(self): string_io = StringIOContextManager("ROW_ID,ROW_VERSION,asdf\n" "1,2,\"I like trains\"\n" "5,1,\"weeeeeeeeeeee\"\n") with patch.object(io, "open", return_value=string_io): csv_file_table = CsvFileTable("syn123", "/fake/file/path") metadata = [x for x in csv_file_table.iter_row_metadata()] assert_equals(2, len(metadata)) assert_equals((1, 2, None), metadata[0]) assert_equals((5, 1, None), metadata[1])
def test_iter_metadata__has_etag(self): string_io = StringIOContextManager("ROW_ID,ROW_VERSION,ROW_ETAG,asdf\n" "1,2,etag1,\"I like trains\"\n" "5,1,etag2,\"weeeeeeeeeeee\"\n") with patch.object(io, "open", return_value=string_io): csv_file_table = CsvFileTable("syn123", "/fake/file/path") metadata = [x for x in csv_file_table.iter_row_metadata()] assert 2 == len(metadata) assert (1, 2, "etag1") == metadata[0] assert (5, 1, "etag2") == metadata[1]
def test_as_table_columns__with_csv_file(): string_io = StringIOContextManager( 'ROW_ID,ROW_VERSION,Name,Born,Hipness,Living\n' '"1", "1", "John Coltrane", 1926, 8.65, False\n' '"2", "1", "Miles Davis", 1926, 9.87, False') cols = as_table_columns(string_io) assert_equals(cols[0]['name'], 'Name') assert_equals(cols[0]['columnType'], 'STRING') assert_equals(cols[1]['name'], 'Born') assert_equals(cols[1]['columnType'], 'INTEGER') assert_equals(cols[2]['name'], 'Hipness') assert_equals(cols[2]['columnType'], 'DOUBLE') assert_equals(cols[3]['name'], 'Living') assert_equals(cols[3]['columnType'], 'STRING')
def test_build_table__with_csv(): string_io = StringIOContextManager('a,b\n' '1,c\n' '2,d\n' '3,e') with patch.object(synapseclient.table, "as_table_columns", return_value=[Column(name="a", columnType="INTEGER"), Column(name="b", columnType="STRING")]),\ patch.object(io, "open", return_value=string_io): table = build_table("test", "syn123", "some_file_name") for col, row in enumerate(table): assert_equals(row[0], (col + 1)) assert_equals(row[1], ["c", "d", "e"][col]) assert_equals(len(table), 3) headers = [{ 'name': 'a', 'columnType': 'INTEGER' }, { 'name': 'b', 'columnType': 'STRING' }] assert_equals(headers, table.headers)