def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") # need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert table_row == expected_row rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row table.columns = cols df = table.asDataFrame() assert list(df['Name']) == [r[0] for r in data]
def test_rowset_asDataFrame__with_ROW_ETAG_column(syn): query_result = { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 6990, 'selectColumns': [ {'id': '61770', 'columnType': 'STRING', 'name': 'annotationColumn1'}, {'id': '61771', 'columnType': 'STRING', 'name': 'annotationColumn2'} ], 'queryCount': 1, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'sometoken', 'queryResults': { 'headers': [ {'id': '61770', 'columnType': 'STRING', 'name': 'annotationColumn1'}, {'id': '61771', 'columnType': 'STRING', 'name': 'annotationColumn2'}], 'concreteType': 'org.sagebionetworks.repo.model.table.RowSet', 'etag': 'DEFAULT', 'tableId': 'syn11363411', 'rows': [{'values': ['initial_value1', 'initial_value2'], 'etag': '7de0f326-9ef7-4fde-9e4a-ac0babca73f6', 'rowId': 123, 'versionNumber':456}] } } } query_result_next_page = {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': { 'etag': 'DEFAULT', 'headers': [ {'id': '61770', 'columnType': 'STRING', 'name': 'annotationColumn1'}, {'id': '61771', 'columnType': 'STRING', 'name': 'annotationColumn2'}], 'rows': [{'values': ['initial_value3', 'initial_value4'], 'etag': '7de0f326-9ef7-4fde-9e4a-ac0babca73f7', 'rowId': 789, 'versionNumber': 101112}], 'tableId': 'syn11363411'}} with patch.object(syn, "_queryTable", return_value=query_result),\ patch.object(syn, "_queryTableNext", return_value=query_result_next_page): table = syn.tableQuery("select something from syn123", resultsAs='rowset') dataframe = table.asDataFrame() assert "ROW_ETAG" not in dataframe.columns expected_indicies = ['123_456_7de0f326-9ef7-4fde-9e4a-ac0babca73f6', '789_101112_7de0f326-9ef7-4fde-9e4a-ac0babca73f7'] assert expected_indicies == dataframe.index.values.tolist()
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'id': '353', 'name': 'name'}, {'columnType': 'DOUBLE', 'id': '355', 'name': 'x'}, {'columnType': 'DOUBLE', 'id': '3020', 'name': 'y'}, {'columnType': 'INTEGER', 'id': '891', 'name': 'n'}], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3}, {'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3}, {'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4}, {'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3}], 'tableId': 'syn2976298'} row_set = RowSet.from_json(row_set_json) assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert row_set.tableId == 'syn2976298' assert len(row_set.headers) == 4 assert len(row_set.rows) == 4 schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001") table = Table(schema, row_set) assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert table.tableId == 'syn2976298' assert len(table.headers) == 4 assert len(table.asRowSet().rows) == 4 df = table.asDataFrame() assert df.shape == (4, 4) assert list(df['name']) == ['foo', 'bar', 'foo', 'qux']
def test_csv_table(): # Maybe not truly a unit test, but here because it doesn't do # network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [ Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN') ] schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") # TODO: use StringIO.StringIO(data) rather than writing files try: # create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert_is_instance(table, CsvFileTable) # need to set column headers to read a CSV file table.setColumnHeaders([ SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING") ] + [SelectColumn.from_column(col) for col in cols]) # test iterator for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) # test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row[2:]) assert_equals(rowset_row['rowId'], expected_row[0]) assert_equals(rowset_row['versionNumber'], expected_row[1]) df = table.asDataFrame() assert_equals(list(df['Name']), [row[2] for row in data]) assert_equals(list(df['Born']), [row[3] for row in data]) assert_equals(list(df['Living']), [row[5] for row in data]) assert_equals(list(df.index), ['%s_%s' % tuple(row[0:2]) for row in data]) assert_equals(df.shape, (8, 4)) except Exception: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise