def test_build_table_download_file_handle_list__repeated_file_handles(): syn = synapseclient.client.Synapse(debug=True, skip_checks=True) #patch the cache so we don't look there in case FileHandle ids actually exist there patch.object(syn.cache, "get", return_value=None) cols = [ Column(name='Name', columnType='STRING', maximumSize=50), Column(name='filehandle', columnType='FILEHANDLEID') ] schema = Schema(name='FileHandleTest', columns=cols, parent='syn420') #using some large filehandle numbers so i don data = [["ayy lmao", 5318008], ["large numberino", 0x5f3759df], ["repeated file handle", 5318008], ["repeated file handle also", 0x5f3759df]] ## need columns to do cast_values w/o storing table = Table(schema, data, headers=[SelectColumn.from_column(col) for col in cols]) file_handle_associations, file_handle_to_path_map = syn._build_table_download_file_handle_list( table, ['filehandle']) #verify only 2 file_handles are added (repeats were ignored) assert_equals(2, len(file_handle_associations)) assert_equals(0, len(file_handle_to_path_map)) #might as well check anyways
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") # need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert table_row == expected_row rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row table.columns = cols df = table.asDataFrame() assert list(df['Name']) == [r[0] for r in data]
def test_pandas_to_table(): pd = _try_import_pandas('test_pandas_to_table') df = pd.DataFrame(dict(a=[1, 2, 3], b=["c", "d", "e"])) schema = Schema(name="Baz", parent="syn12345", columns=as_table_columns(df)) print("\n", df, "\n\n") ## A dataframe with no row id and version table = Table(schema, df) for i, row in enumerate(table): print(row) assert row[0] == (i + 1) assert row[1] == ["c", "d", "e"][i] assert len(table) == 3 ## If includeRowIdAndRowVersion=True, include empty row id an versions ## ROW_ID,ROW_VERSION,a,b ## ,,1,c ## ,,2,d ## ,,3,e table = Table(schema, df, includeRowIdAndRowVersion=True) for i, row in enumerate(table): print(row) assert row[0] is None assert row[1] is None assert row[2] == (i + 1) ## A dataframe with no row id and version df = pd.DataFrame(index=["1_7", "2_7", "3_8"], data=dict(a=[100, 200, 300], b=["c", "d", "e"])) print("\n", df, "\n\n") table = Table(schema, df) for i, row in enumerate(table): print(row) assert row[0] == ["1", "2", "3"][i] assert row[1] == ["7", "7", "8"][i] assert row[2] == (i + 1) * 100 assert row[3] == ["c", "d", "e"][i] ## A dataframe with row id and version in columns df = pd.DataFrame( dict(ROW_ID=["0", "1", "2"], ROW_VERSION=["8", "9", "9"], a=[100, 200, 300], b=["c", "d", "e"])) print("\n", df, "\n\n") table = Table(schema, df) for i, row in enumerate(table): print(row) assert row[0] == ["0", "1", "2"][i] assert row[1] == ["8", "9", "9"][i] assert row[2] == (i + 1) * 100 assert row[3] == ["c", "d", "e"][i]
def test_RowSetTable_len(): schema = Schema(parentId="syn123", id='syn456', columns=[Column(name='column_name', id='123')]) rowset = RowSet(schema=schema, rows=[Row(['first row']), Row(['second row'])]) row_set_table = RowSetTable(schema, rowset) assert_equals(2, len(row_set_table))
def test_dict_to_table(): d = dict(a=[1, 2, 3], b=["c", "d", "e"]) df = pd.DataFrame(d) schema = Schema(name="Baz", parent="syn12345", columns=as_table_columns(df)) with patch.object(CsvFileTable, "from_data_frame") as mocked_from_data_frame: Table(schema, d) # call_agrs is a tuple with values and name agrs_list = mocked_from_data_frame.call_args[0] # getting the second argument df_agr = agrs_list[1] assert df_agr.equals(df)
def test_pandas_to_table(): df = pd.DataFrame(dict(a=[1, 2, 3], b=["c", "d", "e"])) schema = Schema(name="Baz", parent="syn12345", columns=as_table_columns(df)) # A dataframe with no row id and version table = Table(schema, df) for i, row in enumerate(table): assert_equals(row[0], (i + 1)) assert_equals(row[1], ["c", "d", "e"][i]) assert_equals(len(table), 3) # If includeRowIdAndRowVersion=True, include empty row id an versions # ROW_ID,ROW_VERSION,a,b # ,,1,c # ,,2,d # ,,3,e table = Table(schema, df, includeRowIdAndRowVersion=True) for i, row in enumerate(table): assert_is_none(row[0]) assert_is_none(row[1]) assert_equals(row[2], (i + 1)) # A dataframe with no row id and version df = pd.DataFrame(index=["1_7", "2_7", "3_8"], data=dict(a=[100, 200, 300], b=["c", "d", "e"])) table = Table(schema, df) for i, row in enumerate(table): assert_equals(row[0], ["1", "2", "3"][i]) assert_equals(row[1], ["7", "7", "8"][i]) assert_equals(row[2], (i + 1) * 100) assert_equals(row[3], ["c", "d", "e"][i]) # A dataframe with row id and version in columns df = pd.DataFrame( dict(ROW_ID=["0", "1", "2"], ROW_VERSION=["8", "9", "9"], a=[100, 200, 300], b=["c", "d", "e"])) table = Table(schema, df) for i, row in enumerate(table): assert_equals(row[0], ["0", "1", "2"][i]) assert_equals(row[1], ["8", "9", "9"][i]) assert_equals(row[2], (i + 1) * 100) assert_equals(row[3], ["c", "d", "e"][i])
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") ## need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert table_row == expected_row rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row table.columns = cols ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [r[0] for r in data]) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_list_of_rows_table.\n\n' )
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'id': '353', 'name': 'name'}, {'columnType': 'DOUBLE', 'id': '355', 'name': 'x'}, {'columnType': 'DOUBLE', 'id': '3020', 'name': 'y'}, {'columnType': 'INTEGER', 'id': '891', 'name': 'n'}], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3}, {'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3}, {'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4}, {'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3}], 'tableId': 'syn2976298'} row_set = RowSet.from_json(row_set_json) assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert row_set.tableId == 'syn2976298' assert len(row_set.headers) == 4 assert len(row_set.rows) == 4 schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001") table = Table(schema, row_set) assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert table.tableId == 'syn2976298' assert len(table.headers) == 4 assert len(table.asRowSet().rows) == 4 df = table.asDataFrame() assert df.shape == (4, 4) assert list(df['name']) == ['foo', 'bar', 'foo', 'qux']
def test_schema(): schema = Schema(name='My Table', parent="syn1000001") assert not schema.has_columns() schema.addColumn(Column(id='1', name='Name', columnType='STRING')) assert schema.has_columns() assert schema.properties.columnIds == ['1'] schema.removeColumn('1') assert not schema.has_columns() assert schema.properties.columnIds == [] schema = Schema(name='Another Table', parent="syn1000001") schema.addColumns([ Column(name='Name', columnType='STRING'), Column(name='Born', columnType='INTEGER'), Column(name='Hipness', columnType='DOUBLE'), Column(name='Living', columnType='BOOLEAN')]) assert schema.has_columns() assert len(schema.columns_to_store) == 4 assert Column(name='Name', columnType='STRING') in schema.columns_to_store assert Column(name='Born', columnType='INTEGER') in schema.columns_to_store assert Column(name='Hipness', columnType='DOUBLE') in schema.columns_to_store assert Column(name='Living', columnType='BOOLEAN') in schema.columns_to_store schema.removeColumn(Column(name='Living', columnType='BOOLEAN')) assert schema.has_columns() assert len(schema.columns_to_store) == 3 assert Column(name='Living', columnType='BOOLEAN') not in schema.columns_to_store assert Column(name='Hipness', columnType='DOUBLE') in schema.columns_to_store
def test_schema(): schema = Schema(name='My Table', parent="syn1000001") assert not schema.has_columns() schema.addColumn(Column(id='1', name='Name', columnType='STRING')) assert schema.has_columns() assert schema.properties.columnIds == ['1'] schema.removeColumn('1') assert not schema.has_columns() assert schema.properties.columnIds == [] schema = Schema(name='Another Table', parent="syn1000001") schema.addColumns([ Column(name='Name', columnType='STRING'), Column(name='Born', columnType='INTEGER'), Column(name='Hipness', columnType='DOUBLE'), Column(name='Living', columnType='BOOLEAN') ]) assert schema.has_columns() assert len(schema.columns_to_store) == 4 assert Column(name='Name', columnType='STRING') in schema.columns_to_store assert Column(name='Born', columnType='INTEGER') in schema.columns_to_store assert Column(name='Hipness', columnType='DOUBLE') in schema.columns_to_store assert Column(name='Living', columnType='BOOLEAN') in schema.columns_to_store schema.removeColumn(Column(name='Living', columnType='BOOLEAN')) assert schema.has_columns() assert len(schema.columns_to_store) == 3 assert Column(name='Living', columnType='BOOLEAN') not in schema.columns_to_store assert Column(name='Hipness', columnType='DOUBLE') in schema.columns_to_store
def test_Schema__max_column_check(): table = Schema(name="someName", parent="idk") table.addColumns( Column(name="colNum%s" % i, columnType="STRING") for i in range(synapseclient.table.MAX_NUM_TABLE_COLUMNS + 1)) assert_raises(ValueError, syn.store, table)
def test_csv_table(): ## Maybe not truly a unit test, but here because it doesn't do ## network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") #TODO: use StringIO.StringIO(data) rather than writing files try: ## create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) ## need to set column headers to read a CSV file table.setColumnHeaders([ SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING") ] + [SelectColumn.from_column(col) for col in cols]) ## test iterator for table_row, expected_row in zip(table, data): assert table_row == expected_row ## test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row[2:] assert rowset_row['rowId'] == expected_row[0] assert rowset_row['versionNumber'] == expected_row[1] ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [row[2] for row in data]) assert all(df['Born'] == [row[3] for row in data]) assert all(df['Living'] == [row[5] for row in data]) assert all(df.index == ['%s_%s' % tuple(row[0:2]) for row in data]) assert df.shape == (8, 4) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n' ) except Exception as ex1: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'id': '353', 'name': 'name' }, { 'columnType': 'DOUBLE', 'id': '355', 'name': 'x' }, { 'columnType': 'DOUBLE', 'id': '3020', 'name': 'y' }, { 'columnType': 'INTEGER', 'id': '891', 'name': 'n' }], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3 }, { 'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3 }, { 'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4 }, { 'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3 }], 'tableId': 'syn2976298' } row_set = RowSet.from_json(row_set_json) assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert row_set.tableId == 'syn2976298' assert len(row_set.headers) == 4 assert len(row_set.rows) == 4 schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001") table = Table(schema, row_set) assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert table.tableId == 'syn2976298' assert len(table.headers) == 4 assert len(table.asRowSet().rows) == 4 try: import pandas as pd df = table.asDataFrame() assert df.shape == (4, 4) assert all(df['name'] == ['foo', 'bar', 'foo', 'qux']) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping part of test_RowSetTable.\n\n' )
def test_schema(): schema = Schema(name='My Table', parent="syn1000001") assert_false(schema.has_columns()) schema.addColumn(Column(id='1', name='Name', columnType='STRING')) assert_true(schema.has_columns()) assert_equals(schema.properties.columnIds, ['1']) schema.removeColumn('1') assert_false(schema.has_columns()) assert_equals(schema.properties.columnIds, []) schema = Schema(name='Another Table', parent="syn1000001") schema.addColumns([ Column(name='Name', columnType='STRING'), Column(name='Born', columnType='INTEGER'), Column(name='Hipness', columnType='DOUBLE'), Column(name='Living', columnType='BOOLEAN')]) assert_true(schema.has_columns()) assert_equals(len(schema.columns_to_store), 4) assert_in(Column(name='Name', columnType='STRING'), schema.columns_to_store) assert_in(Column(name='Born', columnType='INTEGER'), schema.columns_to_store) assert_in(Column(name='Hipness', columnType='DOUBLE'), schema.columns_to_store) assert_in(Column(name='Living', columnType='BOOLEAN'), schema.columns_to_store) schema.removeColumn(Column(name='Living', columnType='BOOLEAN')) assert_true(schema.has_columns()) assert_equals(len(schema.columns_to_store), 3) assert_not_in(Column(name='Living', columnType='BOOLEAN'), schema.columns_to_store) assert_in(Column(name='Hipness', columnType='DOUBLE'), schema.columns_to_store)
def test_Schema__max_column_check(): table = Schema(name="someName", parent="idk") table.addColumns(Column(name="colNum%s" % i, columnType="STRING") for i in range(synapseclient.table.MAX_NUM_TABLE_COLUMNS + 1)) assert_raises(ValueError, syn.store, table)
def test_schema(): schema = Schema(name='My Table', parent="syn1000001") assert_false(schema.has_columns()) schema.addColumn(Column(id='1', name='Name', columnType='STRING')) assert_true(schema.has_columns()) assert_equals(schema.properties.columnIds, ['1']) schema.removeColumn('1') assert_false(schema.has_columns()) assert_equals(schema.properties.columnIds, []) schema = Schema(name='Another Table', parent="syn1000001") schema.addColumns([ Column(name='Name', columnType='STRING'), Column(name='Born', columnType='INTEGER'), Column(name='Hipness', columnType='DOUBLE'), Column(name='Living', columnType='BOOLEAN') ]) assert_true(schema.has_columns()) assert_equals(len(schema.columns_to_store), 4) assert_in(Column(name='Name', columnType='STRING'), schema.columns_to_store) assert_in(Column(name='Born', columnType='INTEGER'), schema.columns_to_store) assert_in(Column(name='Hipness', columnType='DOUBLE'), schema.columns_to_store) assert_in(Column(name='Living', columnType='BOOLEAN'), schema.columns_to_store) schema.removeColumn(Column(name='Living', columnType='BOOLEAN')) assert_true(schema.has_columns()) assert_equals(len(schema.columns_to_store), 3) assert_not_in(Column(name='Living', columnType='BOOLEAN'), schema.columns_to_store) assert_in(Column(name='Hipness', columnType='DOUBLE'), schema.columns_to_store)