def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") # need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row) table.columns = cols df = table.asDataFrame() assert_equals(list(df['Name']), [r[0] for r in data])
def test_build_table_download_file_handle_list__repeated_file_handles(): syn = synapseclient.client.Synapse(debug=True, skip_checks=True) # patch the cache so we don't look there in case FileHandle ids actually exist there patch.object(syn.cache, "get", return_value=None) cols = [ Column(name='Name', columnType='STRING', maximumSize=50), Column(name='filehandle', columnType='FILEHANDLEID')] schema = Schema(name='FileHandleTest', columns=cols, parent='syn420') # using some large filehandle numbers so i don data = [["ayy lmao", 5318008], ["large numberino", 0x5f3759df], ["repeated file handle", 5318008], ["repeated file handle also", 0x5f3759df]] # need columns to do cast_values w/o storing table = Table(schema, data, headers=[SelectColumn.from_column(col) for col in cols]) file_handle_associations, file_handle_to_path_map = syn._build_table_download_file_handle_list(table, ['filehandle']) # verify only 2 file_handles are added (repeats were ignored) assert_equals(2, len(file_handle_associations)) assert_equals(0, len(file_handle_to_path_map))
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") # need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert table_row == expected_row rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row table.columns = cols df = table.asDataFrame() assert list(df['Name']) == [r[0] for r in data]
def test_build_table_download_file_handle_list__repeated_file_handles(): syn = synapseclient.client.Synapse(debug=True, skip_checks=True) #patch the cache so we don't look there in case FileHandle ids actually exist there patch.object(syn.cache, "get", return_value=None) cols = [ Column(name='Name', columnType='STRING', maximumSize=50), Column(name='filehandle', columnType='FILEHANDLEID') ] schema = Schema(name='FileHandleTest', columns=cols, parent='syn420') #using some large filehandle numbers so i don data = [["ayy lmao", 5318008], ["large numberino", 0x5f3759df], ["repeated file handle", 5318008], ["repeated file handle also", 0x5f3759df]] ## need columns to do cast_values w/o storing table = Table(schema, data, headers=[SelectColumn.from_column(col) for col in cols]) file_handle_associations, file_handle_to_path_map = syn._build_table_download_file_handle_list( table, ['filehandle']) #verify only 2 file_handles are added (repeats were ignored) assert_equals(2, len(file_handle_associations)) assert_equals(0, len(file_handle_to_path_map)) #might as well check anyways
def test_iter_with_table_row_metadata(self): # csv file has row metadata, self.headers does not data = "ROW_ID,ROW_VERSION,col\n" \ "1,2,\"I like trains\"\n" \ "5,1,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [["I like trains"], ["weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert_equals(expected_row, table_row)
def test_iter_with_table_row_metadata(self): # csv file has row metadata, self.headers does not data = "ROW_ID,ROW_VERSION,col\n" \ "1,2,\"I like trains\"\n" \ "5,1,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [["I like trains"], ["weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_iter_no_row_metadata(self): # both csv headers and self.headers do not contains row metadata data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [[1, 2], [2, 1]] for expected_row, table_row in zip(expected_rows, table): assert_equals(expected_row, table_row)
def test_iter_no_row_metadata(self): # both csv headers and self.headers do not contains row metadata data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [[1, 2], [2, 1]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_iter_with_mismatch_row_metadata(self): # self.headers and csv file headers contains mismatch row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() pytest.raises(ValueError, next, iter)
def test_iter_with_mismatch_row_metadata(self): # self.headers and csv file headers contains mismatch row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() assert_raises(ValueError, next, iter)
def test_iter_row_metadata_mismatch_in_headers(self): # csv file does not contain row metadata, self.headers does data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() assert_raises(ValueError, next, iter)
def test_iter_row_metadata_mismatch_in_headers(self): # csv file does not contain row metadata, self.headers does data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() pytest.raises(ValueError, next, iter)
def test_iter_with_file_view_row_metadata(self): # csv file and self.headers contain matching row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING"), SelectColumn(name="ROW_ETAG", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [['1', '2', "etag1", "I like trains"], ['5', '1', "etag2", "weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_iter_with_file_view_row_metadata(self): # csv file and self.headers contain matching row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING"), SelectColumn(name="ROW_ETAG", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [['1', '2', "etag1", "I like trains"], ['5', '1', "etag2", "weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert_equals(expected_row, table_row)
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") ## need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert table_row == expected_row rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row table.columns = cols ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [r[0] for r in data]) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_list_of_rows_table.\n\n' )
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") ## need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in izip(table, data): assert table_row==expected_row rowset = table.asRowSet() for rowset_row, expected_row in izip(rowset.rows, data): assert rowset_row['values']==expected_row table.columns = cols ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [r[0] for r in data]) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_list_of_rows_table.\n\n')
def test_csv_table(): ## Maybe not truly a unit test, but here because it doesn't do ## network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") #TODO: use StringIO.StringIO(data) rather than writing files try: ## create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=os.linesep) writer.writerow(['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]) filename = temp.name for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) ## need to set column headers to read a CSV file table.setColumnHeaders( [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + [SelectColumn.from_column(col) for col in cols]) ## test iterator # print "\n\nJazz Guys" for table_row, expected_row in izip(table, data): # print table_row, expected_row assert table_row==expected_row ## test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in izip(rowset.rows, data): #print rowset_row, expected_row assert rowset_row['values']==expected_row[2:] assert rowset_row['rowId']==expected_row[0] assert rowset_row['versionNumber']==expected_row[1] ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [row[2] for row in data]) assert all(df['Born'] == [row[3] for row in data]) assert all(df['Living'] == [row[5] for row in data]) assert all(df.index == ['%s_%s'%tuple(row[0:2]) for row in data]) assert df.shape == (8,4) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n') except Exception as ex1: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print ex raise
def test_csv_table(): ## Maybe not truly a unit test, but here because it doesn't do ## network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") #TODO: use StringIO.StringIO(data) rather than writing files try: ## create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) ## need to set column headers to read a CSV file table.setColumnHeaders([ SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING") ] + [SelectColumn.from_column(col) for col in cols]) ## test iterator for table_row, expected_row in zip(table, data): assert table_row == expected_row ## test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row[2:] assert rowset_row['rowId'] == expected_row[0] assert rowset_row['versionNumber'] == expected_row[1] ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [row[2] for row in data]) assert all(df['Born'] == [row[3] for row in data]) assert all(df['Living'] == [row[5] for row in data]) assert all(df.index == ['%s_%s' % tuple(row[0:2]) for row in data]) assert df.shape == (8, 4) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n' ) except Exception as ex1: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def test_csv_table(): # Maybe not truly a unit test, but here because it doesn't do # network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") # TODO: use StringIO.StringIO(data) rather than writing files try: # create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert_is_instance(table, CsvFileTable) # need to set column headers to read a CSV file table.setColumnHeaders( [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + [SelectColumn.from_column(col) for col in cols]) # test iterator for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) # test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row[2:]) assert_equals(rowset_row['rowId'], expected_row[0]) assert_equals(rowset_row['versionNumber'], expected_row[1]) df = table.asDataFrame() assert_equals(list(df['Name']), [row[2] for row in data]) assert_equals(list(df['Born']), [row[3] for row in data]) assert_equals(list(df['Living']), [row[5] for row in data]) assert_equals(list(df.index), ['%s_%s' % tuple(row[0:2]) for row in data]) assert_equals(df.shape, (8, 4)) except Exception: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise