def test_iter_metadata__no_etag(self): with patch.object(syn, "_queryTable", return_value=self.query_result_dict): query_result_table = TableQueryResult(syn, self.query_string) metadata = [x for x in query_result_table.iter_row_metadata()] assert_equals(2, len(metadata)) assert_equals((1, 2, None), metadata[0]) assert_equals((5, 1, None), metadata[1])
def test_iter_metadata__no_etag(self): with patch.object(self.syn, "_queryTable", return_value=self.query_result_dict): query_result_table = TableQueryResult(self.syn, self.query_string) metadata = [x for x in query_result_table.iter_row_metadata()] assert 2 == len(metadata) assert (1, 2, None) == metadata[0] assert (5, 1, None) == metadata[1]
def test_iter_metadata__no_etag(self): with patch.object( syn, "_queryTable", return_value=self.query_result_dict) as mocked_table_query: query_result_table = TableQueryResult(syn, self.query_string) metadata = [x for x in query_result_table.iter_row_metadata()] assert_equals(2, len(metadata)) assert_equals((1, 2, None), metadata[0]) assert_equals((5, 1, None), metadata[1])
def test_iter_metadata__has_etag(self): self.rows[0].update({'etag': 'etag1'}) self.rows[1].update({'etag': 'etag2'}) with patch.object(syn, "_queryTable", return_value=self.query_result_dict): query_result_table = TableQueryResult(syn, self.query_string) metadata = [x for x in query_result_table.iter_row_metadata()] assert_equals(2, len(metadata)) assert_equals((1, 2, 'etag1'), metadata[0]) assert_equals((5, 1, 'etag2'), metadata[1])
def test_iter_metadata__has_etag(self): self.rows[0].update({'etag': 'etag1'}) self.rows[1].update({'etag': 'etag2'}) with patch.object(self.syn, "_queryTable", return_value=self.query_result_dict): query_result_table = TableQueryResult(self.syn, self.query_string) metadata = [x for x in query_result_table.iter_row_metadata()] assert 2 == len(metadata) assert (1, 2, 'etag1') == metadata[0] assert (5, 1, 'etag2') == metadata[1]
def test_iter_metadata__has_etag(self): self.rows[0].update({'etag': 'etag1'}) self.rows[1].update({'etag': 'etag2'}) with patch.object( syn, "_queryTable", return_value=self.query_result_dict) as mocked_table_query: query_result_table = TableQueryResult(syn, self.query_string) metadata = [x for x in query_result_table.iter_row_metadata()] assert_equals(2, len(metadata)) assert_equals((1, 2, 'etag1'), metadata[0]) assert_equals((5, 1, 'etag2'), metadata[1])
def test_TableQueryResult_len(): # schema = Schema(parentId="syn123", id='syn456', columns=[Column(name='column_name', id='123')]) # rowset = RowSet(schema=schema, rows=[Row(['first row']), Row(['second row'])]) query_result_dict = { 'queryResult': { 'queryResults': { 'headers': [{ 'columnType': 'STRING', 'name': 'col_name' }], 'rows': [{ 'values': ['first_row'] }, { 'values': ['second_row'] }], 'tableId': 'syn123' } }, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1337', 'name': 'col_name' }] } query_string = "SELECT whatever FROM some_table WHERE sky=blue" with patch.object(syn, "_queryTable", return_value=query_result_dict) as mocked_table_query: query_result_table = TableQueryResult(syn, query_string) args, kwargs = mocked_table_query.call_args assert_equals(query_string, kwargs['query']) assert_equals(2, len(query_result_table))
def test_len(self): with patch.object( syn, "_queryTable", return_value=self.query_result_dict) as mocked_table_query: query_result_table = TableQueryResult(syn, self.query_string) args, kwargs = mocked_table_query.call_args assert_equals(self.query_string, kwargs['query']) assert_equals(2, len(query_result_table))
def test_aggregate_query_result_to_data_frame(): try: import pandas as pd class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['PA', '1935', '2', '1.1']}, {'values': ['MO', '1928', '3', '2.38']}], 'tableId': 'syn2757980'}}, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State'}]} def _queryTableNext(self, nextPageToken, tableId): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['DC', '1929', '1', '3.14']}, {'values': ['NC', '1926', '1', '4.38']}], 'tableId': 'syn2757980'}} result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living") assert result.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert result.tableId == 'syn2757980' assert len(result.headers) == 4 rs = result.asRowSet() assert len(rs.rows) == 4 result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living") df = result.asDataFrame() assert df.shape == (4,4) assert all(df['State'].values == ['PA', 'MO', 'DC', 'NC']) ## check integer, double and boolean types after PLFM-3073 is fixed assert all(df['MIN(Born)'].values == [1935, 1928, 1929, 1926]), "Unexpected values" + unicode(df['MIN(Born)'].values) assert all(df['COUNT(State)'].values == [2,3,1,1]) assert all(df['AVG(Hipness)'].values == [1.1, 2.38, 3.14, 4.38]) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_aggregate_query_result_to_data_frame.\n\n')
def test_aggregate_query_result_to_data_frame(): try: import pandas as pd class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['PA', '1935', '2', '1.1'] }, { 'values': ['MO', '1928', '3', '2.38'] }], 'tableId': 'syn2757980' } }, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State' }] } def _queryTableNext(self, nextPageToken, tableId): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['DC', '1929', '1', '3.14'] }, { 'values': ['NC', '1926', '1', '4.38'] }], 'tableId': 'syn2757980' } } result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living" ) assert result.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert result.tableId == 'syn2757980' assert len(result.headers) == 4 rs = result.asRowSet() assert len(rs.rows) == 4 result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living" ) df = result.asDataFrame() assert df.shape == (4, 4) assert all(df['State'].values == ['PA', 'MO', 'DC', 'NC']) ## check integer, double and boolean types after PLFM-3073 is fixed assert all(df['MIN(Born)'].values == [1935, 1928, 1929, 1926] ), "Unexpected values" + str(df['MIN(Born)'].values) assert all(df['COUNT(State)'].values == [2, 3, 1, 1]) assert all(df['AVG(Hipness)'].values == [1.1, 2.38, 3.14, 4.38]) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_aggregate_query_result_to_data_frame.\n\n' )
def test_aggregate_query_result_to_data_frame(): class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['PA', '1935', '2', '1.1']}, {'values': ['MO', '1928', '3', '2.38']}], 'tableId': 'syn2757980'}}, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State'}]} def _queryTableNext(self, nextPageToken, tableId): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['DC', '1929', '1', '3.14']}, {'values': ['NC', '1926', '1', '4.38']}], 'tableId': 'syn2757980'}} result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 " "group by Living") assert_equals(result.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(result.tableId, 'syn2757980') assert_equals(len(result.headers), 4) rs = result.asRowSet() assert_equals(len(rs.rows), 4) result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980" " group by Living") df = result.asDataFrame() assert_equals(df.shape, (4, 4)) assert_equals(list(df['State'].values), ['PA', 'MO', 'DC', 'NC']) # check integer, double and boolean types after PLFM-3073 is fixed assert_equals(list(df['MIN(Born)'].values), [1935, 1928, 1929, 1926], "Unexpected values" + str(df['MIN(Born)'].values)) assert_equals(list(df['COUNT(State)'].values), [2, 3, 1, 1]) assert_equals(list(df['AVG(Hipness)'].values), [1.1, 2.38, 3.14, 4.38])
def test_aggregate_query_result_to_data_frame(): class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['PA', '1935', '2', '1.1'] }, { 'values': ['MO', '1928', '3', '2.38'] }], 'tableId': 'syn2757980' } }, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State' }] } def _queryTableNext(self, nextPageToken, tableId): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['DC', '1929', '1', '3.14'] }, { 'values': ['NC', '1926', '1', '4.38'] }], 'tableId': 'syn2757980' } } result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980 " "group by Living") assert_equals(result.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(result.tableId, 'syn2757980') assert_equals(len(result.headers), 4) rs = result.asRowSet() assert_equals(len(rs.rows), 4) result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980" " group by Living") df = result.asDataFrame() assert_equals(df.shape, (4, 4)) assert_equals(list(df['State'].values), ['PA', 'MO', 'DC', 'NC']) # check integer, double and boolean types after PLFM-3073 is fixed assert_equals(list(df['MIN(Born)'].values), [1935, 1928, 1929, 1926], "Unexpected values" + str(df['MIN(Born)'].values)) assert_equals(list(df['COUNT(State)'].values), [2, 3, 1, 1]) assert_equals(list(df['AVG(Hipness)'].values), [1.1, 2.38, 3.14, 4.38])