def test_aggregate_query_result_to_data_frame(): try: import pandas as pd class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['PA', '1935', '2', '1.1']}, {'values': ['MO', '1928', '3', '2.38']}], 'tableId': 'syn2757980'}}, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State'}]} def _queryTableNext(self, nextPageToken, tableId): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['DC', '1929', '1', '3.14']}, {'values': ['NC', '1926', '1', '4.38']}], 'tableId': 'syn2757980'}} result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living") assert result.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert result.tableId == 'syn2757980' assert len(result.headers) == 4 rs = result.asRowSet() assert len(rs.rows) == 4 result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living") df = result.asDataFrame() assert df.shape == (4,4) assert all(df['State'].values == ['PA', 'MO', 'DC', 'NC']) ## check integer, double and boolean types after PLFM-3073 is fixed assert all(df['MIN(Born)'].values == [1935, 1928, 1929, 1926]), "Unexpected values" + unicode(df['MIN(Born)'].values) assert all(df['COUNT(State)'].values == [2,3,1,1]) assert all(df['AVG(Hipness)'].values == [1.1, 2.38, 3.14, 4.38]) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_aggregate_query_result_to_data_frame.\n\n')
def test_aggregate_query_result_to_data_frame(): class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['PA', '1935', '2', '1.1']}, {'values': ['MO', '1928', '3', '2.38']}], 'tableId': 'syn2757980'}}, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State'}]} def _queryTableNext(self, nextPageToken, tableId): return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'name': 'State'}, {'columnType': 'INTEGER', 'name': 'MIN(Born)'}, {'columnType': 'INTEGER', 'name': 'COUNT(State)'}, {'columnType': 'DOUBLE', 'name': 'AVG(Hipness)'}], 'rows': [ {'values': ['DC', '1929', '1', '3.14']}, {'values': ['NC', '1926', '1', '4.38']}], 'tableId': 'syn2757980'}} result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 " "group by Living") assert_equals(result.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(result.tableId, 'syn2757980') assert_equals(len(result.headers), 4) rs = result.asRowSet() assert_equals(len(rs.rows), 4) result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980" " group by Living") df = result.asDataFrame() assert_equals(df.shape, (4, 4)) assert_equals(list(df['State'].values), ['PA', 'MO', 'DC', 'NC']) # check integer, double and boolean types after PLFM-3073 is fixed assert_equals(list(df['MIN(Born)'].values), [1935, 1928, 1929, 1926], "Unexpected values" + str(df['MIN(Born)'].values)) assert_equals(list(df['COUNT(State)'].values), [2, 3, 1, 1]) assert_equals(list(df['AVG(Hipness)'].values), [1.1, 2.38, 3.14, 4.38])
def test_aggregate_query_result_to_data_frame(): try: import pandas as pd class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['PA', '1935', '2', '1.1'] }, { 'values': ['MO', '1928', '3', '2.38'] }], 'tableId': 'syn2757980' } }, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State' }] } def _queryTableNext(self, nextPageToken, tableId): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['DC', '1929', '1', '3.14'] }, { 'values': ['NC', '1926', '1', '4.38'] }], 'tableId': 'syn2757980' } } result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living" ) assert result.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert result.tableId == 'syn2757980' assert len(result.headers) == 4 rs = result.asRowSet() assert len(rs.rows) == 4 result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living" ) df = result.asDataFrame() assert df.shape == (4, 4) assert all(df['State'].values == ['PA', 'MO', 'DC', 'NC']) ## check integer, double and boolean types after PLFM-3073 is fixed assert all(df['MIN(Born)'].values == [1935, 1928, 1929, 1926] ), "Unexpected values" + str(df['MIN(Born)'].values) assert all(df['COUNT(State)'].values == [2, 3, 1, 1]) assert all(df['AVG(Hipness)'].values == [1.1, 2.38, 3.14, 4.38]) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_aggregate_query_result_to_data_frame.\n\n' )
def test_aggregate_query_result_to_data_frame(): class MockSynapse(object): def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle', 'maxRowsPerPage': 2, 'queryCount': 4, 'queryResult': { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'nextPageToken': 'aaaaaaaa', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['PA', '1935', '2', '1.1'] }, { 'values': ['MO', '1928', '3', '2.38'] }], 'tableId': 'syn2757980' } }, 'selectColumns': [{ 'columnType': 'STRING', 'id': '1387', 'name': 'State' }] } def _queryTableNext(self, nextPageToken, tableId): return { 'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult', 'queryResults': { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'name': 'State' }, { 'columnType': 'INTEGER', 'name': 'MIN(Born)' }, { 'columnType': 'INTEGER', 'name': 'COUNT(State)' }, { 'columnType': 'DOUBLE', 'name': 'AVG(Hipness)' }], 'rows': [{ 'values': ['DC', '1929', '1', '3.14'] }, { 'values': ['NC', '1926', '1', '4.38'] }], 'tableId': 'syn2757980' } } result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980 " "group by Living") assert_equals(result.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(result.tableId, 'syn2757980') assert_equals(len(result.headers), 4) rs = result.asRowSet() assert_equals(len(rs.rows), 4) result = TableQueryResult( synapse=MockSynapse(), query= "select State, min(Born), count(State), avg(Hipness) from syn2757980" " group by Living") df = result.asDataFrame() assert_equals(df.shape, (4, 4)) assert_equals(list(df['State'].values), ['PA', 'MO', 'DC', 'NC']) # check integer, double and boolean types after PLFM-3073 is fixed assert_equals(list(df['MIN(Born)'].values), [1935, 1928, 1929, 1926], "Unexpected values" + str(df['MIN(Born)'].values)) assert_equals(list(df['COUNT(State)'].values), [2, 3, 1, 1]) assert_equals(list(df['AVG(Hipness)'].values), [1.1, 2.38, 3.14, 4.38])