def test_aggregate_query_result_to_data_frame():

    try:
        import pandas as pd

        class MockSynapse(object):
            def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None):
                return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle',
                        'maxRowsPerPage': 2,
                        'queryCount': 4,
                        'queryResult': {
                         'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult',
                         'nextPageToken': 'aaaaaaaa',
                         'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                         'headers': [
                          {'columnType': 'STRING',  'name': 'State'},
                          {'columnType': 'INTEGER', 'name': 'MIN(Born)'},
                          {'columnType': 'INTEGER', 'name': 'COUNT(State)'},
                          {'columnType': 'DOUBLE',  'name': 'AVG(Hipness)'}],
                          'rows': [
                           {'values': ['PA', '1935', '2', '1.1']},
                           {'values': ['MO', '1928', '3', '2.38']}],
                          'tableId': 'syn2757980'}},
                        'selectColumns': [{
                         'columnType': 'STRING',
                         'id': '1387',
                         'name': 'State'}]}
            def _queryTableNext(self, nextPageToken, tableId):
                return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult',
                        'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                         'headers': [
                          {'columnType': 'STRING',  'name': 'State'},
                          {'columnType': 'INTEGER', 'name': 'MIN(Born)'},
                          {'columnType': 'INTEGER', 'name': 'COUNT(State)'},
                          {'columnType': 'DOUBLE',  'name': 'AVG(Hipness)'}],
                         'rows': [
                          {'values': ['DC', '1929', '1', '3.14']},
                          {'values': ['NC', '1926', '1', '4.38']}],
                         'tableId': 'syn2757980'}}

        result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living")

        assert result.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
        assert result.tableId == 'syn2757980'
        assert len(result.headers) == 4

        rs = result.asRowSet()
        assert len(rs.rows) == 4

        result = TableQueryResult(synapse=MockSynapse(), query="select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living")
        df = result.asDataFrame()

        assert df.shape == (4,4)
        assert all(df['State'].values == ['PA', 'MO', 'DC', 'NC'])

        ## check integer, double and boolean types after PLFM-3073 is fixed
        assert all(df['MIN(Born)'].values == [1935, 1928, 1929, 1926]), "Unexpected values" + unicode(df['MIN(Born)'].values)
        assert all(df['COUNT(State)'].values == [2,3,1,1])
        assert all(df['AVG(Hipness)'].values == [1.1, 2.38, 3.14, 4.38])

    except ImportError as e1:
        sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_aggregate_query_result_to_data_frame.\n\n')
def test_aggregate_query_result_to_data_frame():

    class MockSynapse(object):
        def _queryTable(self, query, limit=None, offset=None, isConsistent=True, partMask=None):
            return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle',
                    'maxRowsPerPage': 2,
                    'queryCount': 4,
                    'queryResult': {
                         'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult',
                         'nextPageToken': 'aaaaaaaa',
                         'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                                          'headers': [
                                              {'columnType': 'STRING',  'name': 'State'},
                                              {'columnType': 'INTEGER', 'name': 'MIN(Born)'},
                                              {'columnType': 'INTEGER', 'name': 'COUNT(State)'},
                                              {'columnType': 'DOUBLE',  'name': 'AVG(Hipness)'}],
                                          'rows': [
                                               {'values': ['PA', '1935', '2', '1.1']},
                                               {'values': ['MO', '1928', '3', '2.38']}],
                                          'tableId': 'syn2757980'}},
                    'selectColumns': [{
                         'columnType': 'STRING',
                         'id': '1387',
                         'name': 'State'}]}

        def _queryTableNext(self, nextPageToken, tableId):
            return {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult',
                    'queryResults': {'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                                     'headers': [
                                         {'columnType': 'STRING',  'name': 'State'},
                                         {'columnType': 'INTEGER', 'name': 'MIN(Born)'},
                                         {'columnType': 'INTEGER', 'name': 'COUNT(State)'},
                                         {'columnType': 'DOUBLE',  'name': 'AVG(Hipness)'}],
                                     'rows': [
                                         {'values': ['DC', '1929', '1', '3.14']},
                                         {'values': ['NC', '1926', '1', '4.38']}],
                                     'tableId': 'syn2757980'}}

    result = TableQueryResult(synapse=MockSynapse(),
                              query="select State, min(Born), count(State), avg(Hipness) from syn2757980 "
                                    "group by Living")

    assert_equals(result.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
    assert_equals(result.tableId, 'syn2757980')
    assert_equals(len(result.headers), 4)

    rs = result.asRowSet()
    assert_equals(len(rs.rows), 4)

    result = TableQueryResult(synapse=MockSynapse(),
                              query="select State, min(Born), count(State), avg(Hipness) from syn2757980"
                                    " group by Living")
    df = result.asDataFrame()

    assert_equals(df.shape, (4, 4))
    assert_equals(list(df['State'].values), ['PA', 'MO', 'DC', 'NC'])

    # check integer, double and boolean types after PLFM-3073 is fixed
    assert_equals(list(df['MIN(Born)'].values), [1935, 1928, 1929, 1926],
                  "Unexpected values" + str(df['MIN(Born)'].values))
    assert_equals(list(df['COUNT(State)'].values), [2, 3, 1, 1])
    assert_equals(list(df['AVG(Hipness)'].values), [1.1, 2.38, 3.14, 4.38])
예제 #3
0
def test_aggregate_query_result_to_data_frame():

    try:
        import pandas as pd

        class MockSynapse(object):
            def _queryTable(self,
                            query,
                            limit=None,
                            offset=None,
                            isConsistent=True,
                            partMask=None):
                return {
                    'concreteType':
                    'org.sagebionetworks.repo.model.table.QueryResultBundle',
                    'maxRowsPerPage':
                    2,
                    'queryCount':
                    4,
                    'queryResult': {
                        'concreteType':
                        'org.sagebionetworks.repo.model.table.QueryResult',
                        'nextPageToken': 'aaaaaaaa',
                        'queryResults': {
                            'etag':
                            'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                            'headers': [{
                                'columnType': 'STRING',
                                'name': 'State'
                            }, {
                                'columnType': 'INTEGER',
                                'name': 'MIN(Born)'
                            }, {
                                'columnType': 'INTEGER',
                                'name': 'COUNT(State)'
                            }, {
                                'columnType': 'DOUBLE',
                                'name': 'AVG(Hipness)'
                            }],
                            'rows': [{
                                'values': ['PA', '1935', '2', '1.1']
                            }, {
                                'values': ['MO', '1928', '3', '2.38']
                            }],
                            'tableId':
                            'syn2757980'
                        }
                    },
                    'selectColumns': [{
                        'columnType': 'STRING',
                        'id': '1387',
                        'name': 'State'
                    }]
                }

            def _queryTableNext(self, nextPageToken, tableId):
                return {
                    'concreteType':
                    'org.sagebionetworks.repo.model.table.QueryResult',
                    'queryResults': {
                        'etag':
                        'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                        'headers': [{
                            'columnType': 'STRING',
                            'name': 'State'
                        }, {
                            'columnType': 'INTEGER',
                            'name': 'MIN(Born)'
                        }, {
                            'columnType': 'INTEGER',
                            'name': 'COUNT(State)'
                        }, {
                            'columnType': 'DOUBLE',
                            'name': 'AVG(Hipness)'
                        }],
                        'rows': [{
                            'values': ['DC', '1929', '1', '3.14']
                        }, {
                            'values': ['NC', '1926', '1', '4.38']
                        }],
                        'tableId':
                        'syn2757980'
                    }
                }

        result = TableQueryResult(
            synapse=MockSynapse(),
            query=
            "select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living"
        )

        assert result.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
        assert result.tableId == 'syn2757980'
        assert len(result.headers) == 4

        rs = result.asRowSet()
        assert len(rs.rows) == 4

        result = TableQueryResult(
            synapse=MockSynapse(),
            query=
            "select State, min(Born), count(State), avg(Hipness) from syn2757980 group by Living"
        )
        df = result.asDataFrame()

        assert df.shape == (4, 4)
        assert all(df['State'].values == ['PA', 'MO', 'DC', 'NC'])

        ## check integer, double and boolean types after PLFM-3073 is fixed
        assert all(df['MIN(Born)'].values == [1935, 1928, 1929, 1926]
                   ), "Unexpected values" + str(df['MIN(Born)'].values)
        assert all(df['COUNT(State)'].values == [2, 3, 1, 1])
        assert all(df['AVG(Hipness)'].values == [1.1, 2.38, 3.14, 4.38])

    except ImportError as e1:
        sys.stderr.write(
            'Pandas is apparently not installed, skipping asDataFrame portion of test_aggregate_query_result_to_data_frame.\n\n'
        )
예제 #4
0
def test_aggregate_query_result_to_data_frame():
    class MockSynapse(object):
        def _queryTable(self,
                        query,
                        limit=None,
                        offset=None,
                        isConsistent=True,
                        partMask=None):
            return {
                'concreteType':
                'org.sagebionetworks.repo.model.table.QueryResultBundle',
                'maxRowsPerPage':
                2,
                'queryCount':
                4,
                'queryResult': {
                    'concreteType':
                    'org.sagebionetworks.repo.model.table.QueryResult',
                    'nextPageToken': 'aaaaaaaa',
                    'queryResults': {
                        'etag':
                        'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                        'headers': [{
                            'columnType': 'STRING',
                            'name': 'State'
                        }, {
                            'columnType': 'INTEGER',
                            'name': 'MIN(Born)'
                        }, {
                            'columnType': 'INTEGER',
                            'name': 'COUNT(State)'
                        }, {
                            'columnType': 'DOUBLE',
                            'name': 'AVG(Hipness)'
                        }],
                        'rows': [{
                            'values': ['PA', '1935', '2', '1.1']
                        }, {
                            'values': ['MO', '1928', '3', '2.38']
                        }],
                        'tableId':
                        'syn2757980'
                    }
                },
                'selectColumns': [{
                    'columnType': 'STRING',
                    'id': '1387',
                    'name': 'State'
                }]
            }

        def _queryTableNext(self, nextPageToken, tableId):
            return {
                'concreteType':
                'org.sagebionetworks.repo.model.table.QueryResult',
                'queryResults': {
                    'etag':
                    'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
                    'headers': [{
                        'columnType': 'STRING',
                        'name': 'State'
                    }, {
                        'columnType': 'INTEGER',
                        'name': 'MIN(Born)'
                    }, {
                        'columnType': 'INTEGER',
                        'name': 'COUNT(State)'
                    }, {
                        'columnType': 'DOUBLE',
                        'name': 'AVG(Hipness)'
                    }],
                    'rows': [{
                        'values': ['DC', '1929', '1', '3.14']
                    }, {
                        'values': ['NC', '1926', '1', '4.38']
                    }],
                    'tableId':
                    'syn2757980'
                }
            }

    result = TableQueryResult(
        synapse=MockSynapse(),
        query=
        "select State, min(Born), count(State), avg(Hipness) from syn2757980 "
        "group by Living")

    assert_equals(result.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
    assert_equals(result.tableId, 'syn2757980')
    assert_equals(len(result.headers), 4)

    rs = result.asRowSet()
    assert_equals(len(rs.rows), 4)

    result = TableQueryResult(
        synapse=MockSynapse(),
        query=
        "select State, min(Born), count(State), avg(Hipness) from syn2757980"
        " group by Living")
    df = result.asDataFrame()

    assert_equals(df.shape, (4, 4))
    assert_equals(list(df['State'].values), ['PA', 'MO', 'DC', 'NC'])

    # check integer, double and boolean types after PLFM-3073 is fixed
    assert_equals(list(df['MIN(Born)'].values), [1935, 1928, 1929, 1926],
                  "Unexpected values" + str(df['MIN(Born)'].values))
    assert_equals(list(df['COUNT(State)'].values), [2, 3, 1, 1])
    assert_equals(list(df['AVG(Hipness)'].values), [1.1, 2.38, 3.14, 4.38])