def test_list_of_rows_table():
    data = [["John Coltrane",  1926, 8.65, False],
            ["Miles Davis",    1926, 9.87, False],
            ["Bill Evans",     1929, 7.65, False],
            ["Paul Chambers",  1935, 5.14, False],
            ["Jimmy Cobb",     1929, 5.78, True],
            ["Scott LaFaro",   1936, 4.21, False],
            ["Sonny Rollins",  1930, 8.99, True],
            ["Kenny Burrel",   1931, 4.37, True]]

    cols = [Column(id='1', name='Name', columnType='STRING'),
            Column(id='2', name='Born', columnType='INTEGER'),
            Column(id='3', name='Hipness', columnType='DOUBLE'),
            Column(id='4', name='Living', columnType='BOOLEAN')]

    schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001")

    # need columns to do cast_values w/o storing
    table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols])

    for table_row, expected_row in zip(table, data):
        assert table_row == expected_row

    rowset = table.asRowSet()
    for rowset_row, expected_row in zip(rowset.rows, data):
        assert rowset_row['values'] == expected_row

    table.columns = cols

    df = table.asDataFrame()
    assert list(df['Name']) == [r[0] for r in data]
def test_rowset_asDataFrame__with_ROW_ETAG_column(syn):
    query_result = {
        'concreteType': 'org.sagebionetworks.repo.model.table.QueryResultBundle',
        'maxRowsPerPage': 6990,
        'selectColumns': [
            {'id': '61770', 'columnType': 'STRING', 'name': 'annotationColumn1'},
            {'id': '61771', 'columnType': 'STRING', 'name': 'annotationColumn2'}
        ],
        'queryCount': 1,
        'queryResult': {
            'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult',
            'nextPageToken': 'sometoken',
            'queryResults': {
                'headers': [
                    {'id': '61770', 'columnType': 'STRING', 'name': 'annotationColumn1'},
                    {'id': '61771', 'columnType': 'STRING', 'name': 'annotationColumn2'}],
                'concreteType': 'org.sagebionetworks.repo.model.table.RowSet',
                'etag': 'DEFAULT',
                'tableId': 'syn11363411',
                           'rows': [{'values': ['initial_value1', 'initial_value2'],
                                     'etag': '7de0f326-9ef7-4fde-9e4a-ac0babca73f6',
                                     'rowId': 123,
                                     'versionNumber':456}]
            }
        }
    }
    query_result_next_page = {'concreteType': 'org.sagebionetworks.repo.model.table.QueryResult',
                              'queryResults': {
                                  'etag': 'DEFAULT',
                                  'headers': [
                                      {'id': '61770', 'columnType': 'STRING', 'name': 'annotationColumn1'},
                                      {'id': '61771', 'columnType': 'STRING', 'name': 'annotationColumn2'}],
                                  'rows': [{'values': ['initial_value3', 'initial_value4'],
                                            'etag': '7de0f326-9ef7-4fde-9e4a-ac0babca73f7',
                                            'rowId': 789,
                                            'versionNumber': 101112}],
                                  'tableId': 'syn11363411'}}

    with patch.object(syn, "_queryTable", return_value=query_result),\
            patch.object(syn, "_queryTableNext", return_value=query_result_next_page):
        table = syn.tableQuery("select something from syn123", resultsAs='rowset')
        dataframe = table.asDataFrame()
        assert "ROW_ETAG" not in dataframe.columns
        expected_indicies = ['123_456_7de0f326-9ef7-4fde-9e4a-ac0babca73f6',
                             '789_101112_7de0f326-9ef7-4fde-9e4a-ac0babca73f7']
        assert expected_indicies == dataframe.index.values.tolist()
def test_RowSetTable():
    row_set_json = {
        'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
        'headers': [
            {'columnType': 'STRING', 'id': '353', 'name': 'name'},
            {'columnType': 'DOUBLE', 'id': '355', 'name': 'x'},
            {'columnType': 'DOUBLE', 'id': '3020', 'name': 'y'},
            {'columnType': 'INTEGER', 'id': '891', 'name': 'n'}],
        'rows': [{
            'rowId': 5,
            'values': ['foo', '1.23', '2.2', '101'],
            'versionNumber': 3},
            {'rowId': 6,
             'values': ['bar', '1.34', '2.4', '101'],
             'versionNumber': 3},
            {'rowId': 7,
             'values': ['foo', '1.23', '2.2', '101'],
             'versionNumber': 4},
            {'rowId': 8,
             'values': ['qux', '1.23', '2.2', '102'],
             'versionNumber': 3}],
        'tableId': 'syn2976298'}

    row_set = RowSet.from_json(row_set_json)

    assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
    assert row_set.tableId == 'syn2976298'
    assert len(row_set.headers) == 4
    assert len(row_set.rows) == 4

    schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001")

    table = Table(schema, row_set)

    assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'
    assert table.tableId == 'syn2976298'
    assert len(table.headers) == 4
    assert len(table.asRowSet().rows) == 4

    df = table.asDataFrame()
    assert df.shape == (4, 4)
    assert list(df['name']) == ['foo', 'bar', 'foo', 'qux']
Exemple #4
0
def test_csv_table():
    # Maybe not truly a unit test, but here because it doesn't do
    # network IO to synapse
    data = [["1", "1", "John Coltrane", 1926, 8.65, False],
            ["2", "1", "Miles Davis", 1926, 9.87, False],
            ["3", "1", "Bill Evans", 1929, 7.65, False],
            ["4", "1", "Paul Chambers", 1935, 5.14, False],
            ["5", "1", "Jimmy Cobb", 1929, 5.78, True],
            ["6", "1", "Scott LaFaro", 1936, 4.21, False],
            ["7", "1", "Sonny Rollins", 1930, 8.99, True],
            ["8", "1", "Kenny Burrel", 1931, 4.37, True]]

    filename = None

    cols = [
        Column(id='1', name='Name', columnType='STRING'),
        Column(id='2', name='Born', columnType='INTEGER'),
        Column(id='3', name='Hipness', columnType='DOUBLE'),
        Column(id='4', name='Living', columnType='BOOLEAN')
    ]

    schema1 = Schema(id='syn1234',
                     name='Jazz Guys',
                     columns=cols,
                     parent="syn1000001")

    # TODO: use StringIO.StringIO(data) rather than writing files
    try:
        # create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            filename = temp.name

        with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
            writer = csv.writer(temp,
                                quoting=csv.QUOTE_NONNUMERIC,
                                lineterminator=str(os.linesep))
            headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]
            writer.writerow(headers)
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert_is_instance(table, CsvFileTable)

        # need to set column headers to read a CSV file
        table.setColumnHeaders([
            SelectColumn(name="ROW_ID", columnType="STRING"),
            SelectColumn(name="ROW_VERSION", columnType="STRING")
        ] + [SelectColumn.from_column(col) for col in cols])

        # test iterator
        for table_row, expected_row in zip(table, data):
            assert_equals(table_row, expected_row)

        # test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in zip(rowset.rows, data):
            assert_equals(rowset_row['values'], expected_row[2:])
            assert_equals(rowset_row['rowId'], expected_row[0])
            assert_equals(rowset_row['versionNumber'], expected_row[1])

        df = table.asDataFrame()
        assert_equals(list(df['Name']), [row[2] for row in data])
        assert_equals(list(df['Born']), [row[3] for row in data])
        assert_equals(list(df['Living']), [row[5] for row in data])
        assert_equals(list(df.index),
                      ['%s_%s' % tuple(row[0:2]) for row in data])
        assert_equals(df.shape, (8, 4))

    except Exception:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print(ex)
        raise