Exemple #1
0
def test_data_schema():
    crime_df = pd.read_csv(os.path.join(RESOURCES, 'sacramento_crime.csv'))
    realestate_df = pd.read_csv(
        os.path.join(RESOURCES, 'sacramento_realestate.csv'))

    tables = jupytab.Tables()
    tables['sacramento_crime'] = \
        jupytab.DataFrameTable('Sacramento Crime', dataframe=crime_df)
    tables['sacramento_realestate'] = \
        jupytab.DataFrameTable('Sacramento RealEstate', dataframe=realestate_df)

    schema = tables.schema()

    assert schema[0]['id'] == 'sacramento_crime'
    assert schema[0]['alias'] == 'Sacramento Crime'
    columns = schema[0]['columns']
    assert len(columns) == 9

    raw_output = '[{"id": "sacramento_crime", "alias": "Sacramento Crime", "columns": [{"id": "cdat\
etime", "dataType": "string"}, {"id": "address", "dataType": "string"}, {"id": "district", "dataTyp\
e": "int"}, {"id": "beat", "dataType": "string"}, {"id": "grid", "dataType": "int"}, {"id": "crimed\
escr", "dataType": "string"}, {"id": "ucr_ncic_code", "dataType": "int"}, {"id": "latitude", "dataT\
ype": "float"}, {"id": "longitude", "dataType": "float"}]}, {"id": "sacramento_realestate", "alias"\
: "Sacramento RealEstate", "columns": [{"id": "street", "dataType": "string"}, {"id": "city", "data\
Type": "string"}, {"id": "zip", "dataType": "int"}, {"id": "state", "dataType": "string"}, {"id": "\
beds", "dataType": "int"}, {"id": "baths", "dataType": "int"}, {"id": "sq__ft", "dataType": "int"},\
 {"id": "type", "dataType": "string"}, {"id": "sale_date", "dataType": "string"}, {"id": "price", "\
dataType": "int"}, {"id": "latitude", "dataType": "float"}, {"id": "longitude", "dataType": "float"\
}]}]'

    assert raw_output == tables.render_schema(do_print=False)
Exemple #2
0
def test_data_schema():
    arrays = [['A', 'A', 'a', 'a', 0, 0, 'a$_!#àz', 'a$_!#àz'],
              [
                  'A', 'A', 0, 1, 'z$_"_àéça"', 'z_èà[|]a',
                  'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',
                  'abcdefghijklmnopqrstuvwxyz0123456789'
              ]]
    tuples = list(zip(*arrays))
    index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
    complex_df = pd.DataFrame(np.random.randn(len(index), len(index)),
                              index=index,
                              columns=index)

    tables = jupytab.Tables()
    tables['complex_df_no_index_{}[]#!'] = \
        jupytab.DataFrameTable('A multi-index Dataframe ({}[]#!)',
                               dataframe=complex_df)
    tables['complex_df_with_index_{}[]#!'] = \
        jupytab.DataFrameTable('A multi-index Dataframe ({}[]#!)',
                               dataframe=complex_df,
                               include_index=True)

    schema = tables.schema()

    assert schema[0]['id'] == 'complex_df_no_index_{}[]#!'
    assert schema[0]['alias'] == 'A multi-index Dataframe ({}[]#!)'
    assert schema[1]['id'] == 'complex_df_with_index_{}[]#!'
    assert schema[1]['alias'] == 'A multi-index Dataframe ({}[]#!)'

    raw_output = '[{"id": "complex_df_no_index_{}[]#!", "alias": "A multi-index Dataframe ({}[]#!' \
                 ')", "columns": [{"id": "A_A_1", "dataType": "float"}, {"id": "A_A_2", "dataType' \
                 '": "float"}, {"id": "a_0", "dataType": "float"}, {"id": "a_1", "dataType": "flo' \
                 'at"}, {"id": "0_z____aeca_", "dataType": "float"}, {"id": "0_z_ea_a", "dataType' \
                 '": "float"}, {"id": "a___az_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", "dataType": ' \
                 '"float"}, {"id": "a___az_abcdefghijklmnopqrstuvwxyz0123456789", "dataType": "fl' \
                 'oat"}]}, {"id": "complex_df_with_index_{}[]#!", "alias": "A multi-index Datafra' \
                 'me ({}[]#!)", "columns": [{"id": "first_", "dataType": "string"}, {"id": "secon' \
                 'd_", "dataType": "string"}, {"id": "A_A_1", "dataType": "float"}, {"id": "A_A_2' \
                 '", "dataType": "float"}, {"id": "a_0", "dataType": "float"}, {"id": "a_1", "dat' \
                 'aType": "float"}, {"id": "0_z____aeca_", "dataType": "float"}, {"id": "0_z_ea_a' \
                 '", "dataType": "float"}, {"id": "a___az_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", ' \
                 '"dataType": "float"}, {"id": "a___az_abcdefghijklmnopqrstuvwxyz0123456789", "da' \
                 'taType": "float"}]}]'

    raw_schema = tables.render_schema(do_print=False)

    assert raw_output == raw_schema
Exemple #3
0
def test_large_data_content():
    row_count = 1000000
    col_count = 10

    np.random.seed(0)

    large_df = pd.DataFrame(np.random.randn(row_count, col_count))
    tables = jupytab.Tables()
    tables['large_df'] = \
        jupytab.DataFrameTable('A very large Dataframe',
                               dataframe=large_df)

    request = json.dumps({
        'args': {
            'table_name': ['large_df'],
            'format': ['json'],
            'from': [5100],
            'to': [5102]
        }
    })

    start = timer()
    raw_data = tables.render_data(request, do_print=False)
    end = timer()

    print(
        f"Elapsed time in second to retrieve one row in a large dataframe : {(end - start)} s"
    )

    assert (end - start) < 0.1

    print(raw_data)

    assert raw_data == '[{"0":0.2307805099,"1":0.7823326556,"2":0.9507107694,"3":1.4595805778,' \
                       '"4":0.6798091111,"5":-0.8676077457,"6":0.3908489554,"7":1.0838125793,' \
                       '"8":0.6227587338,"9":0.0919146565},{"0":0.6267312321,"1":0.7369835911,' \
                       '"2":-0.4665488934,"3":1.5379716957,"4":-1.0313145219,"5":1.0398963231,' \
                       '"6":0.8687854819,"7":0.2055855947,"8":-1.7716643336,"9":0.2428264886}]'