Example #1
0
def add_split_dataset(session):
    # create dummy data
    data = pd.DataFrame(
        data={
            'value': np.random.normal(10, 1, size=350),
            'tstamp': pd.date_range('201309241100', periods=350, freq='15min')
        })
    data.set_index('tstamp', inplace=True)

    # add two entries as split datasets
    kit = api.find_person(session, organisation_abbrev='KIT')[0]
    historical_entry = api.add_entry(session,
                                     title='Historical data',
                                     abstract='Long descirption',
                                     location=(4.2, 42),
                                     variable=1,
                                     license=6,
                                     author=kit.id)
    recent_entry = api.add.add_entry(
        session,
        title='Recent data',
        abstract='something bad happended that needs description',
        location=(4.2, 42),
        variable=1,
        license=6,
        author=kit.id)

    # create datasource
    historical_entry.create_datasource(type=1,
                                       path='timeseries',
                                       datatype='timeseries')
    recent_entry.create_datasource(type=1,
                                   path='timeseries',
                                   datatype='timeseries')

    # split the data
    historical_entry.import_data(data=data.iloc[:300, :])
    recent_entry.import_data(data=data.iloc[300:, :])

    full_dataset = api.add_group(session, 'Split dataset',
                                 [historical_entry.id, recent_entry.id])

    # checkout
    result = api.find_entry(session, id=recent_entry.id, as_result=True)[0]

    # recover data
    db_data = result.get_data()

    # search for checksum - result.checksum is a checksum of member checksum, which is only one here
    assert len(result.checksums) == 1
    checksum = result.checksums[0]
    assert checksum in db_data

    recovered_data = db_data[checksum].values
    assert_array_almost_equal(data.values, recovered_data)

    return True
Example #2
0
def auto_force_data_names(session, df_1D_wind, df_3D_prec):
    """
    If len(data_columns) != len(entry.variable.column_names) force_data_names
    should automatically become True and the column names of the imported data
    should be saved in datasource.data_names.
    To test this, we add 1D wind data (with 3D precision) to the 3D wind
    variable with variable.column_names=['u', 'v', 'w'].
    """
    # find the variable
    var_3D_wind = api.find_variable(session, name='3D-wind')[0]

    # find the previously added person
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_1D_precision = api.add_entry(
        session,
        title='1-dimensional windspeed data, precision',
        abstract='1-dimensional windspeed data',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)
    # create datasource and scale
    entry_1D_precision.create_datasource(type=1,
                                         path='timeseries',
                                         datatype='timeseries')

    entry_1D_precision.datasource.create_scale(resolution='30min',
                                               extent=(df_1D_wind.index[0],
                                                       df_1D_wind.index[-1]),
                                               support=1.0,
                                               scale_dimension='temporal')

    # add data
    entry_1D_precision.import_data(data=df_1D_wind,
                                   precision=df_3D_prec,
                                   force_data_names=False)

    #load data
    dat = entry_1D_precision.get_data()

    # assert
    assert dat.columns.tolist() == [
        'u_ms', 'precision1', 'precision2', 'precision3'
    ]
    assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05)

    return True
Example #3
0
def precision_test(session, df_3D_wind, df_3D_prec):
    """
    Test if precision columns are handled correctly.
    We use the 3D eddy wind data with 3 precision columns for this.
    """
    # find the variable
    var_3D_wind = api.find_variable(session, name='3D-wind')[0]

    # find the previously added person
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_3D_precision = api.add_entry(
        session,
        title='3-dimensional windspeed data, precision',
        abstract='3-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # create datasource and scale
    entry_3D_precision.create_datasource(type=1,
                                         path='timeseries',
                                         datatype='timeseries')

    entry_3D_precision.datasource.create_scale(resolution='30min',
                                               extent=(df_3D_wind.index[0],
                                                       df_3D_wind.index[-1]),
                                               support=1.0,
                                               scale_dimension='temporal')

    # add data
    entry_3D_precision.import_data(data=df_3D_wind,
                                   precision=df_3D_prec,
                                   force_data_names=False)

    #load data
    dat = entry_3D_precision.get_data()

    # assert
    assert dat.columns.tolist() == [
        'u', 'v', 'w', 'precision1', 'precision2', 'precision3'
    ]  # note: input was 'precision_1'
    assert dat['u'].mean() == pytest.approx(3.1, 0.05)

    return True
Example #4
0
def force_data_names_true(session, df_3D_wind):
    """
    Test force_data_names=True when loading the data into the database.
    In this case, datasource.data_names will be overwritten with the column
    names of the imported data, when exporting the data, these column col_names
    will be displayed.
    We use the 3D eddy wind data for this again.
    """
    # find the variable
    var_3D_wind = api.find_variable(session, name='3D-wind')[0]

    # find the previously added author
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_3D_force_data_names = api.add_entry(
        session,
        title='3-dimensional windspeed data, force_data_names',
        abstract='3-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # create datasource and scale
    entry_3D_force_data_names.create_datasource(type=1,
                                                path='timeseries',
                                                datatype='timeseries')

    entry_3D_force_data_names.datasource.create_scale(
        resolution='30min',
        extent=(df_3D_wind.index[0], df_3D_wind.index[-1]),
        support=1.0,
        scale_dimension='temporal')

    # add data
    entry_3D_force_data_names.import_data(df_3D_wind, force_data_names=True)

    #load data
    dat = entry_3D_force_data_names.get_data()

    # assert
    assert dat.columns.tolist() == ['u_ms', 'v_ms', 'w_ms']
    assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05)

    return True
Example #5
0
def one_dim_data(session, df_1D_wind):
    """
    Do the same as above, but with one-dimensional data instead.
    """
    # add the variable
    var_1D_wind = api.add_variable(session,
                                   name='1D-wind',
                                   symbol='u',
                                   column_names=['u'],
                                   unit=107)

    # find the previously added author
    kit = api.find_person(session, organisation_abbrev='KIT')[0]

    # add the entry
    entry_1D_wind = api.add_entry(
        session,
        title='1-dimensional windspeed data',
        abstract='1-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_1D_wind.id,
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # create datasource and scale
    entry_1D_wind.create_datasource(type=1,
                                    path='timeseries',
                                    datatype='timeseries')

    entry_1D_wind.datasource.create_scale(resolution='30min',
                                          extent=(df_1D_wind.index[0],
                                                  df_1D_wind.index[-1]),
                                          support=1.0,
                                          scale_dimension='temporal')

    # add data
    entry_1D_wind.import_data(df_1D_wind)

    # read data
    dat = entry_1D_wind.get_data()

    # assert
    assert dat.columns == 'u'
    assert dat['u'].mean() == pytest.approx(3.1, 0.05)

    return True
Example #6
0
def add_data(session):
    p_df = read_to_df(PERSONS)
    persons = [
        api.add_person(session, **p) for p in p_df.to_dict(orient='records')
    ]

    e_df = read_to_df(ENTRIES)
    e_df['location'] = [(t[0], t[1]) for t in e_df[['x', 'y']].values]
    e_df = e_df.where(e_df.notnull(), None)
    e_df.drop(['x', 'y'], axis=1, inplace=True)

    entries = [
        api.add_entry(session, **e) for e in e_df.to_dict(orient='records')
    ]

    return True
Example #7
0
def add_entries(session):
    """
    Add few the entries
    """
    df = read_to_df(ENTRIES)
    df['location'] = [(
        t[0],
        t[1],
    ) for t in df[['x', 'y']].values]
    # get rid of the uuids
    df = df.where(df.notnull(), None)
    df.drop(['x', 'y'], axis=1, inplace=True)
    entries_dict_list = df.to_dict(orient='records')

    # add
    entries = [api.add_entry(session, **e) for e in entries_dict_list]

    # assert
    assert entries[0].contributors[0].person.first_name == 'Keanu'
    assert entries[1].abstract == df.loc[1].abstract
    assert entries[2].external_id == 'foobar2'

    return True
Example #8
0
def add_3D_entry(session):
    """
    Add an entry for the eddy wind data.
    """
    # add the variable
    var_3D_wind = api.add_variable(session,
                                   name='3D-wind',
                                   symbol='uvw',
                                   column_names=['u', 'v', 'w'],
                                   unit=107)

    # add an author
    kit = api.add_person(
        session,
        first_name=None,
        last_name=None,
        organisation_name='Karlsruhe Institute of Technology (KIT)',
        organisation_abbrev='KIT')

    # add the entry
    entry_3D_wind = api.add_entry(
        session,
        title='3-dimensional windspeed data',
        abstract='3-dimensional windspeed data from the Fendt data set',
        location=(8, 52),
        variable=var_3D_wind.id,
        comment='after double rotation',
        license=6,
        author=kit.id,
        embargo=False,
        is_partial=False)

    # assert
    assert var_3D_wind.column_names == ['u', 'v', 'w']

    return True