def add_split_dataset(session): # create dummy data data = pd.DataFrame( data={ 'value': np.random.normal(10, 1, size=350), 'tstamp': pd.date_range('201309241100', periods=350, freq='15min') }) data.set_index('tstamp', inplace=True) # add two entries as split datasets kit = api.find_person(session, organisation_abbrev='KIT')[0] historical_entry = api.add_entry(session, title='Historical data', abstract='Long descirption', location=(4.2, 42), variable=1, license=6, author=kit.id) recent_entry = api.add.add_entry( session, title='Recent data', abstract='something bad happended that needs description', location=(4.2, 42), variable=1, license=6, author=kit.id) # create datasource historical_entry.create_datasource(type=1, path='timeseries', datatype='timeseries') recent_entry.create_datasource(type=1, path='timeseries', datatype='timeseries') # split the data historical_entry.import_data(data=data.iloc[:300, :]) recent_entry.import_data(data=data.iloc[300:, :]) full_dataset = api.add_group(session, 'Split dataset', [historical_entry.id, recent_entry.id]) # checkout result = api.find_entry(session, id=recent_entry.id, as_result=True)[0] # recover data db_data = result.get_data() # search for checksum - result.checksum is a checksum of member checksum, which is only one here assert len(result.checksums) == 1 checksum = result.checksums[0] assert checksum in db_data recovered_data = db_data[checksum].values assert_array_almost_equal(data.values, recovered_data) return True
def auto_force_data_names(session, df_1D_wind, df_3D_prec): """ If len(data_columns) != len(entry.variable.column_names) force_data_names should automatically become True and the column names of the imported data should be saved in datasource.data_names. To test this, we add 1D wind data (with 3D precision) to the 3D wind variable with variable.column_names=['u', 'v', 'w']. """ # find the variable var_3D_wind = api.find_variable(session, name='3D-wind')[0] # find the previously added person kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_1D_precision = api.add_entry( session, title='1-dimensional windspeed data, precision', abstract='1-dimensional windspeed data', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_1D_precision.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_1D_precision.datasource.create_scale(resolution='30min', extent=(df_1D_wind.index[0], df_1D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_1D_precision.import_data(data=df_1D_wind, precision=df_3D_prec, force_data_names=False) #load data dat = entry_1D_precision.get_data() # assert assert dat.columns.tolist() == [ 'u_ms', 'precision1', 'precision2', 'precision3' ] assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05) return True
def precision_test(session, df_3D_wind, df_3D_prec): """ Test if precision columns are handled correctly. We use the 3D eddy wind data with 3 precision columns for this. """ # find the variable var_3D_wind = api.find_variable(session, name='3D-wind')[0] # find the previously added person kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_3D_precision = api.add_entry( session, title='3-dimensional windspeed data, precision', abstract='3-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_3D_precision.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_3D_precision.datasource.create_scale(resolution='30min', extent=(df_3D_wind.index[0], df_3D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_3D_precision.import_data(data=df_3D_wind, precision=df_3D_prec, force_data_names=False) #load data dat = entry_3D_precision.get_data() # assert assert dat.columns.tolist() == [ 'u', 'v', 'w', 'precision1', 'precision2', 'precision3' ] # note: input was 'precision_1' assert dat['u'].mean() == pytest.approx(3.1, 0.05) return True
def force_data_names_true(session, df_3D_wind): """ Test force_data_names=True when loading the data into the database. In this case, datasource.data_names will be overwritten with the column names of the imported data, when exporting the data, these column col_names will be displayed. We use the 3D eddy wind data for this again. """ # find the variable var_3D_wind = api.find_variable(session, name='3D-wind')[0] # find the previously added author kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_3D_force_data_names = api.add_entry( session, title='3-dimensional windspeed data, force_data_names', abstract='3-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_3D_force_data_names.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_3D_force_data_names.datasource.create_scale( resolution='30min', extent=(df_3D_wind.index[0], df_3D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_3D_force_data_names.import_data(df_3D_wind, force_data_names=True) #load data dat = entry_3D_force_data_names.get_data() # assert assert dat.columns.tolist() == ['u_ms', 'v_ms', 'w_ms'] assert dat['u_ms'].mean() == pytest.approx(3.1, 0.05) return True
def one_dim_data(session, df_1D_wind): """ Do the same as above, but with one-dimensional data instead. """ # add the variable var_1D_wind = api.add_variable(session, name='1D-wind', symbol='u', column_names=['u'], unit=107) # find the previously added author kit = api.find_person(session, organisation_abbrev='KIT')[0] # add the entry entry_1D_wind = api.add_entry( session, title='1-dimensional windspeed data', abstract='1-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_1D_wind.id, license=6, author=kit.id, embargo=False, is_partial=False) # create datasource and scale entry_1D_wind.create_datasource(type=1, path='timeseries', datatype='timeseries') entry_1D_wind.datasource.create_scale(resolution='30min', extent=(df_1D_wind.index[0], df_1D_wind.index[-1]), support=1.0, scale_dimension='temporal') # add data entry_1D_wind.import_data(df_1D_wind) # read data dat = entry_1D_wind.get_data() # assert assert dat.columns == 'u' assert dat['u'].mean() == pytest.approx(3.1, 0.05) return True
def add_data(session): p_df = read_to_df(PERSONS) persons = [ api.add_person(session, **p) for p in p_df.to_dict(orient='records') ] e_df = read_to_df(ENTRIES) e_df['location'] = [(t[0], t[1]) for t in e_df[['x', 'y']].values] e_df = e_df.where(e_df.notnull(), None) e_df.drop(['x', 'y'], axis=1, inplace=True) entries = [ api.add_entry(session, **e) for e in e_df.to_dict(orient='records') ] return True
def add_entries(session): """ Add few the entries """ df = read_to_df(ENTRIES) df['location'] = [( t[0], t[1], ) for t in df[['x', 'y']].values] # get rid of the uuids df = df.where(df.notnull(), None) df.drop(['x', 'y'], axis=1, inplace=True) entries_dict_list = df.to_dict(orient='records') # add entries = [api.add_entry(session, **e) for e in entries_dict_list] # assert assert entries[0].contributors[0].person.first_name == 'Keanu' assert entries[1].abstract == df.loc[1].abstract assert entries[2].external_id == 'foobar2' return True
def add_3D_entry(session): """ Add an entry for the eddy wind data. """ # add the variable var_3D_wind = api.add_variable(session, name='3D-wind', symbol='uvw', column_names=['u', 'v', 'w'], unit=107) # add an author kit = api.add_person( session, first_name=None, last_name=None, organisation_name='Karlsruhe Institute of Technology (KIT)', organisation_abbrev='KIT') # add the entry entry_3D_wind = api.add_entry( session, title='3-dimensional windspeed data', abstract='3-dimensional windspeed data from the Fendt data set', location=(8, 52), variable=var_3D_wind.id, comment='after double rotation', license=6, author=kit.id, embargo=False, is_partial=False) # assert assert var_3D_wind.column_names == ['u', 'v', 'w'] return True