def test_slicing(self, dhdl): '''Test if extract_u_nk assign the attr correctly''' dataset = load_benzene() u_nk = extract_u_nk(dataset['data']['Coulomb'][0], 310) new_u_nk = slicing(u_nk) assert new_u_nk.attrs['temperature'] == 310 assert new_u_nk.attrs['energy_unit'] == 'kT'
def test_slicing_inefficiency_equivalence( self, dataloader, lower, upper, conservative, request ): """ Test that first slicing the data frame, then subsampling is equivalent to subsampling with lower / upper bounds set """ # Load data data = request.getfixturevalue(dataloader) # Check that the input data is appropriate for the test _check_data_is_outside_bounds(data, lower, upper) # Slice dataframe, then subsample it based on the sum of its components sliced_data = slicing(data, lower=lower, upper=upper) subsampled_sliced_data = self.slicer(sliced_data, series=sliced_data.sum(axis=1), conservative=conservative) # Subsample the dataframe based on the sum of its components while # also specifying the slicing range subsampled_data = self.slicer(data, series=data.sum(axis=1), lower=lower, upper=upper, conservative=conservative) assert (subsampled_sliced_data == subsampled_data).all(axis=None)
def get_dHdl(sim, lower=None, upper=None, step=None): try: s = time.time() dHdl = sim.data.retrieve('dHdl') e = time.time() if dHdl is None: dHdl = delayed(slicing_delayed)(delayed(pd.concat)([delayed(get_dHdl_XVG_delayed)(xvg) for xvg in sim['WORK/dhdl/'].glob('*.xvg')]), lower=lower, upper=upper, step=step) else: logging.info("get_dHdl,hdf5_read,{},{},{},{}".format(sim.name, e - s, s, e)) dHdl = slicing(dHdl.sort_index(0), lower=lower, upper=upper, step=step) except: # THIS WILL NOT STORE THE VALUE FOR LATER USE SO YOU SHOULD REALLY # CONTINUOUSLY UPDATE THE dHdl DATA IN THE SIMS dHdl = delayed(slicing_delayed)(delayed(pd.concat)([delayed(get_dHdl_XVG_delayed)(xvg) for xvg in sim['WORK/dhdl/'].glob('*.xvg')]), lower=lower, upper=upper, step=step) return dHdl
def test_no_series(self, data): """Check that we get the same result as simple slicing with no Series. """ df_sub = self.slicer(data, lower=200, upper=5000, step=2) df_sliced = slicing(data, lower=200, upper=5000, step=2) assert np.all((df_sub == df_sliced))
def slicer(self, *args, **kwargs): return slicing(*args, **kwargs)
def slicing_delayed(dhdls, lower=None, upper=None, step=None): return slicing(dhdls.sort_index(0), lower=lower, upper=upper, step=step)