def from_hdf(fpath, dbg=None): """ Restores a Results object from HDF5 as created by the to_hdf method. """ if dbg is None: dbg = Dbg() import h5py data = pd.read_hdf(fpath, 'data') hfile = h5py.File(fpath) md = hfile['metadata'] datetime_conversions = set(md.attrs.get('_datetime_attributes', set())) metadata = {} for k, v in list(md.attrs.items()): if k == '_datetime_attributes': continue dbg(3, 'from_hdf: retrieving metadata {}'.format(k)) if k in datetime_conversions: dbg(3, ' -> converting to Timestamp') v = pd.Timestamp(v) metadata[k] = v return Results(data, metadata)
def __init__(self, df, metadata={}, dbg=None): """ Want to be able to create results from just a single dataframe. """ self.df = df self.metadata = metadata self.metadata['version'] = __version__ self.metadata['errors'] = {} self.metadata['warnings'] = {} self.dbg = dbg or Dbg()
def setUp(self): """ Load the needed datasets for all StatisticsTestCases and set the random seed so that randomized algorithms show deterministic behaviour. """ np.random.seed(0) self.data = Experiment('B', *generate_random_data(), dbg=Dbg(dbg_lvl=5)) # Create time column. TODO: Do this nicer self.data.kpis['time_since_treatment'] = \ self.data.features['treatment_start_time'] # Make time part of index self.data.kpis.set_index('time_since_treatment', append=True, inplace=True) # Metadata as generated by generate_random_data() for later checks self.testmetadata = {'primary_KPI': 'normal_shifted', 'source': 'simulated', 'experiment': 'random_data_generation'}
def __init__(self, baseline_variant, metrics_or_kpis, metadata={}, features='default', dbg=None): # Call constructor of super class super(Experiment, self).__init__(metrics_or_kpis, metadata, features) # If no baseline variant is found if ((baseline_variant not in self.kpis.index.levels[ self.primary_indices.index('variant')]) and (baseline_variant not in self.features.index.levels[ self.primary_indices.index('variant')])): raise KeyError('baseline_variant ({}) not present in KPIs or features.'.format( baseline_variant)) # Add baseline to metadata self.metadata['baseline_variant'] = baseline_variant self.dbg = dbg or Dbg()
def __init__(self, df, metadata={}, dbg=None): """ Want to be able to create results from just a single dataframe. Args: df (pandas.DataFrame): input dataframe metadata (dict): input metadata dbg: """ self.df = df self.metadata = metadata self.metadata['version'] = __version__ self.metadata['errors'] = {} self.metadata['warnings'] = {} self.dbg = dbg or Dbg()
def setUp(self): """ Load the needed datasets for all StatisticsTestCases and set the random seed so that randomized algorithms show deterministic behaviour. """ np.random.seed(0) self.data = Experiment('B', *generate_random_data(), dbg=Dbg(dbg_lvl=5)) # Create time column. TODO: Do this nicer self.data.kpis['time_since_treatment'] = \ self.data.features['treatment_start_time'] # Make time part of index self.data.kpis.set_index('time_since_treatment', append=True, inplace=True)