Example #1
0
def from_hdf(fpath, dbg=None):
    """
	Restores a Results object from HDF5 as created by the to_hdf method.
	"""
    if dbg is None:
        dbg = Dbg()

    import h5py

    data = pd.read_hdf(fpath, 'data')

    hfile = h5py.File(fpath)
    md = hfile['metadata']
    datetime_conversions = set(md.attrs.get('_datetime_attributes', set()))
    metadata = {}
    for k, v in list(md.attrs.items()):
        if k == '_datetime_attributes':
            continue
        dbg(3, 'from_hdf: retrieving metadata {}'.format(k))
        if k in datetime_conversions:
            dbg(3, ' -> converting to Timestamp')
            v = pd.Timestamp(v)
        metadata[k] = v

    return Results(data, metadata)
Example #2
0
    def __init__(self, df, metadata={}, dbg=None):
        """
	    Want to be able to create results from just a single dataframe.
	    """
        self.df = df
        self.metadata = metadata
        self.metadata['version'] = __version__
        self.metadata['errors'] = {}
        self.metadata['warnings'] = {}

        self.dbg = dbg or Dbg()
Example #3
0
	def setUp(self):
		"""
	    Load the needed datasets for all StatisticsTestCases and set the random
	    seed so that randomized algorithms show deterministic behaviour.
	    """
		np.random.seed(0)
		self.data = Experiment('B', *generate_random_data(), dbg=Dbg(dbg_lvl=5))
		# Create time column. TODO: Do this nicer
		self.data.kpis['time_since_treatment'] = \
			self.data.features['treatment_start_time']
		# Make time part of index
		self.data.kpis.set_index('time_since_treatment', append=True, inplace=True)
		# Metadata as generated by generate_random_data() for later checks
		self.testmetadata = {'primary_KPI': 'normal_shifted',
		                     'source': 'simulated',
		                     'experiment': 'random_data_generation'}
Example #4
0
	def __init__(self, baseline_variant, metrics_or_kpis, metadata={},
				 features='default', dbg=None):
		# Call constructor of super class
		super(Experiment, self).__init__(metrics_or_kpis, metadata, features)

		# If no baseline variant is found
		if ((baseline_variant not in self.kpis.index.levels[
			self.primary_indices.index('variant')])
			and (baseline_variant not in self.features.index.levels[
				self.primary_indices.index('variant')])):
			raise KeyError('baseline_variant ({}) not present in KPIs or features.'.format(
				baseline_variant))
		# Add baseline to metadata
		self.metadata['baseline_variant'] = baseline_variant

		self.dbg = dbg or Dbg()
Example #5
0
	def __init__(self, df, metadata={}, dbg=None):
		"""
	    Want to be able to create results from just a single dataframe.

	    Args:
	        df (pandas.DataFrame): input dataframe
	        metadata (dict): input metadata
	        dbg:
	    """
		self.df = df
		self.metadata = metadata
		self.metadata['version'] = __version__
		self.metadata['errors'] = {}
		self.metadata['warnings'] = {}

		self.dbg = dbg or Dbg()
Example #6
0
 def setUp(self):
     """
 Load the needed datasets for all StatisticsTestCases and set the random
 seed so that randomized algorithms show deterministic behaviour.
 """
     np.random.seed(0)
     self.data = Experiment('B',
                            *generate_random_data(),
                            dbg=Dbg(dbg_lvl=5))
     # Create time column. TODO: Do this nicer
     self.data.kpis['time_since_treatment'] = \
      self.data.features['treatment_start_time']
     # Make time part of index
     self.data.kpis.set_index('time_since_treatment',
                              append=True,
                              inplace=True)