class Experiment(Operation): some_parameter = NumericField(0) new_parameter = NumericField(1) def apply(self, runner): print "Running {}...".format(self) # new_parameter's value == 2 reproduces previous experiment return self.some_parameter * self.new_parameter
class SpecA(Spec): field1 = NumericField(0) field2 = PrimitiveField(1, default=None) func = PrimitiveField(default=general_append) verbose = PrimitiveField(default=False, serialize=False) def __repr__(self): return "A(field1={}, field2={})".format(self.field1, self.field2)
class GetNumber(Numeric): input = NumericField(0) def apply(self, runner): super(GetNumber, self).apply(runner) return self.input + 1 def __repr__(self): return "{}".format(self.input)
class BaseDataStore(OperationRunner): """ Base class for all data stores, to implement a backend you need to implement _get, save and iteritems methods The _get is the actual get procedure, the caching strategy is part of the DataStore implementation """ get_cache_size = NumericField(default=0) verbose = PrimitiveField(default=False, serialize=False) def __init__(self, *args, **kwargs): """ Instances the data store. :param get_cache_size: Size of the FIFO cache for serialization """ super(BaseDataStore, self).__init__(*args, **kwargs) if self.get_cache_size > 0: self.get_cache = FifoCache(self.get_cache_size) else: self.get_cache = None @classmethod def get_key(cls, spec): if isinstance(spec, Spec): return spec.key else: assert isinstance(spec, dict) return Spec._dict2key(spec) def get(self, spec): """ Gets an operation from this data store. """ def _get(): try: return self._get(spec) except KeyError, e: # TODO: I don't like puting RehashUI.ignored_specs here if config.interactive_rehash and spec not in RehashUI.ignored_specs: # Interactive rehash has been enabled and this spec has not been processed # Trigger interactive rehash if self.interactive_rehash(spec): # If we did an interactive rehash, retry the get return self.get(spec) else: raise e else: raise e if self.get_cache is None: return _get() else: try: return self.get_cache[spec] except KeyError: res = _get() self.get_cache.set(spec, res) return res
class BaseDataStore(OperationRunner): """ Base class for all data stores, to implement a backend you need to implement _get, save and iteritems methods The _get is the actual get procedure, the caching strategy is part of the DataStore implementation """ get_cache_size = NumericField(default=0) verbose = PrimitiveField(default=False, serialize=False) def __init__(self, *args, **kwargs): """ Instances the data store. :param get_cache_size: Size of the FIFO cache for serialization """ super(BaseDataStore, self).__init__(*args, **kwargs) if self.get_cache_size > 0: self.get_cache = FifoCache(self.get_cache_size) else: self.get_cache = None def get(self, spec): """ Gets an operation from this data store. If you provide a string, it is assumed to be a `Get` """ if self.get_cache is None: return self._get(spec) else: try: return self.get_cache[spec] except KeyError: res = self._get(spec) self.get_cache.set(spec, res) return res def _get(self, spec): """ Abstract method, actual implementation of the fetch from the data_store """ raise NotImplementedError() def get_by_id(self, id): """ Fetches the value given some id. The id is implementation specific """ raise NotImplementedError() def save(self, spec, object): """ Actual implementation that saves an object associated with the id or operation """ raise NotImplementedError() def iteritems(self): """ Iterates over the datastore :return: An iterator over (operation, object) pairs """ raise NotImplementedError() def iterkeys(self, raw=False): """ Iterates over the keys of the data store :param raw: Whether to return raw documents or specs """ raise NotImplementedError() def __getitem__(self, spec): return self.get(spec) def __setitem__(self, spec, object): self.save(spec, object) def get_or_none(self, spec): try: return self.get(spec) except KeyError: return None def __contains__(self, spec): return self.get_or_none(spec) is not None def autosave(self, *args, **kwargs): kwargs['cache_on'] = self return AutosavedFunction(*args, **kwargs) def refactor(self, refactor_operation, out_data_store, permissive=False): # TODO: rewrite iterkeys, it's horrible! for id, doc in self.iterkeys(raw=True): try: refactored_doc = refactor_operation.bind(doc=doc).execute() spec = Spec.dict2spec(refactored_doc) out_data_store[spec] = self.get_by_id(id) except Exception, e: if permissive: warnings.warn(' '.join(e.args)) else: raise e
class OperationRunner(Spec): execute_cache_size = NumericField(default=0) verbose = PrimitiveField(default=False) # Whether to force execution and ignore caches # Helps encapsulate the behaviour so the Operation.apply remains simple force = PrimitiveField(serialize=False, default=False) def __init__(self, *args, **kwargs): super(OperationRunner, self).__init__(*args, **kwargs) if self.execute_cache_size == 0: self.execute_cache = None else: self.execute_cache = FifoCache(self.execute_cache_size, self.verbose) def alias(self, **kwargs): """ Same as self.replace, but keeps the same execute_cache """ res = self.replace(**kwargs) if res.execute_cache is not None: res.execute_cache = self.execute_cache return res # TODO: The FifoCache can be casted into a FifoDataStore, and make this function an @autosave def execute(self, operation, force=False): """ Executes an operation using this data store as input If this data store was configured to use an execute cache, it will be used :param force: Whether to ignore the current cached value of this operation """ force = force or self.force if not force: # if not force, then check the caches out functions = [ lambda: self._get_memory_cache(operation), lambda: self._get_data_store_cache(operation), ] else: functions = [] functions.append(lambda: operation.apply(self.alias(force=force))) for func in functions: res = func() if res is not None: break if self.execute_cache is not None: self.execute_cache.set(operation, res) out_data_store = operation.get_out_data_store() if out_data_store is not None: out_data_store[operation] = res return res def _get_memory_cache(self, operation): if self.execute_cache is not None: return self.execute_cache.get(operation) def _get_data_store_cache(self, operation): out_data_store = operation.get_out_data_store() if out_data_store is not None: return out_data_store.get_or_none(operation)
class Experiment(Operation): some_parameter = NumericField(0) def apply(self, runner): print "Running {}...".format(self) return self.some_parameter * 2