Esempio n. 1
0
class Word2Vec(Model):
    sentences = CollectionField(0, serialize=False)
    size = ModelParameter(1, default=100)
    alpha = ModelParameter(2, default=0.025)
    window = ModelParameter(3, default=5)
    min_count = ModelParameter(4, default=5)
    max_vocab_size = ModelParameter(5, default=None)
    sample = ModelParameter(6, default=0.001)
    seed = ModelParameter(7, default=1)
    workers = PrimitiveField(8, default=3, serialize=False)
    train_iterator = PrimitiveField(
        default=None,
        help=
        'When sentences is not a rewindable iterator, you must specify another copy of it here',
        serialize=False)

    def apply(self, runner):
        kwargs = self.to_kwargs()

        model = GensimWord2Vec(**kwargs)
        if self.train_iterator is None:
            self.train_iterator = self.sentences

        model.build_vocab(self.sentences)
        model.train(self.train_iterator)
        return model
Esempio n. 2
0
class RenameField(FilteredStorageRefactor):
    source = PrimitiveField(1)
    target = PrimitiveField(2)

    def transformation(self, doc):
        if self.matches(doc):
            doc = doc.copy()
            doc[self.target] = doc.pop(self.source)
        return doc
Esempio n. 3
0
class AddField(FilteredStorageRefactor):
    field_name = PrimitiveField(1)
    default_value = PrimitiveField(2)

    def transformation(self, doc):
        if self.matches(doc):
            doc = doc.copy()
            doc[self.field_name] = self.default_value

        return doc
Esempio n. 4
0
class as_operation(GenericDecorator):
    """
    Creates an operation from a callable
    :param out_type: Base class of the operation to be built. Defaults to `Operation`
    :param out_name: Name of the class to be built, deafults to the decorated function name.
    """
    out_type = PrimitiveField(default=Operation)
    out_name = PrimitiveField(default=None)
    cache_on = SpecField(default=None)
    args_specifications = KwargsField()

    def create_decorated(self,
                         to_wrap,
                         func_to_execute,
                         f_spec=None,
                         first_arg=None):
        f_spec = f_spec or inspect.getargspec(to_wrap)
        OperationClass = operation_from_func(
            to_wrap=to_wrap,
            func_to_execute=func_to_execute,
            out_type=self.out_type,
            out_name=self.out_name,
            args_specifications=self.args_specifications,
            f_spec=f_spec,
            method_type=self.method_type,
            first_arg=first_arg,
            cache_on=self.cache_on)

        return OperationClass

    @staticmethod
    def get_current_operation():
        """
        Should be called inside a function decorated with as_operation
        """
        # f_back brings you to the calling function, f_back brings you to the apply method of the
        # dynamically created operation
        frame = inspect.currentframe()
        try:
            res = frame.f_back.f_back.f_locals['self']
            if not isinstance(res, Operation):
                raise RuntimeError(
                    "This function should be called inside an operation created with the as_operation decorator"
                )
            return res
        finally:
            # Avoid reference cycle
            del frame
Esempio n. 5
0
class ProjectedRefactor(StorageRefactor):
    """
    This class handles a different semantic for for storage_refactor field
    It only propagates on doc[field]
    """
    field = PrimitiveField(0)

    def chain_transformations(self, doc):
        return self.transformation(doc)

    def transformation(self, doc):
        # Everything should be able to receive anything. I don't like that
        if not isinstance(doc, dict): return doc

        doc = doc.copy()  # this could be avoided, I prefer code clarity at this stage

        subfields = self.field.split('.')
        assert len(subfields) >= 1

        subdoc = doc
        for field in subfields[:-1]:
            if field not in subdoc:
                return doc
            else:
                subdoc = subdoc[field]

        last_field = subfields[-1]
        if last_field in subdoc:
            subdoc[last_field] = self.storage_refactor.chain_transformations(subdoc[last_field])
        return doc
Esempio n. 6
0
class RemoveField(FilteredStorageRefactor):
    field_name = PrimitiveField(1)

    def transformation(self, doc):
        if self.matches(doc):
            doc = doc.copy()
            doc.pop(self.field_name, None)
        return doc
Esempio n. 7
0
class GenericDecorator(Spec):
    """
    Abstracts all the boilerplate required to build a decorator that works on functions, instance methods and class methods


    :param method_type: if is None, the decorated function is assumed to be a function, otherwise it is assumed
        to be a method. If method_type == 'instance' the function is assumed to be an instance method otherwise a
        classmethod
    """
    method_type = PrimitiveField(0, default=None)

    def __get__(self, instance, owner):
        if (instance is None and self.method_type == 'instance') or \
                (owner is None and self.method_type == 'class'):
            return self

        first_arg = instance if self.method_type == 'instance' else owner

        @wraps(self.func)
        def new_f(*args, **kwargs):
            return self.func(first_arg, *args, **kwargs)

        cls = type(instance) if instance is not None else owner
        assert cls is not None
        return self.create_decorated(self.func,
                                     new_f,
                                     inspect.getargspec(self.func),
                                     first_arg=first_arg)

    def __call__(self, func):
        if self.method_type:
            self.func = func
            return self
        else:
            return self.create_decorated(func, func)

    def create_decorated(self,
                         to_wrap,
                         func_to_execute,
                         f_spec=None,
                         first_arg=None):
        """
        Abstract method that should be implemented in order to build a decorator

        The difference between `to_wrap` and `func_to_execute` is the fact that in the case of instance methods
        and class methods, `func_to_execute` has the first argument already binded.
        If `to_wrap` is just a function, then `to_wrap == func_to_execute`

        :param to_wrap: Original wrapped function
        :param func_to_execute: You should execute this function
        :param f_spec: The argspec of the function to be decorated, if None, it should be computed from to_wrap (TODO: remove this argument)
        :param first_arg: `self` if it's an instance method, `cls` if it's a classmethod, None otherwise

        """
        raise NotImplementedError()
Esempio n. 8
0
class ChangeType(FilteredStorageRefactor):
    new_type = PrimitiveField(1)

    def __init__(self, *args, **kwargs):
        super(ChangeType, self).__init__(*args, **kwargs)
        assert issubclass(self.new_type, Spec)

    def transformation(self, doc):
        if self.matches(doc):
            doc = doc.copy()
            doc['type'] = get_import_path(self.new_type)
        return doc
Esempio n. 9
0
class FilteredStorageRefactor(StorageRefactor):
    field_type = PrimitiveField(0)

    def matches(self, doc):
        return isinstance(
            doc, dict) and doc['type'] == self.get_field_type_string()

    def get_field_type_string(self):
        if isinstance(self.field_type, basestring):
            return self.field_type
        else:
            return get_import_path(self.field_type)
Esempio n. 10
0
class FilteredStorageRefactor(StorageRefactor):
    spec_type = PrimitiveField(0)

    def matches(self, doc):
        # Everything should be able to receive anything. I don't like that
        return isinstance(doc, dict) and 'type' in doc and doc['type'] == self.get_spec_type_string()

    def get_spec_type_string(self):
        if isinstance(self.spec_type, basestring):
            return self.spec_type
        else:
            return get_import_path(self.spec_type)
Esempio n. 11
0
class AutosavedFunction(as_operation):
    cache_on = PrimitiveField()  # make cache_on a required parameter

    def create_decorated(self,
                         to_wrap,
                         func_to_execute,
                         f_spec=None,
                         first_arg=None):
        OperationClass = super(AutosavedFunction,
                               self).create_decorated(to_wrap,
                                                      func_to_execute,
                                                      f_spec=f_spec,
                                                      first_arg=first_arg)

        class AutosavedOperation(OperationClass):
            def to_dict(self, include_all=False):
                res = super(AutosavedOperation,
                            self).to_dict(include_all=include_all)

                if first_arg is not None:
                    res['type'] = get_import_path(first_arg,
                                                  func_to_execute.__name__,
                                                  'operation_class')
                else:
                    res['type'] = get_import_path(func_to_execute,
                                                  'operation_class')

                return res

            def __repr__(self):
                d = self.to_dict(include_all=True)
                d.pop('type')
                return OperationClass(**d).__repr__()

        class FunctionWrapper(object):
            @property
            def wrapped_function(self):
                return to_wrap

            @property
            def operation_class(self):
                return AutosavedOperation

            @wraps(to_wrap)
            def __call__(_, *args, **kwargs):
                force = kwargs.pop('force_run', False)
                return self.cache_on.execute(AutosavedOperation(
                    *args, **kwargs),
                                             force=force)

        return FunctionWrapper()
Esempio n. 12
0
class ChangeField(FilteredStorageRefactor):
    field_name = PrimitiveField(1)
    old_value = PrimitiveField(2)
    new_value = PrimitiveField(3)

    def matches(self, doc):
        return \
            (
            super(ChangeField, self).matches(doc) and
            self.field_name in doc and
            doc[self.field_name] == self.old_value
        )

    def transformation(self, doc):
        if self.matches(doc):
            doc = doc.copy()
            doc[self.field_name] = self.new_value
        return doc

    @property
    def recurse_first(self):
        # we want to apply this transformation last
        return self.field_name == 'type'
Esempio n. 13
0
class MemoryObject(Operation):
    obj = PrimitiveField(0)

    def apply(self, runner):
        return self.obj

    def to_dict(self, include_all=False):
        res = super(MemoryObject, self).to_dict(include_all=include_all)
        res['obj'] = id(self.obj)
        return res

    @classmethod
    def _from_dict(cls, kwargs, path=None):
        res = super(MemoryObject, cls)._from_dict(kwargs, path=path)
        res.obj = load_object(res.obj)
        return res
Esempio n. 14
0
def operation_from_func(to_wrap,
                        func_to_execute,
                        out_type,
                        out_name,
                        args_specifications,
                        f_spec=None,
                        method_type=None,
                        first_arg=None,
                        cache_on=None):
    """
    In the case of methods, to_wrap is not the same to func_to_execute
    :param to_wrap: See `GenericDecorator.create_decorated` for an explanation
    :param func_to_execute: See `GenericDecorator.create_decorated` for an explanation
    :param cache_on: A data store onto which the operation should be cached
    :return:
    """
    f_spec = f_spec or inspect.getargspec(to_wrap)

    out_name = out_name or to_wrap.__name__
    # TODO: find the first_arg where the method was defined
    if method_type == 'instance' and not isinstance(first_arg, Spec):
        # Only when it's an instance of Spec we can identify
        out_name = '{}@{}'.format(out_name, id(first_arg))

    default_values = get_default_values(f_spec)

    attrs = {}
    binded_pos = 0
    unbinded_pos = 0
    for arg in f_spec.args:
        if method_type == 'instance' and arg == 'self': continue
        if method_type == 'class' and arg == 'cls': continue

        if arg in args_specifications:
            spec = args_specifications[arg]
            if inspect.isclass(spec) and issubclass(spec, Spec):
                spec = SpecField(base_type=spec)
            # It can be either a class, or the instance itself
            if inspect.isclass(spec) or inspect.isfunction(spec): spec = spec()

            if isinstance(spec, UnboundField):
                spec.pos = unbinded_pos
                unbinded_pos += 1
            else:
                spec.pos = binded_pos
                binded_pos += 1

        else:
            spec = PrimitiveField(binded_pos)
            binded_pos += 1

        if arg in default_values: spec.default = default_values[arg]
        attrs[arg] = spec

    def get_this_args(self, runner=None):
        this_args = {}
        for k, v in attrs.iteritems():
            value = getattr(self, k)
            if isinstance(v,
                          BaseSpecField) and runner is not None and isinstance(
                              value, Operation):
                value = runner.execute(value)

            this_args[k] = value

        return this_args

    def to_dict(self, include_all=False):
        res = super(out_type, self).to_dict(include_all=include_all)

        if method_type is not None:
            res['type'] = get_import_path(first_arg, func_to_execute.__name__)
        else:
            res['type'] = get_import_path(func_to_execute)

        return res

    @property
    def self(self):
        if method_type is None:
            raise RuntimeError(
                'Can only be called with an operation created from a method')

        return first_arg

    def apply(self, runner):
        this_args = self.get_this_args(runner)
        return func_to_execute(**this_args)

    cls_attrs = attrs.copy()
    cls_attrs['func'] = staticmethod(func_to_execute)
    cls_attrs['apply'] = apply
    cls_attrs['get_this_args'] = get_this_args
    cls_attrs['to_dict'] = to_dict
    cls_attrs['self'] = self

    cls = Operation.type2spec_class(out_name)
    if cls is None:
        # if the class does not exist, create it
        cls = type(out_name, (out_type, ), cls_attrs)
    else:
        # otherwise update it
        for k, v in cls_attrs.iteritems():
            setattr(cls, k, v)

    if cache_on is not None:
        cls.default_data_store = cache_on
    else:
        cls.default_data_store = None

    cls.__module__ = to_wrap.__module__
    return cls
Esempio n. 15
0
class BaseDataStore(OperationRunner):
    """
    Base class for all data stores, to implement a backend you need to implement
    _get, save and iteritems methods

    The _get is the actual get procedure, the caching strategy is part of the DataStore implementation

    """

    get_cache_size = NumericField(default=0)
    verbose = PrimitiveField(default=False, serialize=False)

    def __init__(self, *args, **kwargs):
        """
        Instances the data store.

        :param get_cache_size: Size of the FIFO cache for serialization
        """
        super(BaseDataStore, self).__init__(*args, **kwargs)
        if self.get_cache_size > 0:
            self.get_cache = FifoCache(self.get_cache_size)
        else:
            self.get_cache = None

    @classmethod
    def get_key(cls, spec):
        if isinstance(spec, Spec):
            return spec.key
        else:
            assert isinstance(spec, dict)
            return Spec._dict2key(spec)

    def get(self, spec):
        """
        Gets an operation from this data store.
        """
        def _get():
            try:
                return self._get(spec)
            except KeyError, e:
                # TODO: I don't like puting RehashUI.ignored_specs here
                if config.interactive_rehash and spec not in RehashUI.ignored_specs:
                    # Interactive rehash has been enabled and this spec has not been processed
                    # Trigger interactive rehash
                    if self.interactive_rehash(spec):
                        # If we did an interactive rehash, retry the get
                        return self.get(spec)
                    else:
                        raise e
                else:
                    raise e

        if self.get_cache is None:
            return _get()
        else:
            try:
                return self.get_cache[spec]
            except KeyError:
                res = _get()
                self.get_cache.set(spec, res)
                return res
Esempio n. 16
0
class BaseDataStore(OperationRunner):
    """
    Base class for all data stores, to implement a backend you need to implement
    _get, save and iteritems methods

    The _get is the actual get procedure, the caching strategy is part of the DataStore implementation

    """

    get_cache_size = NumericField(default=0)
    verbose = PrimitiveField(default=False, serialize=False)

    def __init__(self, *args, **kwargs):
        """
        Instances the data store.

        :param get_cache_size: Size of the FIFO cache for serialization
        """
        super(BaseDataStore, self).__init__(*args, **kwargs)
        if self.get_cache_size > 0:
            self.get_cache = FifoCache(self.get_cache_size)
        else:
            self.get_cache = None

    def get(self, spec):
        """
        Gets an operation from this data store.
        If you provide a string, it is assumed to be a `Get`
        """
        if self.get_cache is None:
            return self._get(spec)
        else:
            try:
                return self.get_cache[spec]
            except KeyError:
                res = self._get(spec)
                self.get_cache.set(spec, res)
                return res

    def _get(self, spec):
        """
        Abstract method, actual implementation of the fetch from the data_store
        """
        raise NotImplementedError()

    def get_by_id(self, id):
        """
        Fetches the value given some id. The id is implementation specific
        """
        raise NotImplementedError()

    def save(self, spec, object):
        """
        Actual implementation that saves an object associated with the id or operation
        """
        raise NotImplementedError()

    def iteritems(self):
        """
        Iterates over the datastore
        :return: An iterator over (operation, object) pairs
        """
        raise NotImplementedError()

    def iterkeys(self, raw=False):
        """
        Iterates over the keys of the data store
        :param raw: Whether to return raw documents or specs
        """
        raise NotImplementedError()

    def __getitem__(self, spec):
        return self.get(spec)

    def __setitem__(self, spec, object):
        self.save(spec, object)

    def get_or_none(self, spec):
        try:
            return self.get(spec)
        except KeyError:
            return None

    def __contains__(self, spec):
        return self.get_or_none(spec) is not None

    def autosave(self, *args, **kwargs):
        kwargs['cache_on'] = self
        return AutosavedFunction(*args, **kwargs)

    def refactor(self, refactor_operation, out_data_store, permissive=False):
        # TODO: rewrite iterkeys, it's horrible!
        for id, doc in self.iterkeys(raw=True):
            try:
                refactored_doc = refactor_operation.bind(doc=doc).execute()
                spec = Spec.dict2spec(refactored_doc)
                out_data_store[spec] = self.get_by_id(id)
            except Exception, e:
                if permissive:
                    warnings.warn(' '.join(e.args))
                else:
                    raise e
Esempio n. 17
0
class FieldChange(Spec):
    original_value = PrimitiveField(0)
    new_value = PrimitiveField(1)
Esempio n. 18
0
class Diff(Spec):
    spec_type = PrimitiveField()
    added_fields = CollectionField()
    removed_fields = CollectionField()
    changed_fields = SpecCollection()
    field = PrimitiveField(
        default=None,
        help='Specifies that this diff only applies to this field')

    @classmethod
    def build(cls, old_dict, new_dict, field=None):
        added_fields = {
            k: v
            for k, v in new_dict.iteritems() if k not in old_dict
        }
        removed_fields = [k for k in old_dict if k not in new_dict]

        changed_fields = {}
        subdiffs = []
        for k in old_dict:
            if k not in new_dict: continue

            old_v = old_dict[k]
            new_v = new_dict[k]
            # TODO this fails if a param starts being a spec, or a param stops being a spec
            if (isinstance(old_v, dict) and 'type' in old_v
                    and isinstance(new_v, dict) and 'type' in new_v
                    and old_v != new_v):
                # old_v and new_v refer to specs

                if field is None:
                    subdiff_field = k
                else:
                    subdiff_field = '{}.{}'.format(field, k)
                subdiff = Diff.build(old_v, new_v, field=subdiff_field)
                subdiffs.append(subdiff)

            elif old_v != new_v:
                changed_fields[k] = FieldChange(old_v, new_v)

        diff = Diff(spec_type=old_dict['type'],
                    added_fields=added_fields,
                    removed_fields=removed_fields,
                    changed_fields=changed_fields,
                    field=field)

        if subdiffs:
            subdiffs.insert(0, diff)
            return ChainedDiff(subdiffs)
        else:
            return diff

    def create_refactor(self):
        res = StorageRefactor()
        for field, value in self.added_fields.iteritems():
            res = res.add_field(self.spec_type, field, value)

        for field in self.removed_fields:
            res = res.remove_field(self.spec_type, field)

        for field, field_change in self.changed_fields.iteritems():
            res = res.change_field(self.spec_type, field,
                                   field_change.original_value,
                                   field_change.new_value)

        if self.field and not res.empty:
            res = res.project(self.field)

        return res

    def __repr__(self):
        def colorize(str, color):
            return Cmd.colorcodes[color][True] + str + Cmd.colorcodes[color][
                False]

        res = StringIO()
        if self.field is None:
            template = '{}:\n\t{}'
            join_template = '\n\t'
        else:
            res.write("On field '{}'\n".format(self.field))
            template = '\t{}:\n\t\t{}'
            join_template = '\n\t\t'

        if self.added_fields:
            res.write(
                template.format(
                    colorize('Added fields', 'green'),
                    join_template.join(
                        '{}:\t{}'.format(k, v)
                        for k, v in self.added_fields.iteritems())))

        if self.removed_fields:
            if res.len > 0: res.write('\n\n')
            res.write(
                template.format(colorize('Removed fields', 'red'),
                                join_template.join(self.removed_fields)))

        if self.changed_fields:
            if res.len > 0: res.write('\n\n')
            res.write('{}:\n'.format(colorize('Changed fields', 'yellow')))
            res.write('\t{:<15} {:<45} {:<45}'.format('field', 'from value',
                                                      'to value'))
            for field, field_change in self.changed_fields.iteritems():
                res.write('{}{:<15} {:<45} {:<45}'.format(
                    join_template, field, repr(field_change.original_value),
                    repr(field_change.new_value)))

        return res.getvalue()