Beispiel #1
0
def watch(conf):
    '''Set up file watchers'''
    from . import watcher

    events = {
        'on_modified', 'on_created', 'on_deleted', 'on_moved', 'on_any_event'
    }
    for name, config in conf.items():
        _key = cache_key('watch', config)
        if _key in _cache:
            watcher.watch(name, **_cache[_key])
            continue
        if 'paths' not in config:
            app_log.error('watch:%s has no "paths"', name)
            continue
        if not set(config.keys()) & events:
            app_log.error('watch:%s has no events (on_modified, ...)', name)
            continue
        if not isinstance(config['paths'], (list, set, tuple)):
            config['paths'] = [config['paths']]
        for event in events:
            if event in config:
                if not callable(config[event]):
                    config[event] = locate(config[event],
                                           modules=['gramex.transforms'])
                    if not callable(config[event]):
                        app_log.error('watch:%s.%s is not callable', name,
                                      event)
                        config[event] = lambda event: None
        _cache[_key] = config
        watcher.watch(name, **_cache[_key])
Beispiel #2
0
    def train(self, data):
        '''
        :arg data DataFrame: data to train / re-train the model with
        :arg model_class str: model class to use (default: ``sklearn.naive_bayes.BernoulliNB``)
        :arg model_kwargs dict: kwargs to pass to model class constructor (defaults: ``{}``)
        :arg output str: output column name (default: last column in training data)
        :arg input list: input column names (default: all columns except ``output``)
        :arg labels list: list of possible output values (default: unique ``output`` in training)

        Notes:
        - If model has already been trained, extend the model. Else create it
        '''
        self.output = vars(self).get('output', data.columns[-1])
        self.input = vars(self).get('input', list(data.columns[:-1]))
        self.model_kwargs = vars(self).get('model_kwargs', {})
        self.labels = vars(self).get('labels', None)
        # If model_kwargs have changed since we trained last, re-train model.
        if not self.trained and hasattr(self, 'model'):
            vars(self).pop('model')
        if not hasattr(self, 'model'):
            # Split it into input (x) and output (y)
            x, y = data[self.input], data[self.output]
            # Transform the data
            from sklearn.preprocessing import StandardScaler
            self.scaler = StandardScaler()
            self.scaler.fit(x)
            # Train the classifier. Partially, if possible
            try:
                clf = locate(self.model_class)(**self.model_kwargs)
            except TypeError:
                raise ValueError('{0} is not a correct model class'.format(
                    self.model_class))
            if self.labels and hasattr(clf, 'partial_fit'):
                try:
                    clf.partial_fit(self.scaler.transform(x),
                                    y,
                                    classes=self.labels)
                except AttributeError:
                    raise ValueError('{0} does not support partial fit'.format(
                        self.model_class))
            else:
                clf.fit(self.scaler.transform(x), y)
            self.model = clf

        # Extend the model
        else:
            x, y = data[self.input], data[self.output]
            classes = set(self.model.classes_)
            classes |= set(y)
            self.model.partial_fit(self.scaler.transform(x), y)
Beispiel #3
0
def build_transform(conf,
                    vars=None,
                    filename='transform',
                    cache=False,
                    iter=True):
    '''
    Converts an expression into a callable function. For e.g.::

        function: json.dumps("x", separators: [",", ":"])

    translates to::

        fn = build_transform(conf={
            'function': 'json.dumps("x", separators: [",", ":"])',
        })

    which becomes::

        def transform(_val):
            import json
            result = json.dumps("x", separators=[",", ":""])
            return result if isinstance(result, GeneratorType) else (result,)

    The same can also be achieved via::

        function: json.dumps
        args: ["x"]
        kwargs:
            separators: [",", ":"]

    Any Python expression is also allowed. The following are valid functions::

        function: 1                 # returns 1
        function: _val.type         # Returns _val.type
        function: _val + 1          # Increments the input parameter by 1
        function: json.dumps(_val)  # Returns the input as a string
        function: json.dumps        # This is the same as json.dumps(_val)

    ``build_transform`` also takes an optional ``filename=`` parameter that sets
    the "filename" of the returned function. This is useful for log messages.

    It takes an optional ``cache=True`` that permanently caches the transform.
    The default is ``False`` that re-imports the function's module if changed.

    The returned function takes a single argument called ``_val`` by default. You
    can change the arguments it accepts using ``vars``. For example::

        fn = build_transform(..., vars={'x': None, 'y': 1})

    creates::

        def transform(x=None, y=1):
            ...

    Or pass ``vars={}`` for function that does not accept any parameters.

    The returned function returns an iterable containing the values. If the
    function returns a single value, you can get it on the first iteration. If
    the function returns a generator object, that is returned as-is.

    But if ``iter=False`` is passed, the returned function just contains the
    returned value as-is -- not as a list.

    In the ``conf`` parameter, ``args`` and ``kwargs`` values are interpreted
    literally. But values starting with ``=`` like ``=args`` are treated as
    variables. (Start ``==`` to represent a string that begins with ``=``.) For
    example, when this is called with ``vars={"handler": None}``::

        function: json.dumps
        args: =handler
        kwargs:
            key: abc
            name: =handler.name

    becomes::

        def transform(handler=None):
            return json.dumps(handler, key="abc", name=handler.name)
    '''
    # Ensure that the transform is a dict. This is a common mistake. We forget
    # the pattern: prefix
    if not hasattr(conf, 'items'):
        raise ValueError('%s: needs {function: name}. Got %s' %
                         (filename, repr(conf)))

    conf = {
        key: val
        for key, val in conf.items() if key in {'function', 'args', 'kwargs'}
    }

    # The returned function takes a single argument by default
    if vars is None:
        vars = {'_val': None}

    if 'function' not in conf or not conf['function']:
        raise KeyError('%s: No function in conf %s' % (filename, conf))

    # Get the name of the function in case it's specified as a function call
    # expr is the full function / expression, e.g. six.text_type("abc")
    # tree is the ast result
    expr = conf['function']
    tree = ast.parse(expr)
    if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Expr):
        raise ValueError(
            '%s: function: must be an Python function or expression, not %s',
            (filename, expr))

    # Check whether to use the expression as is, or construct the expression
    # If expr is like "x" or "module.x", construct it if it's callable
    # Else, use the expression as-is
    function_name = _full_name(tree.body[0].value)
    module_name = function_name.split('.')[0] if isinstance(
        function_name, str) else None
    # If the module or function is one of the vars themselves, return it as-is
    # _val.type will be used as-is, then, rather than looking for an "_val" module
    if module_name in vars:
        expr = function_name
    elif function_name is not None:
        function = locate(function_name, modules=['gramex.transforms'])
        if function is None:
            app_log.error('%s: Cannot load function %s' %
                          (filename, function_name))
        # This section converts the function into an expression.
        # We do this only if the original expression was a *callable* function.
        # But if we can't load the original function (e.g. SyntaxError),
        # treat that as a function as well, allowing users to correct it later.
        if callable(function) or function is None:
            if 'args' in conf:
                # If args is not a list, convert to a list with that value
                args = conf['args'] if isinstance(conf['args'],
                                                  list) else [conf['args']]
            else:
                # If args is not specified, use vars' keys as args
                args = ['=%s' % var for var in vars.keys()]
            # Add the function, arguments, and kwargs
            expr = function_name + '('
            for arg in args:
                expr += '%s, ' % _arg_repr(arg)
            for key, val in conf.get('kwargs', {}).items():
                expr += '%s=%s, ' % (key, _arg_repr(val))
            expr += ')'

    # Create the code
    modules = module_names(tree, vars)
    modulestr = ', '.join(sorted(modules))
    body = [
        'def transform(',
        ', '.join('{:s}={!r:}'.format(k, v) for k, v in vars.items()),
        '):\n',
        '\timport %s\n' % modulestr if modulestr else '',
        '\treload_module(%s)\n' % modulestr if modulestr and not cache else '',
        '\tresult = %s\n' % expr,
        # If the result is a generator object, return it. Else, create a list and
        # return that. This ensures that the returned value is always an iterable
        '\treturn result if isinstance(result, GeneratorType) else [result,]'
        if iter else '\treturn result',
    ]

    # Compile the function with context variables
    import gramex.transforms
    from gramex.cache import reload_module
    context = dict(reload_module=reload_module,
                   GeneratorType=GeneratorType,
                   Return=tornado.gen.Return,
                   AttrDict=AttrDict,
                   **{
                       key: getattr(gramex.transforms, key)
                       for key in gramex.transforms.__all__
                   })
    code = compile(''.join(body), filename=filename, mode='exec')
    exec(code, context)  # nosec - OK to run arbitrary Python code in YAML

    # Return the transformed function
    function = context['transform']
    function.__name__ = str(function_name or filename)
    function.__doc__ = str(function.__doc__)

    return function
Beispiel #4
0
def url(conf):
    '''Set up the tornado web app URL handlers'''
    handlers = []
    # Sort the handlers in descending order of priority
    specs = sorted(conf.items(), key=_sort_url_patterns, reverse=True)
    for name, spec in specs:
        _key = cache_key('url', spec)
        if _key in _cache:
            handlers.append(_cache[_key])
            continue
        if 'handler' not in spec:
            app_log.error('url: %s: no handler specified')
            continue
        app_log.debug('url: %s (%s) %s', name, spec.handler,
                      spec.get('priority', ''))
        urlspec = AttrDict(spec)
        handler = locate(spec.handler, modules=['gramex.handlers'])
        if handler is None:
            app_log.error('url: %s: ignoring missing handler %s', name,
                          spec.handler)
            continue

        # Create a subclass of the handler with additional attributes.
        class_vars = {'name': name, 'conf': spec}
        # If there's a cache section, get the cache method for use by BaseHandler
        if 'cache' in urlspec:
            class_vars['cache'] = _cache_generator(urlspec['cache'], name=name)
        else:
            class_vars['cache'] = None
        # PY27 type() requires the class name to be a string, not unicode
        urlspec.handler = type(str(spec.handler), (handler, ), class_vars)

        # If there's a setup method, call it to initialize the class
        kwargs = urlspec.get('kwargs', {})
        if hasattr(handler, 'setup'):
            try:
                urlspec.handler.setup_default_kwargs()
                urlspec.handler.setup(**kwargs)
            except Exception:
                app_log.exception('url: %s: setup exception in handler %s',
                                  name, spec.handler)
                # Since we can't set up the handler, all requests must report the error instead
                class_vars['exc_info'] = sys.exc_info()
                error_handler = locate('SetupFailedHandler',
                                       modules=['gramex.handlers'])
                urlspec.handler = type(str(spec.handler), (error_handler, ),
                                       class_vars)
                urlspec.handler.setup(**kwargs)

        try:
            handler_entry = tornado.web.URLSpec(
                name=name,
                pattern=_url_normalize(urlspec.pattern),
                handler=urlspec.handler,
                kwargs=kwargs,
            )
        except re.error:
            app_log.error('url: %s: pattern: %s is invalid', name,
                          urlspec.pattern)
            continue
        except Exception:
            app_log.exception('url: %s: invalid', name)
            continue
        _cache[_key] = handler_entry
        handlers.append(handler_entry)

    info.app.clear_handlers()
    info.app.add_handlers('.*$', handlers)
Beispiel #5
0
    'sklearn.naive_bayes',
]
TRANSFORMS = {
    'include': [],
    'exclude': [],
    'dropna': True,
    'drop_duplicates': True,
    'pipeline': True,
    'nums': [],
    'cats': [],
    'target_col': None,
}
ACTIONS = ['predict', 'score', 'append', 'train', 'retrain']
DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler',
                           'template.html')
search_modelclass = lambda x: locate(x, MLCLASS_MODULES)  # NOQA: E731


def _fit(model, x, y, path=None, name=None):
    app_log.info('Starting training...')
    getattr(model, 'partial_fit', model.fit)(x, y)
    app_log.info('Done training...')
    joblib.dump(model, path)
    app_log.info(f'{name}: Model saved at {path}.')
    return model


def is_categorical(s, num_treshold=0.1):
    """Check if a series contains a categorical variable.

    Parameters