def watch(conf): '''Set up file watchers''' from . import watcher events = { 'on_modified', 'on_created', 'on_deleted', 'on_moved', 'on_any_event' } for name, config in conf.items(): _key = cache_key('watch', config) if _key in _cache: watcher.watch(name, **_cache[_key]) continue if 'paths' not in config: app_log.error('watch:%s has no "paths"', name) continue if not set(config.keys()) & events: app_log.error('watch:%s has no events (on_modified, ...)', name) continue if not isinstance(config['paths'], (list, set, tuple)): config['paths'] = [config['paths']] for event in events: if event in config: if not callable(config[event]): config[event] = locate(config[event], modules=['gramex.transforms']) if not callable(config[event]): app_log.error('watch:%s.%s is not callable', name, event) config[event] = lambda event: None _cache[_key] = config watcher.watch(name, **_cache[_key])
def train(self, data): ''' :arg data DataFrame: data to train / re-train the model with :arg model_class str: model class to use (default: ``sklearn.naive_bayes.BernoulliNB``) :arg model_kwargs dict: kwargs to pass to model class constructor (defaults: ``{}``) :arg output str: output column name (default: last column in training data) :arg input list: input column names (default: all columns except ``output``) :arg labels list: list of possible output values (default: unique ``output`` in training) Notes: - If model has already been trained, extend the model. Else create it ''' self.output = vars(self).get('output', data.columns[-1]) self.input = vars(self).get('input', list(data.columns[:-1])) self.model_kwargs = vars(self).get('model_kwargs', {}) self.labels = vars(self).get('labels', None) # If model_kwargs have changed since we trained last, re-train model. if not self.trained and hasattr(self, 'model'): vars(self).pop('model') if not hasattr(self, 'model'): # Split it into input (x) and output (y) x, y = data[self.input], data[self.output] # Transform the data from sklearn.preprocessing import StandardScaler self.scaler = StandardScaler() self.scaler.fit(x) # Train the classifier. Partially, if possible try: clf = locate(self.model_class)(**self.model_kwargs) except TypeError: raise ValueError('{0} is not a correct model class'.format( self.model_class)) if self.labels and hasattr(clf, 'partial_fit'): try: clf.partial_fit(self.scaler.transform(x), y, classes=self.labels) except AttributeError: raise ValueError('{0} does not support partial fit'.format( self.model_class)) else: clf.fit(self.scaler.transform(x), y) self.model = clf # Extend the model else: x, y = data[self.input], data[self.output] classes = set(self.model.classes_) classes |= set(y) self.model.partial_fit(self.scaler.transform(x), y)
def build_transform(conf, vars=None, filename='transform', cache=False, iter=True): ''' Converts an expression into a callable function. For e.g.:: function: json.dumps("x", separators: [",", ":"]) translates to:: fn = build_transform(conf={ 'function': 'json.dumps("x", separators: [",", ":"])', }) which becomes:: def transform(_val): import json result = json.dumps("x", separators=[",", ":""]) return result if isinstance(result, GeneratorType) else (result,) The same can also be achieved via:: function: json.dumps args: ["x"] kwargs: separators: [",", ":"] Any Python expression is also allowed. The following are valid functions:: function: 1 # returns 1 function: _val.type # Returns _val.type function: _val + 1 # Increments the input parameter by 1 function: json.dumps(_val) # Returns the input as a string function: json.dumps # This is the same as json.dumps(_val) ``build_transform`` also takes an optional ``filename=`` parameter that sets the "filename" of the returned function. This is useful for log messages. It takes an optional ``cache=True`` that permanently caches the transform. The default is ``False`` that re-imports the function's module if changed. The returned function takes a single argument called ``_val`` by default. You can change the arguments it accepts using ``vars``. For example:: fn = build_transform(..., vars={'x': None, 'y': 1}) creates:: def transform(x=None, y=1): ... Or pass ``vars={}`` for function that does not accept any parameters. The returned function returns an iterable containing the values. If the function returns a single value, you can get it on the first iteration. If the function returns a generator object, that is returned as-is. But if ``iter=False`` is passed, the returned function just contains the returned value as-is -- not as a list. In the ``conf`` parameter, ``args`` and ``kwargs`` values are interpreted literally. But values starting with ``=`` like ``=args`` are treated as variables. (Start ``==`` to represent a string that begins with ``=``.) For example, when this is called with ``vars={"handler": None}``:: function: json.dumps args: =handler kwargs: key: abc name: =handler.name becomes:: def transform(handler=None): return json.dumps(handler, key="abc", name=handler.name) ''' # Ensure that the transform is a dict. This is a common mistake. We forget # the pattern: prefix if not hasattr(conf, 'items'): raise ValueError('%s: needs {function: name}. Got %s' % (filename, repr(conf))) conf = { key: val for key, val in conf.items() if key in {'function', 'args', 'kwargs'} } # The returned function takes a single argument by default if vars is None: vars = {'_val': None} if 'function' not in conf or not conf['function']: raise KeyError('%s: No function in conf %s' % (filename, conf)) # Get the name of the function in case it's specified as a function call # expr is the full function / expression, e.g. six.text_type("abc") # tree is the ast result expr = conf['function'] tree = ast.parse(expr) if len(tree.body) != 1 or not isinstance(tree.body[0], ast.Expr): raise ValueError( '%s: function: must be an Python function or expression, not %s', (filename, expr)) # Check whether to use the expression as is, or construct the expression # If expr is like "x" or "module.x", construct it if it's callable # Else, use the expression as-is function_name = _full_name(tree.body[0].value) module_name = function_name.split('.')[0] if isinstance( function_name, str) else None # If the module or function is one of the vars themselves, return it as-is # _val.type will be used as-is, then, rather than looking for an "_val" module if module_name in vars: expr = function_name elif function_name is not None: function = locate(function_name, modules=['gramex.transforms']) if function is None: app_log.error('%s: Cannot load function %s' % (filename, function_name)) # This section converts the function into an expression. # We do this only if the original expression was a *callable* function. # But if we can't load the original function (e.g. SyntaxError), # treat that as a function as well, allowing users to correct it later. if callable(function) or function is None: if 'args' in conf: # If args is not a list, convert to a list with that value args = conf['args'] if isinstance(conf['args'], list) else [conf['args']] else: # If args is not specified, use vars' keys as args args = ['=%s' % var for var in vars.keys()] # Add the function, arguments, and kwargs expr = function_name + '(' for arg in args: expr += '%s, ' % _arg_repr(arg) for key, val in conf.get('kwargs', {}).items(): expr += '%s=%s, ' % (key, _arg_repr(val)) expr += ')' # Create the code modules = module_names(tree, vars) modulestr = ', '.join(sorted(modules)) body = [ 'def transform(', ', '.join('{:s}={!r:}'.format(k, v) for k, v in vars.items()), '):\n', '\timport %s\n' % modulestr if modulestr else '', '\treload_module(%s)\n' % modulestr if modulestr and not cache else '', '\tresult = %s\n' % expr, # If the result is a generator object, return it. Else, create a list and # return that. This ensures that the returned value is always an iterable '\treturn result if isinstance(result, GeneratorType) else [result,]' if iter else '\treturn result', ] # Compile the function with context variables import gramex.transforms from gramex.cache import reload_module context = dict(reload_module=reload_module, GeneratorType=GeneratorType, Return=tornado.gen.Return, AttrDict=AttrDict, **{ key: getattr(gramex.transforms, key) for key in gramex.transforms.__all__ }) code = compile(''.join(body), filename=filename, mode='exec') exec(code, context) # nosec - OK to run arbitrary Python code in YAML # Return the transformed function function = context['transform'] function.__name__ = str(function_name or filename) function.__doc__ = str(function.__doc__) return function
def url(conf): '''Set up the tornado web app URL handlers''' handlers = [] # Sort the handlers in descending order of priority specs = sorted(conf.items(), key=_sort_url_patterns, reverse=True) for name, spec in specs: _key = cache_key('url', spec) if _key in _cache: handlers.append(_cache[_key]) continue if 'handler' not in spec: app_log.error('url: %s: no handler specified') continue app_log.debug('url: %s (%s) %s', name, spec.handler, spec.get('priority', '')) urlspec = AttrDict(spec) handler = locate(spec.handler, modules=['gramex.handlers']) if handler is None: app_log.error('url: %s: ignoring missing handler %s', name, spec.handler) continue # Create a subclass of the handler with additional attributes. class_vars = {'name': name, 'conf': spec} # If there's a cache section, get the cache method for use by BaseHandler if 'cache' in urlspec: class_vars['cache'] = _cache_generator(urlspec['cache'], name=name) else: class_vars['cache'] = None # PY27 type() requires the class name to be a string, not unicode urlspec.handler = type(str(spec.handler), (handler, ), class_vars) # If there's a setup method, call it to initialize the class kwargs = urlspec.get('kwargs', {}) if hasattr(handler, 'setup'): try: urlspec.handler.setup_default_kwargs() urlspec.handler.setup(**kwargs) except Exception: app_log.exception('url: %s: setup exception in handler %s', name, spec.handler) # Since we can't set up the handler, all requests must report the error instead class_vars['exc_info'] = sys.exc_info() error_handler = locate('SetupFailedHandler', modules=['gramex.handlers']) urlspec.handler = type(str(spec.handler), (error_handler, ), class_vars) urlspec.handler.setup(**kwargs) try: handler_entry = tornado.web.URLSpec( name=name, pattern=_url_normalize(urlspec.pattern), handler=urlspec.handler, kwargs=kwargs, ) except re.error: app_log.error('url: %s: pattern: %s is invalid', name, urlspec.pattern) continue except Exception: app_log.exception('url: %s: invalid', name) continue _cache[_key] = handler_entry handlers.append(handler_entry) info.app.clear_handlers() info.app.add_handlers('.*$', handlers)
'sklearn.naive_bayes', ] TRANSFORMS = { 'include': [], 'exclude': [], 'dropna': True, 'drop_duplicates': True, 'pipeline': True, 'nums': [], 'cats': [], 'target_col': None, } ACTIONS = ['predict', 'score', 'append', 'train', 'retrain'] DEFAULT_TEMPLATE = op.join(op.dirname(__file__), '..', 'apps', 'mlhandler', 'template.html') search_modelclass = lambda x: locate(x, MLCLASS_MODULES) # NOQA: E731 def _fit(model, x, y, path=None, name=None): app_log.info('Starting training...') getattr(model, 'partial_fit', model.fit)(x, y) app_log.info('Done training...') joblib.dump(model, path) app_log.info(f'{name}: Model saved at {path}.') return model def is_categorical(s, num_treshold=0.1): """Check if a series contains a categorical variable. Parameters