def view(handler, table="charts", pk="chart_id"): handler.set_header("Content-Type", "image/png") handler.set_header("Content-Disposition", "attachment; filename=image.png") data = gdata.filter(variables["COARSE_LABELS"], table=table, args={pk: [handler.path_args[0]]}) url = data.iloc[0]["image"].split(",")[1] data = urlsafe_b64decode(url) return data
def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.slug = slugify(cls.name) # Create the config store directory if not config_dir: config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps', 'mlhandler', cls.slug) _mkdir(config_dir) cls.config_dir = config_dir cls.config_store = cache.JSONStore(op.join(cls.config_dir, 'config.json'), flush=None) cls.data_store = op.join(cls.config_dir, 'data.h5') cls.template = kwargs.pop('template', DEFAULT_TEMPLATE) super(MLHandler, cls).setup(**kwargs) try: data = gdata.filter(**data) cls.store_data(data) except TypeError: app_log.warning('MLHandler could not find training data.') data = None default_model_path = op.join(cls.config_dir, slugify(cls.name) + '.pkl') cls.model_path = model.pop('path', default_model_path) # store the model kwargs from gramex.yaml into the store for key in TRANSFORMS: cls.set_opt(key, model.get(key, cls.get_opt(key))) cls.set_opt('class', model.get('class')) cls.set_opt('params', model.get('params', {})) target_col = cls.get_opt('target_col') if op.exists(cls.model_path): # If the pkl exists, load it cls.model = joblib.load(cls.model_path) elif data is not None: mclass = cls.get_opt('class', model.get('class', False)) params = cls.get_opt('params', {}) data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) # train the model target = data[target_col] train = data[[c for c in data if c != target_col]] gramex.service.threadpool.submit(_fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush()
def populate_annotations(): vgg = _cache_model(variables["model_path"]) """ TODO 1. for each row in charts table 2. get the image 3. update x, y, height, width columns in annotations table """ df = gdata.filter(variables["COARSE_LABELS"], table="charts") for _, row in df.iterrows(): x = imread(BytesIO(row["image"].split(",")[1])) pred, mask = seg.segment_image(x, vgg, blocksize=(224, 224), plot=False) rp = regionprops(mask.astype(int)) mask = mask.astype(bool) labeled = label(mask) rp = regionprops(labeled) for region in rp: minrow, mincol, maxrow, maxcol = region.bbox """
def setup(cls, data=None, model=None, config_dir='', **kwargs): cls.slug = slugify(cls.name) if not op.isdir(config_dir): config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps', 'mlhandler', cls.slug) _mkdir(config_dir) cls.config_dir = config_dir cls.uploads_dir = op.join(config_dir, 'uploads') _mkdir(cls.uploads_dir) cls.config_store = cache.JSONStore(op.join(cls.config_dir, 'config.json'), flush=None) cls.data_store = op.join(cls.config_dir, 'data.h5') cls.template = kwargs.pop('template', True) super(MLHandler, cls).setup(**kwargs) if isinstance(data, str): data = cache.open(data) elif isinstance(data, dict): data = gdata.filter(**data) else: data = None if data is not None: cls.store_data(data) # parse model kwargs if model is None: model = {} default_model_path = op.join(gramex.config.variables['GRAMEXDATA'], 'apps', 'mlhandler', slugify(cls.name) + '.pkl') model_path = model.pop('path', default_model_path) # store the model kwargs from gramex.yaml into the store for key in TRAINING_DEFAULTS: kwarg = model.get(key, False) if not cls.get_opt(key, False) and kwarg: cls.set_opt(key, kwarg) if op.exists(model_path): # If the pkl exists, load it cls.model = joblib.load(model_path) cls.model_path = model_path target_col = model.get('target_col', False) if target_col: cls.set_opt('target_col', target_col) else: target_col = cls.get_opt('target_col') else: # build the model mclass = cls.get_opt('class', model.get('class', False)) params = cls.get_opt('params', {}) if not params: params = model.get('params', {}) if mclass: cls.model = search_modelclass(mclass)(**params) cls.set_opt('class', mclass) else: cls.model = None # Params MUST come after class, or they will be ignored. cls.set_opt('params', params) if model_path: # if a path is specified, use to to store the model cls.model_path = model_path else: # or create our own path cls.model_path = default_model_path _mkdir(op.dirname(cls.model_path)) # train the model target_col = model.get('target_col', False) if target_col: cls.set_opt('target_col', target_col) else: target_col = cls.get_opt('target_col', False) if cls.model is not None and not target_col: app_log.warning('Target column not defined. Nothing to do.') else: if cls.model is not None: if data is not None: # filter columns data = cls._filtercols(data) # filter rows data = cls._filterrows(data) # assemble the pipeline if model.get('pipeline', True): cls.model = cls._get_pipeline(data) else: cls.model = search_modelclass(mclass)(**params) # train the model target = data[target_col] train = data[[c for c in data if c != target_col]] if model.get('async', True): gramex.service.threadpool.submit( _fit, cls.model, train, target, cls.model_path, cls.name) else: _fit(cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush()
def setup(cls, data=None, model={}, config_dir='', **kwargs): cls.slug = slugify(cls.name) # Create the config store directory if not config_dir: config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps', 'mlhandler', cls.slug) _mkdir(config_dir) cls.config_dir = config_dir cls.config_store = cache.JSONStore(op.join(cls.config_dir, 'config.json'), flush=None) cls.data_store = op.join(cls.config_dir, 'data.h5') cls.template = kwargs.pop('template', DEFAULT_TEMPLATE) super(MLHandler, cls).setup(**kwargs) try: if 'transform' in data: data['transform'] = build_transform( {'function': data['transform']}, vars={ 'data': None, 'handler': None }, filename='MLHandler:data', iter=False) cls._built_transform = staticmethod(data['transform']) else: cls._built_transform = staticmethod(lambda x: x) data = gdata.filter(**data) cls.store_data(data) except TypeError: app_log.warning('MLHandler could not find training data.') data = None cls._built_transform = staticmethod(lambda x: x) default_model_path = op.join(cls.config_dir, slugify(cls.name) + '.pkl') cls.model_path = model.pop('path', default_model_path) # store the model kwargs from gramex.yaml into the store for key in TRANSFORMS: cls.set_opt(key, model.get(key, cls.get_opt(key))) # Remove target_col if it appears anywhere in cats or nums target_col = cls.get_opt('target_col') cls.set_opt('cats', list(set(cls.get_opt('cats')) - {target_col})) cls.set_opt('nums', list(set(cls.get_opt('nums')) - {target_col})) cls.set_opt('class', model.get('class')) cls.set_opt('params', model.get('params', {})) if op.exists(cls.model_path): # If the pkl exists, load it cls.model = joblib.load(cls.model_path) elif data is not None: mclass = cls.get_opt('class', model.get('class', False)) params = cls.get_opt('params', {}) data = cls._filtercols(data) data = cls._filterrows(data) cls.model = cls._assemble_pipeline(data, mclass=mclass, params=params) # train the model target = data[target_col] train = data[[c for c in data if c != target_col]] # cross validation cls.cross_validation(train, target) gramex.service.threadpool.submit(_fit, cls.model, train, target, cls.model_path, cls.name) cls.config_store.flush()