Esempio n. 1
0
def view(handler, table="charts", pk="chart_id"):
    handler.set_header("Content-Type", "image/png")
    handler.set_header("Content-Disposition", "attachment; filename=image.png")
    data = gdata.filter(variables["COARSE_LABELS"],
                        table=table,
                        args={pk: [handler.path_args[0]]})
    url = data.iloc[0]["image"].split(",")[1]
    data = urlsafe_b64decode(url)
    return data
Esempio n. 2
0
    def setup(cls, data=None, model={}, config_dir='', **kwargs):
        cls.slug = slugify(cls.name)
        # Create the config store directory
        if not config_dir:
            config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps',
                                 'mlhandler', cls.slug)
        _mkdir(config_dir)
        cls.config_dir = config_dir
        cls.config_store = cache.JSONStore(op.join(cls.config_dir,
                                                   'config.json'),
                                           flush=None)
        cls.data_store = op.join(cls.config_dir, 'data.h5')

        cls.template = kwargs.pop('template', DEFAULT_TEMPLATE)
        super(MLHandler, cls).setup(**kwargs)
        try:
            data = gdata.filter(**data)
            cls.store_data(data)
        except TypeError:
            app_log.warning('MLHandler could not find training data.')
            data = None

        default_model_path = op.join(cls.config_dir,
                                     slugify(cls.name) + '.pkl')
        cls.model_path = model.pop('path', default_model_path)

        # store the model kwargs from gramex.yaml into the store
        for key in TRANSFORMS:
            cls.set_opt(key, model.get(key, cls.get_opt(key)))
        cls.set_opt('class', model.get('class'))
        cls.set_opt('params', model.get('params', {}))
        target_col = cls.get_opt('target_col')

        if op.exists(cls.model_path):  # If the pkl exists, load it
            cls.model = joblib.load(cls.model_path)
        elif data is not None:
            mclass = cls.get_opt('class', model.get('class', False))
            params = cls.get_opt('params', {})
            data = cls._filtercols(data)
            data = cls._filterrows(data)
            cls.model = cls._assemble_pipeline(data,
                                               mclass=mclass,
                                               params=params)

            # train the model
            target = data[target_col]
            train = data[[c for c in data if c != target_col]]
            gramex.service.threadpool.submit(_fit, cls.model, train, target,
                                             cls.model_path, cls.name)
        cls.config_store.flush()
Esempio n. 3
0
def populate_annotations():
    vgg = _cache_model(variables["model_path"])
    """
    TODO

    1. for each row in charts table
    2. get the image
    3. update x, y, height, width columns in annotations table
    """
    df = gdata.filter(variables["COARSE_LABELS"], table="charts")
    for _, row in df.iterrows():
        x = imread(BytesIO(row["image"].split(",")[1]))
        pred, mask = seg.segment_image(x,
                                       vgg,
                                       blocksize=(224, 224),
                                       plot=False)
    rp = regionprops(mask.astype(int))
    mask = mask.astype(bool)
    labeled = label(mask)
    rp = regionprops(labeled)

    for region in rp:
        minrow, mincol, maxrow, maxcol = region.bbox
    """
Esempio n. 4
0
    def setup(cls, data=None, model=None, config_dir='', **kwargs):
        cls.slug = slugify(cls.name)
        if not op.isdir(config_dir):
            config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps',
                                 'mlhandler', cls.slug)
            _mkdir(config_dir)
        cls.config_dir = config_dir
        cls.uploads_dir = op.join(config_dir, 'uploads')
        _mkdir(cls.uploads_dir)
        cls.config_store = cache.JSONStore(op.join(cls.config_dir,
                                                   'config.json'),
                                           flush=None)
        cls.data_store = op.join(cls.config_dir, 'data.h5')
        cls.template = kwargs.pop('template', True)
        super(MLHandler, cls).setup(**kwargs)
        if isinstance(data, str):
            data = cache.open(data)
        elif isinstance(data, dict):
            data = gdata.filter(**data)
        else:
            data = None
        if data is not None:
            cls.store_data(data)

        # parse model kwargs
        if model is None:
            model = {}

        default_model_path = op.join(gramex.config.variables['GRAMEXDATA'],
                                     'apps', 'mlhandler',
                                     slugify(cls.name) + '.pkl')
        model_path = model.pop('path', default_model_path)

        # store the model kwargs from gramex.yaml into the store
        for key in TRAINING_DEFAULTS:
            kwarg = model.get(key, False)
            if not cls.get_opt(key, False) and kwarg:
                cls.set_opt(key, kwarg)
        if op.exists(model_path):  # If the pkl exists, load it
            cls.model = joblib.load(model_path)
            cls.model_path = model_path
            target_col = model.get('target_col', False)
            if target_col:
                cls.set_opt('target_col', target_col)
            else:
                target_col = cls.get_opt('target_col')
        else:  # build the model
            mclass = cls.get_opt('class', model.get('class', False))
            params = cls.get_opt('params', {})
            if not params:
                params = model.get('params', {})
            if mclass:
                cls.model = search_modelclass(mclass)(**params)
                cls.set_opt('class', mclass)
            else:
                cls.model = None
            # Params MUST come after class, or they will be ignored.
            cls.set_opt('params', params)

            if model_path:  # if a path is specified, use to to store the model
                cls.model_path = model_path
            else:  # or create our own path
                cls.model_path = default_model_path
                _mkdir(op.dirname(cls.model_path))

            # train the model
            target_col = model.get('target_col', False)
            if target_col:
                cls.set_opt('target_col', target_col)
            else:
                target_col = cls.get_opt('target_col', False)
            if cls.model is not None and not target_col:
                app_log.warning('Target column not defined. Nothing to do.')
            else:
                if cls.model is not None:
                    if data is not None:
                        # filter columns
                        data = cls._filtercols(data)

                        # filter rows
                        data = cls._filterrows(data)

                        # assemble the pipeline
                        if model.get('pipeline', True):
                            cls.model = cls._get_pipeline(data)
                        else:
                            cls.model = search_modelclass(mclass)(**params)

                        # train the model
                        target = data[target_col]
                        train = data[[c for c in data if c != target_col]]
                        if model.get('async', True):
                            gramex.service.threadpool.submit(
                                _fit, cls.model, train, target, cls.model_path,
                                cls.name)
                        else:
                            _fit(cls.model, train, target, cls.model_path,
                                 cls.name)
        cls.config_store.flush()
Esempio n. 5
0
    def setup(cls, data=None, model={}, config_dir='', **kwargs):
        cls.slug = slugify(cls.name)
        # Create the config store directory
        if not config_dir:
            config_dir = op.join(gramex.config.variables['GRAMEXDATA'], 'apps',
                                 'mlhandler', cls.slug)
        _mkdir(config_dir)
        cls.config_dir = config_dir
        cls.config_store = cache.JSONStore(op.join(cls.config_dir,
                                                   'config.json'),
                                           flush=None)
        cls.data_store = op.join(cls.config_dir, 'data.h5')

        cls.template = kwargs.pop('template', DEFAULT_TEMPLATE)
        super(MLHandler, cls).setup(**kwargs)
        try:
            if 'transform' in data:
                data['transform'] = build_transform(
                    {'function': data['transform']},
                    vars={
                        'data': None,
                        'handler': None
                    },
                    filename='MLHandler:data',
                    iter=False)
                cls._built_transform = staticmethod(data['transform'])
            else:
                cls._built_transform = staticmethod(lambda x: x)
            data = gdata.filter(**data)
            cls.store_data(data)
        except TypeError:
            app_log.warning('MLHandler could not find training data.')
            data = None
            cls._built_transform = staticmethod(lambda x: x)

        default_model_path = op.join(cls.config_dir,
                                     slugify(cls.name) + '.pkl')
        cls.model_path = model.pop('path', default_model_path)

        # store the model kwargs from gramex.yaml into the store
        for key in TRANSFORMS:
            cls.set_opt(key, model.get(key, cls.get_opt(key)))
        # Remove target_col if it appears anywhere in cats or nums
        target_col = cls.get_opt('target_col')
        cls.set_opt('cats', list(set(cls.get_opt('cats')) - {target_col}))
        cls.set_opt('nums', list(set(cls.get_opt('nums')) - {target_col}))

        cls.set_opt('class', model.get('class'))
        cls.set_opt('params', model.get('params', {}))
        if op.exists(cls.model_path):  # If the pkl exists, load it
            cls.model = joblib.load(cls.model_path)
        elif data is not None:
            mclass = cls.get_opt('class', model.get('class', False))
            params = cls.get_opt('params', {})
            data = cls._filtercols(data)
            data = cls._filterrows(data)
            cls.model = cls._assemble_pipeline(data,
                                               mclass=mclass,
                                               params=params)
            # train the model
            target = data[target_col]
            train = data[[c for c in data if c != target_col]]
            # cross validation
            cls.cross_validation(train, target)
            gramex.service.threadpool.submit(_fit, cls.model, train, target,
                                             cls.model_path, cls.name)
        cls.config_store.flush()