Beispiel #1
0
def start(config, initial=False):
    if not initial:
        print('\n\nWarning, this process should not have been started... nothing is "wrong" but it needlessly ate away a tiny bit of precious compute !\n\n')
    config = Config(config)
    run_server(config)
Beispiel #2
0
def start(verbose, no_studio):
    config = Config()

    initialize_log(config, 'http', wrap_print=True)

    # start static initialization in a separate thread
    init_static_thread = None
    if not no_studio:
        init_static_thread = threading.Thread(target=initialize_static)
        init_static_thread.start()

    app, api = initialize_flask(config, init_static_thread, no_studio)
    Compress(app)
    initialize_interfaces(app)

    static_root = config['paths']['static']
    if os.path.isabs(static_root) is False:
        static_root = os.path.join(os.getcwd(), static_root)
    static_root = Path(static_root)

    @app.route('/', defaults={'path': ''}, methods=['GET'])
    @app.route('/<path:path>', methods=['GET'])
    def root_index(path):
        if path.startswith('api/'):
            return {'message': 'wrong query'}, 400
        if static_root.joinpath(path).is_file():
            return send_from_directory(static_root, path)
        else:
            return send_from_directory(static_root, 'index.html')

    api.add_namespace(predictor_ns)
    api.add_namespace(datasource_ns)
    api.add_namespace(utils_ns)
    api.add_namespace(conf_ns)
    api.add_namespace(stream_ns)

    @api.errorhandler(Exception)
    def handle_exception(e):
        get_log('http').error(f'http exception: {e}')
        # pass through HTTP errors
        if isinstance(e, HTTPException):
            return {'message': str(e)}, e.code, e.get_response().headers
        name = getattr(type(e), '__name__') or 'Unknown error'
        return {'message': f'{name}: {str(e)}'}, 500

    @app.teardown_appcontext
    def remove_session(*args, **kwargs):
        session.close()

    @app.before_request
    def before_request():
        company_id = request.headers.get('company-id')

        if company_id is not None:
            try:
                company_id = int(company_id)
            except Exception as e:
                get_log('http').error(
                    f'Cloud not parse company id: {company_id} | exception: {e}'
                )
                company_id = None

        request.company_id = company_id

        request.default_store = WithKWArgsWrapper(
            current_app.original_data_store, company_id=company_id)

        request.model_interface = WithKWArgsWrapper(
            current_app.original_model_interface, company_id=company_id)

        request.datasource_interface = WithKWArgsWrapper(
            current_app.original_datasource_interface, company_id=company_id)

    port = config['api']['http']['port']
    host = config['api']['http']['host']

    server = os.environ.get('MINDSDB_DEFAULT_SERVER', 'waitress')

    # waiting static initialization
    if not no_studio:
        init_static_thread.join()
    if server.lower() == 'waitress':
        if host in ('', '0.0.0.0'):
            serve(app,
                  port=port,
                  host='*',
                  max_request_body_size=1073741824 * 10,
                  inbuf_overflow=1073741824 * 10)
        else:
            serve(app,
                  port=port,
                  host=host,
                  max_request_body_size=1073741824 * 10,
                  inbuf_overflow=1073741824 * 10)
    elif server.lower() == 'flask':
        # that will 'disable access' log in console
        log = logging.getLogger('werkzeug')
        log.setLevel(logging.WARNING)

        app.run(debug=False, port=port, host=host)
    elif server.lower() == 'gunicorn':
        try:
            from mindsdb.api.http.gunicorn_wrapper import StandaloneApplication
        except ImportError:
            print(
                "Gunicorn server is not available by default. If you wish to use it, please install 'gunicorn'"
            )
            return

        options = {
            'bind': f'{host}:{port}',
            'workers': min(max(mp.cpu_count(), 2), 3),
            'timeout': 600,
            'reuse_port': True,
            'threads': 4
        }
        StandaloneApplication(app, options).run()
Beispiel #3
0
    def add_db_integration(self, name, data, company_id=None):
        if 'database_name' not in data:
            data['database_name'] = name
        if 'publish' not in data:
            data['publish'] = True

        bundle_path = data.get('secure_connect_bundle')
        if data.get('type') in (
                'cassandra', 'scylla') and self._is_not_empty_str(bundle_path):
            if os.path.isfile(bundle_path) is False:
                raise Exception(f'Can not get access to file: {bundle_path}')
            integrations_dir = Config()['paths']['integrations']

            p = Path(bundle_path)
            data['secure_connect_bundle'] = p.name

            integration_record = Integration(name=name,
                                             data=data,
                                             company_id=company_id)
            session.add(integration_record)
            session.commit()
            integration_id = integration_record.id

            folder_name = f'integration_files_{company_id}_{integration_id}'
            integration_dir = os.path.join(integrations_dir, folder_name)
            create_directory(integration_dir)
            shutil.copyfile(bundle_path, os.path.join(integration_dir, p.name))

            FsStore().put(folder_name, integration_dir, integrations_dir)
        elif data.get('type') in ('mysql', 'mariadb'):
            ssl = data.get('ssl')
            files = {}
            temp_dir = None
            if ssl is True:
                for key in ['ssl_ca', 'ssl_cert', 'ssl_key']:
                    if key not in data:
                        continue
                    if os.path.isfile(data[key]) is False:
                        if self._is_not_empty_str(data[key]) is False:
                            raise Exception(
                                "'ssl_ca', 'ssl_cert' and 'ssl_key' must be paths or inline certs"
                            )
                        if temp_dir is None:
                            temp_dir = tempfile.mkdtemp(
                                prefix='integration_files_')
                        cert_file_name = data.get(f'{key}_name', f'{key}.pem')
                        cert_file_path = os.path.join(temp_dir, cert_file_name)
                        with open(cert_file_path, 'wt') as f:
                            f.write(data[key])
                        data[key] = cert_file_path
                    files[key] = data[key]
                    p = Path(data[key])
                    data[key] = p.name
            integration_record = Integration(name=name,
                                             data=data,
                                             company_id=company_id)
            session.add(integration_record)
            session.commit()
            integration_id = integration_record.id

            if len(files) > 0:
                integrations_dir = Config()['paths']['integrations']
                folder_name = f'integration_files_{company_id}_{integration_id}'
                integration_dir = os.path.join(integrations_dir, folder_name)
                create_directory(integration_dir)
                for file_path in files.values():
                    p = Path(file_path)
                    shutil.copyfile(file_path,
                                    os.path.join(integration_dir, p.name))
                FsStore().put(folder_name, integration_dir, integrations_dir)
        else:
            integration_record = Integration(name=name,
                                             data=data,
                                             company_id=company_id)
            session.add(integration_record)
            session.commit()
Beispiel #4
0
def initialize_interfaces(app):
    app.original_data_store = DataStore()
    app.original_model_interface = ModelInterface()
    config = Config()
    app.config_obj = config
Beispiel #5
0
 def __init__(self, model_interface, data_store, integration_controller):
     self.config = Config()
     self.model_interface = model_interface
     self.data_store = data_store
     self.integration_controller = integration_controller
Beispiel #6
0
    else:
        root_storage_dir = get_or_create_data_dir()
        os.environ['MINDSDB_STORAGE_DIR'] = root_storage_dir

    if os.path.isdir(root_storage_dir) is False:
        os.makedirs(root_storage_dir)

    if 'storage_db' in user_config:
        os.environ['MINDSDB_DB_CON'] = user_config['storage_db']
    elif os.environ.get('MINDSDB_DB_CON', '') == '':
        os.environ['MINDSDB_DB_CON'] = 'sqlite:///' + os.path.join(
            os.environ['MINDSDB_STORAGE_DIR'],
            'mindsdb.sqlite3.db') + '?check_same_thread=False&timeout=30'

    from mindsdb.utilities.config import Config
    mindsdb_config = Config()
    create_dirs_recursive(mindsdb_config['paths'])

    os.environ['DEFAULT_LOG_LEVEL'] = os.environ.get('DEFAULT_LOG_LEVEL',
                                                     'ERROR')
    os.environ['LIGHTWOOD_LOG_LEVEL'] = os.environ.get('LIGHTWOOD_LOG_LEVEL',
                                                       'ERROR')
    os.environ['MINDSDB_STORAGE_PATH'] = mindsdb_config['paths']['predictors']

    if telemetry_file_exists(mindsdb_config['storage_dir']):
        os.environ['CHECK_FOR_UPDATES'] = '0'
        print('\n x telemetry disabled! \n')
    elif os.getenv('CHECK_FOR_UPDATES', '1').lower() in [
            '0', 'false', 'False'
    ] or mindsdb_config.get('cloud', False):
        disable_telemetry(mindsdb_config['storage_dir'])
Beispiel #7
0
from mindsdb.utilities.wizards import cli_config
from mindsdb.utilities.config import Config
from mindsdb.utilities.functions import args_parse

config_dir, storage_dir = get_or_create_dir_struct()

config_path = os.path.join(config_dir, 'config.json')
if not os.path.exists(config_path):
    _ = cli_config(None, None, storage_dir, config_dir, use_default=True)

args = args_parse()
if args.config is not None:
    config_path = args.config

try:
    config = Config(config_path)
except Exception as e:
    print(str(e))
    sys.exit(1)

paths = config.paths
create_directory(paths['datasources'])
create_directory(paths['predictors'])
create_directory(paths['static'])
create_directory(paths['tmp'])

os.environ['MINDSDB_STORAGE_PATH'] = paths['predictors']

from mindsdb_native import *
# Figure out how to add this as a module
import lightwood
Beispiel #8
0
 def __init__(self, company_id):
     self.config = Config()
     self.company_id = company_id
     self.integration_controller = WithKWArgsWrapper(
         IntegrationController(), company_id=company_id
     )
Beispiel #9
0
class ModelController():
    config: Config
    fs_store: FsStore
    predictor_cache: Dict[str, Dict[str, Union[Any]]]
    ray_based: bool

    def __init__(self, ray_based: bool) -> None:
        self.config = Config()
        self.fs_store = FsStore()
        self.predictor_cache = {}
        self.ray_based = ray_based

    def _invalidate_cached_predictors(self) -> None:
        # @TODO: Cache will become stale if the respective ModelInterface is not invoked yet a bunch of predictors remained cached, no matter where we invoke it. In practice shouldn't be a big issue though
        for predictor_name in list(self.predictor_cache.keys()):
            if (datetime.datetime.now() - self.predictor_cache[predictor_name]['created']).total_seconds() > 1200:
                del self.predictor_cache[predictor_name]

    def _lock_predictor(self, id: int, mode: str) -> None:
        from mindsdb.interfaces.storage.db import session, Semaphor

        while True:
            semaphor_record = session.query(Semaphor).filter_by(entity_id=id, entity_type='predictor').first()
            if semaphor_record is not None:
                if mode == 'read' and semaphor_record.action == 'read':
                    return True
            try:
                semaphor_record = Semaphor(entity_id=id, entity_type='predictor', action=mode)
                session.add(semaphor_record)
                session.commit()
                return True
            except Exception:
                pass
            time.sleep(1)

    def _unlock_predictor(self, id: int) -> None:
        from mindsdb.interfaces.storage.db import session, Semaphor
        semaphor_record = session.query(Semaphor).filter_by(entity_id=id, entity_type='predictor').first()
        if semaphor_record is not None:
            session.delete(semaphor_record)
            session.commit()

    @contextmanager
    def _lock_context(self, id, mode: str):
        try:
            self._lock_predictor(id, mode)
            yield True
        finally:
            self._unlock_predictor(id)

    def _get_from_data_df(self, from_data: dict) -> DataFrame:
        if from_data['class'] == 'QueryDS':
            ds = QueryDS(*from_data['args'], **from_data['kwargs'])
        else:
            ds_cls = getattr(mindsdb_datasources, from_data['class'])
            ds = ds_cls(*from_data['args'], **from_data['kwargs'])
        return ds.df

    def _unpack_old_args(
        self, from_data: dict, kwargs: dict, to_predict: Optional[Union[str, list]] = None
    ) -> Tuple[pd.DataFrame, ProblemDefinition, bool]:
        problem_definition = kwargs or {}
        if isinstance(to_predict, str):
            problem_definition['target'] = to_predict
        elif isinstance(to_predict, list) and len(to_predict) == 1:
            problem_definition['target'] = to_predict[0]
        elif problem_definition.get('target') is None:
            raise Exception(
                f"Predict target must be 'str' or 'list' with 1 element. Got: {to_predict}"
            )

        while '.' in str(list(kwargs.keys())):
            for k in list(kwargs.keys()):
                if '.' in k:
                    nks = k.split('.')
                    obj = kwargs
                    for nk in nks[:-1]:
                        if nk not in obj:
                            obj[nk] = {}
                        obj = obj[nk]
                    obj[nks[-1]] = kwargs[k]
                    del kwargs[k]

        join_learn_process = kwargs.get('join_learn_process', False)
        if 'join_learn_process' in kwargs:
            del kwargs['join_learn_process']

        # Adapt kwargs to problem definition
        if 'timeseries_settings' in kwargs:
            problem_definition['timeseries_settings'] = kwargs['timeseries_settings']

        if 'stop_training_in_x_seconds' in kwargs:
            problem_definition['time_aim'] = kwargs['stop_training_in_x_seconds']

        if kwargs.get('ignore_columns') is not None:
            problem_definition['ignore_features'] = kwargs['ignore_columns']

        json_ai_override = {}
        json_ai_keys = list(lightwood.JsonAI.__dict__['__annotations__'].keys())
        for k in kwargs:
            if k in json_ai_keys:
                json_ai_override[k] = kwargs[k]

        if (
            problem_definition.get('ignore_features') is not None and isinstance(problem_definition['ignore_features'], list) is False
        ):
            problem_definition['ignore_features'] = [problem_definition['ignore_features']]

        if from_data is not None:
            df = self._get_from_data_df(from_data)
        else:
            df = None

        return df, problem_definition, join_learn_process, json_ai_override

    @mark_process(name='learn')
    def learn(self, name: str, from_data: dict, to_predict: str, dataset_id: int, kwargs: dict,
              company_id: int, delete_ds_on_fail: Optional[bool] = False) -> None:
        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=name).first()
        if predictor_record is not None:
            raise Exception('Predictor name must be unique.')

        df, problem_definition, join_learn_process, json_ai_override = self._unpack_old_args(from_data, kwargs, to_predict)

        if 'url' in problem_definition:
            train_url = problem_definition['url'].get('train', None)
            predict_url = problem_definition['url'].get('predict', None)
            com_format = problem_definition['format']

            predictor_record = db.Predictor(
                company_id=company_id,
                name=name,
                dataset_id=dataset_id,
                mindsdb_version=mindsdb_version,
                lightwood_version=lightwood_version,
                to_predict=problem_definition['target'],
                learn_args=ProblemDefinition.from_dict(problem_definition).to_dict(),
                data={'name': name, 'train_url': train_url, 'predict_url': predict_url, 'format': com_format,
                      'status': 'complete' if train_url is None else 'training'},
                is_custom=True,
                # @TODO: For testing purposes, remove afterwards!
                dtype_dict=json_ai_override['dtype_dict'],
            )

            db.session.add(predictor_record)
            db.session.commit()
            if train_url is not None:
                p = LearnRemoteProcess(df, predictor_record.id)
                p.start()
                if join_learn_process:
                    p.join()
                    if not IS_PY36:
                        p.close()
                db.session.refresh(predictor_record)
            return

        problem_definition = ProblemDefinition.from_dict(problem_definition)

        predictor_record = db.Predictor(
            company_id=company_id,
            name=name,
            dataset_id=dataset_id,
            mindsdb_version=mindsdb_version,
            lightwood_version=lightwood_version,
            to_predict=problem_definition.target,
            learn_args=problem_definition.to_dict(),
            data={'name': name},
        )

        db.session.add(predictor_record)
        db.session.commit()
        predictor_id = predictor_record.id

        p = LearnProcess(df, problem_definition, predictor_id, delete_ds_on_fail, json_ai_override)
        p.start()
        if join_learn_process:
            p.join()
            if not IS_PY36:
                p.close()
        db.session.refresh(predictor_record)


    @mark_process(name='predict')
    def predict(self, name: str, when_data: Union[dict, list, pd.DataFrame], pred_format: str, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=original_name).first()
        assert predictor_record is not None
        predictor_data = self.get_model_data(name, company_id)

        if isinstance(when_data, dict) and 'kwargs' in when_data and 'args' in when_data:
            ds_cls = getattr(mindsdb_datasources, when_data['class'])
            df = ds_cls(*when_data['args'], **when_data['kwargs']).df
        else:
            if isinstance(when_data, dict):
                when_data = [when_data]
            df = pd.DataFrame(when_data)

        if predictor_record.is_custom:
            if predictor_data['format'] == 'mlflow':
                resp = requests.post(predictor_data['predict_url'],
                                     data=df.to_json(orient='records'),
                                     headers={'content-type': 'application/json; format=pandas-records'})
                answer: List[object] = resp.json()

                predictions = pd.DataFrame({
                    'prediction': answer
                })

            elif predictor_data['format'] == 'ray_server':
                serialized_df = json.dumps(df.to_dict())
                resp = requests.post(predictor_data['predict_url'], json={'df': serialized_df})
                predictions = pd.DataFrame(resp.json())

        else:
            fs_name = f'predictor_{company_id}_{predictor_record.id}'

            if (
                name in self.predictor_cache
                and self.predictor_cache[name]['updated_at'] != predictor_record.updated_at
            ):
                del self.predictor_cache[name]

            if name not in self.predictor_cache:
                # Clear the cache entirely if we have less than 1.2 GB left
                if psutil.virtual_memory().available < 1.2 * pow(10, 9):
                    self.predictor_cache = {}

                if predictor_data['status'] == 'complete':
                    self.fs_store.get(fs_name, fs_name, self.config['paths']['predictors'])
                    self.predictor_cache[name] = {
                        'predictor': lightwood.predictor_from_state(
                            os.path.join(self.config['paths']['predictors'], fs_name),
                            predictor_record.code
                        ),
                        'updated_at': predictor_record.updated_at,
                        'created': datetime.datetime.now(),
                        'code': predictor_record.code,
                        'pickle': str(os.path.join(self.config['paths']['predictors'], fs_name))
                    }
                else:
                    raise Exception(
                        f'Trying to predict using predictor {original_name} with status: {predictor_data["status"]}. Error is: {predictor_data.get("error", "unknown")}'
                    )
            predictions = self.predictor_cache[name]['predictor'].predict(df)
            # Bellow is useful for debugging caching and storage issues
            # del self.predictor_cache[name]

        predictions = predictions.to_dict(orient='records')
        target = predictor_record.to_predict[0]
        if pred_format in ('explain', 'dict', 'dict&explain'):
            explain_arr = []
            dict_arr = []
            for i, row in enumerate(predictions):
                obj = {
                    target: {
                        'predicted_value': row['prediction'],
                        'confidence': row.get('confidence', None),
                        'anomaly': row.get('anomaly', None),
                        'truth': row.get('truth', None)
                    }
                }
                if 'lower' in row:
                    obj[target]['confidence_lower_bound'] = row.get('lower', None)
                    obj[target]['confidence_upper_bound'] = row.get('upper', None)
                    
                explain_arr.append(obj)

                td = {'predicted_value': row['prediction']}
                for col in df.columns:
                    if col in row:
                        td[col] = row[col]
                    elif f'order_{col}' in row:
                        td[col] = row[f'order_{col}']
                    elif f'group_{col}' in row:
                        td[col] = row[f'group_{col}']
                    else:
                        orginal_index = row.get('original_index')
                        if orginal_index is None:
                            log.warning('original_index is None')
                            orginal_index = i
                        td[col] = df.iloc[orginal_index][col]
                dict_arr.append({target: td})
            if pred_format == 'explain':
                return explain_arr
            elif pred_format == 'dict':
                return dict_arr
            elif pred_format == 'dict&explain':
                return dict_arr, explain_arr
        # New format -- Try switching to this in 2-3 months for speed, for now above is ok
        else:
            return predictions

    @mark_process(name='analyse')
    def analyse_dataset(self, ds: dict, company_id: int) -> lightwood.DataAnalysis:
        ds_cls = getattr(mindsdb_datasources, ds['class'])
        df = ds_cls(*ds['args'], **ds['kwargs']).df
        analysis = lightwood.analyze_dataset(df)
        return analysis.to_dict()  # type: ignore

    def get_model_data(self, name, company_id: int):
        if '@@@@@' in name:
            sn = name.split('@@@@@')
            assert len(sn) < 3  # security
            name = sn[1]

        original_name = name
        name = f'{company_id}@@@@@{name}'

        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=original_name).first()
        assert predictor_record is not None

        linked_dataset = db.session.query(db.Dataset).get(predictor_record.dataset_id)

        data = deepcopy(predictor_record.data)
        data['dtype_dict'] = predictor_record.dtype_dict
        data['created_at'] = str(parse_datetime(str(predictor_record.created_at).split('.')[0]))
        data['updated_at'] = str(parse_datetime(str(predictor_record.updated_at).split('.')[0]))
        data['predict'] = predictor_record.to_predict[0]
        data['update'] = predictor_record.update_status
        data['mindsdb_version'] = predictor_record.mindsdb_version
        data['name'] = predictor_record.name
        data['code'] = predictor_record.code
        data['json_ai'] = predictor_record.json_ai
        data['data_source_name'] = linked_dataset.name if linked_dataset else None
        data['problem_definition'] = predictor_record.learn_args

        # assume older models are complete, only temporary
        if 'status' in predictor_record.data:
            data['status'] = predictor_record.data['status']
        elif 'error' in predictor_record.data:
            data['status'] = 'error'
        elif predictor_record.update_status == 'available':
            data['status'] = 'complete'
        elif predictor_record.json_ai is None and predictor_record.code is None:
            data['status'] = 'generating'
        elif predictor_record.data is None:
            data['status'] = 'editable'
        elif 'training_log' in predictor_record.data:
            data['status'] = 'training'
        elif 'error' not in predictor_record.data:
            data['status'] = 'complete'
        else:
            data['status'] = 'error'

        if data.get('accuracies', None) is not None:
            if len(data['accuracies']) > 0:
                data['accuracy'] = float(np.mean(list(data['accuracies'].values())))
        return data

    def get_model_description(self, name: str, company_id: int):
        """
        Similar to `get_model_data` but meant to be seen directly by the user, rather than parsed by something like the Studio predictor view.

        Uses `get_model_data` to compose this, but in the future we might want to make this independent if we deprected `get_model_data`

        :returns: Dictionary of the analysis (meant to be foramtted by the APIs and displayed as json/yml/whatever)
        """ # noqa
        model_description = {}
        model_data = self.get_model_data(name, company_id)

        model_description['accuracies'] = model_data['accuracies']
        model_description['column_importances'] = model_data['column_importances']
        model_description['outputs'] = [model_data['predict']]
        model_description['inputs'] = [col for col in model_data['dtype_dict'] if col not in model_description['outputs']]
        model_description['datasource'] = model_data['data_source_name']
        model_description['model'] = ' --> '.join(str(k) for k in model_data['json_ai'])

        return model_description

    def get_models(self, company_id: int):
        models = []
        for db_p in db.session.query(db.Predictor).filter_by(company_id=company_id):
            model_data = self.get_model_data(db_p.name, company_id=company_id)
            reduced_model_data = {}

            for k in ['name', 'version', 'is_active', 'predict', 'status',
                      'current_phase', 'accuracy', 'data_source', 'update',
                      'data_source_name', 'mindsdb_version', 'error']:
                reduced_model_data[k] = model_data.get(k, None)

            for k in ['train_end_at', 'updated_at', 'created_at']:
                reduced_model_data[k] = model_data.get(k, None)
                if reduced_model_data[k] is not None:
                    try:
                        reduced_model_data[k] = parse_datetime(str(reduced_model_data[k]).split('.')[0])
                    except Exception as e:
                        # @TODO Does this ever happen
                        log.error(f'Date parsing exception while parsing: {k} in get_models: ', e)
                        reduced_model_data[k] = parse_datetime(str(reduced_model_data[k]))

            models.append(reduced_model_data)
        return models

    def delete_model(self, name, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        db_p = db.session.query(db.Predictor).filter_by(company_id=company_id, name=original_name).first()
        if db_p is None:
            raise Exception(f"Predictor '{name}' does not exist")
        db.session.delete(db_p)
        if db_p.dataset_id is not None:
            try:
                dataset_record = db.Datasource.query.get(db_p.dataset_id)
                if (
                    isinstance(dataset_record.data, str)
                    and json.loads(dataset_record.data).get('source_type') != 'file'
                ):
                    DataStore().delete_datasource(dataset_record.name, company_id)
            except Exception:
                pass
        db.session.commit()

        DatabaseWrapper(company_id).unregister_predictor(name)

        # delete from s3
        self.fs_store.delete(f'predictor_{company_id}_{db_p.id}')

        return 0

    def rename_model(self, old_name, new_name, company_id: int):
        db_p = db.session.query(db.Predictor).filter_by(company_id=company_id, name=old_name).first()
        db_p.name = new_name
        db.session.commit()
        dbw = DatabaseWrapper(company_id)
        dbw.unregister_predictor(old_name)
        dbw.register_predictors([self.get_model_data(new_name, company_id)])

    @mark_process(name='learn')
    def update_model(self, name: str, company_id: int):
        # TODO: Add version check here once we're done debugging
        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=name).first()
        assert predictor_record is not None
        predictor_record.update_status = 'updating'
        db.session.commit()

        p = UpdateProcess(name, company_id)
        p.start()
        return 'Updated in progress'

    @mark_process(name='learn')
    def generate_predictor(self, name: str, from_data: dict, dataset_id, problem_definition_dict: dict,
                           join_learn_process: bool, company_id: int):
        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=name).first()
        if predictor_record is not None:
            raise Exception('Predictor name must be unique.')

        df, problem_definition, _, _ = self._unpack_old_args(from_data, problem_definition_dict)

        problem_definition = ProblemDefinition.from_dict(problem_definition)

        predictor_record = db.Predictor(
            company_id=company_id,
            name=name,
            dataset_id=dataset_id,
            mindsdb_version=mindsdb_version,
            lightwood_version=lightwood_version,
            to_predict=problem_definition.target,
            learn_args=problem_definition.to_dict(),
            data={'name': name}
        )

        db.session.add(predictor_record)
        db.session.commit()
        predictor_id = predictor_record.id

        p = GenerateProcess(df, problem_definition, predictor_id)
        p.start()
        if join_learn_process:
            p.join()
            if not IS_PY36:
                p.close()
        db.session.refresh(predictor_record)

    def edit_json_ai(self, name: str, json_ai: dict, company_id=None):
        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=name).first()
        assert predictor_record is not None

        json_ai = lightwood.JsonAI.from_dict(json_ai)
        predictor_record.code = lightwood.code_from_json_ai(json_ai)   
        predictor_record.json_ai = json_ai.to_dict()
        db.session.commit()

    def code_from_json_ai(self, json_ai: dict, company_id=None):
        json_ai = lightwood.JsonAI.from_dict(json_ai)
        code = lightwood.code_from_json_ai(json_ai)
        return code

    def edit_code(self, name: str, code: str, company_id=None):
        """Edit an existing predictor's code"""
        if self.config.get('cloud', False):
            raise Exception('Code editing prohibited on cloud')

        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=name).first()
        assert predictor_record is not None

        lightwood.predictor_from_code(code)
        predictor_record.code = code
        predictor_record.json_ai = None
        db.session.commit()

    @mark_process(name='learn')
    def fit_predictor(self, name: str, from_data: dict, join_learn_process: bool, company_id: int) -> None:
        predictor_record = db.session.query(db.Predictor).filter_by(company_id=company_id, name=name).first()
        assert predictor_record is not None

        df = self._get_from_data_df(from_data)
        p = FitProcess(predictor_record.id, df)
        p.start()
        if join_learn_process:
            p.join()
            if not IS_PY36:
                p.close()
Beispiel #10
0
 def __init__(self):
     self.config = Config()
     self.fs_store = FsStore()
     self.dir = self.config['paths']['datasources']
     self.model_interface = ModelInterface()
Beispiel #11
0
            sys.stdout.flush()
            process.terminate()
            process.join()
            sys.stdout.flush()
        if ray_based:
            os.system('ray stop --force')
    except KeyboardInterrupt:
        sys.exit(0)
    except psutil.NoSuchProcess:
        pass


if __name__ == '__main__':
    mp.freeze_support()
    args = args_parse()
    config = Config()

    if args.verbose is True:
        # Figure this one out later
        pass

    os.environ['DEFAULT_LOG_LEVEL'] = config['log']['level']['console']
    os.environ['LIGHTWOOD_LOG_LEVEL'] = config['log']['level']['console']

    # Switch to this once the native interface has it's own thread :/
    ctx = mp.get_context('spawn')

    from mindsdb.__about__ import __version__ as mindsdb_version
    print(f'Version {mindsdb_version}')

    print(f'Configuration file:\n   {config.config_path}')
Beispiel #12
0
 def __init__(self, config):
     self.config = Config()
     self.mindsdb_native = NativeInterface()
     self.custom_models = CustomModels()
     self.ai_table = AITable_store()
     self.default_store = DataStore()
Beispiel #13
0
def start(verbose, no_studio):
    config = Config()
    if verbose:
        config.set(['log', 'level', 'console'], 'DEBUG')

    initialize_log(config, 'http', wrap_print=True)

    # start static initialization in a separate thread
    init_static_thread = None
    if not no_studio:
        init_static_thread = threading.Thread(target=initialize_static,
                                              args=(config, ))
        init_static_thread.start()

    app, api = initialize_flask(config, init_static_thread, no_studio)
    initialize_interfaces(app)

    static_root = Path(config.paths['static'])

    @app.route('/', defaults={'path': ''}, methods=['GET'])
    @app.route('/<path:path>', methods=['GET'])
    def root_index(path):
        if path.startswith('api/'):
            return {'message': 'wrong query'}, 400
        if static_root.joinpath(path).is_file():
            return send_from_directory(config.paths['static'], path)
        else:
            return send_from_directory(config.paths['static'], 'index.html')

    api.add_namespace(predictor_ns)
    api.add_namespace(datasource_ns)
    api.add_namespace(utils_ns)
    api.add_namespace(conf_ns)

    @api.errorhandler(Exception)
    def handle_exception(e):
        get_log('http').error(f'http exception: {e}')
        # pass through HTTP errors
        if isinstance(e, HTTPException):
            return {'message': str(e)}, e.code, e.get_response().headers
        name = getattr(type(e), '__name__') or 'Unknown error'
        return {'message': f'{name}: {str(e)}'}, 500

    @app.teardown_appcontext
    def remove_session(*args, **kwargs):
        session.close()

    port = config['api']['http']['port']
    host = config['api']['http']['host']

    server = os.environ.get('MINDSDB_DEFAULT_SERVER', 'waitress')

    # waiting static initialization
    if not no_studio:
        init_static_thread.join()
    if server.lower() == 'waitress':
        if host in ('', '0.0.0.0'):
            serve(app, port=port, host='*')
        else:
            serve(app, port=port, host=host)
    elif server.lower() == 'flask':
        # that will 'disable access' log in console
        log = logging.getLogger('werkzeug')
        log.setLevel(logging.WARNING)

        app.run(debug=False, port=port, host=host)
    elif server.lower() == 'gunicorn':
        try:
            from mindsdb.api.http.gunicorn_wrapper import StandaloneApplication
        except ImportError:
            print(
                "Gunicorn server is not available by default. If you wish to use it, please install 'gunicorn'"
            )
            return

        options = {
            'bind': f'{host}:{port}',
            'workers': min(max(multiprocessing.cpu_count(), 2), 3)
        }
        StandaloneApplication(app, options).run()
class ModelController():
    config: Config
    fs_store: FsStore
    predictor_cache: Dict[str, Dict[str, Union[Any]]]
    ray_based: bool

    def __init__(self, ray_based: bool) -> None:
        self.config = Config()
        self.fs_store = FsStore()
        self.predictor_cache = {}
        self.ray_based = ray_based

    def _invalidate_cached_predictors(self) -> None:
        # @TODO: Cache will become stale if the respective ModelInterface is not invoked yet a bunch of predictors remained cached, no matter where we invoke it. In practice shouldn't be a big issue though
        for predictor_name in list(self.predictor_cache.keys()):
            if (datetime.datetime.now() -
                    self.predictor_cache[predictor_name]['created']
                ).total_seconds() > 1200:
                del self.predictor_cache[predictor_name]

    def _lock_predictor(self, id: int, mode: str) -> None:
        from mindsdb.interfaces.storage.db import session, Semaphor

        while True:
            semaphor_record = session.query(Semaphor).filter_by(
                entity_id=id, entity_type='predictor').first()
            if semaphor_record is not None:
                if mode == 'read' and semaphor_record.action == 'read':
                    return True
            try:
                semaphor_record = Semaphor(entity_id=id,
                                           entity_type='predictor',
                                           action=mode)
                session.add(semaphor_record)
                session.commit()
                return True
            except Exception:
                pass
            time.sleep(1)

    def _unlock_predictor(self, id: int) -> None:
        from mindsdb.interfaces.storage.db import session, Semaphor
        semaphor_record = session.query(Semaphor).filter_by(
            entity_id=id, entity_type='predictor').first()
        if semaphor_record is not None:
            session.delete(semaphor_record)
            session.commit()

    @contextmanager
    def _lock_context(self, id, mode: str):
        try:
            self._lock_predictor(id, mode)
            yield True
        finally:
            self._unlock_predictor(id)

    def _unpack_old_args(
        self,
        from_data: dict,
        kwargs: dict,
        to_predict: Optional[Union[str, list]] = None
    ) -> Tuple[pd.DataFrame, ProblemDefinition, bool]:
        if to_predict is not None:
            problem_definition = {
                'target':
                to_predict if isinstance(to_predict, str) else to_predict[0]
            }
        else:
            problem_definition = kwargs

        join_learn_process = kwargs.get('join_learn_process', False)
        if 'join_learn_process' in kwargs:
            del kwargs['join_learn_process']

        # Adapt kwargs to problem definition
        if 'timeseries_settings' in kwargs:
            problem_definition['timeseries_settings'] = kwargs[
                'timeseries_settings']

        if 'stop_training_in_x_seconds' in kwargs:
            problem_definition['time_aim'] = kwargs[
                'stop_training_in_x_seconds']

        ds_cls = getattr(mindsdb_datasources, from_data['class'])
        ds = ds_cls(*from_data['args'], **from_data['kwargs'])
        df = ds.df

        return df, problem_definition, join_learn_process

    @mark_process(name='learn')
    def learn(self, name: str, from_data: dict, to_predict: str,
              datasource_id: int, kwargs: dict, company_id: int) -> None:
        df, problem_definition, join_learn_process = self._unpack_old_args(
            from_data, kwargs, to_predict)
        p = LearnProcess(df, ProblemDefinition.from_dict(problem_definition),
                         name, company_id, datasource_id)
        p.start()
        if join_learn_process:
            p.join()
            if not IS_PY36:
                p.close()

    @mark_process(name='predict')
    def predict(self, name: str, when_data: Union[dict, list, pd.DataFrame],
                pred_format: str, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        predictor_record = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        assert predictor_record is not None
        predictor_data = self.get_model_data(name, company_id)
        fs_name = f'predictor_{company_id}_{predictor_record.id}'

        if name not in self.predictor_cache:
            # Clear the cache entirely if we have less than 1.2 GB left
            if psutil.virtual_memory().available < 1.2 * pow(10, 9):
                self.predictor_cache = {}

            if predictor_data['status'] == 'complete':
                self.fs_store.get(fs_name, fs_name,
                                  self.config['paths']['predictors'])
                self.predictor_cache[name] = {
                    'predictor':
                    lightwood.predictor_from_state(
                        os.path.join(self.config['paths']['predictors'],
                                     fs_name), predictor_record.code),
                    'created':
                    datetime.datetime.now(),
                    'code':
                    predictor_record.code,
                    'pickle':
                    str(
                        os.path.join(self.config['paths']['predictors'],
                                     fs_name))
                }
            else:
                raise Exception(
                    f'Trying to predict using predictor {original_name} with status: {predictor_data["status"]}'
                )

        if isinstance(when_data,
                      dict) and 'kwargs' in when_data and 'args' in when_data:
            ds_cls = getattr(mindsdb_datasources, when_data['class'])
            df = ds_cls(*when_data['args'], **when_data['kwargs']).df
        else:
            if isinstance(when_data, dict):
                when_data = [when_data]
            df = pd.DataFrame(when_data)

        predictions = self.predictor_cache[name]['predictor'].predict(df)
        predictions = predictions.to_dict(orient='records')
        # Bellow is useful for debugging caching and storage issues
        # del self.predictor_cache[name]

        target = predictor_record.to_predict[0]
        if pred_format in ('explain', 'dict', 'dict&explain'):
            explain_arr = []
            dict_arr = []
            for i, row in enumerate(predictions):
                explain_arr.append({
                    target: {
                        'predicted_value': row['prediction'],
                        'confidence': row.get('confidence', None),
                        'confidence_lower_bound': row.get('lower', None),
                        'confidence_upper_bound': row.get('upper', None),
                        'anomaly': row.get('anomaly', None),
                        'truth': row.get('truth', None)
                    }
                })

                td = {'predicted_value': row['prediction']}
                for col in df.columns:
                    if col in row:
                        td[col] = row[col]
                    elif f'order_{col}' in row:
                        td[col] = row[f'order_{col}']
                    elif f'group_{col}' in row:
                        td[col] = row[f'group_{col}']
                    else:
                        td[col] = df.iloc[i][col]
                dict_arr.append({target: td})
            if pred_format == 'explain':
                return explain_arr
            elif pred_format == 'dict':
                return dict_arr
            elif pred_format == 'dict&explain':
                return dict_arr, explain_arr
        # New format -- Try switching to this in 2-3 months for speed, for now above is ok
        else:
            return predictions

    @mark_process(name='analyse')
    def analyse_dataset(self, ds: dict,
                        company_id: int) -> lightwood.DataAnalysis:
        ds_cls = getattr(mindsdb_datasources, ds['class'])
        df = ds_cls(*ds['args'], **ds['kwargs']).df
        analysis = lightwood.analyze_dataset(df)
        return analysis.to_dict()  # type: ignore

    def get_model_data(self, name, company_id: int):
        if '@@@@@' in name:
            sn = name.split('@@@@@')
            assert len(sn) < 3  # security
            name = sn[1]

        original_name = name
        name = f'{company_id}@@@@@{name}'

        predictor_record = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        assert predictor_record is not None

        linked_db_ds = db.session.query(db.Datasource).filter_by(
            company_id=company_id, id=predictor_record.datasource_id).first()

        # check update availability
        if version.parse(predictor_record.mindsdb_version) < version.parse(
                mindsdb_version):
            predictor_record.update_status = 'available'
            db.session.commit()

        data = deepcopy(predictor_record.data)
        data['dtype_dict'] = predictor_record.dtype_dict
        data['created_at'] = str(
            parse_datetime(str(predictor_record.created_at).split('.')[0]))
        data['updated_at'] = str(
            parse_datetime(str(predictor_record.updated_at).split('.')[0]))
        data['predict'] = predictor_record.to_predict[0]
        data['update'] = predictor_record.update_status
        data['name'] = predictor_record.name
        data['code'] = predictor_record.code
        data['json_ai'] = predictor_record.json_ai
        data['data_source_name'] = linked_db_ds.name if linked_db_ds else None
        data['problem_definition'] = predictor_record.learn_args

        # assume older models are complete, only temporary
        if predictor_record.update_status == 'available':
            data['status'] = 'complete'
        elif predictor_record.json_ai is None and predictor_record.code is None:
            data['status'] = 'generating'
        elif predictor_record.data is None:
            data['status'] = 'editable'
        elif 'training_log' in predictor_record.data:
            data['status'] = 'training'
        elif 'error' not in predictor_record.data:
            data['status'] = 'complete'
        else:
            data['status'] = 'error'

        if data.get('accuracies', None) is not None:
            if len(data['accuracies']) > 0:
                data['accuracy'] = float(
                    np.mean(list(data['accuracies'].values())))
        return data

    def get_models(self, company_id: int):
        models = []
        for db_p in db.session.query(
                db.Predictor).filter_by(company_id=company_id):
            model_data = self.get_model_data(db_p.name, company_id=company_id)
            reduced_model_data = {}

            for k in [
                    'name', 'version', 'is_active', 'predict', 'status',
                    'current_phase', 'accuracy', 'data_source', 'update',
                    'data_source_name'
            ]:
                reduced_model_data[k] = model_data.get(k, None)

            for k in ['train_end_at', 'updated_at', 'created_at']:
                reduced_model_data[k] = model_data.get(k, None)
                if reduced_model_data[k] is not None:
                    try:
                        reduced_model_data[k] = parse_datetime(
                            str(reduced_model_data[k]).split('.')[0])
                    except Exception as e:
                        # @TODO Does this ever happen
                        log.error(
                            f'Date parsing exception while parsing: {k} in get_models: ',
                            e)
                        reduced_model_data[k] = parse_datetime(
                            str(reduced_model_data[k]))

            models.append(reduced_model_data)
        return models

    def delete_model(self, name, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        db_p = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        db.session.delete(db_p)
        db.session.commit()

        DatabaseWrapper(company_id).unregister_predictor(name)

        # delete from s3
        self.fs_store.delete(f'predictor_{company_id}_{db_p.id}')

        return 0

    def update_model(self, name: str, company_id: int):
        # TODO: Add version check here once we're done debugging
        p = UpdateProcess(name, company_id)
        p.start()
        return 'Updated in progress'

    @mark_process(name='learn')
    def generate_predictor(self, name: str, from_data: dict, datasource_id,
                           problem_definition_dict: dict,
                           join_learn_process: bool, company_id: int):
        df, problem_definition, _ = self._unpack_old_args(
            from_data, problem_definition_dict)
        p = GenerateProcess(df,
                            ProblemDefinition.from_dict(problem_definition),
                            name, company_id, datasource_id)
        p.start()
        if join_learn_process:
            p.join()
            if not IS_PY36:
                p.close()

    def edit_json_ai(self, name: str, json_ai: dict, company_id=None):
        predictor_record = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=name).first()
        assert predictor_record is not None

        json_ai = lightwood.JsonAI.from_dict(json_ai)
        predictor_record.code = lightwood.code_from_json_ai(json_ai)
        predictor_record.json_ai = json_ai.to_dict()
        db.session.commit()

    def code_from_json_ai(self, json_ai: dict, company_id=None):
        json_ai = lightwood.JsonAI.from_dict(json_ai)
        code = lightwood.code_from_json_ai(json_ai)
        return code

    def edit_code(self, name: str, code: str, company_id=None):
        """Edit an existing predictor's code"""
        if self.config.get('cloud', False):
            raise Exception('Code editing prohibited on cloud')

        predictor_record = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=name).first()
        assert predictor_record is not None

        lightwood.predictor_from_code(code)
        predictor_record.code = code
        predictor_record.json_ai = None
        db.session.commit()

    @mark_process(name='learn')
    def fit_predictor(self, name: str, from_data: dict,
                      join_learn_process: bool, company_id: int) -> None:
        predictor_record = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=name).first()
        assert predictor_record is not None

        df, _, _ = self._unpack_old_args(from_data, {}, None)
        p = FitProcess(predictor_record.id, df)
        p.start()
        if join_learn_process:
            p.join()
            if not IS_PY36:
                p.close()
Beispiel #15
0
    if os.path.isdir(root_storage_dir) is False:
        os.makedirs(root_storage_dir)

    if 'storage_db' in user_config:
        os.environ['MINDSDB_DB_CON'] = user_config['storage_db']
    elif os.environ.get('MINDSDB_DB_CON', '') == '':
        os.environ['MINDSDB_DB_CON'] = 'sqlite:///' + os.path.join(
            os.environ['MINDSDB_STORAGE_DIR'],
            'mindsdb.sqlite3.db') + '?check_same_thread=False'

    if 'company_id' in user_config:
        os.environ['MINDSDB_COMPANY_ID'] = user_config['company_id']

    from mindsdb.utilities.config import Config
    mindsdb_config = Config()
    create_dirs_recursive(mindsdb_config['paths'])

    os.environ['DEFAULT_LOG_LEVEL'] = os.environ.get('DEFAULT_LOG_LEVEL',
                                                     'ERROR')
    os.environ['LIGHTWOOD_LOG_LEVEL'] = os.environ.get('LIGHTWOOD_LOG_LEVEL',
                                                       'ERROR')
    os.environ['MINDSDB_STORAGE_PATH'] = mindsdb_config['paths']['predictors']

    if telemetry_file_exists(mindsdb_config['storage_dir']):
        os.environ['CHECK_FOR_UPDATES'] = '0'
        print('\n x telemetry disabled! \n')
    elif os.getenv('CHECK_FOR_UPDATES',
                   '1').lower() in ['0', 'false', 'False']:
        disable_telemetry(mindsdb_config['storage_dir'])
        print('\n x telemetry disabled \n')
Beispiel #16
0
 def __init__(self, ray_based: bool) -> None:
     self.config = Config()
     self.fs_store = FsStore()
     self.predictor_cache = {}
     self.ray_based = ray_based
Beispiel #17
0
import os
import shelve
import json
from abc import ABC, abstractmethod

import walrus
from mindsdb.utilities.config import Config

CONFIG = Config()


class BaseCache(ABC):
    def __init__(self):
        self.config = Config()

    @abstractmethod
    def delete(self):
        pass

    @abstractmethod
    def __getitem__(self, key):
        pass

    @abstractmethod
    def __setitem__(self, key, value):
        pass


class LocalCache(BaseCache):
    def __init__(self, name, *args, **kwargs):
        super().__init__()
Beispiel #18
0
 def __init__(self, company_id):
     self.config = Config()
     self.company_id = company_id
Beispiel #19
0
            process = api['process']
            childs = get_child_pids(process.pid)
            for p in childs:
                os.kill(p, signal.SIGTERM)
            sys.stdout.flush()
            process.terminate()
            process.join()
            sys.stdout.flush()
    except KeyboardInterrupt:
        sys.exit(0)


if __name__ == '__main__':
    mp.freeze_support()
    args = args_parse()
    config = Config()

    if args.verbose is True:
        config.set(['log', 'level', 'console'], 'DEBUG')

    os.environ['DEFAULT_LOG_LEVEL'] = config['log']['level']['console']
    os.environ['LIGHTWOOD_LOG_LEVEL'] = config['log']['level']['console']
    config.set(['mindsdb_last_started_at'], str(datetime.datetime.now()))
    
    from lightwood.__about__ import __version__ as lightwood_version
    from mindsdb_native.__about__ import __version__ as mindsdb_native_version
    from mindsdb.__about__ import __version__ as mindsdb_version
    print('Versions:')
    print(f' - lightwood {lightwood_version}')
    print(f' - MindsDB_native {mindsdb_native_version}')
    print(f' - MindsDB {mindsdb_version}')
Beispiel #20
0
import os
import sys
import logging
import traceback

from mindsdb.interfaces.storage.db import session, Log
from mindsdb.utilities.config import Config

global_config = Config().get_all()
telemtry_enabled = os.getenv('CHECK_FOR_UPDATES',
                             '1').lower() not in ['0', 'false', 'False']

if telemtry_enabled:
    import sentry_sdk
    from sentry_sdk import capture_exception, capture_message, add_breadcrumb
    sentry_sdk.init(
        "https://[email protected]/5633566",
        traces_sample_rate=0  #Set to `1` to experiment with performance metrics
    )


class LoggerWrapper(object):
    def __init__(self, writer_arr, default_writer_pos):
        self._writer_arr = writer_arr
        self.default_writer_pos = default_writer_pos

    def write(self, message):
        if len(message.strip(' \n')) == 0:
            return
        if 'DEBUG:' in message:
            self._writer_arr[0](message)
Beispiel #21
0
 def __init__(self):
     self.config = Config()
Beispiel #22
0
 def __init__(self):
     self.config = Config()
     self.fs_store = FsSotre()
     self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
     self.dbw = DatabaseWrapper()
     self.predictor_cache = {}
Beispiel #23
0
from mindsdb.utilities.wizards import cli_config
from mindsdb.utilities.config import Config
from mindsdb.utilities.functions import args_parse

config_dir, storage_dir = get_or_create_dir_struct()

config_path = os.path.join(config_dir, 'config.json')
if not os.path.exists(config_path):
    _ = cli_config(None, None, storage_dir, config_dir, use_default=True)

args = args_parse()
if args.config is not None:
    config_path = args.config

try:
    mindsdb_config = Config(config_path)
except Exception as e:
    print(str(e))
    sys.exit(1)

paths = mindsdb_config.paths
for path in paths.values():
    create_directory(path)

os.environ['MINDSDB_STORAGE_PATH'] = paths['predictors']
os.environ['DEFAULT_LOG_LEVEL'] = 'ERROR'
os.environ['LIGHTWOOD_LOG_LEVEL'] = 'ERROR'

from mindsdb_native import *
# Figure out how to add this as a module
import lightwood
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import sys
        import mindsdb_native

        from mindsdb.utilities.config import Config

        name, from_data, to_predict, kwargs, config, trx_type = self._args
        config = Config(config)

        mdb = mindsdb_native.Predictor(name=name)

        if trx_type == 'learn':
            data_source = getattr(mindsdb_native,
                                  from_data['class'])(*from_data['args'],
                                                      **from_data['kwargs'])

            kwargs['use_gpu'] = config.get('use_gpu', None)
            mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)

            stats = mdb.get_model_data()['data_analysis_v2']

            try:
                assert (config['integrations']['default_clickhouse']['enabled']
                        == True)
                from mindsdb.interfaces.clickhouse.clickhouse import Clickhouse
                clickhouse = Clickhouse(config)
                clickhouse.register_predictor(name, stats)
            except:
                pass

            try:
                assert (config['integrations']['default_mariadb']['enabled'] ==
                        True)
                from mindsdb.interfaces.mariadb.mariadb import Mariadb
                mariadb = Mariadb(config)
                mariadb.register_predictor(name, stats)
            except:
                pass

        if trx_type == 'predict':
            if isinstance(from_data, dict):
                when = from_data
                when_data = None
            else:
                when_data = getattr(mindsdb_native,
                                    from_data['class'])(*from_data['args'],
                                                        **from_data['kwargs'])
                when = None

            kwargs['use_gpu'] = config.get('use_gpu', None)

            predictions = mdb.predict(when=when,
                                      when_data=when_data,
                                      run_confidence_variation_analysis=True,
                                      **kwargs)

            return predictions
import inspect
import subprocess

import MySQLdb

from mindsdb.interfaces.native.mindsdb import MindsdbNative
from mindsdb.utilities.config import Config


TEST_CONFIG = '/home/maxs/dev/mdb/venv/sources/mindsdb/test_config.json'

test_csv = 'tests/home_rentals.csv'
test_data_table = 'home_rentals_400'
test_predictor_name = 'test_predictor_400'

config = Config(TEST_CONFIG)

def query_ch(query):
    if 'CREATE ' not in query.upper() and 'INSERT ' not in query.upper():
        query += ' FORMAT JSON'

    user = config['integrations']['default_clickhouse']['user']
    password = config['integrations']['default_clickhouse']['password']

    connect_string = 'http://{}:{}'.format(
        'localhost',
        8123
    )

    params = {}
Beispiel #26
0
 def __init__(self, company_id):
     self.config = Config()
     self.company_id = company_id
     self.datasource_interface = WithKWArgsWrapper(DatasourceController(),
                                                   company_id=company_id)