Exemple #1
0
 def _get_integrations(self, publish=False):
     all_integrations = get_db_integrations(self.company_id)
     if publish is True:
         all_integrations = [
             x for x, y in get_db_integrations(self.company_id).items()
             if y.get('publish') is True
         ]
     else:
         all_integrations = [
             x for x in get_db_integrations(self.company_id)
         ]
     integrations = [self._get_integration(x) for x in all_integrations]
     integrations = [
         x for x in integrations if x is not True and x is not False
     ]
     return integrations
Exemple #2
0
 def get(self):
     integrations = get_db_integrations(request.company_id, False)
     return integrations
Exemple #3
0
 def get(self):
     return {
         'integrations':
         [k for k in get_db_integrations(request.company_id, False)]
     }
Exemple #4
0
    def result(self, query, request_env, mindsdb_env, session):
        models = mindsdb_env['mindsdb_native'].get_models()
        model_names = [x['name'] for x in models]
        table = query['find']
        where_data = query.get('filter', {})
        if table == 'predictors':
            data = [{
                'name':
                x['name'],
                'status':
                x['status'],
                'accuracy':
                str(x['accuracy']) if x['accuracy'] is not None else None,
                'predict':
                ', '.join(x['predict'] if isinstance(x['predict'], list
                                                     ) else [x['predict']]),
                'select_data_query':
                '',
                'external_datasource':
                '',
                'training_options':
                ''
            } for x in models]
        elif table in model_names:
            # prediction
            model = mindsdb_env['mindsdb_native'].get_model_data(
                name=query['find'])

            columns = []
            columns += list(model['dtype_dict'].keys())
            predict = model['predict']
            if not isinstance(predict, list):
                predict = [predict]
            columns += [f'{x}_original' for x in predict]
            for col in predict:
                if model['dtype_dict'][col] in (dtype.integer, dtype.float):
                    columns += [f"{col}_min", f"{col}_max"]
                columns += [f"{col}_confidence"]
                columns += [f"{col}_explain"]

            columns += [
                'when_data', 'select_data_query', 'external_datasource'
            ]

            where_data_list = where_data if isinstance(where_data,
                                                       list) else [where_data]
            for statement in where_data_list:
                if isinstance(statement, dict):
                    for key in statement:
                        if key not in columns:
                            columns.append(key)

            datasource = where_data
            if 'select_data_query' in where_data:
                integrations = get_db_integrations(
                    mindsdb_env['company_id']).keys()
                connection = where_data.get('connection')
                if connection is None:
                    if 'default_mongodb' in integrations:
                        connection = 'default_mongodb'
                    else:
                        for integration in integrations:
                            if integration.startswith('mongodb_'):
                                connection = integration
                                break

                if connection is None:
                    raise Exception(
                        "Can't find connection from which fetch data")

                ds_name = mindsdb_env['data_store'].get_vacant_name('temp')

                mindsdb_env['data_store'].save_datasource(
                    name=ds_name,
                    source_type=connection,
                    source=where_data['select_data_query'])
                datasource = mindsdb_env['data_store'].get_datasource_obj(
                    ds_name, raw=True)

            if 'external_datasource' in where_data:
                ds_name = where_data['external_datasource']
                if mindsdb_env['data_store'].get_datasource(ds_name) is None:
                    raise Exception(f"Datasource {ds_name} not exists")
                datasource = mindsdb_env['data_store'].get_datasource_obj(
                    ds_name, raw=True)

            if isinstance(datasource, OrderedDict):
                datasource = dict(datasource)

            pred_dict_arr, explanations = mindsdb_env[
                'mindsdb_native'].predict(table, datasource, 'dict&explain')

            if 'select_data_query' in where_data:
                mindsdb_env['data_store'].delete_datasource(ds_name)

            predicted_columns = model['predict']
            if not isinstance(predicted_columns, list):
                predicted_columns = [predicted_columns]

            data = []
            all_columns = list(model['dtype_dict'].keys(
            ))  # [k for k in pred_dict_arr[0] if k in columns]
            min_max_keys = []
            for col in predicted_columns:
                if model['dtype_dict'][col] in (dtype.integer, dtype.float):
                    min_max_keys.append(col)

            for i in range(len(pred_dict_arr)):
                row = {}
                explanation = explanations[i]

                for value in pred_dict_arr[i].values():
                    row.update(value)
                if 'predicted_value' in row:
                    del row['predicted_value']
                for key in pred_dict_arr[i]:
                    row[key] = pred_dict_arr[i][key]['predicted_value']
                for key in all_columns:
                    if key not in row:
                        row[key] = None

                for key in predicted_columns:
                    row[key + '_confidence'] = explanation[key]['confidence']
                    row[key + '_explain'] = explanation[key]
                for key in min_max_keys:
                    row[key +
                        '_min'] = explanation[key]['confidence_lower_bound']
                    row[key +
                        '_max'] = explanation[key]['confidence_upper_bound']
                data.append(row)

        else:
            # probably wrong table name. Mongo in this case returns empty data
            data = []

        if 'projection' in query and len(data) > 0:
            true_filter = []
            false_filter = []
            for key, value in query['projection'].items():
                if helpers.is_true(value):
                    true_filter.append(key)
                else:
                    false_filter.append(key)

            keys = list(data[0].keys())
            del_id = '_id' in false_filter
            if len(true_filter) > 0:
                for row in data:
                    for key in keys:
                        if key != '_id':
                            if key not in true_filter:
                                del row[key]
                        elif del_id:
                            del row[key]
            else:
                for row in data:
                    for key in false_filter:
                        if key in row:
                            del row[key]

        db = mindsdb_env['config']['api']['mongodb']['database']

        cursor = {
            'id': Int64(0),
            'ns': f"{db}.$cmd.{query['find']}",
            'firstBatch': data
        }
        return {'cursor': cursor, 'ok': 1}
Exemple #5
0
    dbw = DatabaseWrapper(COMPANY_ID)
    model_interface = ModelInterfaceWrapper(ModelInterface(), COMPANY_ID)
    raw_model_data_arr = model_interface.get_models()
    model_data_arr = []
    for model in raw_model_data_arr:
        if model['status'] == 'complete':
            x = model_interface.get_model_data(model['name'])
            try:
                model_data_arr.append(
                    model_interface.get_model_data(model['name']))
            except Exception:
                pass

    is_cloud = config.get('cloud', False)
    if not is_cloud:
        for integration_name in get_db_integrations(COMPANY_ID,
                                                    sensitive_info=True):
            print(f"Setting up integration: {integration_name}")
            if get_db_integration(integration_name,
                                  COMPANY_ID).get('publish', False):
                # do setup and register only if it is 'publish' integration
                dbw.setup_integration(integration_name)
                dbw.register_predictors(model_data_arr,
                                        integration_name=integration_name)

        for integration_name in config.get('integrations', {}):
            try:
                it = get_db_integration(integration_name, None)
                if it is not None:
                    remove_db_integration(integration_name, None)
                print(f'Adding: {integration_name}')
                add_db_integration(
Exemple #6
0
    def _result(self, query, request_env, mindsdb_env):
        table = query['insert']
        if table != 'predictors':
            raise Exception("Only insert to 'predictors' table allowed")

        predictors_columns = [
            'name', 'status', 'accuracy', 'predict', 'select_data_query',
            'external_datasource', 'training_options', 'connection'
        ]

        models = mindsdb_env['mindsdb_native'].get_models()

        if len(query['documents']) != 1:
            raise Exception("Must be inserted just one predictor at time")

        for doc in query['documents']:
            if '_id' in doc:
                del doc['_id']

            bad_columns = [x for x in doc if x not in predictors_columns]
            if len(bad_columns) > 0:
                raise Exception(
                    f"Is no possible insert this columns to 'predictors' collection: {', '.join(bad_columns)}"
                )

            if 'name' not in doc:
                raise Exception("Please, specify 'name' field")

            if 'predict' not in doc:
                raise Exception("Please, specify 'predict' field")

            if doc['name'] in [x['name'] for x in models]:
                raise Exception(
                    f"Predictor with name '{doc['name']}' already exists")

            is_external_datasource = 'external_datasource' in doc and isinstance(
                doc['external_datasource'], str)
            is_select_data_query = 'select_data_query' in doc and isinstance(
                doc['select_data_query'], dict)

            if is_external_datasource and is_select_data_query:
                raise Exception(
                    "'external_datasource' and 'select_data_query' should not be used in one query"
                )
            elif is_external_datasource is False and is_select_data_query is False:
                raise Exception(
                    "in query should be 'external_datasource' or 'select_data_query'"
                )

            kwargs = doc.get('training_options', {})

            if is_select_data_query:
                integrations = get_db_integrations(
                    mindsdb_env['company_id']).keys()
                connection = doc.get('connection')
                if connection is None:
                    if 'default_mongodb' in integrations:
                        connection = 'default_mongodb'
                    else:
                        for integration in integrations:
                            if integration.startswith('mongodb_'):
                                connection = integration
                                break

                if connection is None:
                    raise Exception("Can't find connection for data source")

                ds_name = mindsdb_env['data_store'].get_vacant_name(
                    doc['name'])
                mindsdb_env['data_store'].save_datasource(
                    name=ds_name,
                    source_type=connection,
                    source=dict(doc['select_data_query']))
            elif is_external_datasource:
                ds_name = doc['external_datasource']

            predict = doc['predict']
            if not isinstance(predict, list):
                predict = [x.strip() for x in predict.split(',')]

            ds_columns = [
                x['name'] for x in mindsdb_env['data_store'].get_datasource(
                    ds_name)['columns']
            ]
            for col in predict:
                if col not in ds_columns:
                    if is_select_data_query:
                        mindsdb_env['data_store'].delete_datasource(ds_name)
                    raise Exception(f"Column '{col}' not exists")

            datasource_record = session.query(Datasource).filter_by(
                company_id=mindsdb_env['company_id'], name=ds_name).first()
            mindsdb_env['mindsdb_native'].learn(
                doc['name'],
                mindsdb_env['data_store'].get_datasource_obj(ds_name,
                                                             raw=True),
                predict,
                datasource_record.id,
                kwargs=dict(kwargs))

        result = {"n": len(query['documents']), "ok": 1}

        return result