def _get_integration(self, db_alias): if self.config['integrations'][db_alias]['publish']: db_type = self.config['integrations'][db_alias]['type'] if db_type == 'clickhouse': return Clickhouse(self.config, db_alias) elif db_type == 'mariadb': return Mariadb(self.config, db_alias) elif db_type == 'mysql': return MySQL(self.config, db_alias) elif db_type == 'postgres': return PostgreSQL(self.config, db_alias) elif db_type == 'mssql': return MSSQL(self.config, db_alias) elif db_type == 'mongodb': return MongoDB(self.config, db_alias) else: logger.warning(f'Uknown integration type: {db_type} for database called: {db_alias}') return False return True
def _get_integrations(self): # @TODO Once we have a presistent state sorted out this should be simplified as to not refresh the existing integrations every single time integration_arr = [] for db_alias in self.config['integrations']: if self.config['integrations'][db_alias]['enabled']: db_type = self.config['integrations'][db_alias]['type'] if db_type == 'clickhouse': integration_arr.append(Clickhouse(self.config, db_alias)) elif db_type == 'mariadb': integration_arr.append(Mariadb(self.config, db_alias)) elif db_type == 'mysql': integration_arr.append(MySQL(self.config, db_alias)) elif db_type == 'postgres': integration_arr.append(PostgreSQL(self.config, db_alias)) else: print( f'Uknown integration type: {db_type} for database called: {db_alias}' ) return integration_arr
def select(self, table, columns=None, where=None, where_data=None, order_by=None, group_by=None, integration_name=None, integration_type=None): ''' NOTE WHERE statements can be just $eq joined with 'and' ''' if table == 'predictors': return self._select_predictors() if table == 'commands': return [] if table == 'datasources': return self._select_datasources() if self.ai_table.get_ai_table(table): return self._select_from_ai_table(table, columns, where) original_when_data = None if 'when_data' in where_data: if len(where_data) > 1: raise ValueError( "Should not be used any other keys in 'where', if 'when_data' used" ) try: original_when_data = where_data['when_data'] where_data = json.loads(where_data['when_data']) if isinstance(where_data, list) is False: where_data = [where_data] except Exception: raise ValueError( f'''Error while parse 'when_data'="{where_data}"''') select_data_query = None if integration_name is not None and 'select_data_query' in where_data: select_data_query = where_data['select_data_query'] del where_data['select_data_query'] integration_data = self.datasource_interface.get_db_integration( integration_name) if integration_type == 'clickhouse': ch = Clickhouse(self.config, integration_name, integration_data) res = ch._query( select_data_query.strip(' ;\n') + ' FORMAT JSON') data = res.json()['data'] elif integration_type == 'mariadb': maria = Mariadb(self.config, integration_name, integration_data) data = maria._query(select_data_query) elif integration_type == 'mysql': mysql = MySQL(self.config, integration_name, integration_data) data = mysql._query(select_data_query) elif integration_type == 'postgres': mysql = PostgreSQL(self.config, integration_name, integration_data) data = mysql._query(select_data_query) elif integration_type == 'mssql': mssql = MSSQL(self.config, integration_name, integration_data) data = mssql._query(select_data_query, fetch=True) else: raise Exception(f'Unknown database type: {integration_type}') where_data = data new_where = {} if where_data is None: for key, value in where_data.items(): if isinstance(value, dict) is False or len( value.keys()) != 1 or list(value.keys())[0] != '$eq': # TODO value should be just string or number raise Exception() new_where[key] = value['$eq'] if len(new_where) == 0: return [] where_data = [new_where] model = self.model_interface.get_model_data(name=table) columns = list(model['dtype_dict'].keys()) predicted_columns = model['predict'] if not isinstance(predicted_columns, list): predicted_columns = [predicted_columns] original_target_values = {} for col in predicted_columns: if where_data is not None: if col in where_data: original_target_values[col + '_original'] = list( where_data[col]) else: original_target_values[col + '_original'] = [None ] * len(where_data) else: original_target_values[col + '_original'] = [None] pred_dicts, explanations = self.model_interface.predict( table, where_data, 'dict&explain') # transform predictions to more convenient view new_pred_dicts = [] for row in pred_dicts: new_row = {} for key in row: new_row.update(row[key]) new_row[key] = new_row['predicted_value'] del new_row['predicted_value'] new_pred_dicts.append(new_row) pred_dicts = new_pred_dicts timeseries_settings = model['problem_definition'][ 'timeseries_settings'] if timeseries_settings['is_timeseries'] is True: __mdb_make_predictions = set([ row.get('__mdb_make_predictions', True) for row in where_data ]) == {True} predict = model['predict'] group_by = timeseries_settings['group_by'] order_by_column = timeseries_settings['order_by'][0] nr_predictions = timeseries_settings['nr_predictions'] groups = set() for row in pred_dicts: groups.add(tuple([row[x] for x in group_by])) # split rows by groups rows_by_groups = {} for group in groups: rows_by_groups[group] = {'rows': [], 'explanations': []} for row_index, row in enumerate(pred_dicts): is_wrong_group = False for i, group_by_key in enumerate(group_by): if row[group_by_key] != group[i]: is_wrong_group = True break if not is_wrong_group: rows_by_groups[group]['rows'].append(row) rows_by_groups[group]['explanations'].append( explanations[row_index]) for group, data in rows_by_groups.items(): rows = data['rows'] explanations = data['explanations'] if len(rows) == 0: break for row in rows: predictions = row[predict] if isinstance(predictions, list) is False: predictions = [predictions] date_values = row[order_by_column] if isinstance(date_values, list) is False: date_values = [date_values] for i in range(len(rows) - 1): rows[i][predict] = rows[i][predict][0] rows[i][order_by_column] = rows[i][order_by_column][0] for col in ('predicted_value', 'confidence', 'confidence_lower_bound', 'confidence_upper_bound'): explanations[i][predict][col] = explanations[i][ predict][col][0] last_row = rows.pop() last_explanation = explanations.pop() for i in range(nr_predictions): new_row = copy.deepcopy(last_row) if nr_predictions == 1: new_row[predict] = new_row[predict] new_row[order_by_column] = new_row[order_by_column] else: new_row[predict] = new_row[predict][i] new_row[order_by_column] = new_row[order_by_column][i] if '__mindsdb_row_id' in new_row and ( i > 0 or __mdb_make_predictions is False): new_row['__mindsdb_row_id'] = None rows.append(new_row) new_explanation = copy.deepcopy(last_explanation) for col in ('predicted_value', 'confidence', 'confidence_lower_bound', 'confidence_upper_bound'): if nr_predictions == 1: new_explanation[predict][col] = new_explanation[ predict][col] else: new_explanation[predict][col] = new_explanation[ predict][col][i] if i != 0: new_explanation[predict]['anomaly'] = None new_explanation[predict]['truth'] = None explanations.append(new_explanation) pred_dicts = [] explanations = [] for group, data in rows_by_groups.items(): pred_dicts.extend(data['rows']) explanations.extend(data['explanations']) original_target_values[f'{predict}_original'] = [] for i in range(len(pred_dicts)): original_target_values[f'{predict}_original'].append( explanations[i][predict].get('truth', None)) if model['dtypes'][order_by_column] == dtype.date: for row in pred_dicts: if isinstance(row[order_by_column], (int, float)): row[order_by_column] = str( datetime.fromtimestamp( row[order_by_column]).date()) elif model['dtypes'][order_by_column] == dtype.datetime: for row in pred_dicts: if isinstance(row[order_by_column], (int, float)): row[order_by_column] = str( datetime.fromtimestamp(row[order_by_column])) keys = [x for x in pred_dicts[0] if x in columns] min_max_keys = [] for col in predicted_columns: if model['dtype_dict'][col] in (dtype.integer, dtype.float): min_max_keys.append(col) data = [] explains = [] keys_to_save = [ *keys, '__mindsdb_row_id', 'select_data_query', 'when_data' ] for i, el in enumerate(pred_dicts): data.append({key: el.get(key) for key in keys_to_save}) explains.append(explanations[i]) for i, row in enumerate(data): cast_row_types(row, model['dtype_dict']) row['select_data_query'] = select_data_query row['when_data'] = original_when_data for k in original_target_values: try: row[k] = original_target_values[k][i] except Exception: row[k] = None for column_name in columns: if column_name not in row: row[column_name] = None explanation = explains[i] for key in predicted_columns: row[key + '_confidence'] = explanation[key]['confidence'] row[key + '_explain'] = json.dumps(explanation[key], cls=NumpyJSONEncoder, ensure_ascii=False) if 'anomaly' in explanation[key]: row[key + '_anomaly'] = explanation[key]['anomaly'] for key in min_max_keys: row[key + '_min'] = explanation[key]['confidence_lower_bound'] row[key + '_max'] = explanation[key]['confidence_upper_bound'] return data
def select(self, table, columns=None, where=None, where_data=None, order_by=None, group_by=None, came_from=None): ''' NOTE WHERE statements can be just $eq joined with 'and' ''' if table == 'predictors': return self._select_predictors() if table == 'commands': return [] original_when_data = None if 'when_data' in where: if len(where) > 1: raise ValueError( "Should not be used any other keys in 'where', if 'when_data' used" ) try: original_when_data = where['when_data']['$eq'] where_data = json.loads(where['when_data']['$eq']) if isinstance(where_data, list) is False: where_data = [where_data] except Exception: raise ValueError( f'''Error while parse 'when_data'="{where_data}"''') external_datasource = None if 'external_datasource' in where: external_datasource = where['external_datasource']['$eq'] del where['external_datasource'] select_data_query = None if came_from is not None and 'select_data_query' in where: select_data_query = where['select_data_query']['$eq'] del where['select_data_query'] dbtype = self.config['integrations'][came_from]['type'] if dbtype == 'clickhouse': ch = Clickhouse(self.config, came_from) res = ch._query( select_data_query.strip(' ;\n') + ' FORMAT JSON') data = res.json()['data'] elif dbtype == 'mariadb': maria = Mariadb(self.config, came_from) data = maria._query(select_data_query) elif dbtype == 'mysql': mysql = MySQL(self.config, came_from) data = mysql._query(select_data_query) elif dbtype == 'postgres': mysql = PostgreSQL(self.config, came_from) data = mysql._query(select_data_query) elif dbtype == 'mssql': mssql = MSSQL(self.config, came_from) data = mssql._query(select_data_query, fetch=True) else: raise Exception(f'Unknown database type: {dbtype}') if where_data is None: where_data = data else: where_data += data new_where = {} if where_data is not None: where_data = pandas.DataFrame(where_data) else: for key, value in where.items(): if isinstance(value, dict) is False or len( value.keys()) != 1 or list(value.keys())[0] != '$eq': # TODO value should be just string or number raise Exception() new_where[key] = value['$eq'] if len(new_where) == 0: return [] where_data = [new_where] try: model = self.custom_models.get_model_data(name=table) except Exception: model = self.mindsdb_native.get_model_data(name=table) predicted_columns = model['predict'] original_target_values = {} for col in predicted_columns: if where_data is not None: if col in where_data: original_target_values[col + '_original'] = list( where_data[col]) else: original_target_values[col + '_original'] = [None ] * len(where_data) else: original_target_values[col + '_original'] = [None] if table in [x['name'] for x in self.custom_models.get_models()]: res = self.custom_models.predict(name=table, when_data=where_data) data = [] fields = model['data_analysis_v2']['columns'] for i, ele in enumerate(res): row = {} row['select_data_query'] = select_data_query row['external_datasource'] = external_datasource row['when_data'] = original_when_data for key in ele: row[key] = ele[key]['predicted_value'] # FIXME prefer get int from mindsdb_native in this case if model['data_analysis_v2'][key]['typing'][ 'data_subtype'] == 'Int': row[key] = int(row[key]) for k in fields: if k not in ele: if isinstance(where_data, list): if k in where_data[i]: row[k] = where_data[i][k] else: row[k] = None elif k in where_data.columns: row[k] = where_data[k].iloc[i] else: row[k] = None for k in original_target_values: row[k] = original_target_values[k][i] data.append(row) field_types = { f: model['data_analysis_v2'][f]['typing']['data_subtype'] for f in fields if 'typing' in model['data_analysis_v2'][f] } for row in data: cast_row_types(row, field_types) return data else: res = self.mindsdb_native.predict(name=table, when_data=where_data) keys = [x for x in list(res._data.keys()) if x in columns] min_max_keys = [] for col in predicted_columns: if model['data_analysis_v2'][col]['typing'][ 'data_type'] == 'Numeric': min_max_keys.append(col) data = [] explains = [] for i, el in enumerate(res): data.append({key: el[key] for key in keys}) explains.append(el.explain()) field_types = { f: model['data_analysis_v2'][f]['typing']['data_subtype'] for f in model['data_analysis_v2']['columns'] if 'typing' in model['data_analysis_v2'][f] } for row in data: cast_row_types(row, field_types) row['select_data_query'] = select_data_query row['external_datasource'] = external_datasource row['when_data'] = original_when_data for k in original_target_values: row[k] = original_target_values[k][i] explanation = explains[i] for key in predicted_columns: row[key + '_confidence'] = explanation[key]['confidence'] row[key + '_explain'] = json.dumps(explanation[key], cls=NumpyJSONEncoder) for key in min_max_keys: row[key + '_min'] = min( explanation[key]['confidence_interval']) row[key + '_max'] = max( explanation[key]['confidence_interval']) return data
def select(self, table, columns=None, where=None, where_data=None, order_by=None, group_by=None, came_from=None): ''' NOTE WHERE statements can be just $eq joined with 'and' ''' if table == 'predictors': return self._select_predictors() if table == 'commands': return [] original_when_data = None if 'when_data' in where: if len(where) > 1: raise ValueError( "Should not be used any other keys in 'where', if 'when_data' used" ) try: original_when_data = where['when_data']['$eq'] where_data = json.loads(where['when_data']['$eq']) if isinstance(where_data, list) is False: where_data = [where_data] except Exception: raise ValueError( f'''Error while parse 'where_data'="{where_data}"''') external_datasource = None if 'external_datasource' in where: external_datasource = where['external_datasource']['$eq'] del where['external_datasource'] select_data_query = None if came_from is not None and 'select_data_query' in where: select_data_query = where['select_data_query']['$eq'] del where['select_data_query'] dbtype = self.config['integrations'][came_from]['type'] if dbtype == 'clickhouse': ch = Clickhouse(self.config, came_from) res = ch._query( select_data_query.strip(' ;\n') + ' FORMAT JSON') data = res.json()['data'] elif dbtype == 'mariadb': maria = Mariadb(self.config, came_from) data = maria._query(select_data_query) elif dbtype == 'mysql': mysql = MySQL(self.config, came_from) data = mysql._query(select_data_query) elif dbtype == 'postgres': mysql = PostgreSQL(self.config, came_from) data = mysql._query(select_data_query) elif dbtype == 'mssql': mssql = MSSQL(self.config, came_from) data = mssql._query(select_data_query, fetch=True) else: raise Exception(f'Unknown database type: {dbtype}') if where_data is None: where_data = data else: where_data += data new_where = {} if where_data is not None: where_data = pandas.DataFrame(where_data) else: for key, value in where.items(): if isinstance(value, dict) is False or len( value.keys()) != 1 or list(value.keys())[0] != '$eq': # TODO value should be just string or number raise Exception() new_where[key] = value['$eq'] if len(new_where) == 0: return [] where_data = [new_where] model = self.mindsdb_native.get_model_data(name=table) predicted_columns = model['predict'] original_target_values = {} for col in predicted_columns: if where_data is not None: if col in where_data: original_target_values[col + '_original'] = list( where_data[col]) else: original_target_values[col + '_original'] = [None ] * len(where_data) else: original_target_values[col + '_original'] = [None] res = self.mindsdb_native.predict(name=table, when_data=where_data) data = [] keys = [x for x in list(res._data.keys()) if x in columns] min_max_keys = [] for col in predicted_columns: if model['data_analysis_v2'][col]['typing'][ 'data_type'] == 'Numeric': min_max_keys.append(col) length = len(res._data[predicted_columns[0]]) for i in range(length): row = {} explanation = res[i].explain() for key in keys: row[key] = res._data[key][i] # +++ FIXME this fix until issue https://github.com/mindsdb/mindsdb/issues/591 not resolved typing = None if key in model['data_analysis_v2']: typing = model['data_analysis_v2'][key]['typing'][ 'data_subtype'] if typing == 'Timestamp' and row[key] is not None: timestamp = datetime.datetime.utcfromtimestamp(row[key]) row[key] = timestamp.strftime('%Y-%m-%d %H:%M:%S') elif typing == 'Date': timestamp = datetime.datetime.utcfromtimestamp(row[key]) row[key] = timestamp.strftime('%Y-%m-%d') # --- for key in predicted_columns: row[key + '_confidence'] = explanation[key]['confidence'] row[key + '_explain'] = json.dumps(explanation[key]) for key in min_max_keys: row[key + '_min'] = min( explanation[key]['confidence_interval']) row[key + '_max'] = max( explanation[key]['confidence_interval']) row['select_data_query'] = select_data_query row['external_datasource'] = external_datasource row['when_data'] = original_when_data for k in original_target_values: row[k] = original_target_values[k][i] data.append(row) return data