def _execute_learn(self): """ :return: """ self._call_phase_module('DataExtractor') if len(self.input_data.data_array) <= 0 or len(self.input_data.data_array[0]) <=0: self.type = TRANSACTION_BAD_QUERY self.errorMsg = "No results for this query." return try: # start populating data self.lmd['current_phase'] = MODEL_STATUS_ANALYZING self.lmd['columns'] = self.input_data.columns # this is populated by data extractor self._call_phase_module('StatsGenerator', input_data=self.input_data, modify_light_metadata=True) self.lmd['current_phase'] = MODEL_STATUS_TRAINING if self.lmd['model_backend'] == 'ludwig': self.lmd['is_active'] = True self.model_backend = LudwigBackend(self) self.model_backend.train() self.lmd['is_active'] = False self.lmd['train_end_at'] = str(datetime.datetime.now()) self._call_phase_module('ModelAnalyzer') with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.lmd['name'] + '_light_model_metadata.pickle'), 'wb') as fp: self.lmd['updated_at'] = str(datetime.datetime.now()) pickle.dump(self.lmd, fp) with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.hmd['name'] + '_heavy_model_metadata.pickle'), 'wb') as fp: # Don't save data for now self.hmd['from_data'] = None self.hmd['test_from_data'] = None # Don't save data for now pickle.dump(self.hmd, fp) return except Exception as e: self.lmd['is_active'] = False self.lmd['current_phase'] = MODEL_STATUS_ERROR self.lmd['error_msg'] = traceback.print_exc() self.log.error(str(e)) raise e
def run(self, mode='train'): if self.transaction.lmd['model_backend'] == 'ludwig': if mode == 'train': self.transaction.lmd['is_active'] = True self.transaction.model_backend = LudwigBackend( self.transaction) self.transaction.model_backend.train() self.transaction.lmd['is_active'] = False self.transaction.lmd['train_end_at'] = str( datetime.datetime.now()) elif mode == 'predict': self.transaction.model_backend = LudwigBackend( self.transaction) self.transaction.hmd[ 'predictions'] = self.transaction.model_backend.predict()
def run(self, mode='train'): try: from mindsdb.libs.backends.ludwig import LudwigBackend except ImportError as e: self.transaction.log.warning(e) try: from mindsdb.libs.backends.lightwood import LightwoodBackend except ImportError as e: self.transaction.log.warning(e) phase_name = PHASE_MODEL_INTERFACE if self.transaction.hmd['model_backend'] == 'ludwig': self.transaction.model_backend = LudwigBackend(self.transaction) elif self.transaction.hmd['model_backend'] == 'lightwood': self.transaction.model_backend = LightwoodBackend(self.transaction) else: self.transaction.model_backend = self.transaction.hmd[ 'model_backend'] if hasattr(self.transaction.model_backend, 'set_transaction'): self.transaction.model_backend.set_transaction(self.transaction) if mode == 'train': self.transaction.model_backend.train() self.transaction.lmd['train_end_at'] = str(datetime.datetime.now()) elif mode == 'predict': self.transaction.hmd[ 'predictions'] = self.transaction.model_backend.predict()
def _execute_learn(self): """ :return: """ self.lmd['current_phase'] = MODEL_STATUS_PREPARING self.save_metadata() self._call_phase_module(clean_exit=True, module_name='DataExtractor') try: # start populating data self.lmd[ 'columns'] = self.input_data.columns # this is populated by data extractor self.save_metadata() self.lmd['current_phase'] = MODEL_STATUS_DATA_ANALYSIS self._call_phase_module(clean_exit=True, module_name='StatsGenerator', input_data=self.input_data, modify_light_metadata=True, hmd=self.hmd) self.lmd['current_phase'] = MODEL_STATUS_TRAINING self.save_metadata() if self.lmd['model_backend'] == 'ludwig': self.lmd['is_active'] = True self.model_backend = LudwigBackend(self) self.model_backend.train() self.lmd['is_active'] = False self.lmd['train_end_at'] = str(datetime.datetime.now()) self.save_metadata() self.lmd['current_phase'] = MODEL_STATUS_ANALYZING self._call_phase_module(clean_exit=True, module_name='ModelAnalyzer') self.lmd['current_phase'] = MODEL_STATUS_TRAINED self.save_metadata() return except Exception as e: self.lmd['is_active'] = False self.lmd['current_phase'] = MODEL_STATUS_ERROR self.lmd['error_msg'] = traceback.print_exc() self.log.error(str(e)) raise e
def _execute_learn(self): """ :return: """ self.lmd['current_phase'] = MODEL_STATUS_PREPARING self.save_metadata() self._call_phase_module('DataExtractor') if len(self.input_data.data_array) <= 0 or len(self.input_data.data_array[0]) <=0: self.type = TRANSACTION_BAD_QUERY self.errorMsg = "No results for this query." return try: # start populating data self.lmd['current_phase'] = MODEL_STATUS_ANALYZING self.lmd['columns'] = self.input_data.columns # this is populated by data extractor self.save_metadata() self._call_phase_module('StatsGenerator', input_data=self.input_data, modify_light_metadata=True, hmd=self.hmd) self.lmd['current_phase'] = MODEL_STATUS_TRAINING self.save_metadata() if self.lmd['model_backend'] == 'ludwig': self.lmd['is_active'] = True self.model_backend = LudwigBackend(self) self.model_backend.train() self.lmd['is_active'] = False self.lmd['train_end_at'] = str(datetime.datetime.now()) self.save_metadata() self._call_phase_module('ModelAnalyzer') self.save_metadata() return except Exception as e: self.lmd['is_active'] = False self.lmd['current_phase'] = MODEL_STATUS_ERROR self.lmd['error_msg'] = traceback.print_exc() self.log.error(str(e)) raise e
def run(self, mode='train'): if self.transaction.lmd['model_backend'] == 'ludwig': self.transaction.model_backend = LudwigBackend(self.transaction) if self.transaction.lmd['model_backend'] == 'lightwood': self.transaction.model_backend = LightwoodBackend(self.transaction) if mode == 'train': self.transaction.model_backend.train() self.transaction.lmd['train_end_at'] = str(datetime.datetime.now()) elif mode == 'predict': self.transaction.hmd['predictions'] = self.transaction.model_backend.predict()
class Transaction: def __init__(self, session, light_transaction_metadata, heavy_transaction_metadata, logger = log, breakpoint = PHASE_END): """ A transaction is the interface to start some MindsDB operation within a session :param session: :type session: utils.controllers.session_controller.SessionController :param transaction_type: :param transaction_metadata: :type transaction_metadata: dict :type heavy_transaction_metadata: dict :param breakpoint: """ self.breakpoint = breakpoint self.session = session self.lmd = light_transaction_metadata self.lmd['created_at'] = str(datetime.datetime.now()) self.hmd = heavy_transaction_metadata # variables to de defined by setup self.error = None self.errorMsg = None self.input_data = TransactionData() self.output_data = TrainTransactionOutputData() # variables that can be persisted self.log = logger self.run() # @TODO Make it more generic, move to general helpers, use inside predictor instead of linline loading def load_metadata(self): with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.lmd['name'] + '_light_model_metadata.pickle'), 'rb') as fp: self.lmd = pickle.load(fp) with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.hmd['name'] + '_heavy_model_metadata.pickle'), 'rb') as fp: self.hmd = pickle.load(fp) # @TODO Make it more generic, move to general helpers def save_metadata(self): with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.lmd['name'] + '_light_model_metadata.pickle'), 'wb') as fp: self.lmd['updated_at'] = str(datetime.datetime.now()) pickle.dump(self.lmd, fp) with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.hmd['name'] + '_heavy_model_metadata.pickle'), 'wb') as fp: # Don't save data for now save_hmd = {} null_out_fields = ['test_from_data', 'from_data'] for k in null_out_fields: save_hmd[k] = None for k in self.hmd: if k not in null_out_fields: save_hmd[k] = self.hmd[k] # Don't save data for now pickle.dump(save_hmd, fp) def _call_phase_module(self, module_name, **kwargs): """ Loads the module and runs it :param module_name: :return: """ self.lmd['is_active'] = True module_path = convert_cammelcase_to_snake_string(module_name) module_full_path = 'mindsdb.libs.phases.{module_path}.{module_path}'.format(module_path=module_path) try: main_module = importlib.import_module(module_full_path) module = getattr(main_module, module_name) return module(self.session, self)(**kwargs) except: error = 'Could not load module {module_name}'.format(module_name=module_name) self.log.error('Could not load module {module_name}'.format(module_name=module_name)) self.log.error(traceback.format_exc()) raise ValueError(error) return None finally: self.lmd['is_active'] = False def _execute_learn(self): """ :return: """ self.lmd['current_phase'] = MODEL_STATUS_PREPARING self.save_metadata() self._call_phase_module('DataExtractor') if len(self.input_data.data_array) <= 0 or len(self.input_data.data_array[0]) <=0: self.type = TRANSACTION_BAD_QUERY self.errorMsg = "No results for this query." return try: # start populating data self.lmd['current_phase'] = MODEL_STATUS_ANALYZING self.lmd['columns'] = self.input_data.columns # this is populated by data extractor self.save_metadata() self._call_phase_module('StatsGenerator', input_data=self.input_data, modify_light_metadata=True, hmd=self.hmd) self.lmd['current_phase'] = MODEL_STATUS_TRAINING self.save_metadata() if self.lmd['model_backend'] == 'ludwig': self.lmd['is_active'] = True self.model_backend = LudwigBackend(self) self.model_backend.train() self.lmd['is_active'] = False self.lmd['train_end_at'] = str(datetime.datetime.now()) self.save_metadata() self._call_phase_module('ModelAnalyzer') self.save_metadata() return except Exception as e: self.lmd['is_active'] = False self.lmd['current_phase'] = MODEL_STATUS_ERROR self.lmd['error_msg'] = traceback.print_exc() self.log.error(str(e)) raise e def _execute_drop_model(self): """ Make sure that we remove all previous data about this model :return: """ self.output_data.data_array = [['Model '+self.lmd['name']+' deleted.']] self.output_data.columns = ['Status'] return def _execute_predict(self): """ :return: """ old_lmd = {} for k in self.lmd: old_lmd[k] = self.lmd[k] old_hmd = {} for k in self.hmd: old_hmd[k] = self.hmd[k] self.load_metadata() for k in old_lmd: if old_lmd[k] is not None: self.lmd[k] = old_lmd[k] else: if k not in self.lmd: self.lmd[k] = None for k in old_hmd: if old_hmd[k] is not None: self.hmd[k] = old_hmd[k] else: if k not in self.hmd: self.hmd[k] = None if self.lmd is None: self.log.error('No metadata found for this model') return self._call_phase_module('DataExtractor') self.save_metadata() if len(self.input_data.data_array[0]) <= 0: self.output_data = self.input_data return self.output_data = PredictTransactionOutputData(transaction=self) if self.lmd['model_backend'] == 'ludwig': self.model_backend = LudwigBackend(self) predictions = self.model_backend.predict() self.save_metadata() # self.transaction.lmd['predict_columns'] self.output_data.data = {col: [] for i, col in enumerate(self.input_data.columns)} input_columns = [col for col in self.input_data.columns if col not in self.lmd['predict_columns']] for row in self.input_data.data_array: for index, cell in enumerate(row): col = self.input_data.columns[index] self.output_data.data[col].append(cell) for predicted_col in self.lmd['predict_columns']: probabilistic_validator = unpickle_obj(self.hmd['probabilistic_validators'][predicted_col]) predicted_values = predictions[predicted_col] self.output_data.data[predicted_col] = predicted_values confidence_column_name = "{col}_confidence".format(col=predicted_col) self.output_data.data[confidence_column_name] = [None] * len(predicted_values) self.output_data.evaluations[predicted_col] = [None] * len(predicted_values) for row_number, predicted_value in enumerate(predicted_values): features_existance_vector = [False if self.output_data.data[col][row_number] is None else True for col in input_columns] prediction_evaluation = probabilistic_validator.evaluate_prediction_accuracy(features_existence=features_existance_vector, predicted_value=predicted_value) self.output_data.data[confidence_column_name][row_number] = prediction_evaluation #output_data[col][row_number] = prediction_evaluation.most_likely_value Huh, is this correct, are we replacing the predicted value with the most likely one ? Seems... wrong self.output_data.evaluations[predicted_col][row_number] = prediction_evaluation self.save_metadata() return def run(self): """ :return: """ if self.lmd['type'] == TRANSACTION_BAD_QUERY: self.log.error(self.errorMsg) self.error = True return if self.lmd['type'] == TRANSACTION_DROP_MODEL: self._execute_drop_model() return if self.lmd['type'] == TRANSACTION_LEARN: self.output_data.data_array = [['Model ' + self.lmd['name'] + ' training.']] self.output_data.columns = ['Status'] if CONFIG.EXEC_LEARN_IN_THREAD == False: self._execute_learn() else: _thread.start_new_thread(self._execute_learn, ()) return elif self.lmd['type'] == TRANSACTION_PREDICT: self._execute_predict() elif self.lmd['type'] == TRANSACTION_NORMAL_SELECT: self._execute_normal_select()
def _execute_predict(self): """ :return: """ old_lmd = {} for k in self.lmd: old_lmd[k] = self.lmd[k] old_hmd = {} for k in self.hmd: old_hmd[k] = self.hmd[k] self.load_metadata() for k in old_lmd: if old_lmd[k] is not None: self.lmd[k] = old_lmd[k] else: if k not in self.lmd: self.lmd[k] = None for k in old_hmd: if old_hmd[k] is not None: self.hmd[k] = old_hmd[k] else: if k not in self.hmd: self.hmd[k] = None if self.lmd is None: self.log.error('No metadata found for this model') return self._call_phase_module('DataExtractor') self.save_metadata() if len(self.input_data.data_array[0]) <= 0: self.output_data = self.input_data return self.output_data = PredictTransactionOutputData(transaction=self) if self.lmd['model_backend'] == 'ludwig': self.model_backend = LudwigBackend(self) predictions = self.model_backend.predict() self.save_metadata() # self.transaction.lmd['predict_columns'] self.output_data.data = {col: [] for i, col in enumerate(self.input_data.columns)} input_columns = [col for col in self.input_data.columns if col not in self.lmd['predict_columns']] for row in self.input_data.data_array: for index, cell in enumerate(row): col = self.input_data.columns[index] self.output_data.data[col].append(cell) for predicted_col in self.lmd['predict_columns']: probabilistic_validator = unpickle_obj(self.hmd['probabilistic_validators'][predicted_col]) predicted_values = predictions[predicted_col] self.output_data.data[predicted_col] = predicted_values confidence_column_name = "{col}_confidence".format(col=predicted_col) self.output_data.data[confidence_column_name] = [None] * len(predicted_values) self.output_data.evaluations[predicted_col] = [None] * len(predicted_values) for row_number, predicted_value in enumerate(predicted_values): features_existance_vector = [False if self.output_data.data[col][row_number] is None else True for col in input_columns] prediction_evaluation = probabilistic_validator.evaluate_prediction_accuracy(features_existence=features_existance_vector, predicted_value=predicted_value) self.output_data.data[confidence_column_name][row_number] = prediction_evaluation #output_data[col][row_number] = prediction_evaluation.most_likely_value Huh, is this correct, are we replacing the predicted value with the most likely one ? Seems... wrong self.output_data.evaluations[predicted_col][row_number] = prediction_evaluation self.save_metadata() return
def _execute_predict(self): """ :return: """ old_lmd = {} for k in self.lmd: old_lmd[k] = self.lmd[k] old_hmd = {} for k in self.hmd: old_hmd[k] = self.hmd[k] with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.lmd['name'] + '_light_model_metadata.pickle'), 'rb') as fp: self.lmd = pickle.load(fp) with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.hmd['name'] + '_heavy_model_metadata.pickle'), 'rb') as fp: self.hmd = pickle.load(fp) for k in old_lmd: if old_lmd[k] is not None: self.lmd[k] = old_lmd[k] else: if k not in self.lmd: self.lmd[k] = None for k in old_hmd: if old_hmd[k] is not None: self.hmd[k] = old_hmd[k] else: if k not in self.hmd: self.hmd[k] = None if self.lmd is None: self.log.error('No metadata found for this model') return self._call_phase_module('DataExtractor') if len(self.input_data.data_array[0]) <= 0: self.output_data = self.input_data return self.output_data = PredictTransactionOutputData(transaction=self) if self.lmd['model_backend'] == 'ludwig': self.model_backend = LudwigBackend(self) predictions = self.model_backend.predict() # self.transaction.lmd['predict_columns'] self.output_data.data = {col: [] for i, col in enumerate(self.input_data.columns)} input_columns = [col for col in self.input_data.columns if col not in self.lmd['predict_columns']] for row in self.input_data.data_array: for index, cell in enumerate(row): col = self.input_data.columns[index] self.output_data.data[col].append(cell) for predicted_col in self.lmd['predict_columns']: probabilistic_validator = unpickle_obj(self.hmd['probabilistic_validators'][predicted_col]) predicted_values = predictions[predicted_col] self.output_data.data[predicted_col] = predicted_values confidence_column_name = "{col}_confidence".format(col=predicted_col) self.output_data.data[confidence_column_name] = [None] * len(predicted_values) self.output_data.evaluations[predicted_col] = [None] * len(predicted_values) for row_number, predicted_value in enumerate(predicted_values): features_existance_vector = [False if self.output_data.data[col][row_number] is None else True for col in input_columns] prediction_evaluation = probabilistic_validator.evaluate_prediction_accuracy(features_existence=features_existance_vector, predicted_value=predicted_value) self.output_data.data[confidence_column_name][row_number] = prediction_evaluation #output_data[col][row_number] = prediction_evaluation.most_likely_value Huh, is this correct, are we replacing the predicted value with the most likely one ? Seems... wrong self.output_data.evaluations[predicted_col][row_number] = prediction_evaluation with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.lmd['name'] + '_light_model_metadata.pickle'), 'wb') as fp: self.lmd['updated_at'] = str(datetime.datetime.now()) pickle.dump(self.lmd, fp) with open(os.path.join(CONFIG.MINDSDB_STORAGE_PATH, self.hmd['name'] + '_heavy_model_metadata.pickle'), 'wb') as fp: # Don't save data for now self.hmd['from_data'] = None self.hmd['test_from_data'] = None # Don't save data for now pickle.dump(self.hmd, fp) return