def run(self, conf_data): """ executed on cluster run :param conf_data: :return: """ try: # get related nodes net_node = self.get_prev_node(grp='netconf') data_node = self.get_prev_node(grp='preprocess') self._init_node_parm(conf_data['node_id']) # set result info cls result = TrainSummaryInfo(type=self.eval_result_type) result.set_nn_wf_ver_id(conf_data['wf_ver']) result.set_nn_id(conf_data['nn_id']) # run eval for each network result = net_node[0].eval(conf_data['node_id'], conf_data, data=data_node[0], result=result) # set parms for db store input_data = TrainSummaryInfo.save_result_info(self, result) input_data['accuracy'] = result.get_accuracy() return input_data except Exception as e: logging.error(e) raise Exception(e)
def run(self, conf_data): """ executed on cluster run :param conf_data: :return: """ try: # get related nodes net_node = self.get_prev_node(grp='netconf') data_node = self.get_prev_node(grp='preprocess') self._init_node_parm(conf_data['node_id']) # set result info cls result = TrainSummaryInfo(type=self.eval_result_type) result.set_nn_wf_ver_id(conf_data['wf_ver']) result.set_nn_id(conf_data['nn_id']) # run eval for each network result = net_node[0].eval(conf_data['node_id'], conf_data, data=data_node[0], result=result) if result is None or result == '': return {} # set parms for db store input_data = TrainSummaryInfo.save_result_info(self, result) input_data['accuracy'] = result.get_accuracy() condition_data = {} condition_data['nn_wf_ver_id'] = conf_data['wf_ver'] condition_data[ 'condition'] = "3" # 1 Pending, 2 Progress, 3 Finish, 4 Error # Net Version create NNCommonManager().update_nn_wf_info(conf_data['nn_id'], condition_data) return input_data except Exception as e: condition_data = {} condition_data['nn_wf_ver_id'] = conf_data['wf_ver'] condition_data[ 'condition'] = "4" # 1 Pending, 2 Progress, 3 Finish, 4 Error # Net Version create NNCommonManager().update_nn_wf_info(conf_data['nn_id'], condition_data) logging.error(e) raise Exception(e)
def run(self, conf_data): """ executed on cluster run :param conf_data: :return: """ try: # get related nodes net_node = self.get_prev_node(grp='netconf') data_node = self.get_prev_node(grp='preprocess') self._init_node_parm(conf_data['node_id']) # set result info cls result = TrainSummaryInfo(type=self.eval_result_type) result.set_nn_wf_ver_id(conf_data['wf_ver']) result.set_nn_id(conf_data['nn_id']) # run eval for each network result = net_node[0].eval(conf_data['node_id'], conf_data, data=data_node[0], result=result) if result is None: return {} # set parms for db store input_data = TrainSummaryInfo.save_result_info(self, result) input_data['accuracy'] = result.get_accuracy() condition_data = {} condition_data['nn_wf_ver_id'] = conf_data['wf_ver'] condition_data['condition'] = "3" # 1 Pending, 2 Progress, 3 Finish, 4 Error # Net Version create NNCommonManager().update_nn_wf_info(conf_data['nn_id'], condition_data) return input_data except Exception as e: condition_data = {} condition_data['nn_wf_ver_id'] = conf_data['wf_ver'] condition_data['condition'] = "4" # 1 Pending, 2 Progress, 3 Finish, 4 Error # Net Version create NNCommonManager().update_nn_wf_info(conf_data['nn_id'], condition_data) logging.error(e) raise Exception(e)
def eval_run(self, input_data): self.batch_size = self.netconf["param"]["batch_size"] labels = self.netconf["labels"] pred_cnt = self.netconf["param"]["predictcnt"] try: predlog = self.netconf["param"]["predictlog"] except: predlog = "N" # logging.info(labels) t_cnt_arr = [] f_cnt_arr = [] for i in range(len(labels)): t_cnt_arr.append(0) f_cnt_arr.append(0) input_data.pointer = 0 # eval config = { "type": self.netconf["config"]["eval_type"], "labels": self.netconf["labels"], "nn_id": self.nn_id, "nn_wf_ver_id": self.wf_ver, "nn_batch_ver_id": self.batch } self.eval_data = TrainSummaryInfo(conf=config) while (input_data.has_next()): data_set = input_data[0:input_data.data_size()] x_batch, y_batch, n_batch = self.get_batch_img_data(data_set, "E") try: logits = self.model.predict(x_batch) for i in range(len(logits)): true_name = y_batch[i] file_name = n_batch[i] logit = [] logit.append(logits[i]) # idx = labels.index(true_name) retrun_data = self.set_predict_return_cnn_img( labels, logit, pred_cnt) pred_name = retrun_data["key"][0] if self.eval_flag == "E": if true_name == pred_name: t_cnt_arr[idx] = t_cnt_arr[idx] + 1 strLog = "[True] : " if (predlog == "TT"): logging.info(strLog + true_name + " FileName=" + file_name) logging.info(retrun_data["key"]) logging.info(retrun_data["val"]) else: f_cnt_arr[idx] = f_cnt_arr[idx] + 1 strLog = "[False] : " if (predlog == "FF"): logging.info(strLog + true_name + " FileName=" + file_name) logging.info(retrun_data["key"]) logging.info(retrun_data["val"]) if (predlog == "AA"): logging.info(strLog + true_name + " FileName=" + file_name) logging.info(retrun_data["key"]) logging.info(retrun_data["val"]) else: try: listTF = retrun_data["key"].index(true_name) t_cnt_arr[idx] = t_cnt_arr[idx] + 1 strLog = "[True] : " if (predlog == "T"): logging.info(strLog + true_name + " FileName=" + file_name) logging.info(retrun_data["key"]) logging.info(retrun_data["val"]) except: f_cnt_arr[idx] = f_cnt_arr[idx] + 1 strLog = "[False] : " if (predlog == "F"): logging.info(strLog + true_name + " FileName=" + file_name) logging.info(retrun_data["key"]) logging.info(retrun_data["val"]) if (predlog == "A"): logging.info(strLog + true_name + " FileName=" + file_name) logging.info(retrun_data["key"]) logging.info(retrun_data["val"]) self.eval_data.set_result_info(true_name, pred_name) except Exception as e: logging.info(e) logging.info( "None to restore checkpoint. Initializing variables instead." ) input_data.next() # set parms for db store input_data = TrainSummaryInfo.save_result_info(self, self.eval_data) self.eval_print(labels, t_cnt_arr, f_cnt_arr)
def eval(self, node_id, conf_data, data=None, result=None): ''' eval run init :param node_id: :param conf_data: :param data: :param result: :return: ''' try : logging.info("run NeuralNetNodeImage eval") pred_cnt = self.netconf["param"]["predictcnt"] eval_type = self.netconf["config"]["eval_type"] # eval result config = {"type": eval_type, "labels": self.labels, "nn_id": self.nn_id, "nn_wf_ver_id": self.nn_wf_ver_id, "nn_batch_ver_id": self.train_batch} self.eval_data = TrainSummaryInfo(conf=config) if data is None: return self.eval_data data.reset_pointer() while (data.has_next()): data_set = data[0:data.data_size()] x_batch = self.get_convert_img_x(data_set[0], self.x_size, self.y_size, self.channel) # img_data_batch # # Normalize data. # x_batch = x_batch.astype('float32') / 255 # # If subtract pixel mean is enabled # if self.subtract_pixel_mean: # x_train_mean = np.mean(x_batch, axis=0) # x_batch -= x_train_mean logits = self.model.predict(x_batch) y_batch = self.get_convert_img_y_eval(data_set[1]) n_batch = self.get_convert_img_y_eval(data_set[2]) # File Name for i in range(len(logits)): true_name = y_batch[i] logit = [] logit.append(logits[i]) retrun_data = self.set_predict_return_cnn_img(self.labels, logit, pred_cnt) pred_name = retrun_data["key"] pred_value = retrun_data["val"] #예측값이 배열로 넘어온다 한개라도 맞으면참 t_pred_name = pred_name[0] for p_cnt in range(pred_cnt): if pred_name[p_cnt] == true_name: t_pred_name = pred_name[p_cnt] # eval result self.eval_data.set_result_info(true_name, t_pred_name) # Row log를 찍기위해서 호출한다. file_name = n_batch[i] self.eval_data.set_tf_log(file_name, true_name, pred_name, pred_value) data.next() # eval result if self.train_cnt != 0: TrainSummaryInfo.save_result_info(self, self.eval_data) return self.eval_data except Exception as e : logging.info("===Error on Eval : {0}".format(e))
def run(self, conf_data): """ Wide and Deep Network Training :param : conf_data :return: None """ logging.info("NeuralNetNode Xgboost Run called") #nodeid 필요 try: self._init_train_parm(conf_data) #self._init_value() train, test= self.get_input_data() spec = importlib.util.spec_from_file_location("data_preprocess", "/hoya_src_root/data_preprocess.py") foo = importlib.util.module_from_spec(spec) spec.loader.exec_module(foo) _label, _label_info, _label_values = foo.label_info() y_train = train[_label].ravel() x_train = train.drop([_label,"id"], axis=1) y_test = test[_label].ravel() x_test = test.drop([_label,"id"], axis=1) #x_train = train.values # Creates an array of the train data #x_test = test.values # Creats an array of the test data self.load_batch = self.get_eval_batch(self.node_id) # Train이 Y인것 가져오기 Eval Flag가 Y인거 가져오기 self.train_batch, self.batch = self.make_batch(self.node_id) logging.info("Xgboost Train get batch -> {0}".format(self.batch)) logging.info("Xgboost Train get batch -> {0}".format(self.load_batch)) if self.train_batch == None: self.model_train_path = ''.join([self.model_path + '/' + self.batch + '.bin']) else: self.model_train_path = ''.join([self.model_path + '/' + self.train_batch + '.bin']) xgb_params = self.get_xgboost_paramter() num_rounds = self.conf.get("epoch") dtrain = xgb.DMatrix(x_train, y_train) # training data dvalid = xgb.DMatrix(x_test, y_test) # validation data eval_result= {} gbm = xgb.train(xgb_params, dtrain, num_rounds, [(dtrain, 'train'),(dvalid,"test")], evals_result= eval_result ) # stop if no improvement in 10 rounds gbm.save_model(self.model_train_path ) predictions = gbm.predict(dvalid) train_prediction = gbm.predict(dvalid) #trainprediction_xgb = pd.DataFrame({'id': test, # 'predict': train_prediction}) #trainprediction_xgb_merge = train_results_xgb.merge(trainprediction_xgb, how='left', on='id') # Todo Eval flag 보도록 고치고 # "nn_wf_ver_id": self.wf_ver, "nn_batch_ver_id": self.batch} config = {"nn_id": self.nn_id, "nn_wf_ver_id": self.wf_ver, "nn_batch_ver_id": self.batch} acc_result = TrainSummaryAccLossInfo(config) acc_result.loss_info["loss"].extend(eval_result['test']['rmse']) acc_result.acc_info["acc"].extend(eval_result['test']['rmse']) self.save_accloss_info(acc_result) config = {"type": self.model_type, "labels": _label_values, "nn_id":self.nn_id, "nn_wf_ver_id":self.wf_ver} eval_result = TrainSummaryInfo(conf=config) eval_result.set_nn_batch_ver_id(self.batch) eval_result.set_result_info(y_test, train_prediction) input_data = TrainSummaryInfo.save_result_info(self, eval_result) input_data['accuracy'] = eval_result.get_accuracy() return input_data except Exception as e: logging.info("NeuralNetNodeXgboost Run Exception : {0}".format(e)) raise Exception(e)