def __call__(self): try: self.state = State.PRED_RESERVED self.sync_state() # This guarantees the state information returns immediately. self.updated = True PredictionThread.semaphore.acquire(self.stop_event) if self.stop_event.is_set(): # Watch stop event self.updated = True return self.state = State.PRED_STARTED self._prepare_params() self._prepare_model() release_mem_pool() self.running_state = RunningState.STARTING assert self.model is not None self.sync_state() self.run() except Exception as e: traceback.print_exc() self.error_msg = e self.model = None finally: release_mem_pool() PredictionThread.semaphore.release() self.state = State.STOPPED self.running_state = RunningState.STOPPING self.sync_state()
def run(self, f, *args, **kwargs): with self.gpu_resource: self.active_gpu.id = self.gpus.pop() try: set_cuda_active(True) with use_device(self.active_gpu.id): return f(*args, **kwargs) finally: self.gpus.add(self.active_gpu.id) release_mem_pool()
def export_csv(model_id): try: model = storage.fetch_model(model_id) prediction = model["last_prediction_result"] task_id = model["task_id"] print(task_id) ret = [] if task_id == Task.CLASSIFICATION.value: img_path = prediction["img"] sizes = prediction["size"] prediction = prediction["prediction"] for img, size, pred in zip(img_path, sizes, prediction): ret.append({ 'path': img, 'size': size, 'predictions': pred["class"] }) elif task_id == Task.DETECTION.value: img_path = prediction["img"] sizes = prediction["size"] prediction = prediction["prediction"] for img, size, pred in zip(img_path, sizes, prediction): ret.append({ 'path': img, 'size': size, 'predictions': pred }) elif task_id == Task.SEGMENTATION.value: img_path = prediction["img"] sizes = prediction["size"] prediction = prediction["prediction"] for img, size, pred in zip(img_path, sizes, prediction): ret.append({ 'path': img, 'size': size, 'predictions': pred }) else: raise Exception("Not supported task id.") df = pd.DataFrame.from_dict(json_normalize(ret), orient='columns') df.to_csv('prediction.csv') return static_file("prediction.csv", root='.', download=True) except Exception as e: release_mem_pool() traceback.print_exc() body = json.dumps({"error_msg": "{}: {}".format(type(e).__name__, str(e))}) ret = create_response(body, 500) return ret
def __call__(self): # This func works as thread. batch_size = self.batch_size try: i = 0 set_cuda_active(True) release_mem_pool() self.model.set_models(inference=True) result = [] # Prediction self.running_state = RUN_STATE_PREDICTING if self.is_stopped(): return display_loss = 0 self.total_batch = int( np.ceil(len(self.predict_files) / float(self.batch_size))) for i in range(0, self.total_batch): self.nth_batch = i batch = self.predict_files[i * self.batch_size:(i + 1) * batch_size] batch_result = self.model.predict(batch) result.extend(batch_result) # Set result. predict_list = [] for img_path in self.predict_files: img = Image.open(img_path) height = img.size[1] width = img.size[0] predict_list.append({ "path": img_path, "height": height, "width": width, }) self.predict_results = { "prediction_file_list": predict_list, "bbox_list": result } self.save_predict_result_to_csv() self.save_predict_result_to_xml() # Store epoch data tp DB. except Exception as e: traceback.print_exc() self.error_msg = str(e) self.model = None release_mem_pool()
def train_model(model_id): model = db.session().query(db.Model).get(model_id) if not model.best_epoch_r2: if not model: return create_response({}, 404, err='model not found') taskstate = train_task.TaskState.add_task(model) executor = Executor() submit_task(executor, train_task.train, taskstate, model.id) if renom.cuda.has_cuda(): release_mem_pool() return create_response({'result': 'ok'})
def run_model(project_id, model_id): """ Create thread(Future object) and submit it to executor. The thread is stored to train_thread_pool as a pair of thread_id and thread. """ try: fields = 'hyper_parameters,algorithm,algorithm_params,dataset_def_id' data = storage.fetch_model(project_id, model_id, fields=fields) thread_id = "{}_{}".format(project_id, model_id) th = TrainThread(thread_id, project_id, model_id, data['dataset_def_id'], data["hyper_parameters"], data['algorithm'], data['algorithm_params']) ft = executor.submit(th) train_thread_pool[thread_id] = [ft, th] try: # This will wait for end of thread. ft.result() ft.cancel() except CancelledError as ce: # If the model is deleted or stopped, # program reaches here. pass error_msg = th.error_msg del train_thread_pool[thread_id] ft = None th = None model = storage.fetch_model(project_id, model_id, fields='state') if model['state'] != STATE_DELETED: storage.update_model_state(model_id, STATE_FINISHED) release_mem_pool() if error_msg is not None: body = json.dumps({"error_msg": error_msg}) ret = create_response(body) return ret body = json.dumps({"dummy": ""}) ret = create_response(body) return ret except Exception as e: release_mem_pool() traceback.print_exc() body = json.dumps({"error_msg": e.args[0]}) ret = create_response(body) return ret
def wrapped(*args, **kwargs): global respopnse_cache try: ret = func(*args, **kwargs) if ret is None: ret = {} if respopnse_cache.get(func.__name__, None) == ret and False: # If server will return same value as last response, return 204. body = json.dumps({}, ignore_nan=True, default=json_encoder) return create_response(body, 204) else: assert isinstance(ret, dict),\ "The returned object of the API '{}' is not a dictionary.".format(func.__name__) respopnse_cache[func.__name__] = ret body = json.dumps(ret, ignore_nan=True, default=json_encoder) return create_response(body) except Exception as e: release_mem_pool() traceback.print_exc() body = json.dumps({"error_msg": "{}: {}".format(type(e).__name__, str(e))}) ret = create_response(body, 500) return ret
def _exec(self): # This func works as thread. try: # Algorithm and model preparation. # Pretrained weights are must be prepared. # This have to be done in thread. if self.algorithm == ALG_YOLOV1: cell_size = int(self.algorithm_params["cells"]) num_bbox = int(self.algorithm_params["bounding_box"]) path = self.download_weight(Yolov1.WEIGHT_URL, Yolov1.__name__ + '.h5') self.model = Yolov1(self.class_map, cell_size, num_bbox, imsize=self.imsize, load_pretrained_weight=path, train_whole_network=self.train_whole_network) train_target_builder = self.model.build_data() valid_target_builder = self.model.build_data() elif self.algorithm == ALG_YOLOV2: anchor = int(self.algorithm_params["anchor"]) path = self.download_weight(Yolov2.WEIGHT_URL, Yolov2.__name__ + '.h5') annotations = self.train_dist.annotation_list self.model = Yolov2(self.class_map, create_anchor(annotations, anchor, base_size=self.imsize), imsize=self.imsize, load_pretrained_weight=path, train_whole_network=self.train_whole_network) train_target_builder = self.model.build_data( imsize_list=[(i * 32, i * 32) for i in range(9, 20)]) valid_target_builder = self.model.build_data() else: self.error_msg = "{} is not supported algorithm id.".format(self.algorithm) i = 0 self.model.set_gpu(self._gpu) release_mem_pool() filename = '{}.h5'.format(int(time.time())) epoch = self.total_epoch batch_size = self.batch_size best_valid_loss = np.Inf valid_annotation_list = self.valid_dist.get_resized_annotation_list(self.imsize) storage.update_model_state(self.model_id, STATE_RUNNING) for e in range(epoch): epoch_id = storage.register_epoch( model_id=self.model_id, nth_epoch=e ) # Train self.nth_epoch = e self.running_state = RUN_STATE_TRAINING if self.is_stopped(): return display_loss = 0 batch_gen = self.train_dist.batch(batch_size, target_builder=train_target_builder) self.total_batch = int(np.ceil(len(self.train_dist) // batch_size)) for i, (train_x, train_y) in enumerate(batch_gen): self.nth_batch = i if self.is_stopped(): return self.model.set_models(inference=False) with self.model.train(): loss = self.model.loss(self.model(train_x), train_y) reg_loss = loss + self.model.regularize() reg_loss.grad().update(self.model.get_optimizer(e, epoch, i, self.total_batch)) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() display_loss += float(loss) self.last_batch_loss = float(loss) avg_train_loss = display_loss / (i + 1) # Validation self.running_state = RUN_STATE_VALIDATING if self.is_stopped(): return valid_predict_box = [] display_loss = 0 batch_gen = self.valid_dist.batch( batch_size, valid_target_builder, shuffle=False) self.model.set_models(inference=True) for i, (valid_x, valid_y) in enumerate(batch_gen): if self.is_stopped(): return valid_z = self.model(valid_x) valid_predict_box.extend(self.model.get_bbox(valid_z)) loss = self.model.loss(valid_z, valid_y) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() display_loss += float(loss) if self.is_stopped(): return prec, recl, _, iou = get_prec_rec_iou(valid_predict_box, valid_annotation_list) _, mAP = get_ap_and_map(prec, recl) mAP = float(0 if np.isnan(mAP) else mAP) iou = float(0 if np.isnan(iou) else iou) if self.is_stopped(): return avg_valid_loss = display_loss / (i + 1) self.valid_predict_box.append(valid_predict_box) self.train_loss_list.append(avg_train_loss) self.valid_loss_list.append(avg_valid_loss) self.valid_iou_list.append(iou) self.valid_map_list.append(mAP) # Store epoch data tp DB. storage.update_model_loss_list( model_id=self.model_id, train_loss_list=self.train_loss_list, validation_loss_list=self.valid_loss_list, ) if best_valid_loss > avg_valid_loss: # modelのweightを保存する self.model.save(os.path.join(DB_DIR_TRAINED_WEIGHT, filename)) storage.update_model_best_epoch(self.model_id, e, iou, mAP, filename, valid_predict_box) storage.update_epoch( epoch_id=epoch_id, train_loss=avg_train_loss, validation_loss=avg_valid_loss, epoch_iou=iou, epoch_map=mAP) except Exception as e: traceback.print_exc() self.error_msg = str(e) self.model = None release_mem_pool()
def test_segmentation_model_implementation(algo): release_mem_pool() # 1. Check if the model can be instantiate only giving nothing. try: model = algo() except Exception as e: # Model have to be initializable without no argument for using it with trained weight. raise Exception("Model have to be initializable without no argument.") methods = { k: v for k, v in inspect.getmembers(model) if inspect.ismethod(v) } # 2. Check function names and their arguments. method_list = { "__init__": [["class_map", type(None)], ["imsize", tuple], ["load_pretrained_weight", bool], ["train_whole_network", bool]], "loss": ["x", "y"], "fit": [ "train_img_path_list", "train_annotation_list", ["valid_img_path_list", type(None)], ["valid_annotation_list", type(None)], ["epoch", int], ["batch_size", int], ["augmentation", type(None)], ["callback_end_epoch", type(None)] ], "predict": ["img_list"], "get_optimizer": [ ["current_loss", type(None)], ["current_epoch", type(None)], ["total_epoch", type(None)], ["current_batch", type(None)], ["total_batch", type(None)], ["avg_valid_loss_list", type(None)], ], "preprocess": ["x"], "regularize": [], } for k, v in method_list.items(): last_checked_index = -1 assert k in methods args = inspect.getargspec(getattr(model, k)) for i, a in enumerate(v): if isinstance(a, list): try: index = args.args.index(a[0]) except ValueError as e: raise ValueError( "Argument '{}' is not implemented.".format(a[0])) assert a[1] == type(args.defaults[index - (len(args.args) - len(args.defaults))]), \ "Default argument type miss matched." else: try: index = args.args.index(a) except ValueError as e: raise ValueError( "Argument '{}' is not implemented.".format(a)) print(index, last_checked_index) assert index > last_checked_index, \ "The order of arguments are not correct." last_checked_index = index # 3. Check serializable attributes. serializables = ["class_map", "imsize", "num_class"] for s in serializables: assert s in algo.SERIALIZED # 4. Check fit function. test_imgs = [ "voc.jpg", "voc.jpg", ] test_annotation = [ "segmentation_target.png", "segmentation_target.png", ] class_map = ["car"] model = algo(class_map) model.fit(test_imgs, test_annotation, test_imgs, test_annotation, batch_size=2, epoch=2) # Predict model.predict(test_imgs)
def run(self): model = self.model self.state = State.STARTED self.running_state = RunningState.TRAINING if self.task_id == Task.DETECTION.value: valid_target = self.valid_dist.get_resized_annotation_list( self.imsize) if self.stop_event.is_set(): # Watch stop event self.updated = True return for e in range(self.total_epoch): release_mem_pool() self.nth_epoch = e if self.stop_event.is_set(): # Watch stop event self.updated = True return model.set_models(inference=False) temp_train_batch_loss_list = [] self.running_state = RunningState.TRAINING self.sync_state() for b, (train_x, train_y) in enumerate( self.train_dist.batch(self.batch_size), 1): if isinstance(self.model, Yolov2) and (b - 1) % 10 == 0 and (b - 1): release_mem_pool() self.nth_batch = b if self.stop_event.is_set(): # Watch stop event self.updated = True return if len(train_x) > 0: with model.train(): loss = model.loss(model(train_x), train_y) reg_loss = loss + model.regularize() try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() loss = float(loss) temp_train_batch_loss_list.append(loss) self.last_batch_loss = loss self.sync_batch_result() if self.stop_event.is_set(): # Watch stop event self.updated = True return reg_loss.grad().update( model.get_optimizer( current_loss=loss, current_epoch=e, total_epoch=self.total_epoch, current_batch=b - 1, total_batch=self.total_batch, avg_valid_loss_list=self.valid_loss_list)) # Thread value changed. self.updated = True self.train_loss_list.append(np.mean(temp_train_batch_loss_list)) self.sync_train_loss() self.updated = True release_mem_pool() self.running_state = RunningState.VALIDATING self.sync_state() if self.task_id != Task.DETECTION.value: valid_target = [] valid_prediction = [] temp_valid_batch_loss_list = [] model.set_models(inference=True) for b, (valid_x, valid_y) in enumerate( self.valid_dist.batch(self.batch_size, shuffle=False)): if self.stop_event.is_set(): # Watch stop event self.updated = True return valid_prediction_in_batch = model(valid_x) loss = model.loss(valid_prediction_in_batch, valid_y) if self.task_id == Task.CLASSIFICATION.value: valid_prediction.append( rm.softmax(valid_prediction_in_batch).as_ndarray()) else: valid_prediction.append( valid_prediction_in_batch.as_ndarray()) if self.task_id != Task.DETECTION.value: valid_target.append(valid_y) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() loss = float(loss) temp_valid_batch_loss_list.append(loss) self.valid_loss_list.append(np.mean(temp_valid_batch_loss_list)) self.sync_valid_loss() if self.stop_event.is_set(): # Watch stop event self.updated = True return valid_prediction = np.concatenate(valid_prediction, axis=0) if self.task_id != Task.DETECTION.value: valid_target = np.concatenate(valid_target, axis=0) n_valid = min(len(valid_prediction), len(valid_target)) # Depends on each task. loss = self.valid_loss_list[-1] if self.task_id == Task.CLASSIFICATION.value: pred = np.argmax(valid_prediction, axis=1) targ = np.argmax(valid_target, axis=1) _, pr, _, rc, _, f1 = precision_recall_f1_score(pred, targ) prediction = [{ "score": [float(vc) for vc in v], "class": float(p) } for v, p in zip(valid_prediction, pred)] if self.best_epoch_valid_result: if self.best_epoch_valid_result["f1"] <= f1: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } else: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } self.sync_best_valid_result() elif self.task_id == Task.DETECTION.value: prediction_box = model.get_bbox(valid_prediction[:n_valid]) prec, rec, _, iou = get_prec_rec_iou(prediction_box, valid_target[:n_valid]) _, mAP = get_ap_and_map(prec, rec) if self.best_epoch_valid_result: if self.best_epoch_valid_result["mAP"] <= mAP: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction_box, "mAP": float(mAP), "IOU": float(iou), "loss": float(loss) } else: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction_box, "mAP": float(mAP), "IOU": float(iou), "loss": float(loss) } self.sync_best_valid_result() elif self.task_id == Task.SEGMENTATION.value: pred = np.argmax(valid_prediction, axis=1) targ = np.argmax(valid_target, axis=1) _, pr, _, rc, _, f1, _, _, _, _ = \ get_segmentation_metrics(pred, targ, n_class=len(self.class_map)) prediction = [] for p, t in zip(pred, targ): lep, lemp, ler, lemr, _, _, _, _, _, _ = get_segmentation_metrics( p[None], t[None], n_class=len(self.class_map)) prediction.append({ "class": p.astype(np.int).tolist(), "recall": {k: float(v) for k, v in ler.items()}, "precision": {k: float(v) for k, v in lep.items()}, }) if self.best_epoch_valid_result: if self.best_epoch_valid_result["f1"] <= f1: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } else: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } self.sync_best_valid_result() # Thread value changed. self.save_last_model() self.updated = True
def predict_model(model_id): explanatory_column = request.params.explanatory_column target_column = request.params.target_column explanatory_column_ids = json.loads(request.params.explanatory_column_ids) model = db.session().query(db.Model).get(model_id) if not model: return create_response({}, 404, err='model not found') try: with open(os.path.join(DATASRC_DIR, 'prediction_set', 'pred.pickle'), mode='rb') as f: p_all_data = pickle.load(f) p_X = split_target(p_all_data, pickle.loads(model.dataset.labels), explanatory_column_ids) n_X = np.array(p_X) selected_scaling = model.dataset.selected_scaling if selected_scaling != 1: filename_X = model.dataset.filename_X n_X_scaling = scaling_again(n_X, filename_X) else: n_X_scaling = n_X except Exception as e: traceback.print_exc() return create_response({}, 404, err=str(e)) executor = Executor() f = submit_task(executor, pred_task.prediction, model.id, n_X_scaling) try: result = f.result() if selected_scaling != 1: filename_y = model.dataset.filename_y result = re_scaling(result, filename_y) CSV_DIR = os.path.join(DATASRC_PREDICTION_OUT, 'csv') if not os.path.isdir(CSV_DIR): os.makedirs(CSV_DIR) now = datetime.datetime.now() filename = 'model' + str(model_id) + '_{0:%Y%m%d%H%M%S}'.format( now) + '.csv' filepath = os.path.join(CSV_DIR, filename) target_labels = target_column.split(',') explanatory_labels = explanatory_column.split(',') labels = target_labels + explanatory_labels np_xy = np.round(np.c_[result, n_X], 3) pred_x_y = pd.DataFrame(np_xy) pred_x_y.columns = labels pred_x_y.to_csv(filepath, index=False) SAMPLING_SIZE = 1000 sampled_n_X = [] sampled_result = [] p = np.random.permutation(n_X.shape[0]) n_X = n_X.tolist() result = result.tolist() sampled_p = p[:SAMPLING_SIZE] for sp_n in sampled_p: sampled_n_X.append(n_X[sp_n]) sampled_result.append(result[sp_n]) body = { 'pred_x': n_X, 'pred_y': result, 'sampled_pred_x': sampled_n_X, 'sampled_pred_y': sampled_result, 'pred_csv': filename } return create_response(body) except Exception as e: traceback.print_exc() return create_response({}, 404, err=str(e)) finally: if renom.cuda.has_cuda(): release_mem_pool()
def fit(self, train_img_path_list, train_annotation_list, valid_img_path_list=None, valid_annotation_list=None, epoch=160, batch_size=16, imsize_list=None, augmentation=None, callback_end_epoch=None): """ This function performs training with given data and hyper parameters. Yolov2 is trained using multiple scale images. Therefore, this function requires list of image size. If it is not given, the model will be trained using fixed image size. Args: train_img_path_list(list): List of image path. train_annotation_list(list): List of annotations. valid_img_path_list(list): List of image path for validation. valid_annotation_list(list): List of annotations for validation. epoch(int): Number of training epoch. batch_size(int): Number of batch size. imsize_list(list): List of image size. augmentation(Augmentation): Augmentation object. callback_end_epoch(function): Given function will be called at the end of each epoch. Returns: (tuple): Training loss list and validation loss list. Example: >>> from renom_img.api.detection.yolo_v2 import Yolov2 >>> train_img_path_list, train_annot_list = ... # Define own data. >>> valid_img_path_list, valid_annot_list = ... >>> model = Yolov2() >>> model.fit( ... # Feeds image and annotation data. ... train_img_path_list, ... train_annot_list, ... valid_img_path_list, ... valid_annot_list, ... epoch=8, ... batch_size=8) >>> Following arguments will be given to the function ``callback_end_epoch``. - **epoch** (int) - Number of current epoch. - **model** (Model) - Yolo2 object. - **avg_train_loss_list** (list) - List of average train loss of each epoch. - **avg_valid_loss_list** (list) - List of average valid loss of each epoch. """ if imsize_list is None: imsize_list = [self.imsize] else: for ims in imsize_list: assert (ims[0] / 32.) % 1 == 0 and (ims[1] / 32.) % 1 == 0, \ "Yolo v2 only accepts 'imsize' argument which is list of multiple of 32. \ exp),imsize=[(288, 288), (320, 320)]." train_dist = ImageDistributor( train_img_path_list, train_annotation_list, augmentation=augmentation, num_worker=8) if valid_img_path_list is not None and valid_annotation_list is not None: valid_dist = ImageDistributor(valid_img_path_list, valid_annotation_list) else: valid_dist = None batch_loop = int(np.ceil(len(train_dist) / batch_size)) avg_train_loss_list = [] avg_valid_loss_list = [] for e in range(epoch): bar = tqdm(range(batch_loop)) display_loss = 0 for i, (train_x, train_y) in enumerate(train_dist.batch(batch_size, shuffle=True, target_builder=self.build_data(imsize_list))): # This is for avoiding memory over flow. if is_cuda_active() and i % 10 == 0: release_mem_pool() self.set_models(inference=False) with self.train(): loss = self.loss(self(train_x), train_y) reg_loss = loss + self.regularize() reg_loss.grad().update(self.get_optimizer(loss.as_ndarray(), e, epoch, i, batch_loop)) try: loss = float(loss.as_ndarray()[0]) except: loss = float(loss.as_ndarray()) display_loss += loss bar.set_description("Epoch:{:03d} Train Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_train_loss = display_loss / (i + 1) avg_train_loss_list.append(avg_train_loss) if valid_dist is not None: if is_cuda_active(): release_mem_pool() bar.n = 0 bar.total = int(np.ceil(len(valid_dist) / batch_size)) display_loss = 0 for i, (valid_x, valid_y) in enumerate(valid_dist.batch(batch_size, shuffle=False, target_builder=self.build_data())): self.set_models(inference=True) loss = self.loss(self(valid_x), valid_y) try: loss = float(loss.as_ndarray()[0]) except: loss = float(loss.as_ndarray()) display_loss += loss bar.set_description("Epoch:{:03d} Valid Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_valid_loss = display_loss / (i + 1) avg_valid_loss_list.append(avg_valid_loss) bar.set_description("Epoch:{:03d} Avg Train Loss:{:5.3f} Avg Valid Loss:{:5.3f}".format( e, avg_train_loss, avg_valid_loss)) else: bar.set_description("Epoch:{:03d} Avg Train Loss:{:5.3f}".format(e, avg_train_loss)) bar.close() if callback_end_epoch is not None: callback_end_epoch(e, self, avg_train_loss_list, avg_valid_loss_list) return avg_train_loss_list, avg_valid_loss_list