def detect_mask(frame, mask_model): cfg = config.cfg min_sizes = cfg['min_sizes'] num_cell = [len(min_sizes[k]) for k in range(len(cfg['steps']))] img_height, img_width, _ = frame.shape img = np.float32(frame.copy()) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(frame, max_steps=max(cfg['steps'])) img = img / 255.0 - 0.5 priors, _ = priors_box(cfg, image_sizes=(img.shape[0], img.shape[1])) priors = tf.cast(priors, tf.float32) predictions = mask_model.predict(img[np.newaxis, ...]) boxes, classes, scores = parse_predict(predictions, priors, cfg) print(f"scores:{scores}") # recover padding effect boxes = recover_pad_output(boxes, pad_params) locs = [] areas = [] mask_classes = [] scores_ = [] for prior_index in range(len(boxes)): x1, y1, x2, y2 = int(boxes[prior_index][0] * img_width), int( boxes[prior_index][1] * img_height), int( boxes[prior_index][2] * img_width), int(boxes[prior_index][3] * img_height) locs.append((x1, y1, x2, y2)) bbox_height = y2 - y1 bbox_width = x2 - x1 areas.append(bbox_height * bbox_width) scores_.append(scores[prior_index]) mask_classes.append(cfg['labels_list'][classes[prior_index]]) max_area_idx = areas.index(max(areas)) return locs[max_area_idx], mask_classes[max_area_idx], scores_[ max_area_idx]
def main(_): global model cfg = config.cfg min_sizes = cfg['min_sizes'] num_cell = [len(min_sizes[k]) for k in range(len(cfg['steps']))] try: model = SlimModel(cfg=cfg, num_cell=num_cell, training=False) paths = [ os.path.join(FLAGS.model_path, path) for path in os.listdir(FLAGS.model_path) ] latest = sorted(paths, key=os.path.getmtime)[-1] model.load_weights(latest) print(f"model path : {latest}") # model.save('final.h5') #if want to convert to tflite by model.save,it should be set input image size. # model.summary() except AttributeError as e: print('Please make sure there is at least one weights at {}'.format( FLAGS.model_path)) if not FLAGS.camera: if not os.path.exists(FLAGS.img_path): print(f"Cannot find image path from {FLAGS.img_path}") exit() print("[*] Predict {} image.. ".format(FLAGS.img_path)) img_raw = cv2.imread(FLAGS.img_path) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) img = img / 255.0 - 0.5 priors, _ = priors_box(cfg, image_sizes=(img.shape[0], img.shape[1])) priors = tf.cast(priors, tf.float32) predictions = model.predict(img[np.newaxis, ...]) boxes, classes, scores = parse_predict(predictions, priors, cfg) print(f"scores:{scores}") # recover padding effect boxes = recover_pad_output(boxes, pad_params) # draw and save results save_img_path = os.path.join('assets/out_' + os.path.basename(FLAGS.img_path)) for prior_index in range(len(boxes)): show_image(img_raw, boxes, classes, scores, img_height_raw, img_width_raw, prior_index, cfg['labels_list']) cv2.imwrite(save_img_path, img_raw) cv2.imshow('results', img_raw) if cv2.waitKey(0) == ord('q'): exit(0) else: capture = cv2.VideoCapture(0) capture.set(cv2.CAP_PROP_FRAME_WIDTH, 320) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 240) start = time.time() priors, _ = priors_box(cfg, image_sizes=(240, 320)) priors = tf.cast(priors, tf.float32) while True: _, frame = capture.read() if frame is None: print('No camera found') h, w, _ = frame.shape img = np.float32(frame.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255.0 - 0.5 predictions = model(img[np.newaxis, ...]) boxes, classes, scores = parse_predict(predictions, priors, cfg) for prior_index in range(len(classes)): show_image(frame, boxes, classes, scores, h, w, prior_index, cfg['labels_list']) # calculate fps fps_str = "FPS: %.2f" % (1 / (time.time() - start)) start = time.time() cv2.putText(frame, fps_str, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 255, 0), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): exit()
def main(_): global load_t1 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu # CPU:'-1' logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() weights_dir = 'checkpoints/' if not os.path.exists(weights_dir): os.mkdir(weights_dir) # if os.path.exists('logs'): # shutil.rmtree('logs') logging.info("Load configuration...") cfg = config.cfg label_classes = cfg['labels_list'] logging.info( f"Total image sample:{cfg['dataset_len']},Total classes number:" f"{len(label_classes)},classes list:{label_classes}") logging.info("Compute priors boxes...") priors, num_cell = priors_box(cfg) logging.info( f"Prior boxes number:{len(priors)},default anchor box number per feature map cell:{num_cell}" ) logging.info("Loading dataset...") train_dataset = load_dataset(cfg, priors, shuffle=True, train=True) # val_dataset = load_dataset(cfg, priors, shuffle=False, train=False) logging.info("Create Model...") try: model = SlimModel(cfg=cfg, num_cell=num_cell, training=True) model.summary() tf.keras.utils.plot_model(model, to_file=os.path.join(os.getcwd(), 'model.png'), show_shapes=True, show_layer_names=True) except Exception as e: logging.error(e) logging.info("Create network failed.") sys.exit() if cfg['breaktraing']: # Training from latest weights paths = [ os.path.join(weights_dir, path) for path in os.listdir(weights_dir) ] latest = sorted(paths, key=os.path.getmtime)[-1] model.load_weights(latest) init_epoch = int(os.path.splitext(latest)[0][-3:]) else: #Training from scratch init_epoch = -1 steps_per_epoch = cfg['dataset_len'] // cfg['batch_size'] # val_steps_per_epoch = cfg['val_len'] // cfg['batch_size'] logging.info(f"steps_per_epoch:{steps_per_epoch}") logging.info("Define optimizer and loss computation and so on...") learning_rate = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=1e-3, decay_steps=20000, decay_rate=0.96) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) multi_loss = MultiBoxLoss(num_class=len(label_classes), neg_pos_ratio=3) train_log_dir = 'logs/train' train_summary_writer = tf.summary.create_file_writer(train_log_dir) @tf.function def train_step(inputs, labels): with tf.GradientTape() as tape: predictions = model(inputs, training=True) losses = {} losses['reg'] = tf.reduce_sum( model.losses) #unused. Init for redefine network losses['loc'], losses['class'] = multi_loss(labels, predictions) total_loss = tf.add_n([l for l in losses.values()]) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return total_loss, losses for epoch in range(init_epoch + 1, cfg['epoch']): try: start = time.time() avg_loss = 0.0 for step, (inputs, labels) in enumerate( train_dataset.take(steps_per_epoch)): load_t0 = time.time() total_loss, losses = train_step(inputs, labels) avg_loss = (avg_loss * step + total_loss.numpy()) / (step + 1) load_t1 = time.time() batch_time = load_t1 - load_t0 steps = steps_per_epoch * epoch + step with train_summary_writer.as_default(): tf.summary.scalar('loss/total_loss', total_loss, step=steps) for k, l in losses.items(): tf.summary.scalar('loss/{}'.format(k), l, step=steps) tf.summary.scalar('learning_rate', optimizer.lr(steps), step=steps) print( f"\rEpoch: {epoch + 1}/{cfg['epoch']} | Batch {step + 1}/{steps_per_epoch} | Batch time {batch_time:.3f} || Loss: {total_loss:.6f} | loc loss:{losses['loc']:.6f} | class loss:{losses['class']:.6f} ", end='', flush=True) print( f"\nEpoch: {epoch + 1}/{cfg['epoch']} | Epoch time {(load_t1 - start):.3f} || Average Loss: {avg_loss:.6f}" ) with train_summary_writer.as_default(): tf.summary.scalar('loss/avg_loss', avg_loss, step=epoch) if (epoch + 1) % cfg['save_freq'] == 0: filepath = os.path.join(weights_dir, f'weights_epoch_{(epoch + 1):03d}.h5') model.save_weights(filepath) if os.path.exists(filepath): print( f">>>>>>>>>>Save weights file at {filepath}<<<<<<<<<<") except KeyboardInterrupt: print('interrupted') # filepath = os.path.join(weights_dir, 'weights_last.h5') # model.save_weights(filepath) # print(f'model saved into: {filepath}') exit(0)
def main(_): dataset_path = FLAGS.dataset_path if not os.path.isdir(dataset_path): logging.info('Please define valid dataset path.') else: logging.info('Loading {}'.format(dataset_path)) detect_reslut_dir = 'mAP/detection-results/' if not os.path.exists(detect_reslut_dir): os.makedirs(detect_reslut_dir) for file in os.listdir(detect_reslut_dir): path_file = os.path.join(detect_reslut_dir + file) if os.path.isfile(path_file): os.remove(path_file) ground_thuth_dir = 'mAP/ground-truth/' if not os.path.exists(ground_thuth_dir): os.makedirs(ground_thuth_dir) for file in os.listdir(ground_thuth_dir): path_file = os.path.join(ground_thuth_dir + file) if os.path.isfile(path_file): os.remove(path_file) logging.info('Reading configuration...') cfg = config.cfg class_list = cfg['labels_list'] image_size = tuple(FLAGS.image_size) logging.info("Class dictionary loaded: %s", class_list) priors, num_cell = priors_box(cfg, image_size) priors = tf.cast(priors, tf.float32) try: model = load_model(FLAGS.model_path) except: model = SlimModel(cfg=cfg, num_cell=num_cell, training=False) paths = [ os.path.join(FLAGS.model_path, path) for path in os.listdir(FLAGS.model_path) ] latest = sorted(paths, key=os.path.getmtime)[-1] model.load_weights(latest) print(f"model path : {latest}") img_list = open( os.path.join(FLAGS.dataset_path, 'ImageSets', 'Main', '%s.txt' % FLAGS.split)).read().splitlines() logging.info("Image list loaded: %d", len(img_list)) for image in tqdm.tqdm(img_list): image_file = os.path.join(FLAGS.dataset_path, 'JPEGImages', '%s.jpg' % image) annot_file = os.path.join(FLAGS.dataset_path, 'Annotations', '%s.xml' % image) # detect image img_raw = cv2.imread(image_file) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) img = cv2.resize(img, (image_size[1], image_size[0])) # cv2.resize img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = (img / 255.0 - 0.5) / 1.0 predictions = model.predict(img[np.newaxis, ...]) boxes, classes, scores = parse_predict(predictions, priors, cfg) with open(detect_reslut_dir + f'{image}.txt', "a") as new_f: for prior_index in range(len(boxes)): x1, y1, x2, y2 = (boxes[prior_index][0] * img_width_raw), (boxes[prior_index][1] * img_height_raw), \ (boxes[prior_index][2] * img_width_raw), (boxes[prior_index][3] * img_height_raw) top = max(0, np.floor(y1 + 0.5).astype('int32')) left = max(0, np.floor(x1 + 0.5).astype('int32')) bottom = min(img_width_raw, np.floor(y2 + 0.5).astype('int32')) right = min(img_height_raw, np.floor(x2 + 0.5).astype('int32')) class_name = class_list[classes[prior_index]] score = "{:.2f}".format(scores[prior_index]) label = '{} {}'.format(class_name, score) new_f.write("%s %s %s %s %s\n" % (label, left, top, right, bottom)) # ground truth with open(ground_thuth_dir + f'{image}.txt', 'a') as gt_f: tree = ET.parse(annot_file) root = tree.getroot() for obj in root.iter('object'): difficult = obj.find('difficult') if not difficult: difficult = '0' else: difficult = difficult.text cls = obj.find('name').text xmlbox = obj.find('bndbox') bbox = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) gt_f.write(cls + ' ' + " ".join([str(a) for a in bbox]) + '\n')
def main(_): global model cfg = config.cfg min_sizes = cfg['min_sizes'] num_cell = [len(min_sizes[k]) for k in range(len(cfg['steps']))] try: model = SlimModel(cfg=cfg, num_cell=num_cell, training=False) paths = [os.path.join(FLAGS.model_path, path) for path in os.listdir(FLAGS.model_path)] latest = sorted(paths, key=os.path.getmtime)[-1] model.load_weights(latest) print(f"model path : {latest}") #new_input = tf.keras.Input(shape=(224,224,3)) #x = model(new_input) #m = tf.keras.Model(inputs=new_input, outputs=x) model.save('final.h5') #converter = tf.lite.TFLiteConverter.from_keras_model(m) #tflite_model = converter.convert() #with open('model.tflite', 'wb') as f: # f.write(tflite_model) # model.summary() except AttributeError as e: print('Please make sure there is at least one weights at {}'.format(FLAGS.model_path)) capture = cv2.VideoCapture("http://192.168.0.8:4747/video") #capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) #capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) prev_time = 0 FPS = 16 priors, _ = priors_box(cfg, image_sizes=(480, 640)) priors = tf.cast(priors, tf.float32) start = time.time() b,g,r,a = 0,0,255,0 fontpath = "malgun.ttf" font = ImageFont.truetype(fontpath, 30) soundTime = time.time() while True: ret, frame = capture.read() if frame is None: print('No camera found') #print(frame.shape) current_time = time.time() - prev_time if (ret is True) and (current_time > 1.0/ FPS) : prev_time = time.time() h, w, _ = frame.shape img = np.float32(frame.copy()) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255.0 - 0.5 predictions = model(img[np.newaxis, ...]) boxes, classes, scores = parse_predict(predictions, priors, cfg) onmask=1 if(len(classes) > 0): for i in classes: if(i == 1): onmask = 0 if(i == 2): onmask = 1 break print(onmask) for prior_index in range(len(classes)): show_image(frame, boxes, classes, scores, h, w, prior_index, cfg['labels_list']) # calculate fps #fps_str = "FPS: %.2f" % (1 / (time.time() - start)) start = time.time() warning = "마스크를 착용해주세요" if(onmask == 1 & len(classes) > 0): img_pil = Image.fromarray(frame) draw = ImageDraw.Draw(img_pil) draw.text((25, 25), "마스크를 착용해주세요", font=font, fill=(b,g,r,a)) frame = np.array(img_pil) if(time.time() - soundTime > 5): winsound.PlaySound("warning.wav", winsound.SND_FILENAME | winsound.SND_ASYNC) soundTime = time.time() #cv2.putText(frame, warning, (25, 25), cv2.FONT_HERSHEY_DUPLEX, 0.75, (0, 0, 255), 2) # show frame cv2.imshow('frame', frame) if cv2.waitKey(1) == ord('q'): raise KeyboardInterrupt
if cv2.waitKey(0) == ord('q'): exit() print("data fps: {:.2f}".format(num_samples / (time.time() - start_time))) if __name__ == '__main__': #for test dataset cfg = config.cfg class_list = cfg['labels_list'] print(f"class:{class_list}") os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' batch_size = 1 priors, num_cell = priors_box(cfg) visualization = True # False for time cost estimattion using_encoding = True # batch size should be 1 when False using_normalizing = True # image:-0.5~0.5 variances = [0.1, 0.2] match_thresh = 0.45 ignore_thresh = 0.3 num_samples = cfg['dataset_len'] tfrecord_name = rootPath + '/dataset/train_mask.tfrecord' # num_samples = cfg['val_len'] # tfrecord_name = rootPath+'/dataset/trainval_mask.tfrecord' data_visulization() exit()
"""loading the detection model variables for the detector object""" import tensorflow as tf import numpy as np from TrackEverything.tool_box import DetectionVars from components import config from components.prior_box import priors_box from components.utils import decode_bbox_tf, compute_nms from network.network import SlimModel #initial variables cfg = config.cfg image_size = (240, 320) priors, num_cell = priors_box(cfg, image_size) priors = tf.cast(priors, tf.float32) #detection variables def get_model(det_model_path): """Get the model obj Args: det_model_path (tf.model): path to model Returns: [type]: [description] """ #loading the detection model print("loading head detection model...") det_model = SlimModel(cfg=cfg, num_cell=num_cell, training=False) det_model.load_weights(det_model_path) print("detection model loaded!") return det_model