def emotion_init(**params): # Load driver emotion_model = params.get('emotion_model') if not emotion_model: return LOG.info('------------------------') LOG.info('Loading emotion model at %s...' % emotion_model) drv = driver.load_driver('openvino') # Instantiate driver global emotion_serving emotion_serving = drv() emotion_serving.load_model( emotion_model, device='CPU', flexible_batch_size=True, ) LOG.info('Loaded.') LOG.info('------------------------')
def __init__(self, model_path, use_tensor_rt=False): self._model_path = model_path drv = driver.load_driver('tensorflow') self.serving = drv() _model = 'opencv_face_detector_uint8.pb' if use_tensor_rt: _model = 'opencv_face_detector_uint8_rt_fp16.pb' self.serving.load_model(os.path.join(self._model_path, _model), inputs='data:0', outputs='mbox_loc:0,mbox_conf_flatten:0') configFile = self._model_path + "/detector.pbtxt" self.net = cv2.dnn.readNetFromTensorflow(None, configFile) self.prior = np.fromfile(self._model_path + '/mbox_priorbox.np', np.float32) self.prior = np.reshape(self.prior, (1, 2, 35568)) self.threshold = 0.5 ##Dry run self.bboxes(np.zeros((300, 300, 3), np.uint8))
def _load_driver(self): if self.serving is None: driver_name = 'openvino' if '_edgetpu' in self.face_detection_path and '.tflite' in self.face_detection_path: driver_name = 'edgetpu' drv = driver.load_driver(driver_name) # Instantinate driver self.serving = drv() self.serving.load_model( self.face_detection_path, # device=self.device, flexible_batch_size=True, ) self.input_name = list(self.serving.inputs.keys())[0] if driver_name == 'openvino': self.input_size = tuple( list(self.serving.inputs.values())[0][:-3:-1]) else: self.input_size = tuple( list(self.serving.inputs.values())[0][-2:-4:-1]) self.output_name = list(self.serving.outputs.keys())[0]
def main(): args = parse_args() face_driver = driver.load_driver('openvino')() face_driver.load_model(args.face_model) train_a = sorted(glob.glob(os.path.join(args.data_dir, '*-1.tiff'))) train_b = sorted(glob.glob(os.path.join(args.data_dir, '*-2.tiff'))) output_a = os.path.join(args.output_dir, 'trainA') output_b = os.path.join(args.output_dir, 'trainB') os.makedirs(output_a, exist_ok=True) os.makedirs(output_b, exist_ok=True) print('Processing images...') for img_a_path, img_b_path in zip(train_a, train_b): img_a = cv2.imread(img_a_path) img_b = cv2.imread(img_b_path) base_a, _ = os.path.splitext(os.path.basename(img_a_path)) base_b, _ = os.path.splitext(os.path.basename(img_b_path)) boxes_a = hook.get_boxes(face_driver, img_a, threshold=0.2) boxes_b = hook.get_boxes(face_driver, img_b, threshold=0.2) if len(boxes_a) != 1 or len(boxes_b) != 1: print(f'Found {len(boxes_a)} boxes: {img_a_path}') print(f'Found {len(boxes_b)} boxes: {img_b_path}') continue img_a = hook.crop_by_box(img_a, boxes_a[0], margin=0.05) img_b = hook.crop_by_box(img_b, boxes_b[0], margin=0.05) cv2.imwrite(os.path.join(output_a, base_a + '.jpg'), img_a) cv2.imwrite(os.path.join(output_b, base_b + '.jpg'), img_b) print('.', end='') sys.stdout.flush() print() print(f'Done. Processed images are saved in {output_a} and {output_b}')
def face_init(**params): threshold = params.get('face_threshold') if threshold: PARAMS['face_threshold'] = float(threshold) # Load driver face_model = params.get('face_model') if not face_model: return LOG.info('------------------------') LOG.info('Loading face model at %s...' % face_model) drv = driver.load_driver('openvino') # Instantiate driver global face_serving face_serving = drv() face_serving.load_model( face_model, device='CPU', flexible_batch_size=True, ) LOG.info('Loaded.') LOG.info('------------------------')
from ml_serving.drivers import driver import cv2 import os import numpy as np drv = driver.load_driver("model")() drv.load_model('kuberlab-demo/person-mask:1.82.85') video = cv2.VideoCapture(0) while True: _, frame = video.read() if frame is None: break serv_img = cv2.resize(frame[:, :, ::-1], (160, 160)) serv_img = serv_img.astype(np.float32) / 255 result = drv.predict({'image': np.expand_dims(serv_img, axis=0)}) mask = result['output'] mask = mask[0] * 255 #mask[mask < 10] = 0 mask = mask.astype(np.uint8) mask = cv2.resize(mask, (frame.shape[1], frame.shape[0])) mask = mask.astype(np.float32) / 255 frame = frame.astype(np.float32) * np.expand_dims(mask, axis=2) frame = frame.astype(np.uint8) cv2.imshow('Video', frame) key = cv2.waitKey(1) if key in [ord('q'), 202, 27]: break
def main(args): # Get the paths for the corresponding images use_mlboard = False mlboard = None if client: mlboard = client.Client() try: mlboard.apps.get() except Exception: mlboard = None utils.print_fun('Do not use mlboard.') else: utils.print_fun('Use mlboard parameters logging.') use_mlboard = True image_size = args.image_size driver_name = 'openvino' if os.path.isdir(args.model) and os.path.exists( os.path.join(args.model, 'saved_model.pb')): driver_name = 'tensorflow' image_size = 112 data = { 'image_size': image_size, 'driver_name': driver_name, 'model_path': args.model, 'data_dir': args.data_dir, 'batch_size': args.batch_size, } update_data(data, use_mlboard, mlboard) img_paths, actual_issame = load_dataset(args.data_dir) drv = driver.load_driver(driver_name) serving = drv() serving.load_model( args.model, inputs='input:0,phase_train:0', outputs='embeddings:0', device='CPU', flexible_batch_size=True, ) # Run forward pass to calculate embeddings utils.print_fun('Runnning forward pass on dataset images') # Enqueue one epoch of image paths and labels nrof_images = len(img_paths) data = { 'num_images': nrof_images, 'num_classes': nrof_images // 4, } update_data(data, use_mlboard, mlboard) embedding_size = list(serving.outputs.values())[0][-1] nrof_batches = int(np.ceil(float(nrof_images) / args.batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) # TODO(nmakhotkin): cache embeddings by image paths (because image pairs # are duplicated and no need to do inference on them) for i in range(nrof_batches): start_index = i * args.batch_size end_index = min((i + 1) * args.batch_size, nrof_images) paths_batch = img_paths[start_index:end_index] probe_imgs = dataset.load_data(paths_batch, image_size, normalization=args.normalization) emb = _predict(serving, probe_imgs) emb_array[start_index:end_index, :] = emb if i % 5 == 4: utils.print_fun('{}/{}'.format(i + 1, nrof_batches)) sys.stdout.flush() utils.print_fun('') embeddings = emb_array tpr, fpr, accuracy, val, val_std, far = helpers.evaluate( embeddings, actual_issame, nrof_folds=args.lfw_nrof_folds, distance_metric=args.distance_metric, subtract_mean=args.subtract_mean) rpt = report(tpr, fpr, accuracy, val, val_std, far) with open('report.html', 'w') as f: f.write(rpt) update_data({'#documents.report.html': rpt}, use_mlboard, mlboard)
def eval_video(**kwargs): logger.setLevel(logging.INFO) cap = cv2.VideoCapture(kwargs['video_source']) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc( *'MP4V') # int(cap.get(cv2.CAP_PROP_FOURCC)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_count = -1 iter_count = 0 each_frame = kwargs['each_frame'] save_dir = kwargs['save_dir'] frames_limit = kwargs['frames_limit'] video_writer = None video_output = kwargs['video_output'] if video_output is not None: logger.info( f'Write video to {video_output} ({width}x{height}, {fps/each_frame} fps) ...' ) video_writer = cv2.VideoWriter(video_output, fourcc, fps / each_frame, frameSize=(width, height)) write_report_to = None data = {} if kwargs['report_output']: write_report_to = kwargs['report_output'] tracker = OnlineTracker(**kwargs) timer = Timer() results = [] wait_time = 1 drv = driver.load_driver('tensorflow') logger.info(f'init person detection driver...') person_detect_driver = drv() person_detect_model = kwargs['person_detect_model'] logger.info(f'loading person detection model {person_detect_model}...') person_detect_driver.load_model(person_detect_model) logger.info(f'person detection model {person_detect_model} loaded') try: while True: frame_count += 1 if frames_limit is not None and frame_count > frames_limit: logger.warn('frames limit {} reached'.format(frames_limit)) break # read each X bgr frame frame = cap.read() # bgr if frame_count % each_frame > 0: continue if isinstance(frame, tuple): frame = frame[1] if frame is None: logger.warn('video capturing finished') break if iter_count % 20 == 0: logger.info( 'Processing frame {} (iteration {}) ({:.2f} fps)'.format( frame_count, iter_count, 1. / max(1e-5, timer.average_time))) det_tlwhs, det_scores = detect_persons_tf(person_detect_driver, frame, threshold=.5) # run tracking timer.tic() online_targets = tracker.update(frame, det_tlwhs, None) online_tlwhs = [] online_ids = [] for t in online_targets: online_tlwhs.append(t.tlwh) online_ids.append(t.track_id) timer.toc() if write_report_to: for i, id in enumerate(online_ids): if id not in data: data[id] = { 'intervals': [], 'images': [], 'last_image': None, } di = data[id]['intervals'] if len(di) == 0 or di[-1][1] < frame_count - each_frame: if len(di) > 0 and di[-1][0] == di[-1][1]: di = di[:-1] di.append([frame_count, frame_count]) else: di[-1][1] = frame_count if not data[id]['last_image'] or data[id][ 'last_image'] < frame_count - fps * 10: data[id]['last_image'] = frame_count tlwh = [max(0, int(o)) for o in online_tlwhs[i]] pers_img = frame[tlwh[1]:tlwh[1] + tlwh[3], tlwh[0]:tlwh[0] + tlwh[2]].copy() if max(pers_img.shape[0], pers_img.shape[1]) > 100: coef = max(pers_img.shape[0], pers_img.shape[1]) / 100 pers_img = cv2.resize( pers_img, (int(pers_img.shape[1] / coef), int(pers_img.shape[0] / coef))) _, pers_img = cv2.imencode('.jpeg', pers_img) data[id]['images'].append( base64.b64encode(pers_img).decode()) # save results frame_id = frame_count # or make it incremental? results.append((frame_id + 1, online_tlwhs, online_ids)) online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id, fps=1. / timer.average_time) for tlwh in det_tlwhs: cv2.rectangle( online_im, (tlwh[0], tlwh[1]), # (left, top) (tlwh[0] + tlwh[2], tlwh[1] + tlwh[3]), # (right, bottom) (0, 255, 0), 1, ) if kwargs['show_image']: cv2.imshow('online_im', online_im) if save_dir is not None: save_to = os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)) cv2.imwrite(save_to, online_im) if video_writer is not None: video_writer.write(cv2.resize(online_im, (width, height))) key = cv2.waitKey(wait_time) key = chr(key % 128).lower() if key in [ord('q'), 202, 27]: # 'q' or Esc or 'q' in russian layout exit(0) elif key == 'p': cv2.waitKey(0) elif key == 'a': wait_time = int(not wait_time) iter_count += 1 except (KeyboardInterrupt, SystemExit) as e: logger.info('Caught %s: %s' % (e.__class__.__name__, e)) finally: cv2.destroyAllWindows() if video_writer is not None: logger.info('Written video to %s.' % video_output) video_writer.release() if write_report_to: for i in data: di = data[i] di['index'] = i di['duration'] = sum([i[1] - i[0] for i in di['intervals']]) di['duration_sec'] = '{:.2f}'.format(di['duration'] / fps) di['intervals_str'] = ', '.join([ '{:.2f}-{:.2f}'.format(i[0] / fps, i[1] / fps) for i in di['intervals'] ]) data = data.values() data = sorted(data, key=lambda x: x['duration'], reverse=True) # prepare html tpl = jinja2.Template(template) html = tpl.render(data=data) with open(write_report_to, 'w') as f: f.write(html) update_data({'#documents.persons.html': html}, use_mlboard, mlboard)
def main(args): use_mlboard = False mlboard = None if client: mlboard = client.Client() try: mlboard.apps.get() except Exception: mlboard = None print('Do not use mlboard.') else: print('Use mlboard parameters logging.') use_mlboard = True if args.use_split_dataset: dataset_tmp = facenet.get_dataset(args.data_dir) train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) if args.mode == 'TRAIN': dataset = train_set elif args.mode == 'CLASSIFY': dataset = test_set else: dataset = facenet.get_dataset(args.data_dir) update_data({'mode': args.mode}, use_mlboard, mlboard) # Check that there are at least one training image per class for cls in dataset: assert len( cls.image_paths ) > 0, 'There must be at least one image for each class in the dataset' paths, labels = facenet.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) data = { 'num_classes': len(dataset), 'num_images': len(paths), 'model_path': args.model, 'image_size': args.image_size, 'data_dir': args.data_dir, 'batch_size': args.batch_size, } update_data(data, use_mlboard, mlboard) # Load the model print('Loading feature extraction model') # Load driver drv = driver.load_driver(args.driver) # Instantinate driver serving = drv() serving.load_model( args.model, inputs='input:0,phase_train:0', outputs='embeddings:0', device=args.device, ) # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / args.batch_size)) emb_array = np.zeros((nrof_images, 512)) for i in range(nrof_batches_per_epoch): start_index = i * args.batch_size end_index = min((i + 1) * args.batch_size, nrof_images) paths_batch = paths[start_index:end_index] for j in range(end_index - start_index): print('Batch {} <-> {}'.format(paths_batch[j], labels[start_index + j])) images = facenet.load_data(paths_batch, False, False, args.image_size) if serving.driver_name == 'tensorflow': feed_dict = {'input:0': images, 'phase_train:0': False} elif serving.driver_name == 'openvino': input_name = list(serving.inputs.keys())[0] # Transpose image for channel first format images = images.transpose([0, 3, 1, 2]) feed_dict = {input_name: images} else: raise RuntimeError('Driver %s currently not supported' % serving.driver_name) outputs = serving.predict(feed_dict) emb_array[start_index:end_index, :] = list(outputs.values())[0] classifier_filename_exp = os.path.expanduser(args.classifier_filename) if args.mode == 'TRAIN': # Train classifier print('Training classifier') model = svm.SVC(kernel='linear', probability=True) model.fit(emb_array, labels) # Create a list of class names class_names = [cls.name.replace('_', ' ') for cls in dataset] print('Classes:') print(class_names) # Saving classifier model with open(classifier_filename_exp, 'wb') as outfile: pickle.dump((model, class_names), outfile, protocol=2) print('Saved classifier model to file "%s"' % classifier_filename_exp) elif args.mode == 'CLASSIFY': # Classify images print('Testing classifier') with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) accuracy = np.mean(np.equal(best_class_indices, labels)) update_data({'accuracy': accuracy}, use_mlboard, mlboard) print('Accuracy: %.3f' % accuracy) if args.upload_model and accuracy >= args.upload_threshold: timestamp = datetime.datetime.now().strftime('%s') model_name = 'facenet-classifier' version = '1.0.0-%s-%s' % (args.driver, timestamp) print('Uploading model as %s:%s' % (model_name, version)) upload_model(use_mlboard, mlboard, classifier_filename_exp, model_name, version)
def main(args): output_dir = os.path.expanduser(args.output_dir) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes.txt') align_filename = os.path.join(output_dir, 'align.pkl') align_data_args = dict(vars(args)) # the next arguments can be changed w/o changing aligned images del align_data_args['complementary'] del align_data_args['input_dir'] del align_data_args['output_dir'] align_data = {} clear_output_dir = True if args.complementary: if os.path.isfile(align_filename): print_fun("Check previous align data") with open(align_filename, 'rb') as infile: (align_data_args_loaded, align_data_loaded) = pickle.load(infile) if align_data_args == align_data_args_loaded: print_fun("Loaded data about %d aligned classes" % len(align_data_loaded)) align_data = align_data_loaded clear_output_dir = False else: print_fun( "Previous align data is for another arguments, skipped" ) if clear_output_dir: print_fun("Clearing output dir") shutil.rmtree(output_dir, ignore_errors=True) if not os.path.isdir(output_dir): print_fun("Creating output dir") os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print_fun('Creating networks and loading parameters') # Load driver drv = driver.load_driver("openvino") # Instantinate driver serving = drv() serving.load_model( args.face_detection_path, device="CPU", flexible_batch_size=True, ) bg_rm_drv = bg_remove.get_driver(args.bg_remove_path) input_name = list(serving.inputs.keys())[0] output_name = list(serving.outputs.keys())[0] threshold = 0.5 min_face_area = args.min_face_size**2 with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) output_class_dir_created = False if cls.name in align_data: align_data_class = align_data[cls.name] else: align_data_class = {} for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') if not os.path.exists(output_filename): try: img = cv2.imread(image_path, cv2.IMREAD_COLOR).astype(np.float32) except Exception as e: error_message = '{}: {}'.format(image_path, e) print_fun('ERROR: %s' % error_message) continue img_hash = hashlib.sha1(img.tostring()).hexdigest() if image_path in align_data_class and align_data_class[ image_path] == img_hash: print_fun("%s - cached" % image_path) continue align_data_class[image_path] = hashlib.sha1( img.tostring()).hexdigest() print_fun(image_path) if len(img.shape) <= 2: print_fun('WARNING: Unable to align "%s", shape %s' % (image_path, img.shape)) text_file.write('%s\n' % output_filename) continue if bg_rm_drv is not None: img = bg_rm_drv.apply_mask(img) serving_img = cv2.resize(img, (300, 300), interpolation=cv2.INTER_AREA) serving_img = np.transpose(serving_img, [2, 0, 1]).reshape( [1, 3, 300, 300]) raw = serving.predict({input_name: serving_img })[output_name].reshape([-1, 7]) # 7 values: # class_id, label, confidence, x_min, y_min, x_max, y_max # Select boxes where confidence > factor bboxes_raw = raw[raw[:, 2] > threshold] bboxes_raw[:, 3] = bboxes_raw[:, 3] * img.shape[1] bboxes_raw[:, 5] = bboxes_raw[:, 5] * img.shape[1] bboxes_raw[:, 4] = bboxes_raw[:, 4] * img.shape[0] bboxes_raw[:, 6] = bboxes_raw[:, 6] * img.shape[0] bounding_boxes = np.zeros([len(bboxes_raw), 5]) bounding_boxes[:, 0:4] = bboxes_raw[:, 3:7] bounding_boxes[:, 4] = bboxes_raw[:, 2] # Get the biggest box: find the box with largest square: # (y1 - y0) * (x1 - x0) - size of box. bbs = bounding_boxes area = (bbs[:, 3] - bbs[:, 1]) * (bbs[:, 2] - bbs[:, 0]) if len(area) < 1: print_fun('WARNING: Unable to align "%s", n_faces=%s' % (image_path, len(area))) text_file.write('%s\n' % output_filename) continue num = np.argmax(area) if area[num] < min_face_area: print_fun( 'WARNING: Face found but too small - about {}px ' 'width against required minimum of {}px. Try' ' adjust parameter --min-face-size'.format( int(np.sqrt(area[num])), args.min_face_size)) continue bounding_boxes = np.stack([bbs[num]]) imgs = openvino_detection.get_images( img, bounding_boxes, face_crop_size=args.image_size, face_crop_margin=args.margin, prewhiten=False, ) for i, cropped in enumerate(imgs): nrof_successfully_aligned += 1 bb = bounding_boxes[i] filename_base, file_extension = os.path.splitext( output_filename) output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) text_file.write( '%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) if not output_class_dir_created: output_class_dir_created = True if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) cv2.imwrite(output_filename_n, cropped) align_data[cls.name] = align_data_class with open(align_filename, 'wb') as align_file: pickle.dump((align_data_args, align_data), align_file, protocol=2) print_fun('Total number of images: %d' % nrof_images_total) print_fun('Number of successfully aligned images: %d' % nrof_successfully_aligned) build_id = os.environ.get('BUILD_ID', None) if os.environ.get('PROJECT_ID', None) and (build_id is not None): from mlboardclient.api import client client.update_task_info({'aligned_location': output_dir})
def main(args): algorithms = ["kNN", "SVM"] use_mlboard = False mlboard = None if client: mlboard = client.Client() try: mlboard.apps.get() except Exception: mlboard = None print_fun('Do not use mlboard.') else: print_fun('Use mlboard parameters logging.') use_mlboard = True if args.use_split_dataset: dataset_tmp = facenet.get_dataset(args.data_dir) train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) if args.mode == 'TRAIN': dataset = train_set elif args.mode == 'CLASSIFY': dataset = test_set else: dataset = facenet.get_dataset(args.data_dir) update_data({'mode': args.mode}, use_mlboard, mlboard) # Check that there are at least one training image per class for cls in dataset: if len(cls.image_paths) == 0: print_fun('WARNING: %s: There are no aligned images in this class.' % cls) paths, labels = facenet.get_image_paths_and_labels(dataset) print_fun('Number of classes: %d' % len(dataset)) print_fun('Number of images: %d' % len(paths)) data = { 'num_classes': len(dataset), 'num_images': len(paths), 'model_path': args.model, 'image_size': args.image_size, 'data_dir': args.data_dir, 'batch_size': args.batch_size, } update_data(data, use_mlboard, mlboard) # Load the model print_fun('Loading feature extraction model') # Load and instantinate driver drv = driver.load_driver(args.driver) serving = drv() serving.load_model( args.model, inputs='input:0,phase_train:0', outputs='embeddings:0', device=args.device, flexible_batch_size=True, ) # Run forward pass to calculate embeddings print_fun('Calculating features for images') noise_count = max(0, args.noise_count) if args.noise else 0 emb_args = { 'model': args.model, 'use_split_dataset': args.use_split_dataset, 'noise': noise_count > 0, 'noise_count': noise_count, 'flip': args.flip, 'image_size': args.image_size, 'min_nrof_images_per_class': args.min_nrof_images_per_class, 'nrof_train_images_per_class': args.nrof_train_images_per_class, } stored_embeddings = {} if args.mode == 'TRAIN': embeddings_filename = os.path.join( args.data_dir, "embeddings-%s.pkl" % hashlib.md5(json.dumps(emb_args, sort_keys=True).encode()).hexdigest(), ) if os.path.isfile(embeddings_filename): print_fun("Found stored embeddings data, loading...") with open(embeddings_filename, 'rb') as embeddings_file: stored_embeddings = pickle.load(embeddings_file) total_time = 0. nrof_images = len(paths) nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / args.batch_size)) epp = embeddings_per_path(noise_count, args.flip) embeddings_size = nrof_images * epp emb_array = np.zeros((embeddings_size, 512)) fit_labels = [] emb_index = 0 for i in range(nrof_batches_per_epoch): start_index = i * args.batch_size end_index = min((i + 1) * args.batch_size, nrof_images) paths_batch = paths[start_index:end_index] labels_batch = labels[start_index:end_index] # has_not_stored_embeddings = False paths_batch_load, labels_batch_load = [], [] for j in range(end_index - start_index): # print_fun(os.path.split(paths_batch[j])) cls_name = dataset[labels_batch[j]].name cached = True if cls_name not in stored_embeddings or paths_batch[j] not in stored_embeddings[cls_name]: # has_not_stored_embeddings = True cached = False paths_batch_load.append(paths_batch[j]) labels_batch_load.append(labels_batch[j]) else: embeddings = stored_embeddings[cls_name][paths_batch[j]] emb_array[emb_index:emb_index + len(embeddings), :] = stored_embeddings[cls_name][paths_batch[j]] fit_labels.extend([labels_batch[j]] * len(embeddings)) emb_index += len(embeddings) print_fun('Batch {} <-> {} {} {}'.format( paths_batch[j], labels_batch[j], cls_name, "cached" if cached else "", )) if len(paths_batch_load) == 0: continue images = load_data(paths_batch_load, labels_batch_load, args.image_size, noise_count, args.flip) if serving.driver_name == 'tensorflow': feed_dict = {'input:0': images, 'phase_train:0': False} elif serving.driver_name == 'openvino': input_name = list(serving.inputs.keys())[0] # Transpose image for channel first format images = images.transpose([0, 3, 1, 2]) feed_dict = {input_name: images} else: raise RuntimeError('Driver %s currently not supported' % serving.driver_name) t = time.time() outputs = serving.predict(feed_dict) total_time += time.time() - t emb_outputs = list(outputs.values())[0] if args.mode == "TRAIN": for n, e in enumerate(emb_outputs): cls_name = dataset[labels_batch_load[n]].name if cls_name not in stored_embeddings: stored_embeddings[cls_name] = {} path = paths_batch_load[n] if path not in stored_embeddings[cls_name]: stored_embeddings[cls_name][path] = [] stored_embeddings[cls_name][path].append(e) emb_array[emb_index:emb_index + len(images), :] = emb_outputs fit_labels.extend(labels_batch_load) emb_index += len(images) # average_time = total_time / embeddings_size * 1000 # print_fun('Average time: %.3fms' % average_time) classifiers_path = os.path.expanduser(args.classifiers_path) if args.mode == 'TRAIN': # Save embeddings with open(embeddings_filename, 'wb') as embeddings_file: pickle.dump(stored_embeddings, embeddings_file, protocol=2) # Clear (or create) classifiers directory try: shutil.rmtree(classifiers_path, ignore_errors=True) except: pass os.makedirs(classifiers_path) # Create a list of class names dataset_class_names = [cls.name for cls in dataset] class_names = [cls.replace('_', ' ') for cls in dataset_class_names] print_fun('Classes:') print_fun(class_names) class_stats = [{} for _ in range(len(dataset_class_names))] for cls in stored_embeddings: class_stats[dataset_class_names.index(cls)] = { 'images': len(stored_embeddings[cls]), 'embeddings': sum(len(e) for e in stored_embeddings[cls].values()), } # Train classifiers for algorithm in algorithms: if args.only_algorithm is not None and algorithm != args.only_algorithm: continue print_fun('Classifier algorithm %s' % algorithm) # update_data({'classifier_algorithm': args.algorithm}, use_mlboard, mlboard) if algorithm == 'SVM': model = svm.SVC(kernel='linear', probability=True) elif algorithm == 'kNN': # n_neighbors = int(round(np.sqrt(len(emb_array)))) model = neighbors.KNeighborsClassifier(n_neighbors=args.knn_neighbors, weights='distance') else: raise RuntimeError("Classifier algorithm %s not supported" % algorithm) model.fit(emb_array, fit_labels) # Saving classifier model classifier_filename = get_classifier_path(classifiers_path, algorithm) with open(classifier_filename, 'wb') as outfile: pickle.dump((model, class_names, class_stats), outfile, protocol=2) print_fun('Saved classifier model to file "%s"' % classifier_filename) # update_data({'average_time_%s': '%.3fms' % average_time}, use_mlboard, mlboard) elif args.mode == 'CLASSIFY': summary_accuracy = 1 # Classify images for algorithm in algorithms: print_fun('Testing classifier %s' % algorithm) classifier_filename = get_classifier_path(classifiers_path, algorithm) with open(classifier_filename, 'rb') as infile: (model, class_names, class_stats) = pickle.load(infile) print_fun('Loaded classifier model from file "%s"' % classifier_filename) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) if isinstance(model, neighbors.KNeighborsClassifier): param_name = 'distance' # clf_name = "knn" (closest_distances, _) = model.kneighbors(emb_array) eval_values = closest_distances[:, 0] elif isinstance(model, svm.SVC): param_name = 'probability' # clf_name = "svm" eval_values = predictions[np.arange(len(best_class_indices)), best_class_indices] else: raise RuntimeError("Unsupported classifier type: %s" % type(model)) for i in range(len(best_class_indices)): predicted = best_class_indices[i] if predicted == labels[i]: print_fun('%4d %s: %s %.3f' % ( i, class_names[predicted], param_name, eval_values[i], )) else: print_fun('%4d %s: %s %.3f, WRONG! Should be %s.' % ( i, class_names[predicted], param_name, eval_values[i], class_names[labels[i]]), ) accuracy = np.mean(np.equal(best_class_indices, labels)) summary_accuracy = min(summary_accuracy, accuracy) rpt = confusion(labels, best_class_indices, class_names, use_mlboard and not args.skip_draw_confusion_matrix) data = { 'accuracy': accuracy, # 'average_time': '%.3fms' % average_time } if not args.skip_draw_confusion_matrix: data['#documents.confusion_matrix.html'] = rpt update_data(data, use_mlboard, mlboard) print_fun('Accuracy for %s: %.3f' % (algorithm, accuracy)) if args.upload_model and summary_accuracy >= args.upload_threshold: timestamp = datetime.datetime.now().strftime('%s') model_name = 'facenet-classifier' if args.device == 'MYRIAD': model_name = model_name + "-movidius" version = '1.0.0-%s-%s' % (args.driver, timestamp) print_fun('Uploading model as %s:%s' % (model_name, version)) upload_model( use_mlboard, mlboard, classifiers_path, model_name, version )
def __init__(self, bg_remove_path): utils.print_fun('Load BG_REMOVE model') drv = sdrv.load_driver('tensorflow') self.drv = drv() self.drv.load_model(bg_remove_path)
def process(): size = 1024 charset, _ = read_charset() global chrset_index chrset_index = charset names = fuzzyset.FuzzySet() names.add('stas khirman') names.add('khirman stas') names.add('stas') names.add('khirman') drv1 = driver.load_driver('tensorflow') serving1 = drv1() serving1.load_model('./m1') drv2 = driver.load_driver('tensorflow') serving2 = drv2() serving2.load_model('./m2') global to_process i_name = 1 while runned: lock.acquire(blocking=True) frame = to_process if frame is None: lock.release() continue print('start frame') to_process = None w = frame.shape[1] h = frame.shape[0] if w > h: if w > size: ratio = size / float(w) h = int(float(h) * ratio) w = size else: if h > size: ratio = size / float(h) w = int(float(w) * ratio) h = size w = fix_length(w,32) h = fix_length(h,32) original = frame[:, :, ::-1].copy() image = cv2.resize(original, (w, h)) image = image.astype(np.float32) / 255.0 image = np.expand_dims(image, 0) outputs = serving1.predict({'image': image}) cls = outputs['pixel_pos_scores'][0] links = outputs['link_pos_scores'][0] mask = decodeImageByJoin(cls, links, 0.5, 0.1) bboxes = maskToBoxes(mask, (original.shape[1], original.shape[0])) found_name = None candidates = [] for i in range(len(bboxes)): box = np.int0(cv2.boxPoints(bboxes[i])) maxp = np.max(box, axis=0) + 2 minp = np.min(box, axis=0) - 2 y1 = max(0, minp[1]) y2 = min(original.shape[0], maxp[1]) x1 = max(0, minp[0]) x2 = min(original.shape[1], maxp[0]) text_img = original[y1:y2, x1:x2, :] if text_img.shape[0] < 4 or text_img.shape[1] < 4: continue #if bboxes[i][1][0]>bboxes[i][1][1]: # angle = -1*bboxes[i][2] #else: # angle = -1*(90+bboxes[i][2]) #if angle!=0: # text_img = rotate_bound(text_img,angle) text_img = norm_image_for_text_prediction(text_img, 32, 320) text_img = np.expand_dims(text_img, 0) text = serving2.predict({'images':text_img}) text = text['output'][0] text = get_text(text) if len(text)>2: print('text: {}'.format(text)) found = names.get(text) if (found is not None) and (len(found)>0): print(found[0]) if found[0][0]>0.7: text = found[0][1] if ' ' in text: found_name = (found[0][0],text) candidates = [] break else: candidates.append(text) if (found_name is None) and len(candidates)>0: found_name = choose_one(names,candidates) for i in bboxes: box = cv2.boxPoints(i) box = np.int0(box) original = cv2.drawContours(original, [box], 0, (255, 0, 0), 2) frame = np.ascontiguousarray(original[:, :, ::-1],np.uint8) if found_name is not None: add_overlays(frame,found_name[0],found_name[1]) cv2.imwrite('results/result_{}.jpg'.format(i_name),frame) global result result = frame i_name+=1 global last_processed last_processed = frame lock.release() print('stop frame')
def main(args): # Create a context object. This object owns the # handles to all connected realsense devices drv = driver.load_driver('tensorflow') serving = drv() serving.load_model(args.model) gray = 55 offset = 500 back = None pipeline = rs.pipeline() config = rs.config() config.enable_device_from_file(args.input, repeat_playback=True) # Configure the pipeline to stream the depth stream config.enable_stream(rs.stream.depth) config.enable_stream(rs.stream.color) #, 640, 480, rs.format.rgb8, 30) profile = pipeline.start(config) # Getting the depth sensor's depth scale (see rs-align example for explanation) depth_sensor = profile.get_device().first_depth_sensor() depth_scale = depth_sensor.get_depth_scale() print("Depth Scale is: ", depth_scale) align_to = rs.stream.color align = rs.align(align_to) # Create opencv window to render image in cv2.namedWindow("Video", cv2.WINDOW_AUTOSIZE) use_realsense = False while True: frames = pipeline.wait_for_frames() # Align the depth frame to color frame aligned_frames = align.process(frames) # Get aligned frames depth_frame = aligned_frames.get_depth_frame( ) # aligned_depth_frame is a 640x480 depth image color_frame = aligned_frames.get_color_frame() depth_frame = np.asanyarray(depth_frame.get_data()) # depth_color_image = cv2.applyColorMap( # cv2.convertScaleAbs(depth_frame, alpha=0.08), cv2.COLORMAP_JET # ) color_frame = np.asanyarray(color_frame.get_data()) color_frame = color_frame[:, :, ::-1] if back is None: back = np.full([color_frame.shape[0], color_frame.shape[1], 1], gray) show_frame = process_frame(serving, color_frame, depth_frame, offset, back, use_realsense=use_realsense) images = np.vstack((color_frame, show_frame)) # Render image in opencv window cv2.imshow("Video", images) key = cv2.waitKey(1) # if pressed escape exit program if key == 27: cv2.destroyAllWindows() break if key == 32: use_realsense = not use_realsense if key in {ord('+'), ord('=')}: offset += 50 print(offset) if key in {ord('-'), ord('_')}: offset -= 50 print(offset)
# Alpha blend rectangular patches img_rect = (1.0 - alpha) * warp_image1 + alpha * warp_image2 # Copy triangular region of the rectangular patch to the output image img_morph[r[1]:r[1] + r[3], r[0]:r[0] + r[2]] = \ img_morph[r[1]:r[1] + r[3], r[0]:r[0] + r[2]] * (1 - mask) + img_rect * mask return img_morph if __name__ == '__main__': args = parse_args() input_img = cv2.imread(args.input) avg_img = cv2.imread(args.avg) drv = driver.load_driver('openvino') face_driver = drv() face_driver.load_model(args.face_model) landmarks_driver = drv() landmarks_driver.load_model(args.landmarks_model) face_boxes = get_boxes(face_driver, input_img) # avg_box = get_boxes(face_driver, avg_img, threshold=0.5)[0] # avg_face = crop_by_box(avg_img, avg_box) avg_face = avg_img face = crop_by_box(input_img, face_boxes[0]) cv2.namedWindow("Image") cv2.imshow("Image", avg_face) cv2.waitKey(0)
os.makedirs(args.output, 0o755) else: inputs = [args.video] graph_path = get_graph_path(args.model, models_dir=args.modelsDir) logger.debug('initialization %s : %s' % (args.model, graph_path)) w, h = model_wh(args.resolution) config = tf.ConfigProto() config.gpu_options.allow_growth = True e = TfPoseEstimator( graph_path, target_size=(w, h), tf_config=config, ) drv = driver.load_driver("tensorflow") d = drv() d.load_model(args.modelObjectDetection) for inp in inputs: logger.info(f"processing video {inp}") cap = cv2.VideoCapture(inp) fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) if args.rotate == "cw" or args.rotate == "ccw": width, height = height, width video_writer = None if args.output: if process_dir:
def main(args): dataset = facenet.get_dataset(args.data_dir) # Check that there are at least one training image per class for cls in dataset: assert len( cls.image_paths ) > 0, 'There must be at least one image for each class in the dataset' paths, labels = facenet.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) # Load the model print('Loading feature extraction model') # Load driver drv = driver.load_driver(args.driver) # Instantinate driver serving = drv() serving.load_model( args.model, inputs='input:0,phase_train:0', outputs='embeddings:0', device=args.device, flexible_batch_size=True, ) # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / args.batch_size)) embeddings_size = nrof_images emb_array = np.zeros((embeddings_size, 512)) start_time = time.time() for j in range(100): for i in range(nrof_batches_per_epoch): start_index = i * args.batch_size end_index = min((i + 1) * args.batch_size, nrof_images) paths_batch = paths[start_index:end_index] images = facenet.load_data(paths_batch, False, False, args.image_size) if serving.driver_name == 'tensorflow': feed_dict = {'input:0': images, 'phase_train:0': False} elif serving.driver_name == 'openvino': input_name = list(serving.inputs.keys())[0] # Transpose image for channel first format images = images.transpose([0, 3, 1, 2]) feed_dict = {input_name: images} else: raise RuntimeError('Driver %s currently not supported' % serving.driver_name) outputs = serving.predict(feed_dict) end_time = time.time() nrof_batches_per_epoch *= 100 print("Duration: {} sec/sample batch count:{}".format( (end_time - start_time) / nrof_batches_per_epoch, nrof_batches_per_epoch)) print("Speed: {} sample/sec batch count:{}".format( nrof_batches_per_epoch / (end_time - start_time), nrof_batches_per_epoch))
def main(): frame_interval = 2 # Number of frames after which to run face detection fps_display_interval = 5 # seconds frame_rate = 0 frame_count = 0 start_time = time.time() parser = get_parser() args = parser.parse_args() drv = driver.load_driver('tensorflow') serving = drv() serving.load_model('./model') if args.camera: video_capture = cv2.VideoCapture(args.camera) else: video_capture = cv2.VideoCapture(0) width, height = get_size(args.size) back = cv2.imread('./newback.jpg')[:, :, ::-1] back = cv2.resize(back, (width, height)) # back = np.full([height, width, 1], 100) back = back.astype(np.float32) try: while True: _, frame = video_capture.read() #print("Orginal {}".format(frame.shape)) frame = imresample(frame, height, width) if (frame_count % frame_interval) == 0: # BGR -> RGB frame = frame[:, :, ::-1] frame = frame.astype(np.float32) input = cv2.resize(frame, (160, 160)) input = np.asarray(input, np.float32) / 255.0 outputs = serving.predict( {'image': np.expand_dims(input, axis=0)}) mask = outputs['output'][0] mask = cv2.resize(mask, (width, height)) mask = np.expand_dims(mask, 2) frame = np.concatenate( [frame, frame * mask + back * (1 - mask)], axis=1) #print('rgb_frame {}'.format(rgb_frame.shape)) #rgb_frame = rgb_frame.astype(np.uint8) frame = np.ascontiguousarray(frame[:, :, ::-1], np.uint8) # Check our current fps end_time = time.time() if (end_time - start_time) > fps_display_interval: frame_rate = int(frame_count / (end_time - start_time)) start_time = time.time() frame_count = 0 add_overlays(frame, frame_rate / 2) cv2.imshow('Video', frame) frame_count += 1 key = cv2.waitKey(1) # Wait 'q' or Esc if key == ord('q') or key == 27: break except (KeyboardInterrupt, SystemExit) as e: print('Caught %s: %s' % (e.__class__.__name__, e)) # When everything is done, release the capture video_capture.release() cv2.destroyAllWindows() print('Finished')
def main(args): drv = driver.load_driver('tensorflow') serving = drv() serving.load_model(args.model) color_frame = np.load(args.color) depth_frame = np.load(args.depth) width, height = color_frame.shape[1], color_frame.shape[0] gray = 55 back = np.full([height, width, 1], gray) frame = color_frame frame = frame.astype(np.float32) inputs = cv2.resize(frame, (160, 160)) inputs = np.asarray(inputs, np.float32) / 255.0 outputs = serving.predict({'image': np.expand_dims(inputs, axis=0)}) mask = outputs['output'][0] mask = cv2.resize(mask, (width, height)) mask = np.expand_dims(mask, 2) threshold = args.threshold use_realsense = False while True: if use_realsense: mask_2d = np.copy(mask).reshape(mask.shape[0], mask.shape[1]) center = np.round( ndimage.measurements.center_of_mass(mask_2d)).astype(np.int) x = center[0] y = center[1] depth = depth_frame[x][y] max_depth = depth * threshold # Drop pixels which have depth more than foreground * threshold mask_2d[depth_frame >= max_depth] = 0 mask_3d = mask_2d.reshape(mask.shape[0], mask.shape[1], 1) show_frame = np.concatenate( [frame, frame * mask_3d + back * (1 - mask_3d)], axis=1) show_frame = np.ascontiguousarray(show_frame[:, :, ::-1], np.uint8) else: show_frame = np.concatenate( [frame, frame * mask + back * (1 - mask)], axis=1) show_frame = np.ascontiguousarray(show_frame[:, :, ::-1], np.uint8) cv2.imshow('Video', show_frame) key = cv2.waitKey(1) # Wait 'q' or Esc if key == ord('q') or key == 27: break if key == 32: use_realsense = not use_realsense if key in {ord('+'), ord('=')}: threshold += 0.025 print(threshold) if key in {ord('-'), ord('_')}: threshold -= 0.025 print(threshold) cv2.destroyAllWindows() print('Finished')
def main(args): dataset = facenet.get_dataset(args.data_dir) # Check that there are at least one training image per class for cls in dataset: assert len(cls.image_paths) > 0, 'There must be at least one image for each class in the dataset' paths, labels = facenet.get_image_paths_and_labels(dataset) print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) # Load the model print('Loading feature extraction model') # Load driver drv = driver.load_driver(args.driver) # Instantinate driver serving = drv( preprocess=serving_hook.preprocess, postprocess=serving_hook.postprocess, init_hook=serving_hook.init_hook, classifier=args.classifier, use_tf='False', use_face_detection='True', face_detection_path=args.face_detection_path ) serving.load_model( args.model, inputs='input:0,phase_train:0', outputs='embeddings:0', device=args.device, flexible_batch_size=True, ) # Run forward pass to calculate embeddings print('Calculating features for images') time_requests = 0.0 epochs = 2 start_time = time.time() for j in range(epochs): for path in paths: print('Processing %s...' % path) with open(path, 'rb') as f: data = f.read() t = time.time() feed_dict = {'input': np.array(data)} outputs = serving.predict_hooks(feed_dict, context=Context()) delta = (time.time() - t) * 1000 time_requests += delta duration = float(time.time() - start_time) print() print('Total time: %.3fs' % duration) per_request_ms = float(time_requests) / epochs / len(paths) print('Time per request: %.3fms' % per_request_ms) speed = 1 / (per_request_ms / 1000) print('Speed: {} sample/sec'.format(speed))
from ml_serving.drivers import driver import cv2 import os from scipy import ndimage import numpy as np import json import shutil import logging import argparse LOG = logging.getLogger(__name__) face_drv = driver.load_driver("model")() mat_drv = driver.load_driver("model")() face_input_name = '' face_input_shape = None face_output_name = '' g_mean = np.array(([126.88, 120.24, 112.19])).reshape([1, 1, 3]) unknown_code = 128 def generate_trimap(alpha): trimap = np.copy(alpha) k_size = 20 trimap[np.where( (ndimage.grey_dilation(alpha[:, :], size=(k_size, k_size)) - ndimage.grey_erosion(alpha[:, :], size=(k_size, k_size))) != 0 )] = unknown_code