def video_process(video_path, pm_model, save_video_flag): video_thread = VideoThread(video_path, 1280, 960, 1, '视频线程') video_thread.start() serial_thread = SerialThread('串口线程') serial_thread.start() init_flag = True while True: frame_read = video_thread.get_image() if frame_read is None: print('获取视频失败!') break # if init_flag and save_video_flag: # # 视频模式输出检测视频 # save_name = 'save_video.avi' # print('保存视频到' + save_name) # out_video = cv2.VideoWriter(save_name, cv2.VideoWriter_fourcc(*"MJPG"), 10.0, # (frame_read.shape[1], frame_read.shape[0])) # init_flag = False if init_flag: init_flag = False continue # [类别编号, 置信度, 中点坐标, 左上坐标, 右下坐标] boxes = pm_model.predict(frame_read) print_results(boxes, pm_model.label_names, init_flag) draw_results(frame_read, boxes, pm_model.colors, pm_model.label_names, False) serial_thread.set_data(boxes)
def train(self): # initialize self.sess.run(tf.global_variables_initializer()) # 여기서 저장되는 그래프는 껍데기일 뿐. freeze graph를 통해 체크포인트와 결합해야 한다. tf.train.write_graph(self.sess.graph_def, logdir=os.path.join(self.graph_dir, self.model_dir()), name='empty_graph.pbtxt', as_text=True) writer = tf.summary.FileWriter( os.path.join(self.log_dir, self.model_dir()), self.sess.graph) self.saver = tf.train.Saver(max_to_keep=3) # dataset dataset = Dataset(self.input_height, self.input_width, self.batch_size, self.data_dir) # imgaug aug_seq = iaa.SomeOf((1, 2), [ iaa.OneOf([ iaa.Affine(rotate=(-30, 30), name="Rotate"), iaa.Affine(scale=(0.3, 1.0), name="Scale") ]), iaa.OneOf([ iaa.Multiply((0.5, 1.5), name="Multiply"), iaa.GaussianBlur((0, 3.0), name="GaussianBlur"), iaa.CoarseDropout((0.05, 0.2), size_percent=(0.01, 0.1), name="CoarseDropout") ]) ]) # deactivate certain augmenters for binmasks # on binmasks, we only want to execute crop, horizontal flip, and affine transformation def activator_binmasks(images, augmenter, parents, default): if augmenter.name in ["Multiply", "GaussianBlur", "CoarseDropout"]: return False else: return default hooks_binmasks = ia.HooksImages(activator=activator_binmasks) # load checkpoint counter = 0 could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: counter = checkpoint_counter print(" [*] Load SUCCESS") else: print(" [!] Load failed...") # list for saving past IOUs val_ious = [] # training start_time = time.time() for epoch_i in range(self.epoch): dataset.reset_batch_pointer() for batch_i in range(dataset.num_batches_in_epoch()): self.sess.run(self.net.running_vars_initializer) batch_inputs, batch_targets = dataset.next_batch() # dtype scale shape # batch_inputs: uint8 0~255 (N,224,224,3) # batch_targets: uint8 0,255 (N,224,224,1) batch_inputs = np.reshape( batch_inputs, (self.batch_size, self.input_height, self.input_width, 3)) batch_targets = np.reshape( batch_targets, (self.batch_size, self.input_height, self.input_width, 1)) aug_seq_det = aug_seq.to_deterministic() # augmentation batch_inputs = aug_seq_det.augment_images(batch_inputs) batch_targets = aug_seq_det.augment_images( batch_targets, hooks=hooks_binmasks) # per image standardization _batch_inputs = batch_inputs.astype(np.float32) _batch_targets = np.multiply(batch_targets, 1.0 / 255).astype(np.int32) feed_dict = { self.net.inputs: _batch_inputs, self.net.targets: _batch_targets } _, step_loss = self.sess.run([self.train_op, self.loss_op], feed_dict=feed_dict) self.sess.run(self.net.iou_op, feed_dict=feed_dict) step_iou = self.sess.run(self.net.iou, feed_dict=feed_dict) step_summary = self.sess.run(self.summary_op, feed_dict=feed_dict) counter += 1 print( "Epoch: [%2d/%2d] [%4d/%4d] time: %5.2f, Loss: %.5f, IOU: %.5f" % (epoch_i, self.epoch, batch_i, dataset.num_batches_in_epoch(), time.time() - start_time, step_loss, step_iou)) writer.add_summary(step_summary, global_step=counter) # Validation if batch_i % self.validation_step == 0: self.sess.run(self.net.running_vars_initializer) val_inputs, val_targets = dataset.val_set() # dtype scale shape # val_inputs: uint8 0~255 (N,224,224,3) # val_targets: uint8 0,255 (N,224,224,1) val_inputs = np.reshape( val_inputs, (-1, self.input_height, self.input_width, 3)) val_targets = np.reshape( val_targets, (-1, self.input_height, self.input_width, 1)) _val_inputs = val_inputs.astype(np.float32) _val_targets = np.multiply(val_targets, 1.0 / 255).astype(np.int32) feed_dict = { self.net.inputs: _val_inputs, self.net.targets: _val_targets } self.sess.run(self.net.iou_op, feed_dict=feed_dict) val_iou = self.sess.run(self.net.iou, feed_dict=feed_dict) # matplotlib val_preds = self.sess.run(tf.squeeze(self.net.preds, axis=3), feed_dict=feed_dict) draw_results(val_iou, val_inputs, np.squeeze(val_targets, axis=3), val_preds, counter, self.sample_dir, self.model_dir(), num_samples=10) print() print( "=====================================================================" ) val_ious.append(val_iou) max_iou = max(val_ious) print("IOUs in time:", val_ious) print("Best IOU: %.4f" % max_iou) print("Validation IOU: %.4f" % val_iou) if val_iou >= max_iou: self.save(self.checkpoint_dir, counter) print( "Validation IOU exceeded the current best. Saved checkpoints!!!" ) else: print( "IOU hasn't increased. Did not save checkpoints..." ) print( "=====================================================================" ) print()
def local_train(training_dataset): """ Train on single GPU using TensorFlow DatasetAPI. """ iterator = training_dataset.make_one_shot_iterator() one_element = iterator.get_next() net, total_loss, log_tensors = make_model(*one_element, is_train=True, reuse=False) x_ = net.img # net input last_conf = net.last_conf # net output last_paf = net.last_paf # net output confs_ = net.confs # GT pafs_ = net.pafs # GT mask = net.m1 # mask1, GT # net.m2 = m2 # mask2, GT stage_losses = net.stage_losses l2_loss = net.l2_loss global_step = tf.Variable(1, trainable=False) print( 'Start - n_step: {} batch_size: {} lr_init: {} lr_decay_every_step: {}' .format(n_step, batch_size, lr_init, lr_decay_every_step)) with tf.variable_scope('learning_rate'): lr_v = tf.Variable(lr_init, trainable=False) opt = tf.train.MomentumOptimizer(lr_v, 0.9) train_op = opt.minimize(total_loss, global_step=global_step) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # start training with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # restore pretrained weights try: # tl.files.load_and_assign_npz(sess, os.path.join(model_path, 'pose.npz'), net) tl.files.load_and_assign_npz_dict(sess=sess, name=os.path.join( model_path, 'pose.npz')) except: print("no pretrained model") # train until the end sess.run(tf.assign(lr_v, lr_init)) while (True): tic = time.time() step = sess.run(global_step) if step != 0 and (step % lr_decay_every_step == 0): new_lr_decay = lr_decay_factor**(step // lr_decay_every_step) sess.run(tf.assign(lr_v, lr_init * new_lr_decay)) [_, _loss, _stage_losses, _l2, conf_result, paf_result] = \ sess.run([train_op, total_loss, stage_losses, l2_loss, last_conf, last_paf]) # tstring = time.strftime('%d-%m %H:%M:%S', time.localtime(time.time())) lr = sess.run(lr_v) print( 'Total Loss at iteration {} / {} is: {} Learning rate {:10e} l2_loss {:10e} Took: {}s' .format(step, n_step, _loss, lr, _l2, time.time() - tic)) for ix, ll in enumerate(_stage_losses): print('Network#', ix, 'For Branch', ix % 2 + 1, 'Loss:', ll) # save intermedian results and model if (step != 0) and (step % save_interval == 0): ## save some results [ img_out, confs_ground, pafs_ground, conf_result, paf_result, mask_out ] = sess.run([x_, confs_, pafs_, last_conf, last_paf, mask]) draw_results(img_out, confs_ground, conf_result, pafs_ground, paf_result, mask_out, 'train_%d_' % step) # save model # tl.files.save_npz( # net.all_params, os.path.join(model_path, 'pose' + str(step) + '.npz'), sess=sess) # tl.files.save_npz(net.all_params, os.path.join(model_path, 'pose.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join( model_path, 'pose' + str(step) + '.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join(model_path, 'pose.npz'), sess=sess) if step == n_step: # training finished break
feed_dict={ x: x_, confs: confs_, pafs: pafs_, img_mask1: mask1, img_mask2: mask2 }) # tstring = time.strftime('%d-%m %H:%M:%S', time.localtime(time.time())) lr = sess.run(lr_v) print('Total Loss at iteration {} / {} is: {} Learning rate {:10e} weight_norm {:10e} Took: {}s'.format( step, n_step, the_loss, lr, weight_norm, time.time()-tic)) for ix, ll in enumerate(loss_ll): print('Network#', ix, 'For Branch', ix % 2 + 1, 'Loss:', ll) ## save intermedian results and model if (step != 0) and (step % save_interval == 0): draw_results(x_, confs_, conf_result, pafs_, paf_result, mask, 'train_%d_' % step) tl.files.save_npz_dict( net.all_params, os.path.join(model_path, 'pose' + str(step) + '.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join(model_path, 'pose.npz'), sess=sess) if step == n_step: # training finished break elif config.TRAIN.train_mode == 'datasetapi': # TODO ## Train with TensorFlow dataset mode is usually faster than placeholder. raise Exception("TODO") elif config.TRAIN.train_mode == 'distributed': # TODO ## Train with distributed mode. raise Exception("TODO tl.distributed.Trainer")
def infer(self): cap = cv2.VideoCapture(self.video_path) if self.save: size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fps = int(cap.get(cv2.CAP_PROP_FPS)) out = cv2.VideoWriter(f'{self.dataset_path}/processed_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size) # [face detection and recognition, person detection and tracking, mapping faces with persons] time = [[], [], []] initial_time = datetime.now() frames = 0 while True: ret, frame = cap.read() if not ret: print( "Get error while trying to retrieve new frame or video has ended." ) break frames += 1 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) start_time = datetime.now().microsecond faces, idx, verif_scores = self.face_identificator(frame) delta = datetime.now().microsecond - start_time if delta > 0: time[0].append(delta) start_time = datetime.now().second bboxes, identities = self.tracker(frame) delta = datetime.now().microsecond - start_time if delta > 0: time[1].append(delta) # TODO пофиксить снижение устойчивости треков # # show tracked bboxes # for i, bbox in enumerate(bboxes): # frame = draw_results(frame, bbox, identities[i], '1') # cv2.imshow('Frame', frame) # if cv2.waitKey(1) & 0xFF == ord('q'): # break # continue # Create ident info for new tracks if is_proper_predictions(bboxes): for ident in identities: if ident not in list(self.mapped_tracks.keys()): self.mapped_tracks[ident] = [ 'Undefined', self.face_identificator.threshold ] if is_proper_predictions(faces): start_time = datetime.now().microsecond mapped_bboxes, identities, scores = self.map_faces_with_persons( bboxes, faces, idx, identities, verif_scores) # Update mapping self.update_relations(identities, scores) delta = datetime.now().microsecond - start_time if delta > 0: time[2].append(delta) if self.show_results: tracks_numbers = list(self.mapped_tracks.keys()) if len(tracks_numbers) != 0: for i, box in enumerate(bboxes): frame = draw_results( frame, box, self.mapped_tracks[tracks_numbers[i]][0], self.mapped_tracks[tracks_numbers[i]][1]) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow("Verified", frame) if self.save: out.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() if self.save: out.release() cv2.destroyAllWindows() finish_time = float(frames) / (datetime.now() - initial_time).seconds print('\n\nRetinaFace + FaceNet: ', int(average(time[0])), '| YOLOv3 + DeepSort: ', int(average(time[1])), '| Update Relations: ', int(average(time[2])), '\n') print('Average FPS: ', finish_time)
print( 'Total Loss at iteration {} / {} is: {} Learning rate {:10e} weight_norm {:10e} Took: {}s' .format(step, n_step, the_loss, lr, weight_norm, time.time() - tic)) for ix, ll in enumerate(loss_ll): print('Network#', ix, 'For Branch', ix % 2 + 1, 'Loss:', ll) ## save intermedian results and model if (step != 0) and (step % save_interval == 0): img_out = tran_batch[0] confs_ground = tran_batch[1][:, :, :, :n_pos] pafs_ground = tran_batch[1][:, :, :, n_pos:] mask_out = tran_batch[2] draw_results(img_out, confs_ground, conf_result, pafs_ground, paf_result, mask_out, 'train_%d_' % step) # tl.files.save_npz( # net.all_params, os.path.join(model_path, 'pose' + str(step) + '.npz'), sess=sess) # tl.files.save_npz(net.all_params, os.path.join(model_path, 'pose.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join( model_path, 'pose' + str(step) + '.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join( model_path, 'pose.npz'), sess=sess) if step == n_step: # training finished break
def inference(input_files): n_pos = config.MODEL.n_pos model_path = config.MODEL.model_path # define model x = tf.placeholder(tf.float32, [None, image_height, image_width, 3], "image") _, _, _, net = model(x, n_pos, None, None, False, False) # get output from network conf_tensor = tl.layers.get_layers_with_name( net, 'model/cpm/stage6/branch1/conf')[0] pafs_tensor = tl.layers.get_layers_with_name( net, 'model/cpm/stage6/branch2/pafs')[0] def get_peak(pafs_tensor): from inference.smoother import Smoother smoother = Smoother({'data': pafs_tensor}, 25, 3.0) gaussian_heatMat = smoother.get_output() max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat, window_shape=(3, 3), pooling_type='MAX', padding='SAME') tensor_peaks = tf.where( tf.equal(gaussian_heatMat, max_pooled_in_tensor), gaussian_heatMat, tf.zeros_like(gaussian_heatMat)) return tensor_peaks peak_tensor = get_peak(pafs_tensor) # restore model parameters sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) if model_file: tl.files.load_and_assign_npz_dict(os.path.join(model_path, model_file), sess) images = [load_image(f) for f in input_files] # inference # 1st time need time to compile # _, _ = sess.run([conf_tensor, pafs_tensor], feed_dict={x: [im]}) st = time.time() conf, pafs, peak = sess.run([conf_tensor, pafs_tensor, peak_tensor], feed_dict={x: images}) t = time.time() - st print("get maps took {}s i.e. {} FPS".format(t, 1. / t)) # print(conf.shape, pafs.shape, peak.shape) # get coordinate results from maps using conf and pafs from network output, and peak # using OpenPose's official C++ code for this part from inference.estimator import Human def estimate_paf(peaks, heat_mat, paf_mat): pafprocess.process_paf(peaks, heat_mat, paf_mat) # C++ humans = [] for human_id in range(pafprocess.get_num_humans()): human = Human([]) is_added = False for part_idx in range(18): c_idx = int(pafprocess.get_part_cid(human_id, part_idx)) if c_idx < 0: continue is_added = True human.body_parts[part_idx] = BodyPart( '%d-%d' % (human_id, part_idx), part_idx, float(pafprocess.get_part_x(c_idx)) / heat_mat.shape[1], float(pafprocess.get_part_y(c_idx)) / heat_mat.shape[0], pafprocess.get_part_score(c_idx)) if is_added: score = pafprocess.get_score(human_id) human.score = score humans.append(human) return humans for a, b, c in zip(peak, conf, pafs): humans = estimate_paf(a, b, c) print(humans) # draw maps draw_results(images, None, conf, None, pafs, None, 'inference')
def parallel_train(training_dataset): hvd.init() # Horovod ds = training_dataset.shuffle(buffer_size=4096) ds = ds.shard(num_shards=hvd.size(), index=hvd.rank()) ds = ds.repeat(n_epoch) ds = ds.map(_map_fn, num_parallel_calls=4) ds = ds.batch(batch_size) ds = ds.prefetch(buffer_size=1) iterator = ds.make_one_shot_iterator() one_element = iterator.get_next() net, total_loss, log_tensors = make_model(*one_element, is_train=True, reuse=False) x_ = net.img # net input last_conf = net.last_conf # net output last_paf = net.last_paf # net output confs_ = net.confs # GT pafs_ = net.pafs # GT mask = net.m1 # mask1, GT # net.m2 = m2 # mask2, GT stage_losses = net.stage_losses l2_loss = net.l2_loss global_step = tf.Variable(1, trainable=False) scaled_lr = lr_init * hvd.size() # Horovod: scale the learning rate linearly with tf.variable_scope('learning_rate'): lr_v = tf.Variable(scaled_lr, trainable=False) opt = tf.train.MomentumOptimizer(lr_v, 0.9) opt = hvd.DistributedOptimizer(opt) # Horovod train_op = opt.minimize(total_loss, global_step=global_step) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True # Horovod config.gpu_options.visible_device_list = str(hvd.local_rank()) # Horovod # Add variable initializer. init = tf.global_variables_initializer() # Horovod: broadcast initial variable states from rank 0 to all other processes. # This is necessary to ensure consistent initialization of all workers when # training is started with random weights or restored from a checkpoint. bcast = hvd.broadcast_global_variables(0) # Horovod # Horovod: adjust number of steps based on number of GPUs. global n_step, lr_decay_every_step n_step = n_step // hvd.size() + 1 # Horovod lr_decay_every_step = lr_decay_every_step // hvd.size() + 1 # Horovod # Start training with tf.Session(config=config) as sess: init.run() bcast.run() # Horovod print('Worker{}: Initialized'.format(hvd.rank())) print('Worker{}: Start - n_step: {} batch_size: {} lr_init: {} lr_decay_every_step: {}'.format( hvd.rank(), n_step, batch_size, lr_init, lr_decay_every_step)) # restore pre-trained weights try: # tl.files.load_and_assign_npz(sess, os.path.join(model_path, 'pose.npz'), net) tl.files.load_and_assign_npz_dict(sess=sess, name=os.path.join(model_path, 'pose.npz')) except: print("no pre-trained model") # train until the end while True: step = sess.run(global_step) if step == n_step: break tic = time.time() if step != 0 and (step % lr_decay_every_step == 0): new_lr_decay = lr_decay_factor**(step // lr_decay_every_step) sess.run(tf.assign(lr_v, scaled_lr * new_lr_decay)) [_, _loss, _stage_losses, _l2, conf_result, paf_result] = \ sess.run([train_op, total_loss, stage_losses, l2_loss, last_conf, last_paf]) # tstring = time.strftime('%d-%m %H:%M:%S', time.localtime(time.time())) lr = sess.run(lr_v) print('Worker{}: Total Loss at iteration {} / {} is: {} Learning rate {:10e} l2_loss {:10e} Took: {}s'.format( hvd.rank(), step, n_step, _loss, lr, _l2, time.time() - tic)) for ix, ll in enumerate(_stage_losses): print('Worker{}:', hvd.rank(), 'Network#', ix, 'For Branch', ix % 2 + 1, 'Loss:', ll) # save intermediate results and model if hvd.rank() == 0: # Horovod if (step != 0) and (step % save_interval == 0): # save some results [img_out, confs_ground, pafs_ground, conf_result, paf_result, mask_out] = sess.run([x_, confs_, pafs_, last_conf, last_paf, mask]) draw_results(img_out, confs_ground, conf_result, pafs_ground, paf_result, mask_out, 'train_%d_' % step) # save model # tl.files.save_npz( # net.all_params, os.path.join(model_path, 'pose' + str(step) + '.npz'), sess=sess) # tl.files.save_npz(net.all_params, os.path.join(model_path, 'pose.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join(model_path, 'pose' + str(step) + '.npz'), sess=sess) tl.files.save_npz_dict(net.all_params, os.path.join(model_path, 'pose.npz'), sess=sess)
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ mqtt_client=connect_mqtt() infer_network = Network(args.model) infer_network.load_model(args.device,args.cpu_extension) n,c,h,w = infer_network.get_input_shape() input_validated, single_image_mode=utils.validate_input(args.input) cap=cv2.VideoCapture(input_validated) if not cap.isOpened(): exit("Error: couldn't open input file") video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) input_width = int(cap.get(3)) input_height = int(cap.get(4)) frame_rate=cap.get(cv2.CAP_PROP_FPS) frame_count=0 #stats vars current_people_before=0 current_people_now=0 current_people_buffer=0 total_people_count=0 time_in_frame=0.0 #time the current detected person has stayed so far [sec] total_times=[0.0] #list of total time in frame for all people detected so far average_time=0.0 #average time in frame for all people detected so far new_person_detected=False while True: ret, frame = cap.read() if not ret: break start_time=time.time() processed_frame=utils.process_input(frame, h, w) input_dict=infer_network.get_inputs(processed_frame,h,w,SCALE) request_handle=infer_network.exec_inference(input_dict,0) infer_network.wait(request_handle) output=infer_network.get_output(request_handle) boxes=utils.process_output(output,args.prob_threshold,input_width,input_height) inference_time=int((time.time()-start_time)*1000.0) frame_count=frame_count+1 current_people_now=len(boxes) if not single_image_mode: #working with video if (current_people_now != current_people_before): current_people_buffer=current_people_buffer+1 new_person_detected=False if current_people_buffer == FILTER_COUNT: current_people_before = current_people_now current_people_buffer = 0 if current_people_now != 0: #a new person was detected total_people_count=total_people_count+1 mqtt_client.publish("person",json.dumps({"count": current_people_before})) #mqtt_client.publish("person",json.dumps({"total": total_people_count})) removed because UI calculates it new_person_detected=True else: #no detections on frame anymore, store time person was in frame total_times.append(time_in_frame) mqtt_client.publish("person/duration",json.dumps({"duration": time_in_frame})) mqtt_client.publish("person",json.dumps({"count": 0})) average_time=sum(total_times)/total_people_count time_in_frame=0 if(new_person_detected): time_in_frame = time_in_frame + 1/frame_rate utils.draw_results(frame, boxes, current_people_before, total_people_count, time_in_frame, average_time,inference_time) sys.stdout.buffer.write(frame) sys.stdout.flush() else: #working with a single image utils.draw_results(frame, boxes, current_people_now, -1, -1, -1,inference_time) cv2.imwrite('output.jpg',frame) cap.release() client.loop_stop() client.disconnect()