Q.append(frame) # format data to torch imgs = [] for img in Q: img = transform(img) imgs.append(torch.unsqueeze(img, 0)) data = torch.cat(imgs) data = data.permute(1, 0, 2, 3) data = data[None, :, :, :, :] target = [2] target = torch.tensor(target) data = data.to(device) model.eval() # set model to eval mode output = model(data) # send to softmax layer output = torch.nn.functional.softmax(output, dim=1) k = 5 ts, pred = output.detach().cpu().topk(k, 1, True, True) top5 = [gesture_dict[pred[0][i].item()] for i in range(k)] pi = [pred[0][i].item() for i in range(k)] ps = [ts[0][i].item() for i in range(k)] top1 = top5[0] if ps[0] > threshold else gesture_dict[0] hist = {} for i in range(num_classes):
def FrameCapture(path): str2bool = lambda x: (str(x).lower() == 'true') parser = argparse.ArgumentParser( description='PyTorch Jester Training using JPEG') parser.add_argument('--use_gpu', default=False, type=str2bool, help="flag to use gpu or not.") parser.add_argument('--config', '-c', help='json config file path') parser.add_argument('--resume', '-r', default=False, type=str2bool, help="resume training from given checkpoint.") parser.add_argument('--gpus', '-g', help="gpu ids for use.") args = parser.parse_args() device = torch.device( "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu") if args.use_gpu: gpus = [int(i) for i in args.gpus.split(',')] print("=> active GPUs: {}".format(args.gpus)) with open("configs/config.json") as data_file: config = json.load(data_file) transform = Compose([ CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) model = ConvColumn(8) if args.use_gpu: model = torch.nn.DataParallel(model, device_ids=gpus).to(device) if 1: if os.path.isfile(config['checkpoint']): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(config['checkpoint'], map_location='cpu') new_state_dict = OrderedDict() for k, v in checkpoint.items(): if (k == 'state_dict'): del checkpoint['state_dict'] for j, val in v.items(): name = j[7:] # remove `module.` new_state_dict[name] = val checkpoint['state_dict'] = new_state_dict break args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( config['checkpoint'], checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format( config['checkpoint'])) img_paths = get_frame_names('test_img') imgs = [] for img_path in img_paths: img = Image.open(img_path).convert('RGB') img = transform(img) imgs.append(torch.unsqueeze(img, 0)) # format data to torch data = torch.cat(imgs) data = data.permute(1, 0, 2, 3) data = data[None, :, :, :, :] target = [2] target = torch.tensor(target) data = data.to(device) model.eval() output = model(data) print("\nOutput values for all the 8 classes: ") print(output.detach()) gesture_label_int = accuracy(output.detach(), target.detach().cpu(), topk=(1, 5)) return gesture_label_int
transform = Compose([ CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #init model # create model model = ConvColumn(config['num_classes']) # multi GPU setting model = torch.nn.DataParallel(model).to(device) checkpoint = torch.load(config['checkpoint'], map_location='cpu') model.load_state_dict(checkpoint['state_dict']) model.eval() def model_caculate(input): # compute the model input = input.to(device) out = model(input) label_number = np.argmax(out.detach().cpu().numpy()) label = label_dict[label_number] print(label_number) print(label) return label, label_number # input 18+2 shape()img for three input and we can output the high ; def recognize(array_img): # normalize teh img;
class GestureDetectorThread(Thread): SWIPE_LEFT = 'Swiping Left' SWIPE_RIGHT = 'Swiping Right' SWIPE_UP = 'Swiping Up' SWIPE_DOWN = 'Swiping Down' THUMB_OK = 'Thumb Up' THUMB_NOT = 'Thumb Down' NO_GESTURE = 'No gesture' OTHER_GESTURE = 'Doing other things' def __init__(self, fps=12, width=176, height=100, use_gpu=True, model_data="model_best.pth.tar"): Thread.__init__(self) self.isRunning = True self._capture = cv.VideoCapture(0) self._target_frame_size = (width, height) self._sleeping_time = 1/fps self._event_queue = queue.Queue() self._frame_queue = queue.Queue(maxsize=18) self._predict_queue = queue.Queue(maxsize=3) self._model = ConvColumn(8) if use_gpu: self._model.cuda() if os.path.isfile(model_data): last_checkpoint = torch.load(model_data, map_location='cpu') new_state_dict = OrderedDict() for k, v in last_checkpoint.items(): if k == 'state_dict': del last_checkpoint['state_dict'] for j, val in v.items(): name = j[7:] # we need name without 'module.' prefix new_state_dict[name] = val last_checkpoint['state_dict'] = new_state_dict break self._model.load_state_dict(last_checkpoint['state_dict']) else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), model_data) self._transform = Compose([ ToPILImage(), CenterCrop(84), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self._device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu") self._gestures = {0: GestureDetectorThread.SWIPE_LEFT, 1: GestureDetectorThread.SWIPE_RIGHT, 2: GestureDetectorThread.SWIPE_DOWN, 3: GestureDetectorThread.SWIPE_UP, 4: GestureDetectorThread.THUMB_OK, 5: GestureDetectorThread.THUMB_NOT, 6: GestureDetectorThread.NO_GESTURE, 7: GestureDetectorThread.OTHER_GESTURE, GestureDetectorThread.SWIPE_LEFT: 0, GestureDetectorThread.SWIPE_RIGHT: 1, GestureDetectorThread.SWIPE_DOWN: 2, GestureDetectorThread.SWIPE_UP: 3, GestureDetectorThread.THUMB_OK: 4, GestureDetectorThread.THUMB_NOT: 5, GestureDetectorThread.NO_GESTURE: 6, GestureDetectorThread.OTHER_GESTURE: 7} self.TRESHOLD = 0.7 def run(self): while self.isRunning: start_time = time.time() _, frame = self._capture.read() frame = cv.resize(frame, self._target_frame_size) try: self._frame_queue.put_nowait(frame) except queue.Full: _ = self._frame_queue.get() self._frame_queue.put_nowait(frame) frames = [torch.unsqueeze(self._transform(img), 0) for img in list(self._frame_queue.queue)] data = torch.cat(frames) data = data.permute(1, 0, 2, 3) data = data[None, :, :, :, :] data = data.to(self._device) self._model.eval() nn_output = self._model(data) nn_output = torch.nn.functional.softmax(nn_output, dim=1) pred, class_index = nn_output.max(1) pred = pred.item() class_index = class_index.item() g = self._gestures[class_index] if pred > self.TRESHOLD and g != GestureDetectorThread.OTHER_GESTURE and g != GestureDetectorThread.NO_GESTURE: try: self._predict_queue.put_nowait((pred, g)) except queue.Full: self._predict_queue.get() self._predict_queue.put_nowait((pred, g)) predictions = sorted(list(self._predict_queue.queue)) print(predictions) g = predictions[-1][1] self._event_queue.put(g) # Clear queues while not self._frame_queue.empty(): self._frame_queue.get_nowait() while not self._predict_queue.empty(): self._predict_queue.get_nowait() else: while not self._predict_queue.empty(): self._predict_queue.get_nowait() time_diff = time.time() - start_time try: time.sleep(self._sleeping_time - time_diff) except: pass self._capture.release() def get_event(self) -> int: try: return self._event_queue.get(block=False) except: return None def stop_detector(self): self.isRunning = False