Beispiel #1
0
    Q.append(frame)

    # format data to torch
    imgs = []
    for img in Q:
        img = transform(img)
        imgs.append(torch.unsqueeze(img, 0))

    data = torch.cat(imgs)
    data = data.permute(1, 0, 2, 3)
    data = data[None, :, :, :, :]
    target = [2]
    target = torch.tensor(target)
    data = data.to(device)

    model.eval()  # set model to eval mode
    output = model(data)

    # send to softmax layer
    output = torch.nn.functional.softmax(output, dim=1)

    k = 5
    ts, pred = output.detach().cpu().topk(k, 1, True, True)
    top5 = [gesture_dict[pred[0][i].item()] for i in range(k)]

    pi = [pred[0][i].item() for i in range(k)]
    ps = [ts[0][i].item() for i in range(k)]
    top1 = top5[0] if ps[0] > threshold else gesture_dict[0]

    hist = {}
    for i in range(num_classes):
Beispiel #2
0
def FrameCapture(path):

    str2bool = lambda x: (str(x).lower() == 'true')
    parser = argparse.ArgumentParser(
        description='PyTorch Jester Training using JPEG')
    parser.add_argument('--use_gpu',
                        default=False,
                        type=str2bool,
                        help="flag to use gpu or not.")
    parser.add_argument('--config', '-c', help='json config file path')
    parser.add_argument('--resume',
                        '-r',
                        default=False,
                        type=str2bool,
                        help="resume training from given checkpoint.")
    parser.add_argument('--gpus', '-g', help="gpu ids for use.")
    args = parser.parse_args()
    device = torch.device(
        "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu")

    if args.use_gpu:
        gpus = [int(i) for i in args.gpus.split(',')]
        print("=> active GPUs: {}".format(args.gpus))

    with open("configs/config.json") as data_file:
        config = json.load(data_file)

    transform = Compose([
        CenterCrop(84),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    model = ConvColumn(8)

    if args.use_gpu:
        model = torch.nn.DataParallel(model, device_ids=gpus).to(device)
    if 1:
        if os.path.isfile(config['checkpoint']):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(config['checkpoint'], map_location='cpu')
            new_state_dict = OrderedDict()

            for k, v in checkpoint.items():
                if (k == 'state_dict'):
                    del checkpoint['state_dict']
                    for j, val in v.items():
                        name = j[7:]  # remove `module.`
                        new_state_dict[name] = val
                    checkpoint['state_dict'] = new_state_dict
                    break
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                config['checkpoint'], checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(
                config['checkpoint']))

    img_paths = get_frame_names('test_img')
    imgs = []
    for img_path in img_paths:
        img = Image.open(img_path).convert('RGB')
        img = transform(img)
        imgs.append(torch.unsqueeze(img, 0))

    # format data to torch
    data = torch.cat(imgs)
    data = data.permute(1, 0, 2, 3)
    data = data[None, :, :, :, :]
    target = [2]
    target = torch.tensor(target)
    data = data.to(device)

    model.eval()
    output = model(data)

    print("\nOutput values for all the 8 classes: ")
    print(output.detach())
    gesture_label_int = accuracy(output.detach(),
                                 target.detach().cpu(),
                                 topk=(1, 5))

    return gesture_label_int
Beispiel #3
0
transform = Compose([
        CenterCrop(84),
        ToTensor(),
        Normalize(mean=[0.485, 0.456, 0.406],
                  std=[0.229, 0.224, 0.225])
    ])

#init model
# create model
model = ConvColumn(config['num_classes'])
# multi GPU setting
model = torch.nn.DataParallel(model).to(device)
checkpoint = torch.load(config['checkpoint'], map_location='cpu')
model.load_state_dict(checkpoint['state_dict'])
model.eval()

def model_caculate(input):
    # compute the model
    input = input.to(device)
    out = model(input)
    label_number = np.argmax(out.detach().cpu().numpy())
    label = label_dict[label_number]
    print(label_number)
    print(label)
    return label, label_number


# input 18+2 shape()img for three input and we can output the high ;
def recognize(array_img):
    # normalize teh img;
Beispiel #4
0
class GestureDetectorThread(Thread):
    SWIPE_LEFT = 'Swiping Left'
    SWIPE_RIGHT = 'Swiping Right'
    SWIPE_UP = 'Swiping Up'
    SWIPE_DOWN = 'Swiping Down'
    THUMB_OK = 'Thumb Up'
    THUMB_NOT = 'Thumb Down'

    NO_GESTURE = 'No gesture'
    OTHER_GESTURE = 'Doing other things'

    def __init__(self, fps=12, width=176, height=100, use_gpu=True, model_data="model_best.pth.tar"):
        Thread.__init__(self)
        self.isRunning = True

        self._capture = cv.VideoCapture(0)
        self._target_frame_size = (width, height)
        self._sleeping_time = 1/fps

        self._event_queue = queue.Queue()
        self._frame_queue = queue.Queue(maxsize=18)
        self._predict_queue = queue.Queue(maxsize=3)

        self._model = ConvColumn(8)
        if use_gpu:
            self._model.cuda()

        if os.path.isfile(model_data):
            last_checkpoint = torch.load(model_data, map_location='cpu')

            new_state_dict = OrderedDict()
            for k, v in last_checkpoint.items():
                if k == 'state_dict':
                    del last_checkpoint['state_dict']
                    for j, val in v.items():
                        name = j[7:] # we need name without 'module.' prefix
                        new_state_dict[name] = val
                    last_checkpoint['state_dict'] = new_state_dict
                    break

            self._model.load_state_dict(last_checkpoint['state_dict'])
        else:
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), model_data)

        self._transform = Compose([
            ToPILImage(),
            CenterCrop(84),
            ToTensor(),
            Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
        ])
        self._device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")

        self._gestures = {0: GestureDetectorThread.SWIPE_LEFT,
                          1: GestureDetectorThread.SWIPE_RIGHT,
                          2: GestureDetectorThread.SWIPE_DOWN,
                          3: GestureDetectorThread.SWIPE_UP,
                          4: GestureDetectorThread.THUMB_OK,
                          5: GestureDetectorThread.THUMB_NOT,
                          6: GestureDetectorThread.NO_GESTURE,
                          7: GestureDetectorThread.OTHER_GESTURE,
                          GestureDetectorThread.SWIPE_LEFT: 0,
                          GestureDetectorThread.SWIPE_RIGHT: 1,
                          GestureDetectorThread.SWIPE_DOWN: 2,
                          GestureDetectorThread.SWIPE_UP: 3,
                          GestureDetectorThread.THUMB_OK: 4, 
                          GestureDetectorThread.THUMB_NOT: 5,
                          GestureDetectorThread.NO_GESTURE: 6,
                          GestureDetectorThread.OTHER_GESTURE: 7}

        self.TRESHOLD = 0.7

    def run(self):
        while self.isRunning:
            start_time = time.time()
            _, frame = self._capture.read()
            frame = cv.resize(frame, self._target_frame_size)

            try:
                self._frame_queue.put_nowait(frame)
            except queue.Full:
                _ = self._frame_queue.get()
                self._frame_queue.put_nowait(frame)

                frames = [torch.unsqueeze(self._transform(img), 0) for img in list(self._frame_queue.queue)]
                
                data = torch.cat(frames)
                data = data.permute(1, 0, 2, 3)
                data = data[None, :, :, :, :]
                data = data.to(self._device)
 

                self._model.eval()
                nn_output = self._model(data)
                nn_output = torch.nn.functional.softmax(nn_output, dim=1)
                pred, class_index = nn_output.max(1)
                pred = pred.item()
                class_index = class_index.item()

                g = self._gestures[class_index]
                if pred > self.TRESHOLD and g != GestureDetectorThread.OTHER_GESTURE and g != GestureDetectorThread.NO_GESTURE:

                    try:
                        self._predict_queue.put_nowait((pred, g))
                    except queue.Full:
                        self._predict_queue.get()
                        self._predict_queue.put_nowait((pred, g))

                        predictions = sorted(list(self._predict_queue.queue))
                        print(predictions)

                        g = predictions[-1][1]
                        self._event_queue.put(g)

                        # Clear queues
                        while not self._frame_queue.empty():
                            self._frame_queue.get_nowait()
                        while not self._predict_queue.empty():
                            self._predict_queue.get_nowait()

                else:
                    while not self._predict_queue.empty():
                        self._predict_queue.get_nowait()


            time_diff = time.time() - start_time
            try:
                time.sleep(self._sleeping_time - time_diff)
            except:
                pass

        self._capture.release()
    
    def get_event(self) -> int:
        try:
            return self._event_queue.get(block=False)
        except:
            return None

    def stop_detector(self):
        self.isRunning = False