Beispiel #1
0
def main():
    learning_rate = 4 * 1e-4

    if args.start_from:
        start_from = torch.load(args.start_from, map_location=device)
        nstack = start_from['nstack']
        nfeatures = start_from['nfeatures']
        nlandmarks = start_from['nlandmarks']
        best_val_loss = start_from['best_val_loss']
        eyenet = EyeNet(nstack=nstack,
                        nfeatures=nfeatures,
                        nlandmarks=nlandmarks).to(device)
        optimizer = torch.optim.Adam(eyenet.parameters(), lr=learning_rate)
        eyenet.load_state_dict(start_from['model_state_dict'])
        optimizer.load_state_dict(start_from['optimizer_state_dict'])
    elif os.path.exists(args.out):
        raise Exception(f'Out file {args.out} already exists.')
    else:
        nstack = args.nstack
        nfeatures = args.nfeatures
        nlandmarks = args.nlandmarks
        best_val_loss = float('inf')
        eyenet = EyeNet(nstack=nstack,
                        nfeatures=nfeatures,
                        nlandmarks=nlandmarks).to(device)
        optimizer = torch.optim.Adam(eyenet.parameters(), lr=learning_rate)

    train(eyenet=eyenet,
          optimizer=optimizer,
          nepochs=args.nepochs,
          best_val_loss=best_val_loss,
          checkpoint_fn=args.out)
Beispiel #2
0
def validate(eyenet: EyeNet, val_loader: DataLoader) -> float:
    with torch.no_grad():
        val_losses = []
        for val_batch in val_loader:
            val_imgs = val_batch['img'].float().to(device)
            heatmaps = val_batch['heatmaps'].to(device)
            landmarks = val_batch['landmarks'].to(device)
            gaze = val_batch['gaze'].float().to(device)
            heatmaps_pred, landmarks_pred, gaze_pred = eyenet.forward(val_imgs)
            heatmaps_loss, landmarks_loss, gaze_loss = eyenet.calc_loss(
                heatmaps_pred, heatmaps, landmarks_pred, landmarks, gaze_pred,
                gaze)
            loss = 1000 * heatmaps_loss + landmarks_loss + gaze_loss
            val_losses.append(loss.item())
        val_loss = np.mean(val_losses)
        return val_loss
def init_model(transform):
    global face_cascade, landmarks_detector, checkpoint, nstack, nfeatures, nlandmarks, eyenet
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    # dirname = os.path.dirname(__file__)
    dirname = pathToProject
    face_cascade = cv2.CascadeClassifier(
        os.path.join(dirname, 'lbpcascade_frontalface_improved.xml'))
    landmarks_detector = dlib.shape_predictor(
        os.path.join(dirname, 'shape_predictor_5_face_landmarks.dat'))
    # face_cascade = cv2.CascadeClassifier(dirname + 'lbpcascade_frontalface_improved.xml')
    # landmarks_detector = dlib.shape_predictor(dirname +'shape_predictor_5_face_landmarks.dat')

    checkpoint = torch.load('checkpoint.pt', map_location=device)
    # checkpoint = torch.load(dirname + 'checkpoint.pt', map_location=device)
    nstack = checkpoint['nstack']
    nfeatures = checkpoint['nfeatures']
    nlandmarks = checkpoint['nlandmarks']
    eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures,
                    nlandmarks=nlandmarks).to(device)
    eyenet.load_state_dict(checkpoint['model_state_dict'])
    return None, None
from datasets.mpii_gaze import MPIIGaze
from models.eyenet import EyeNet
import os
import numpy as np
import cv2
from util.preprocess import gaussian_2d
from matplotlib import pyplot as plt
import util.gaze

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dataset = MPIIGaze()
checkpoint = torch.load('checkpoint.pt', map_location=device)
nstack = checkpoint['nstack']
nfeatures = checkpoint['nfeatures']
nlandmarks = checkpoint['nlandmarks']
eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures,
                nlandmarks=nlandmarks).to(device)
eyenet.load_state_dict(checkpoint['model_state_dict'])

with torch.no_grad():
    errors = []

    print('N', len(dataset))
    for i, sample in enumerate(dataset):
        print(i)
        x = torch.tensor([sample['img']]).float().to(device)

        heatmaps_pred, landmarks_pred, gaze_pred = eyenet.forward(x)

        gaze = sample['gaze'].reshape((1, 2))
        gaze_pred = np.asarray(gaze_pred.cpu().numpy())
Beispiel #5
0
def train_epoch(epoch: int, eyenet: EyeNet, optimizer,
                train_loader: DataLoader, val_loader: DataLoader,
                best_val_loss: float, checkpoint_fn: str,
                writer: SummaryWriter):

    N = len(train_loader)
    for i_batch, sample_batched in enumerate(train_loader):
        i_batch += N * epoch
        imgs = sample_batched['img'].float().to(device)
        heatmaps_pred, landmarks_pred, gaze_pred = eyenet.forward(imgs)

        heatmaps = sample_batched['heatmaps'].to(device)
        landmarks = sample_batched['landmarks'].float().to(device)
        gaze = sample_batched['gaze'].float().to(device)

        heatmaps_loss, landmarks_loss, gaze_loss = eyenet.calc_loss(
            heatmaps_pred, heatmaps, landmarks_pred, landmarks, gaze_pred,
            gaze)

        loss = 1000 * heatmaps_loss + landmarks_loss + gaze_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        hm = np.mean(heatmaps[-1, 8:16].cpu().detach().numpy(), axis=0)
        hm_pred = np.mean(heatmaps_pred[-1, -1, 8:16].cpu().detach().numpy(),
                          axis=0)
        norm_hm = cv2.normalize(hm,
                                None,
                                alpha=0,
                                beta=1,
                                norm_type=cv2.NORM_MINMAX,
                                dtype=cv2.CV_32F)
        norm_hm_pred = cv2.normalize(hm_pred,
                                     None,
                                     alpha=0,
                                     beta=1,
                                     norm_type=cv2.NORM_MINMAX,
                                     dtype=cv2.CV_32F)

        if i_batch % 20 == 0:
            cv2.imwrite('true.jpg', norm_hm * 255)
            cv2.imwrite('pred.jpg', norm_hm_pred * 255)
            cv2.imwrite('eye.jpg', sample_batched['img'].numpy()[-1] * 255)

        writer.add_scalar("Training heatmaps loss", heatmaps_loss.item(),
                          i_batch)
        writer.add_scalar("Training landmarks loss", landmarks_loss.item(),
                          i_batch)
        writer.add_scalar("Training gaze loss", gaze_loss.item(), i_batch)
        writer.add_scalar("Training loss", loss.item(), i_batch)

        if i_batch > 0 and i_batch % 20 == 0:
            val_loss = validate(eyenet=eyenet, val_loader=val_loader)
            writer.add_scalar("validation loss", val_loss, i_batch)
            print('Epoch', epoch, 'Validation loss', val_loss)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(
                    {
                        'nstack': eyenet.nstack,
                        'nfeatures': eyenet.nfeatures,
                        'nlandmarks': eyenet.nlandmarks,
                        'best_val_loss': best_val_loss,
                        'model_state_dict': eyenet.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                    }, checkpoint_fn)

    return best_val_loss
webcam.set(cv2.CAP_PROP_FRAME_WIDTH, 960)
webcam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
webcam.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
webcam.set(cv2.CAP_PROP_FPS, 60)

dirname = os.path.dirname(__file__)
face_cascade = cv2.CascadeClassifier(
    os.path.join(dirname, 'lbpcascade_frontalface_improved.xml'))
landmarks_detector = dlib.shape_predictor(
    os.path.join(dirname, 'shape_predictor_5_face_landmarks.dat'))

checkpoint = torch.load('checkpoint.pt', map_location=device)
nstack = checkpoint['nstack']
nfeatures = checkpoint['nfeatures']
nlandmarks = checkpoint['nlandmarks']
eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures,
                nlandmarks=nlandmarks).to(device)
eyenet.load_state_dict(checkpoint['model_state_dict'])


def main():
    current_face = None
    landmarks = None
    alpha = 0.95
    left_eye = None
    right_eye = None

    while True:
        _, frame_bgr = webcam.read()
        orig_frame = frame_bgr.copy()
        frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)