Beispiel #1
0
def main():

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((224, 224)),
            # transforms.Grayscale(num_output_channels=1),
            # transforms.RandomRotation(10),
            # transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
            # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            # transforms.RandomHorizontalFlip(),
            # transforms.RandomAffine(degrees=20, translate=(0.15, 0.15), scale=((1, 1.15)), shear=20),
            transforms.ToTensor(),
            # transforms.Normalize((0.5), (0.5))
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
            # transforms.Normalize(mean=[0.5, 0.5, 0.5],
            #                      std=[0.5, 0.5, 0.5])
        ]),
        'val':
        transforms.Compose([
            transforms.Resize((224, 224)),
            # transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor(),
            # transforms.Normalize((0.5), (0.5))
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
            # transforms.Normalize(mean=[0.5, 0.5, 0.5],
            #                      std=[0.5, 0.5, 0.5])
        ])
    }

    # data_dir = 'dataset/Train_Gest_Dataset_Resized'
    # data_dir = 'dataset/Train_Gest_Dataset_Resized'
    data_dir = '/home/gabriel_carvalho/teste/dataset/Train_Gest_Dataset_Resized'
    image_datasets = {
        x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
        for x in ['train', 'val']
    }
    dataloaders = {
        x: torch.utils.data.DataLoader(image_datasets[x],
                                       batch_size=64,
                                       shuffle=True,
                                       num_workers=4,
                                       pin_memory=True)
        for x in ['train', 'val']
    }
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    print(image_datasets['train'].class_to_idx)
    print(device)

    # base_model = torchvision.models.mobilenet_v2(pretrained=True)
    # base_model = torchvision.models.mnasnet1_0(pretrained=True)
    # base_model = torchvision.models.mnasnet0_5(pretrained=True)
    base_model = gest_model.GestModel()

    for param in base_model.parameters():
        param.requires_grad = True
    '''
    m1 - 50 epocas
    m2 - 100 epocas
    m3 - 100 epocas
    m4 - 100 epocas
    m5 - 1000 epocas
    m6 - 100 epocas
    '''

    num_outputs = 5

    # base_model.classifier = torch.nn.Sequential(
    # # torch.nn.AvgPool2d((7, 7)),
    # # torch.nn.Flatten(),
    # torch.nn.Linear(1280, 128),
    # torch.nn.Sigmoid(),
    # torch.nn.Dropout(),
    # torch.nn.Linear(128, num_outputs),
    # torch.nn.Softmax(1)
    # )

    # for m in base_model.classifier.modules():
    #     if isinstance(m, torch.nn.Linear):
    #         torch.nn.init.kaiming_normal_(m.weight, mode="fan_out",
    #                                     nonlinearity="sigmoid")
    #         torch.nn.init.zeros_(m.bias)

    base_model.to(device)

    if num_outputs == 1:
        criterion = torch.nn.BCELoss()  #torch.nn.BCEWithLogitsLoss()
    else:
        criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(base_model.parameters(),
                                lr=0.0010,
                                momentum=0.9)
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9)
    # optimizer = torch.optim.Adam(base_model.parameters(), lr=0.0008, weight_decay=0.001/30)
    scheduler = None

    writer = SummaryWriter('model/f36/logs')

    model, loss_log, acc_log, val_loss_log, val_acc_log, final_epoch = train_model(
        model=base_model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        dataloaders=dataloaders,
        dataset_sizes=dataset_sizes,
        device=device,
        writer=writer,
        last_epoch=-1,
        num_epochs=50,
        checkpoint_path='model/f36',
        num_outputs=num_outputs)

    print('Ultima epoca: ', final_epoch)

    N = final_epoch + 1
    plt.style.use("ggplot")
    fig = plt.figure()
    ax = fig.add_subplot(yticks=[])
    plt.plot(np.arange(0, N), loss_log, label="train_loss")
    plt.plot(np.arange(0, N), val_loss_log, label="val_loss")
    plt.plot(np.arange(0, N), acc_log, label="train_acc")
    plt.plot(np.arange(0, N), val_acc_log, label="val_acc")
    ax.set_yticks(np.arange(0, 1.1, step=0.1))
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig('Model_f36.jpg')
    plt.show()
Beispiel #2
0
def main():

    # data_dir = 'dataset/Train_Gest_Dataset_Resized'
    # data_dir = 'dataset/Train_Gest_Dataset_Resized'
    data_dir = 'dataset/Train_Gest_Dataset_Resized'
    image_datasets = {
        x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
        for x in ['train', 'val']
    }
    dataloaders = {
        x: torch.utils.data.DataLoader(image_datasets[x],
                                       batch_size=128,
                                       shuffle=True,
                                       num_workers=4,
                                       pin_memory=True)
        for x in ['train', 'val']
    }
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    print(image_datasets['train'].class_to_idx)
    print(device)

    # base_model = torchvision.models.mobilenet_v2(pretrained=True)
    # base_model = torchvision.models.mnasnet0_5(pretrained=True)
    base_model = gest_model.GestModel()

    for param in base_model.parameters():
        param.requires_grad = True

    if pretrained == True:
        base_model = torchvision.models.mnasnet1_0(pretrained=True)

        base_model.classifier = torch.nn.Sequential(
            # torch.nn.AvgPool2d((7, 7)),
            # torch.nn.Flatten(),
            torch.nn.Linear(1280, 128),
            torch.nn.Sigmoid(),
            torch.nn.Dropout(),
            torch.nn.Linear(128, num_outputs),
            torch.nn.Softmax(1))

    # for m in base_model.classifier.modules():
    #     if isinstance(m, torch.nn.Linear):
    #         torch.nn.init.kaiming_normal_(m.weight, mode="fan_out",
    #                                     nonlinearity="sigmoid")
    #         torch.nn.init.zeros_(m.bias)

    base_model.to(device)

    if num_outputs == 1:
        criterion = torch.nn.BCELoss()  #torch.nn.BCEWithLogitsLoss()
    else:
        criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(base_model.parameters(),
                                lr=0.001,
                                momentum=0.9)
    scheduler = None

    writer = SummaryWriter('model/a15/logs')

    model, loss_log, acc_log, val_loss_log, val_acc_log, final_epoch = train_model(
        model=base_model,
        IMG_SIZE=56,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        dataloaders=dataloaders,
        dataset_sizes=dataset_sizes,
        device=device,
        writer=writer,
        last_epoch=-1,
        num_epochs=20,
        checkpoint_path='model/a15',
        num_outputs=5,
        pretrained=False)

    print('Ultima epoca: ', final_epoch)

    N = final_epoch + 1
    plt.style.use("ggplot")
    fig = plt.figure()
    ax = fig.add_subplot(yticks=[])
    plt.plot(np.arange(0, N), loss_log, label="train_loss")
    plt.plot(np.arange(0, N), val_loss_log, label="val_loss")
    plt.plot(np.arange(0, N), acc_log, label="train_acc")
    plt.plot(np.arange(0, N), val_acc_log, label="val_acc")
    ax.set_yticks(np.arange(0, 1.1, step=0.1))
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig('Model_a15.jpg')
    plt.show()
Beispiel #3
0
def main():


    num_outputs = 5

    # model = torchvision.models.mobilenet_v2()
    # model = torchvision.models.mnasnet1_0()
    # model = torchvision.models.mnasnet1_0()

    # model.classifier = torch.nn.Sequential(
    #     # torch.nn.AvgPool2d((7, 7)),
    #     # torch.nn.Flatten(),
    #     torch.nn.Linear(1280, 128),
    #     torch.nn.Sigmoid(),
    #     torch.nn.Dropout(),
    #     torch.nn.Linear(128, num_outputs),
    #     torch.nn.Sigmoid() if num_outputs==1 else torch.nn.Softmax(1)
    # )

    model = gest_model.GestModel()
    # model = torchvision.models.mnasnet1_0(pretrained=True)


    model.load_state_dict(torch.load('model/f25/epoch40.pth')['state_dict'])
    model.cuda().eval()


    print('Loading video capture')
    video_cap = cv2.VideoCapture(0)

    tfs = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        # transforms.Normalize((0.5), (0.5))

    ])

    c, a = sock.accept()

    cv2.namedWindow('webcam')

    num_frame = 0
    total_frames_time = 0
    # for frame in video_cap:
    while True:
        # predictions = []
        # length = 0
        # while(length <= 5):

        # print('Reading frame')
        grabbed, frame = video_cap.read()
        print(frame.shape)
        if not grabbed:
            print('Frame not grabbed')
            continue

        num_frame += 1

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        tensor = tfs(frame).unsqueeze(0).cuda()
        # print(tfs(frame).shape)
        # print(tfs(frame).unsqueeze(0).shape)
        # print(tensor.shape)





        t0 = time.time()
        output = model(tensor) #.cpu().detach().numpy()
        output = softmax(output, dim=1)
        latency = time.time() - t0

        total_frames_time += latency

        cv2.putText(frame, 'FPS: {:.1f}'.format(num_frame/total_frames_time), (450,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)


        # print('output dim: ', output.shape)

        # print(output.data)
        # return

        THRESHOLD = 0.95





        print('Output data', output.data)
        print('Output', output)


        _, result = torch.max(output.data, 1)
        print(f'output: [{output[0, 0]:.4f} | {output[0, 1]:.4f} | {output[0, 2]:.4f} | {output[0, 3]:.4f} | {output[0, 4]:.4f}], result: {result[0]}, latency: {latency*1000:.2f} ms')

        # predictions.append(result)
        # print('Drawing results on frame')
        # color = (0, int(255.0*(1-result)), int(255.0*(result)))

        count = 0

        for i in range(0,5):
            if (output.data[0, i] < THRESHOLD):
                count += 1

        if count == 5:
            result = 5




        if (result == 0):
            print('Direita')
            cv2.putText(frame, 'Direita', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            c.send('Direita'.encode('utf-8'))
        if (result == 1):
            print('Esquerda')
            cv2.putText(frame, 'Esquerda', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            c.send('Esquerda'.encode('utf-8'))
        if (result == 2):
            print('Frente')
            cv2.putText(frame, 'Frente', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            c.send('Frente'.encode('utf-8'))
        if (result == 3):
            print('Parado')
            cv2.putText(frame, 'Parado', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            c.send('Parado'.encode('utf-8'))
        if (result == 4):
            print('Tras')
            cv2.putText(frame, 'Tras', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            c.send('Tras'.encode('utf-8'))
        # if (result == 5):
        #     print('Sem comando')
        #     cv2.putText(frame, 'Sem comando', (200,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        #     c.send('Sem Comando'.encode('utf-8'))

        # if result == 1:
        #     color = (0, 0, 255)
        # else:
        #     color = (0, 255, 0)

        # length += 1
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        cv2.imshow('webcam', frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            c.send('Parado'.encode('utf-8'))
            break
Beispiel #4
0
def main():

    '''
    print('Loading model')
    model = MaskDetector()
    model.load_state_dict(torch.load('face_mask.ckpt')['state_dict'], strict=False)
    model.cuda().eval()

    # example = torch.rand(1, 3, 100, 100).cuda()
    # traced_script_module = torch.jit.trace(model, example)
    # traced_script_module.save('model.pt')

    print('Converting model to TensorRT')
    x = torch.ones((1, 3, 100, 100)).cuda()
    model = torch2trt.torch2trt(model, [x])

    # print('Saving model')
    # torch.save(model_trt.state_dict(), 'face_mask_trt.ckpt')

    # print('Loading model')
    # model_trt = torch2trt.TRTModule()
    # model_trt.load_state_dict(torch.load('face_mask_trt.ckpt'))
    # model_trt.cuda().eval()
    '''

    num_outputs = 5

    # model = torchvision.models.mobilenet_v2()
    # model = torchvision.models.mnasnet1_0()
    # model = torchvision.models.mnasnet1_0()

    # model.classifier = torch.nn.Sequential(
    #     # torch.nn.AvgPool2d((7, 7)),
    #     # torch.nn.Flatten(),
    #     torch.nn.Linear(1280, 128),
    #     torch.nn.Sigmoid(),
    #     torch.nn.Dropout(),
    #     torch.nn.Linear(128, num_outputs),
    #     torch.nn.Sigmoid() if num_outputs==1 else torch.nn.Softmax(1)
    # )

    model = gest_model.GestModel()
    # model = torchvision.models.mnasnet1_0(pretrained=True)
    #
    # model.classifier = torch.nn.Sequential(
    # # torch.nn.AvgPool2d((7, 7)),
    # # torch.nn.Flatten(),
    # torch.nn.Linear(1280, 128),
    # torch.nn.Sigmoid(),
    # torch.nn.Dropout(),
    # torch.nn.Linear(128, num_outputs),
    # torch.nn.Softmax(1)
    # )

    model.load_state_dict(torch.load('model/f25/epoch40.pth')['state_dict'])
    model.cuda().eval()


    print('Loading video capture')
    video_cap = cv2.VideoCapture(0)

    tfs = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        # transforms.Normalize((0.5), (0.5))

    ])

    NUM_FRAMES = 30
    num_frame = 0
    total_frames_time = 0

    cv2.namedWindow('webcam')
    # for frame in video_cap:
    while True:


        # print('Reading frame')
        grabbed, frame = video_cap.read()
        print(frame.shape)
        if not grabbed:
            print('Frame not grabbed')
            continue

        num_frame += 1

        # print('Running mask model on face')
        # tensor = transforms.ToTensor()(Image.fromarray(face)).unsqueeze_(0)
        # tensor = torch.Tensor(tensor).cuda()
        # frame = frame[100:550, 100:550]
        # frame = frame[ :, :, ::-1]
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        tensor = tfs(frame).unsqueeze(0).cuda()
        # print(tfs(frame).shape)
        # print(tfs(frame).unsqueeze(0).shape)
        # print(tensor.shape)
        t0 = time.time()
        output = model(tensor) #.cpu().detach().numpy()
        output = softmax(output, dim=1)
        latency = time.time() - t0

        total_frames_time += latency

        # print('output dim: ', output.shape)

        # print(output.data)
        # return

        # if (num_frame == NUM_FRAMES):
        cv2.putText(frame, 'FPS: {:.1f}'.format(num_frame/total_frames_time), (450,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
            # total_frames_time = 0
            # num_frame = 0

        THRESHOLD = 0.95


        # if num_outputs == 1:
        #     result = output.cpu().detach().numpy()[0] > 0.5
        #     print(f'output: [{output[0, 0]:.4f}], result: {result}, latency: {latency*1000:.2f} ms')
        # else:



        print('Output data', output.data)
        print('Output', output)


        _, result = torch.max(output.data, 1)
        print(f'output: [{output[0, 0]:.4f} | {output[0, 1]:.4f} | {output[0, 2]:.4f} | {output[0, 3]:.4f} | {output[0, 4]:.4f}], result: {result[0]}, latency: {latency*1000:.2f} ms')

        # print('Drawing results on frame')
        # color = (0, int(255.0*(1-result)), int(255.0*(result)))

        count = 0

        for i in range(0,5):
            if (output.data[0, i] < THRESHOLD):
                count += 1

        if count == 5:
            result = 5

        # print(output.data.shape)

        # return

        # if all(x < THRESHOLD for x in output.data[0]):
        #     print('Todos menos que THRESHOLD')
        #     result = 5
        # else:
        #     print('DEu ruim')
        #
        # return

        # BGR


        if result == 0:
            print('Direita')
            cv2.putText(frame, 'Direita', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        if result == 1:
            print('Esquerda')
            cv2.putText(frame, 'Esquerda', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        if result == 2:
            print('Frente')
            cv2.putText(frame, 'Frente', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        if result == 3:
            print('Parado')
            cv2.putText(frame, 'Parado', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        if result == 4:
            print('Tras')
            cv2.putText(frame, 'Tras', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        if result == 5:
            print('Sem comando')
            cv2.putText(frame, 'Sem comando', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)

        # if result == 1:
        #     color = (0, 0, 255)
        # else:
        #     color = (0, 255, 0)

        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        cv2.imshow('webcam', frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

    cv2.destroyAllWindows()
Beispiel #5
0
from torch.nn import Conv2d, Linear, MaxPool2d, ReLU, Sequential
from torch.nn.functional import softmax
from torchvision import transforms
import gest_model
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
from sklearn.metrics import roc_auc_score
from sklearn import metrics
from sklearn.metrics import roc_curve, auc

num_outputs = 5

model = gest_model.GestModel()

# model = torchvision.models.mnasnet1_0(pretrained=True)

model.load_state_dict(torch.load('model/f25/epoch40.pth')['state_dict'])
model.cuda().eval()

tfs = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((112, 112)),
    # transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # transforms.Normalize(mean=[0.5, 0.5, 0.5],
    #                      std=[0.5, 0.5, 0.5])
])