def main(): data_transforms = { 'train': transforms.Compose([ transforms.Resize((224, 224)), # transforms.Grayscale(num_output_channels=1), # transforms.RandomRotation(10), # transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)), # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # transforms.RandomHorizontalFlip(), # transforms.RandomAffine(degrees=20, translate=(0.15, 0.15), scale=((1, 1.15)), shear=20), transforms.ToTensor(), # transforms.Normalize((0.5), (0.5)) transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) ]), 'val': transforms.Compose([ transforms.Resize((224, 224)), # transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), # transforms.Normalize((0.5), (0.5)) transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) ]) } # data_dir = 'dataset/Train_Gest_Dataset_Resized' # data_dir = 'dataset/Train_Gest_Dataset_Resized' data_dir = '/home/gabriel_carvalho/teste/dataset/Train_Gest_Dataset_Resized' image_datasets = { x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val'] } dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64, shuffle=True, num_workers=4, pin_memory=True) for x in ['train', 'val'] } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") print(image_datasets['train'].class_to_idx) print(device) # base_model = torchvision.models.mobilenet_v2(pretrained=True) # base_model = torchvision.models.mnasnet1_0(pretrained=True) # base_model = torchvision.models.mnasnet0_5(pretrained=True) base_model = gest_model.GestModel() for param in base_model.parameters(): param.requires_grad = True ''' m1 - 50 epocas m2 - 100 epocas m3 - 100 epocas m4 - 100 epocas m5 - 1000 epocas m6 - 100 epocas ''' num_outputs = 5 # base_model.classifier = torch.nn.Sequential( # # torch.nn.AvgPool2d((7, 7)), # # torch.nn.Flatten(), # torch.nn.Linear(1280, 128), # torch.nn.Sigmoid(), # torch.nn.Dropout(), # torch.nn.Linear(128, num_outputs), # torch.nn.Softmax(1) # ) # for m in base_model.classifier.modules(): # if isinstance(m, torch.nn.Linear): # torch.nn.init.kaiming_normal_(m.weight, mode="fan_out", # nonlinearity="sigmoid") # torch.nn.init.zeros_(m.bias) base_model.to(device) if num_outputs == 1: criterion = torch.nn.BCELoss() #torch.nn.BCEWithLogitsLoss() else: criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(base_model.parameters(), lr=0.0010, momentum=0.9) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) # optimizer = torch.optim.Adam(base_model.parameters(), lr=0.0008, weight_decay=0.001/30) scheduler = None writer = SummaryWriter('model/f36/logs') model, loss_log, acc_log, val_loss_log, val_acc_log, final_epoch = train_model( model=base_model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, dataloaders=dataloaders, dataset_sizes=dataset_sizes, device=device, writer=writer, last_epoch=-1, num_epochs=50, checkpoint_path='model/f36', num_outputs=num_outputs) print('Ultima epoca: ', final_epoch) N = final_epoch + 1 plt.style.use("ggplot") fig = plt.figure() ax = fig.add_subplot(yticks=[]) plt.plot(np.arange(0, N), loss_log, label="train_loss") plt.plot(np.arange(0, N), val_loss_log, label="val_loss") plt.plot(np.arange(0, N), acc_log, label="train_acc") plt.plot(np.arange(0, N), val_acc_log, label="val_acc") ax.set_yticks(np.arange(0, 1.1, step=0.1)) plt.title("Training Loss and Accuracy") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend(loc="lower left") plt.savefig('Model_f36.jpg') plt.show()
def main(): # data_dir = 'dataset/Train_Gest_Dataset_Resized' # data_dir = 'dataset/Train_Gest_Dataset_Resized' data_dir = 'dataset/Train_Gest_Dataset_Resized' image_datasets = { x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val'] } dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=128, shuffle=True, num_workers=4, pin_memory=True) for x in ['train', 'val'] } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") print(image_datasets['train'].class_to_idx) print(device) # base_model = torchvision.models.mobilenet_v2(pretrained=True) # base_model = torchvision.models.mnasnet0_5(pretrained=True) base_model = gest_model.GestModel() for param in base_model.parameters(): param.requires_grad = True if pretrained == True: base_model = torchvision.models.mnasnet1_0(pretrained=True) base_model.classifier = torch.nn.Sequential( # torch.nn.AvgPool2d((7, 7)), # torch.nn.Flatten(), torch.nn.Linear(1280, 128), torch.nn.Sigmoid(), torch.nn.Dropout(), torch.nn.Linear(128, num_outputs), torch.nn.Softmax(1)) # for m in base_model.classifier.modules(): # if isinstance(m, torch.nn.Linear): # torch.nn.init.kaiming_normal_(m.weight, mode="fan_out", # nonlinearity="sigmoid") # torch.nn.init.zeros_(m.bias) base_model.to(device) if num_outputs == 1: criterion = torch.nn.BCELoss() #torch.nn.BCEWithLogitsLoss() else: criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(base_model.parameters(), lr=0.001, momentum=0.9) scheduler = None writer = SummaryWriter('model/a15/logs') model, loss_log, acc_log, val_loss_log, val_acc_log, final_epoch = train_model( model=base_model, IMG_SIZE=56, criterion=criterion, optimizer=optimizer, scheduler=scheduler, dataloaders=dataloaders, dataset_sizes=dataset_sizes, device=device, writer=writer, last_epoch=-1, num_epochs=20, checkpoint_path='model/a15', num_outputs=5, pretrained=False) print('Ultima epoca: ', final_epoch) N = final_epoch + 1 plt.style.use("ggplot") fig = plt.figure() ax = fig.add_subplot(yticks=[]) plt.plot(np.arange(0, N), loss_log, label="train_loss") plt.plot(np.arange(0, N), val_loss_log, label="val_loss") plt.plot(np.arange(0, N), acc_log, label="train_acc") plt.plot(np.arange(0, N), val_acc_log, label="val_acc") ax.set_yticks(np.arange(0, 1.1, step=0.1)) plt.title("Training Loss and Accuracy") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend(loc="lower left") plt.savefig('Model_a15.jpg') plt.show()
def main(): num_outputs = 5 # model = torchvision.models.mobilenet_v2() # model = torchvision.models.mnasnet1_0() # model = torchvision.models.mnasnet1_0() # model.classifier = torch.nn.Sequential( # # torch.nn.AvgPool2d((7, 7)), # # torch.nn.Flatten(), # torch.nn.Linear(1280, 128), # torch.nn.Sigmoid(), # torch.nn.Dropout(), # torch.nn.Linear(128, num_outputs), # torch.nn.Sigmoid() if num_outputs==1 else torch.nn.Softmax(1) # ) model = gest_model.GestModel() # model = torchvision.models.mnasnet1_0(pretrained=True) model.load_state_dict(torch.load('model/f25/epoch40.pth')['state_dict']) model.cuda().eval() print('Loading video capture') video_cap = cv2.VideoCapture(0) tfs = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((112, 112)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # transforms.Normalize((0.5), (0.5)) ]) c, a = sock.accept() cv2.namedWindow('webcam') num_frame = 0 total_frames_time = 0 # for frame in video_cap: while True: # predictions = [] # length = 0 # while(length <= 5): # print('Reading frame') grabbed, frame = video_cap.read() print(frame.shape) if not grabbed: print('Frame not grabbed') continue num_frame += 1 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) tensor = tfs(frame).unsqueeze(0).cuda() # print(tfs(frame).shape) # print(tfs(frame).unsqueeze(0).shape) # print(tensor.shape) t0 = time.time() output = model(tensor) #.cpu().detach().numpy() output = softmax(output, dim=1) latency = time.time() - t0 total_frames_time += latency cv2.putText(frame, 'FPS: {:.1f}'.format(num_frame/total_frames_time), (450,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) # print('output dim: ', output.shape) # print(output.data) # return THRESHOLD = 0.95 print('Output data', output.data) print('Output', output) _, result = torch.max(output.data, 1) print(f'output: [{output[0, 0]:.4f} | {output[0, 1]:.4f} | {output[0, 2]:.4f} | {output[0, 3]:.4f} | {output[0, 4]:.4f}], result: {result[0]}, latency: {latency*1000:.2f} ms') # predictions.append(result) # print('Drawing results on frame') # color = (0, int(255.0*(1-result)), int(255.0*(result))) count = 0 for i in range(0,5): if (output.data[0, i] < THRESHOLD): count += 1 if count == 5: result = 5 if (result == 0): print('Direita') cv2.putText(frame, 'Direita', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) c.send('Direita'.encode('utf-8')) if (result == 1): print('Esquerda') cv2.putText(frame, 'Esquerda', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) c.send('Esquerda'.encode('utf-8')) if (result == 2): print('Frente') cv2.putText(frame, 'Frente', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) c.send('Frente'.encode('utf-8')) if (result == 3): print('Parado') cv2.putText(frame, 'Parado', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) c.send('Parado'.encode('utf-8')) if (result == 4): print('Tras') cv2.putText(frame, 'Tras', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) c.send('Tras'.encode('utf-8')) # if (result == 5): # print('Sem comando') # cv2.putText(frame, 'Sem comando', (200,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) # c.send('Sem Comando'.encode('utf-8')) # if result == 1: # color = (0, 0, 255) # else: # color = (0, 255, 0) # length += 1 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow('webcam', frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): c.send('Parado'.encode('utf-8')) break
def main(): ''' print('Loading model') model = MaskDetector() model.load_state_dict(torch.load('face_mask.ckpt')['state_dict'], strict=False) model.cuda().eval() # example = torch.rand(1, 3, 100, 100).cuda() # traced_script_module = torch.jit.trace(model, example) # traced_script_module.save('model.pt') print('Converting model to TensorRT') x = torch.ones((1, 3, 100, 100)).cuda() model = torch2trt.torch2trt(model, [x]) # print('Saving model') # torch.save(model_trt.state_dict(), 'face_mask_trt.ckpt') # print('Loading model') # model_trt = torch2trt.TRTModule() # model_trt.load_state_dict(torch.load('face_mask_trt.ckpt')) # model_trt.cuda().eval() ''' num_outputs = 5 # model = torchvision.models.mobilenet_v2() # model = torchvision.models.mnasnet1_0() # model = torchvision.models.mnasnet1_0() # model.classifier = torch.nn.Sequential( # # torch.nn.AvgPool2d((7, 7)), # # torch.nn.Flatten(), # torch.nn.Linear(1280, 128), # torch.nn.Sigmoid(), # torch.nn.Dropout(), # torch.nn.Linear(128, num_outputs), # torch.nn.Sigmoid() if num_outputs==1 else torch.nn.Softmax(1) # ) model = gest_model.GestModel() # model = torchvision.models.mnasnet1_0(pretrained=True) # # model.classifier = torch.nn.Sequential( # # torch.nn.AvgPool2d((7, 7)), # # torch.nn.Flatten(), # torch.nn.Linear(1280, 128), # torch.nn.Sigmoid(), # torch.nn.Dropout(), # torch.nn.Linear(128, num_outputs), # torch.nn.Softmax(1) # ) model.load_state_dict(torch.load('model/f25/epoch40.pth')['state_dict']) model.cuda().eval() print('Loading video capture') video_cap = cv2.VideoCapture(0) tfs = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((112, 112)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # transforms.Normalize((0.5), (0.5)) ]) NUM_FRAMES = 30 num_frame = 0 total_frames_time = 0 cv2.namedWindow('webcam') # for frame in video_cap: while True: # print('Reading frame') grabbed, frame = video_cap.read() print(frame.shape) if not grabbed: print('Frame not grabbed') continue num_frame += 1 # print('Running mask model on face') # tensor = transforms.ToTensor()(Image.fromarray(face)).unsqueeze_(0) # tensor = torch.Tensor(tensor).cuda() # frame = frame[100:550, 100:550] # frame = frame[ :, :, ::-1] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) tensor = tfs(frame).unsqueeze(0).cuda() # print(tfs(frame).shape) # print(tfs(frame).unsqueeze(0).shape) # print(tensor.shape) t0 = time.time() output = model(tensor) #.cpu().detach().numpy() output = softmax(output, dim=1) latency = time.time() - t0 total_frames_time += latency # print('output dim: ', output.shape) # print(output.data) # return # if (num_frame == NUM_FRAMES): cv2.putText(frame, 'FPS: {:.1f}'.format(num_frame/total_frames_time), (450,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) # total_frames_time = 0 # num_frame = 0 THRESHOLD = 0.95 # if num_outputs == 1: # result = output.cpu().detach().numpy()[0] > 0.5 # print(f'output: [{output[0, 0]:.4f}], result: {result}, latency: {latency*1000:.2f} ms') # else: print('Output data', output.data) print('Output', output) _, result = torch.max(output.data, 1) print(f'output: [{output[0, 0]:.4f} | {output[0, 1]:.4f} | {output[0, 2]:.4f} | {output[0, 3]:.4f} | {output[0, 4]:.4f}], result: {result[0]}, latency: {latency*1000:.2f} ms') # print('Drawing results on frame') # color = (0, int(255.0*(1-result)), int(255.0*(result))) count = 0 for i in range(0,5): if (output.data[0, i] < THRESHOLD): count += 1 if count == 5: result = 5 # print(output.data.shape) # return # if all(x < THRESHOLD for x in output.data[0]): # print('Todos menos que THRESHOLD') # result = 5 # else: # print('DEu ruim') # # return # BGR if result == 0: print('Direita') cv2.putText(frame, 'Direita', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) if result == 1: print('Esquerda') cv2.putText(frame, 'Esquerda', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) if result == 2: print('Frente') cv2.putText(frame, 'Frente', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) if result == 3: print('Parado') cv2.putText(frame, 'Parado', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) if result == 4: print('Tras') cv2.putText(frame, 'Tras', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) if result == 5: print('Sem comando') cv2.putText(frame, 'Sem comando', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) # if result == 1: # color = (0, 0, 255) # else: # color = (0, 255, 0) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) cv2.imshow('webcam', frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break cv2.destroyAllWindows()
from torch.nn import Conv2d, Linear, MaxPool2d, ReLU, Sequential from torch.nn.functional import softmax from torchvision import transforms import gest_model import seaborn as sns from sklearn.metrics import classification_report, confusion_matrix import pandas as pd import matplotlib.pyplot as plt import tqdm from sklearn.metrics import roc_auc_score from sklearn import metrics from sklearn.metrics import roc_curve, auc num_outputs = 5 model = gest_model.GestModel() # model = torchvision.models.mnasnet1_0(pretrained=True) model.load_state_dict(torch.load('model/f25/epoch40.pth')['state_dict']) model.cuda().eval() tfs = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((112, 112)), # transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) ])