def test_full_image_network(video_path, model_path, output_path, start_frame=0, end_frame=None, cuda=True): """ Reads a video and evaluates a subset of frames with the a detection network that takes in a full frame. Outputs are only given if a face is present and the face is highlighted using dlib. :param video_path: path to video file :param model_path: path to model file (should expect the full sized image) :param output_path: path where the output video is stored :param start_frame: first frame to evaluate :param end_frame: last frame to evaluate :param cuda: enable cuda :return: """ print('Starting: {}'.format(video_path)) # Read and write reader = cv2.VideoCapture(video_path) video_fn = video_path.split('/')[-1].split('.')[0]+'.avi' os.makedirs(output_path, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = reader.get(cv2.CAP_PROP_FPS) num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT)) writer = None # Face detector face_detector = dlib.get_frontal_face_detector() # Load model model = model_selection(modelname='xception', num_out_classes=2, dropout=0.5) model.load_state_dict(torch.load(model_path)) if isinstance(model, torch.nn.DataParallel): model = model.module
def __init__(self, args) -> None: super(MyNet, self).__init__() self.net, *_ = model_selection(modelname='xception', num_out_classes=2) self.net = self.net.to(device) self.batch = args.batch self.max_images = args.max_images self.threshold = args.threshold self.criterion = nn.CrossEntropyLoss() self.dataloader = self.get_dataloader(args)
def main(): args = parse.parse_args() test_list = args.test_list batch_size = args.batch_size model_path = args.model_path torch.backends.cudnn.benchmark = True test_dataset = MyDataset( txt_path=test_list, transform=xception_default_data_transforms['test']) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8) test_dataset_size = len(test_dataset) corrects = 0 acc = 0 #model = torchvision.models.densenet121(num_classes=2) model = model_selection(modelname='xception', num_out_classes=2, dropout=0.5) model.load_state_dict(torch.load(model_path)) if isinstance(model, torch.nn.DataParallel): model = model.module model = model.cuda() model.eval() with torch.no_grad(): for (image, labels) in test_loader: image = image.cuda() labels = labels.cuda() outputs = model(image) _, preds = torch.max(outputs.data, 1) corrects += torch.sum(preds == labels.data).to(torch.float32) print('Iteration Acc {:.4f}'.format( torch.sum(preds == labels.data).to(torch.float32) / batch_size)) acc = corrects / test_dataset_size print('Test Acc: {:.4f}'.format(acc))
def test_full_image_network(video_path, model_path, output_path, start_frame=0, end_frame=None, cuda=True): """ Reads a video and evaluates a subset of frames with the a detection network that takes in a full frame. Outputs are only given if a face is present and the face is highlighted using dlib. :param video_path: path to video file :param model_path: path to model file (should expect the full sized image) :param output_path: path where the output video is stored :param start_frame: first frame to evaluate :param end_frame: last frame to evaluate :param cuda: enable cuda :return: """ print('Starting: {}'.format(video_path)) # Read and write reader = cv2.VideoCapture(video_path) video_fn = video_path.split('/')[-1].split('.')[0]+'.avi' os.makedirs(output_path, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = reader.get(cv2.CAP_PROP_FPS) num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT)) writer = None # Face detector face_detector = dlib.get_frontal_face_detector() # Load model model, *_ = model_selection(modelname='xception', num_out_classes=2) if model_path is not None: model = torch.load(model_path) print('Model found in {}'.format(model_path)) else: print('No model found, initializing random model.') if cuda: model = model.cuda() # Text variables font_face = cv2.FONT_HERSHEY_SIMPLEX thickness = 2 font_scale = 1 # Frame numbers and length of output video frame_num = 0 assert start_frame < num_frames - 1 end_frame = end_frame if end_frame else num_frames pbar = tqdm(total=end_frame-start_frame) while reader.isOpened(): _, image = reader.read() if image is None: break frame_num += 1 if frame_num < start_frame: continue pbar.update(1) # Image size height, width = image.shape[:2] # Init output writer if writer is None: writer = cv2.VideoWriter(join(output_path, video_fn), fourcc, fps, (height, width)[::-1]) # 2. Detect with dlib gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) faces = face_detector(gray, 1) if len(faces): # For now only take biggest face face = faces[0] # --- Prediction --------------------------------------------------- # Face crop with dlib and bounding box scale enlargement x, y, size = get_boundingbox(face, width, height) cropped_face = image[y:y+size, x:x+size] # Actual prediction using our model prediction, output = predict_with_model(cropped_face, model, cuda=cuda) # ------------------------------------------------------------------ # Text and bb x = face.left() y = face.top() w = face.right() - x h = face.bottom() - y label = 'fake' if prediction == 1 else 'real' color = (0, 255, 0) if prediction == 0 else (0, 0, 255) output_list = ['{0:.2f}'.format(float(x)) for x in output.detach().cpu().numpy()[0]] cv2.putText(image, str(output_list)+'=>'+label, (x, y+h+30), font_face, font_scale, color, thickness, 2) # draw box over face cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) if frame_num >= end_frame: break # Show # cv2.imshow('test', image) # cv2.waitKey(33) # About 30 fps writer.write(image) pbar.close() if writer is not None: writer.release() print('Finished! Output saved under {}'.format(output_path)) else: print('Input video file was empty')
validTransform = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), normTransform ]) # 构建MyDataset实例 valid_data = Mytest(txt_path=valid_txt_path, transform=validTransform) # 构建DataLoder valid_loader = DataLoader(dataset=valid_data, batch_size=valid_bs, num_workers=4) # ------------------------------------ step 2/5 : 定义网络------------------------------------ model, *_ = model_selection(modelname=model_name, num_out_classes=2) fc1 = ourfc(2) #print(model) fc2 = ourfc(2) fc3 = ourfc(2) fc4 = ourfc(2) fc5 = ourfc(2) fc6 = ourfc(2) fc7 = ourfc(2) fc8 = ourfc(2) FC = ourFC(2) use_cuda = torch.cuda.is_available() Device = torch.device('cuda' if use_cuda else 'cpu')
def main(): args = parse.parse_args() name = args.name continue_train = args.continue_train train_list = args.train_list val_list = args.val_list epoches = args.epoches batch_size = args.batch_size model_name = args.model_name model_path = args.model_path output_path = os.path.join('./output', name) if not os.path.exists(output_path): os.mkdir(output_path) torch.backends.cudnn.benchmark = True train_dataset = MyDataset( txt_path=train_list, transform=xception_default_data_transforms['train']) val_dataset = MyDataset(txt_path=val_list, transform=xception_default_data_transforms['val']) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=8) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=8) train_dataset_size = len(train_dataset) val_dataset_size = len(val_dataset) model = model_selection(modelname='xception', num_out_classes=2, dropout=0.5) if continue_train: model.load_state_dict(torch.load(model_path)) model = model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08) scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5) model = nn.DataParallel(model) best_model_wts = model.state_dict() best_acc = 0.0 iteration = 0 for epoch in range(epoches): print('Epoch {}/{}'.format(epoch + 1, epoches)) print('-' * 10) model = model.train() train_loss = 0.0 train_corrects = 0.0 val_loss = 0.0 val_corrects = 0.0 for (image, labels) in train_loader: iter_loss = 0.0 iter_corrects = 0.0 image = image.cuda() labels = labels.cuda() optimizer.zero_grad() outputs = model(image) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) loss.backward() optimizer.step() iter_loss = loss.data.item() train_loss += iter_loss iter_corrects = torch.sum(preds == labels.data).to(torch.float32) train_corrects += iter_corrects iteration += 1 if not (iteration % 20): print('iteration {} train loss: {:.4f} Acc: {:.4f}'.format( iteration, iter_loss / batch_size, iter_corrects / batch_size)) epoch_loss = train_loss / train_dataset_size epoch_acc = train_corrects / train_dataset_size print('epoch train loss: {:.4f} Acc: {:.4f}'.format( epoch_loss, epoch_acc)) model.eval() with torch.no_grad(): for (image, labels) in val_loader: image = image.cuda() labels = labels.cuda() outputs = model(image) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) val_loss += loss.data.item() val_corrects += torch.sum(preds == labels.data).to( torch.float32) epoch_loss = val_loss / val_dataset_size epoch_acc = val_corrects / val_dataset_size print('epoch val loss: {:.4f} Acc: {:.4f}'.format( epoch_loss, epoch_acc)) if epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = model.state_dict() scheduler.step() #if not (epoch % 40): torch.save(model.module.state_dict(), os.path.join(output_path, str(epoch) + '_' + model_name)) print('Best val Acc: {:.4f}'.format(best_acc)) model.load_state_dict(best_model_wts) torch.save(model.module.state_dict(), os.path.join(output_path, "best.pkl"))
outputs = net(inputs) prediction_imgs = outputs.argmax(1) prediction = 'fake' if prediction_imgs.float().mean( ) >= threshold else 'real' f.write("Video ID ending in {}: {}".format(video_id.item(), prediction)) if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--video_dir', '-i', type=str, default='videos') parser.add_argument('--image_dir', '-o', type=str, default='images') parser.add_argument('--interval', type=int, default=10) parser.add_argument('--max_images', type=int, default=200) parser.add_argument('--threshold', type=float, default=0.5) args = parser.parse_args() create_images(args.video_dir, args.image_dir, args.interval) dataloader = get_dataloader(args.image_dir) net, *_ = model_selection(modelname='xception', num_out_classes=2) net = net.to(device) net.load_state_dict(torch.load('weights/xception.pth')) evaluate(net, dataloader, args.threshold, args.max_images)
def test_full_image_network(video_path, model_path, output_path, start_frame=0, end_frame=None, cuda=True): """ Reads a video and evaluates a subset of frames with the a detection network that takes in a full frame. Outputs are only given if a face is present and the face is highlighted using dlib. :param video_path: path to video file :param model_path: path to model file (should expect the full sized image) :param output_path: path where the output video is stored :param start_frame: first frame to evaluate :param end_frame: last frame to evaluate :param cuda: enable cuda :return: """ cuda = False # BRISHNA : attempting to force non-cuda (enabled by default somehow) print('Starting: {}'.format(video_path)) # Read and write reader = cv2.VideoCapture(video_path) video_fn = video_path.split('/')[-1].split('.')[0] + '.avi' os.makedirs(output_path, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = reader.get(cv2.CAP_PROP_FPS) num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT)) writer = None print("\n\n\t\t" + video_fn + "\n\n") # Face detector face_detector = dlib.get_frontal_face_detector() # Load model model, *_ = model_selection(modelname='xception', num_out_classes=2) if model_path is not None: # model = Model() # BRISHNA ATTEMPT # model = torch.load(model_path) model = torch.load(model_path, map_location=torch.device( 'cpu')) # Brishna: attempting to un-enforce default cuda) print('Model found in {}'.format(model_path)) else: print('No model found, initializing random model.') if cuda: model = model.cuda() # Text variables font_face = cv2.FONT_HERSHEY_SIMPLEX thickness = 2 font_scale = 1 # Frame numbers and length of output video frame_num = 0 fakeCount = 0 realCount = 0 assert start_frame < num_frames - 1 end_frame = end_frame if end_frame else num_frames pbar = tqdm(total=end_frame - start_frame) while reader.isOpened(): _, image = reader.read() if image is None: break frame_num += 1 if frame_num < start_frame: continue pbar.update(1) # Image size height, width = image.shape[:2] # # Init output writer # if writer is None: # writer = cv2.VideoWriter(join(output_path, video_fn), fourcc, fps, # (height, width)[::-1]) # 2. Detect with dlib gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) faces = face_detector(gray, 1) if len(faces): # For now only take biggest face face = faces[0] # --- Prediction --------------------------------------------------- # Face crop with dlib and bounding box scale enlargement x, y, size = get_boundingbox(face, width, height) cropped_face = image[y:y + size, x:x + size] # Actual prediction using our model prediction, output = predict_with_model(cropped_face, model, cuda=cuda) # ------------------------------------------------------------------ # Brishna: disabling Video output for faster results if prediction == 1: fakeCount += 1 else: realCount += 1 # print('\n\t$$ ' + 'realCount = ' + str(realCount) + '; fakeCount = ' + str(fakeCount) + ' ; frame_num = ' + str(frame_num)) # # Text and bb # x = face.left() # y = face.top() # w = face.right() - x # h = face.bottom() - y # label ='fake' if prediction == 1 else 'real' # color = (0, 255, 0) if prediction == 0 else (0, 0, 255) # output_list = ['{0:.2f}'.format(float(x)) for x in # output.detach().cpu().numpy()[0]] # cv2.putText(image, str(output_list)+'=>'+label, (x, y+h+30), # font_face, font_scale, # color, thickness, 2) # # draw box over face # cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) if frame_num >= end_frame: break # Show # cv2.imshow('test', image) # cv2.waitKey(33) # About 30 fps # writer.write(image) pbar.close() resultFile = open((output_path + '-scores.txt'), "a") # print('\n' + video_fn) # resultFile.write(video_fn) print('\n' + video_fn + ': real frames = ' + str(realCount) + ' fake frames = ' + str(fakeCount) + ' ; total frames = ' + str(realCount + fakeCount) + ' ; P(fake) = ' + str(fakeCount / (realCount + fakeCount))) resultFile.write('\n' + video_fn + ': real frames = ' + str(realCount) + ' fake frames = ' + str(fakeCount) + ' ; total frames = ' + str(realCount + fakeCount) + ' ; P(fake) = ' + str(fakeCount / (realCount + fakeCount))) # if fakeCount > realCount: # print('\n' + video_fn + ' = real') # resultFile.write('\n' + video_fn + ' : real') # else: # print('\n' + video_fn + ' = fake') # resultFile.write('\n' + video_fn + ' : fake') resultFile.close()