Python model_selectionの例、network.models.model_selection Pythonの例

コード例 #1

0

ファイルを表示

def test_full_image_network(video_path, model_path, output_path,
                            start_frame=0, end_frame=None, cuda=True):
    """
    Reads a video and evaluates a subset of frames with the a detection network
    that takes in a full frame. Outputs are only given if a face is present
    and the face is highlighted using dlib.
    :param video_path: path to video file
    :param model_path: path to model file (should expect the full sized image)
    :param output_path: path where the output video is stored
    :param start_frame: first frame to evaluate
    :param end_frame: last frame to evaluate
    :param cuda: enable cuda
    :return:
    """
    print('Starting: {}'.format(video_path))

    # Read and write
    reader = cv2.VideoCapture(video_path)

    video_fn = video_path.split('/')[-1].split('.')[0]+'.avi'
    os.makedirs(output_path, exist_ok=True)
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    fps = reader.get(cv2.CAP_PROP_FPS)
    num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))
    writer = None

    # Face detector
    face_detector = dlib.get_frontal_face_detector()

    # Load model
    model = model_selection(modelname='xception', num_out_classes=2, dropout=0.5)
	model.load_state_dict(torch.load(model_path))
	if isinstance(model, torch.nn.DataParallel):
		model = model.module

コード例 #2

0

ファイルを表示

    def __init__(self, args) -> None:
        super(MyNet, self).__init__()

        self.net, *_ = model_selection(modelname='xception', num_out_classes=2)
        self.net = self.net.to(device)
        self.batch = args.batch
        self.max_images = args.max_images
        self.threshold = args.threshold
        self.criterion = nn.CrossEntropyLoss()
        self.dataloader = self.get_dataloader(args)

コード例 #3

0

ファイルを表示

def main():
    args = parse.parse_args()
    test_list = args.test_list
    batch_size = args.batch_size
    model_path = args.model_path
    torch.backends.cudnn.benchmark = True
    test_dataset = MyDataset(
        txt_path=test_list, transform=xception_default_data_transforms['test'])
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              drop_last=True,
                                              num_workers=8)
    test_dataset_size = len(test_dataset)
    corrects = 0
    acc = 0
    #model = torchvision.models.densenet121(num_classes=2)
    model = model_selection(modelname='xception',
                            num_out_classes=2,
                            dropout=0.5)
    model.load_state_dict(torch.load(model_path))
    if isinstance(model, torch.nn.DataParallel):
        model = model.module
    model = model.cuda()
    model.eval()
    with torch.no_grad():
        for (image, labels) in test_loader:
            image = image.cuda()
            labels = labels.cuda()
            outputs = model(image)
            _, preds = torch.max(outputs.data, 1)
            corrects += torch.sum(preds == labels.data).to(torch.float32)
            print('Iteration Acc {:.4f}'.format(
                torch.sum(preds == labels.data).to(torch.float32) /
                batch_size))
        acc = corrects / test_dataset_size
        print('Test Acc: {:.4f}'.format(acc))

コード例 #4

0

ファイルを表示

ファイル: detect_from_video.py プロジェクト: zigonk/FaceForensics

def test_full_image_network(video_path, model_path, output_path,
                            start_frame=0, end_frame=None, cuda=True):
    """
    Reads a video and evaluates a subset of frames with the a detection network
    that takes in a full frame. Outputs are only given if a face is present
    and the face is highlighted using dlib.
    :param video_path: path to video file
    :param model_path: path to model file (should expect the full sized image)
    :param output_path: path where the output video is stored
    :param start_frame: first frame to evaluate
    :param end_frame: last frame to evaluate
    :param cuda: enable cuda
    :return:
    """
    print('Starting: {}'.format(video_path))

    # Read and write
    reader = cv2.VideoCapture(video_path)

    video_fn = video_path.split('/')[-1].split('.')[0]+'.avi'
    os.makedirs(output_path, exist_ok=True)
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    fps = reader.get(cv2.CAP_PROP_FPS)
    num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))
    writer = None

    # Face detector
    face_detector = dlib.get_frontal_face_detector()

    # Load model
    model, *_ = model_selection(modelname='xception', num_out_classes=2)
    if model_path is not None:
        model = torch.load(model_path)
        print('Model found in {}'.format(model_path))
    else:
        print('No model found, initializing random model.')
    if cuda:
        model = model.cuda()

    # Text variables
    font_face = cv2.FONT_HERSHEY_SIMPLEX
    thickness = 2
    font_scale = 1

    # Frame numbers and length of output video
    frame_num = 0
    assert start_frame < num_frames - 1
    end_frame = end_frame if end_frame else num_frames
    pbar = tqdm(total=end_frame-start_frame)

    while reader.isOpened():
        _, image = reader.read()
        if image is None:
            break
        frame_num += 1

        if frame_num < start_frame:
            continue
        pbar.update(1)

        # Image size
        height, width = image.shape[:2]

        # Init output writer
        if writer is None:
            writer = cv2.VideoWriter(join(output_path, video_fn), fourcc, fps,
                                     (height, width)[::-1])

        # 2. Detect with dlib
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray, 1)
        if len(faces):
            # For now only take biggest face
            face = faces[0]

            # --- Prediction ---------------------------------------------------
            # Face crop with dlib and bounding box scale enlargement
            x, y, size = get_boundingbox(face, width, height)
            cropped_face = image[y:y+size, x:x+size]

            # Actual prediction using our model
            prediction, output = predict_with_model(cropped_face, model,
                                                    cuda=cuda)
            # ------------------------------------------------------------------

            # Text and bb
            x = face.left()
            y = face.top()
            w = face.right() - x
            h = face.bottom() - y
            label = 'fake' if prediction == 1 else 'real'
            color = (0, 255, 0) if prediction == 0 else (0, 0, 255)
            output_list = ['{0:.2f}'.format(float(x)) for x in
                           output.detach().cpu().numpy()[0]]
            cv2.putText(image, str(output_list)+'=>'+label, (x, y+h+30),
                        font_face, font_scale,
                        color, thickness, 2)
            # draw box over face
            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)

        if frame_num >= end_frame:
            break

        # Show
        # cv2.imshow('test', image)
        # cv2.waitKey(33)     # About 30 fps
        writer.write(image)
    pbar.close()
    if writer is not None:
        writer.release()
        print('Finished! Output saved under {}'.format(output_path))
    else:
        print('Input video file was empty')

コード例 #5

0

ファイルを表示

validTransform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    normTransform
])

# 构建MyDataset实例

valid_data = Mytest(txt_path=valid_txt_path, transform=validTransform)

# 构建DataLoder

valid_loader = DataLoader(dataset=valid_data, batch_size=valid_bs, num_workers=4)

# ------------------------------------ step 2/5 : 定义网络------------------------------------
model, *_ = model_selection(modelname=model_name, num_out_classes=2)
fc1  = ourfc(2)
#print(model)
fc2  = ourfc(2)
fc3  = ourfc(2)
fc4  = ourfc(2)
fc5  = ourfc(2)
fc6  = ourfc(2)
fc7  = ourfc(2)
fc8  = ourfc(2)


FC  = ourFC(2)

use_cuda = torch.cuda.is_available()
Device = torch.device('cuda' if use_cuda else 'cpu')

コード例 #6

0

ファイルを表示

def main():
    args = parse.parse_args()
    name = args.name
    continue_train = args.continue_train
    train_list = args.train_list
    val_list = args.val_list
    epoches = args.epoches
    batch_size = args.batch_size
    model_name = args.model_name
    model_path = args.model_path
    output_path = os.path.join('./output', name)
    if not os.path.exists(output_path):
        os.mkdir(output_path)
    torch.backends.cudnn.benchmark = True
    train_dataset = MyDataset(
        txt_path=train_list,
        transform=xception_default_data_transforms['train'])
    val_dataset = MyDataset(txt_path=val_list,
                            transform=xception_default_data_transforms['val'])
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               drop_last=False,
                                               num_workers=8)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             drop_last=False,
                                             num_workers=8)
    train_dataset_size = len(train_dataset)
    val_dataset_size = len(val_dataset)
    model = model_selection(modelname='xception',
                            num_out_classes=2,
                            dropout=0.5)
    if continue_train:
        model.load_state_dict(torch.load(model_path))
    model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=0.001,
                           betas=(0.9, 0.999),
                           eps=1e-08)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    model = nn.DataParallel(model)
    best_model_wts = model.state_dict()
    best_acc = 0.0
    iteration = 0
    for epoch in range(epoches):
        print('Epoch {}/{}'.format(epoch + 1, epoches))
        print('-' * 10)
        model = model.train()
        train_loss = 0.0
        train_corrects = 0.0
        val_loss = 0.0
        val_corrects = 0.0
        for (image, labels) in train_loader:
            iter_loss = 0.0
            iter_corrects = 0.0
            image = image.cuda()
            labels = labels.cuda()
            optimizer.zero_grad()
            outputs = model(image)
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            iter_loss = loss.data.item()
            train_loss += iter_loss
            iter_corrects = torch.sum(preds == labels.data).to(torch.float32)
            train_corrects += iter_corrects
            iteration += 1
            if not (iteration % 20):
                print('iteration {} train loss: {:.4f} Acc: {:.4f}'.format(
                    iteration, iter_loss / batch_size,
                    iter_corrects / batch_size))
        epoch_loss = train_loss / train_dataset_size
        epoch_acc = train_corrects / train_dataset_size
        print('epoch train loss: {:.4f} Acc: {:.4f}'.format(
            epoch_loss, epoch_acc))

        model.eval()
        with torch.no_grad():
            for (image, labels) in val_loader:
                image = image.cuda()
                labels = labels.cuda()
                outputs = model(image)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                val_loss += loss.data.item()
                val_corrects += torch.sum(preds == labels.data).to(
                    torch.float32)
            epoch_loss = val_loss / val_dataset_size
            epoch_acc = val_corrects / val_dataset_size
            print('epoch val loss: {:.4f} Acc: {:.4f}'.format(
                epoch_loss, epoch_acc))
            if epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
        scheduler.step()
        #if not (epoch % 40):
        torch.save(model.module.state_dict(),
                   os.path.join(output_path,
                                str(epoch) + '_' + model_name))
    print('Best val Acc: {:.4f}'.format(best_acc))
    model.load_state_dict(best_model_wts)
    torch.save(model.module.state_dict(), os.path.join(output_path,
                                                       "best.pkl"))

コード例 #7

0

ファイルを表示

            outputs = net(inputs)
            prediction_imgs = outputs.argmax(1)

            prediction = 'fake' if prediction_imgs.float().mean(
            ) >= threshold else 'real'

            f.write("Video ID ending in {}: {}".format(video_id.item(),
                                                       prediction))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--video_dir', '-i', type=str, default='videos')
    parser.add_argument('--image_dir', '-o', type=str, default='images')
    parser.add_argument('--interval', type=int, default=10)
    parser.add_argument('--max_images', type=int, default=200)
    parser.add_argument('--threshold', type=float, default=0.5)

    args = parser.parse_args()

    create_images(args.video_dir, args.image_dir, args.interval)

    dataloader = get_dataloader(args.image_dir)

    net, *_ = model_selection(modelname='xception', num_out_classes=2)
    net = net.to(device)
    net.load_state_dict(torch.load('weights/xception.pth'))

    evaluate(net, dataloader, args.threshold, args.max_images)

コード例 #8

0

ファイルを表示

def test_full_image_network(video_path,
                            model_path,
                            output_path,
                            start_frame=0,
                            end_frame=None,
                            cuda=True):
    """
    Reads a video and evaluates a subset of frames with the a detection network
    that takes in a full frame. Outputs are only given if a face is present
    and the face is highlighted using dlib.
    :param video_path: path to video file
    :param model_path: path to model file (should expect the full sized image)
    :param output_path: path where the output video is stored
    :param start_frame: first frame to evaluate
    :param end_frame: last frame to evaluate
    :param cuda: enable cuda
    :return:
    """

    cuda = False  # BRISHNA : attempting to force non-cuda (enabled by default somehow)
    print('Starting: {}'.format(video_path))

    # Read and write
    reader = cv2.VideoCapture(video_path)

    video_fn = video_path.split('/')[-1].split('.')[0] + '.avi'
    os.makedirs(output_path, exist_ok=True)
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    fps = reader.get(cv2.CAP_PROP_FPS)
    num_frames = int(reader.get(cv2.CAP_PROP_FRAME_COUNT))
    writer = None

    print("\n\n\t\t" + video_fn + "\n\n")

    # Face detector
    face_detector = dlib.get_frontal_face_detector()

    # Load model
    model, *_ = model_selection(modelname='xception', num_out_classes=2)
    if model_path is not None:
        # model = Model() # BRISHNA ATTEMPT

        # model = torch.load(model_path)
        model = torch.load(model_path, map_location=torch.device(
            'cpu'))  # Brishna: attempting to un-enforce default cuda)

        print('Model found in {}'.format(model_path))
    else:
        print('No model found, initializing random model.')
    if cuda:
        model = model.cuda()

    # Text variables
    font_face = cv2.FONT_HERSHEY_SIMPLEX
    thickness = 2
    font_scale = 1

    # Frame numbers and length of output video
    frame_num = 0
    fakeCount = 0
    realCount = 0

    assert start_frame < num_frames - 1
    end_frame = end_frame if end_frame else num_frames
    pbar = tqdm(total=end_frame - start_frame)

    while reader.isOpened():
        _, image = reader.read()
        if image is None:
            break
        frame_num += 1

        if frame_num < start_frame:
            continue
        pbar.update(1)

        # Image size
        height, width = image.shape[:2]

        # # Init output writer
        # if writer is None:
        #     writer = cv2.VideoWriter(join(output_path, video_fn), fourcc, fps,
        #                              (height, width)[::-1])

        # 2. Detect with dlib
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray, 1)
        if len(faces):
            # For now only take biggest face
            face = faces[0]

            # --- Prediction ---------------------------------------------------
            # Face crop with dlib and bounding box scale enlargement
            x, y, size = get_boundingbox(face, width, height)
            cropped_face = image[y:y + size, x:x + size]

            # Actual prediction using our model
            prediction, output = predict_with_model(cropped_face,
                                                    model,
                                                    cuda=cuda)
            # ------------------------------------------------------------------

            # Brishna: disabling Video output for faster results
            if prediction == 1:
                fakeCount += 1
            else:
                realCount += 1

            # print('\n\t$$ ' + 'realCount = ' + str(realCount) + '; fakeCount = ' + str(fakeCount) + ' ; frame_num = ' + str(frame_num))

            # # Text and bb
            # x = face.left()
            # y = face.top()
            # w = face.right() - x
            # h = face.bottom() - y
            # label  ='fake' if prediction == 1 else 'real'
            # color = (0, 255, 0) if prediction == 0 else (0, 0, 255)
            # output_list = ['{0:.2f}'.format(float(x)) for x in
            #                output.detach().cpu().numpy()[0]]
            # cv2.putText(image, str(output_list)+'=>'+label, (x, y+h+30),
            #             font_face, font_scale,
            #             color, thickness, 2)
            # # draw box over face
            # cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)

        if frame_num >= end_frame:
            break

        # Show
        # cv2.imshow('test', image)
        # cv2.waitKey(33)     # About 30 fps
        # writer.write(image)

    pbar.close()
    resultFile = open((output_path + '-scores.txt'), "a")
    # print('\n' + video_fn)
    # resultFile.write(video_fn)
    print('\n' + video_fn + ': real frames = ' + str(realCount) +
          ' fake frames = ' + str(fakeCount) + ' ; total frames = ' +
          str(realCount + fakeCount) + ' ; P(fake) = ' +
          str(fakeCount / (realCount + fakeCount)))
    resultFile.write('\n' + video_fn + ': real frames = ' + str(realCount) +
                     ' fake frames = ' + str(fakeCount) +
                     ' ; total frames = ' + str(realCount + fakeCount) +
                     ' ; P(fake) = ' + str(fakeCount /
                                           (realCount + fakeCount)))
    # if fakeCount > realCount:
    #     print('\n' + video_fn + ' = real')
    #     resultFile.write('\n' + video_fn + ' : real')
    # else:
    #     print('\n' + video_fn + ' = fake')
    #     resultFile.write('\n' + video_fn + ' : fake')
    resultFile.close()