Esempio n. 1
0
def main():
    torch.set_grad_enabled(False)
    torch.backends.cudnn.benchmark = True

    test_dir = "./input/deepfake-detection-challenge/test_videos"
    csv_path = "./input/deepfake-detection-challenge/sample_submission.csv"

    face_detector = FaceDetector()
    face_detector.load_checkpoint(
        "./input/dfdc-pretrained-2/RetinaFace-Resnet50-fixed.pth")
    loader = DFDCLoader(test_dir, face_detector, T.ToTensor())

    model1 = xception(num_classes=2, pretrained=False)
    ckpt = torch.load("./input/dfdc-pretrained-2/xception-hg-2.pth",
                      map_location=torch.device('cpu'))
    model1.load_state_dict(ckpt["state_dict"])
    model1 = model1.cpu()
    model1.eval()

    model2 = WSDAN(num_classes=2, M=8, net="xception", pretrained=False).cpu()
    ckpt = torch.load("./input/dfdc-pretrained-2/ckpt_x.pth",
                      map_location=torch.device('cpu'))
    model2.load_state_dict(ckpt["state_dict"])
    model2.eval()

    model3 = WSDAN(num_classes=2, M=8, net="efficientnet",
                   pretrained=False).cpu()
    ckpt = torch.load("./input/dfdc-pretrained-2/ckpt_e.pth",
                      map_location=torch.device('cpu'))
    model3.load_state_dict(ckpt["state_dict"])
    model3.eval()

    zhq_nm_avg = torch.Tensor([.4479, .3744, .3473]).view(1, 3, 1, 1).cpu()
    zhq_nm_std = torch.Tensor([.2537, .2502, .2424]).view(1, 3, 1, 1).cpu()

    for batch in loader:
        batch = batch.cpu()

        i1 = F.interpolate(batch, size=299, mode="bilinear")
        i1.sub_(0.5).mul_(2.0)
        o1 = model1(i1).softmax(-1)[:, 1].cpu().numpy()

        i2 = (batch - zhq_nm_avg) / zhq_nm_std
        o2, _, _ = model2(i2)
        o2 = o2.softmax(-1)[:, 1].cpu().numpy()

        i3 = F.interpolate(i2, size=300, mode="bilinear")
        o3, _, _ = model3(i3)
        o3 = o3.softmax(-1)[:, 1].cpu().numpy()

        out = 0.2 * o1 + 0.7 * o2 + 0.1 * o3
        loader.feedback(out)

    with open(csv_path) as fin, open("submission.csv", "w") as fout:
        fout.write(next(fin))
        for line in fin:
            fname = line.split(",", 1)[0]
            pred = loader.score[fname]
            print("%s,%.6f" % (fname, pred), file=fout)
def test_on_image_directory(path):
    net = SimpleConvNet()
    net.forward(Variable(torch.FloatTensor(1, 1, 28, 28)))
    net.load_state_dict(torch.load(open(MODEL_FILE, "rb")))

    label_regex = re.compile("([0123456789]+)\\..*?")
    correct = 0
    total = 0

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])

    for file_path in glob.glob(os.path.join(path, "*.png")):
        cur_correct = int(label_regex.findall(os.path.basename(file_path))[0])
        image = PIL.Image.open(file_path).convert("L")
        transformed_image = transform(image)
        transformed_image = Variable(transformed_image.view(1, 1, 28, 28))

        cur_predicted = net.forward(transformed_image).data.max(1)[1][0][0]

        print os.path.basename(file_path) + " - " + str(cur_predicted)

        total += 1
        if cur_correct == cur_predicted:
            correct += 1

    print "Identified {} of {}, {:.2%}".format(correct, total,
                                               float(correct) / float(total))
def show_saved_net_accuracy():
    train_dataset, test_dataset = load_normalized_datasets()
    test_dataset_loader = DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE)

    net = SimpleConvNet()
    net.forward(Variable(torch.FloatTensor(1, 1, 28, 28)))
    net.load_state_dict(torch.load(open(MODEL_FILE, "rb")))
    test_training_accuracy(net, test_dataset_loader, 0)
Esempio n. 4
0
def single_run(corpus, index, title, overwrite, only_test=False):
    if cfg.BATCH_TYPE == "multi":
        collate_fn = multi_batchify
    else:
        collate_fn = lambda x: \
            (x[0].X, x[0].C, x[0].POS, x[0].REL, x[0].DEP, x[0].Y)

    model_save_path = os.path.join(cfg.MODEL_SAVE_DIR, title + ".m")
    plot_save_path = os.path.join(cfg.PLOT_SAVE_DIR, title + ".png")
    if not only_test:
        the_model = build_model(corpus.train, corpus.dev, corpus.test,
                                collate_fn, corpus.tag_idx, corpus.is_oov,
                                corpus.embedding_matrix, model_save_path,
                                plot_save_path)
    else:
        the_model = torch.load(model_save_path)

    print("Testing ...")
    test_loader = DataLoader(corpus.test,
                             batch_size=cfg.BATCH_SIZE,
                             num_workers=28,
                             collate_fn=collate_fn)
    test_eval, only_ent_eval, pred_list, true_list = test(
        "test", test_loader, corpus.tag_idx, the_model)

    print("Writing Brat File ...")
    bratfile_full = Writer(cfg.CONF_DIR, os.path.join(cfg.BRAT_DIR, title),
                           "full_out", corpus.tag_idx)
    bratfile_inc = Writer(cfg.CONF_DIR, os.path.join(cfg.BRAT_DIR, title),
                          "inc_out", corpus.tag_idx)

    # convert idx to label
    test_eval.print_results()
    only_ent_eval.print_results()
    txt_res_file = os.path.join(cfg.TEXT_RESULT_DIR, title + ".txt")
    csv_res_file = os.path.join(cfg.CSV_RESULT_DIR, title + ".csv")
    test_eval.write_results(txt_res_file, title + "g={0}".format(cfg.LM_GAMMA),
                            overwrite)
    only_ent_eval.write_results(txt_res_file,
                                title + " g={0}".format(cfg.LM_GAMMA),
                                overwrite)
    test_eval.write_csv_results(csv_res_file,
                                title + "g={0}".format(cfg.LM_GAMMA),
                                overwrite)

    test_loader = DataLoader(corpus.test,
                             batch_size=cfg.BATCH_SIZE,
                             num_workers=28,
                             collate_fn=collate_fn)
    sents = [(sent, p) for SENT, X, C, POS, Y, P in test_loader
             for sent, p in zip(SENT, P)]
    bratfile_full.from_labels(sents, true_list, pred_list, doFull=True)
    bratfile_inc.from_labels(sents, true_list, pred_list, doFull=False)

    return test_eval
Esempio n. 5
0
    def load_pretrained_rnn(rnn_params, rnn_path):
        # load pretrained rnn
        rnn_net = Text_RNN(*rnn_params)
        rnn_net.load_state_dict(torch.load(rnn_path))
        rnn_net.eval()

        # freeze weigths
        for param in rnn_net.parameters():
            param.requires_grad = False

        return(rnn_net)
Esempio n. 6
0
    def merge_checkpoints(checkpoint_paths, output_path):
        if checkpoint_paths is None or len(checkpoint_paths) < 1:
            raise ValueError(
                'Need to specify at least one checkpoint, %d provided.' %
                len(checkpoint_paths))

        if len(checkpoint_paths) < 2:
            shutil.copyfile(checkpoint_paths[0], output_path)

        def __sum(source, destination):
            for key, value in source.items():
                if isinstance(value, dict):
                    node = destination.setdefault(key, {})
                    __sum(value, node)
                else:
                    if isinstance(value, torch.FloatTensor):
                        destination[key] = torch.add(destination[key], 1.0,
                                                     value)

            return destination

        def __divide(source, denominator):
            for key, value in source.items():
                if isinstance(value, dict):
                    node = source.setdefault(key, {})
                    __divide(node, denominator)
                else:
                    if isinstance(value, torch.FloatTensor):
                        source[key] = torch.div(value, denominator)

            return source

        output_checkpoint = torch.load(checkpoint_paths[0])

        for checkpoint_path in checkpoint_paths[1:]:
            checkpoint = torch.load(checkpoint_path)
            output_checkpoint = __sum(checkpoint, output_checkpoint)

        output_checkpoint = __divide(output_checkpoint, len(checkpoint_paths))

        torch.save(output_checkpoint, output_path)
Esempio n. 7
0
def loadModel(optional=True):
    model_exists = os.path.isfile(MODEL_PATH_BEST)
    if model_exists:
        checkpoint = torch.load(MODEL_PATH_BEST)
        net.load_state_dict(checkpoint['state_dict'])
        return "TRAINING AVG LOSS: {}\n" \
               "TRAINING AVG DIFF: {}".format(
            checkpoint["epoch_avg_loss"], checkpoint["epoch_avg_diff"])
    else:
        if optional:
            pass  # model loading was optional, so nothing to do
        else:
            #shit, no model
            raise Exception("model couldn't be found:", MODEL_PATH_BEST)
Esempio n. 8
0
    def load_checkpoint(self, gpu_arg, checkpoint_file):
        """
        Method to load a checkpoint file and reassign required variables.
        INPUT:
            1. GPU user selection:          <bool>
            2. Checkpoint file:             <string>
        RETURNS:
            1. Selected model definition:   <model object>
            2. Gradient descent def:        <optimizer object>
        """
        # check if the GPU is currently available and set device flag appropriately
        _ = self.gpu_status(gpu_arg)
        
        # load the old model state
        checkpoint = torch.load(checkpoint_file, map_location=self.device_location)
        
        # load a pre-trained network model based on the command line argument, if supplied
        if checkpoint['arch'] == 'vgg13':
            model = models.vgg13(pretrained=True)
        elif checkpoint['arch'] == 'vgg16':
            model = models.vgg16(pretrained=True)
        elif checkpoint['arch'] == 'densenet121':
            model = models.densenet121(pretrained=True)
        else:
            model = models.vgg16(pretrained=True)
            print("Checkpoint model architecture not recoginized or supported. \n"
                  "Using default VGG16 instead. Available architectures: VGG13, \n"
                  "VGG16, and DenseNet121.\n")

        # freeze the networks parameters so no backprop occurs
        for param in model.parameters():
            param.requires_grad = False

        # in case more training is desired, assign needed values
        self.arch = checkpoint['arch']
        self.epochs = checkpoint['epochs']
        self.training_loss = checkpoint['loss']
        model.classifier = checkpoint['classifier']
        #criterion = checkpoint['criterion']

        # prepare to train the model using NLLLoss, Adam - for momentum & a learning rate of 0.001
        optimizer = optim.Adam(model.classifier.parameters(), lr=checkpoint['learning_rate'])
        
        # ressign the state dictionaries and label indices
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        model.class_to_idx = checkpoint['class_to_idx']
            
        return model, optimizer
Esempio n. 9
0
def restore_from(model: nn.Module, restore_from: str) -> nn.Module:
    '''从存档点恢复模型

    Args:
        model(nn.Module): 模型
        restore_from(str): 存档路径

    Return:
        model(nn.Module): 恢复的模型
    '''
    assert os.path.exists(restore_from), '不存在的路径!{}'.format(restore_from)

    ckpt = torch.load(restore_from)
    model.load_state_dict(ckpt['model_state_dict'])
    return model, ckpt['epoch']
Esempio n. 10
0
def build_model(train_dataset, dev_dataset, test_dataset, collate_fn, tag_idx,
                is_oov, embedding_matrix, model_save_path, plot_save_path):
    # init model
    model = BiLSTM_CRF(embedding_matrix, tag_idx)

    # Turn on cuda
    model = model.cuda()

    # verify model
    print(model)

    # remove paramters that have required_grad = False

    optimizer = optim.Adadelta(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=cfg.LEARNING_RATE)
    # optimizer = optim.SGD(model.parameters(), lr=cfg.LEARNING_RATE, momentum=0.9)
    optimizer.zero_grad()
    model.zero_grad()

    # init loss criteria
    best_res_val_0 = 0.0
    best_epoch = 0
    dev_eval_history = []
    test_eval_history = []
    for epoch in range(cfg.MAX_EPOCH):
        print('-' * 40)
        print("EPOCH = {0}".format(epoch))
        print('-' * 40)

        random.seed(epoch)
        train_loader = DataLoader(train_dataset,
                                  batch_size=cfg.BATCH_SIZE,
                                  shuffle=cfg.RANDOM_TRAIN,
                                  num_workers=28,
                                  collate_fn=collate_fn)

        train_eval, model = train_a_epoch(name="train",
                                          data=train_loader,
                                          tag_idx=tag_idx,
                                          model=model,
                                          optimizer=optimizer)

        dev_loader = DataLoader(dev_dataset,
                                batch_size=cfg.BATCH_SIZE,
                                num_workers=28,
                                collate_fn=collate_fn)
        test_loader = DataLoader(test_dataset,
                                 batch_size=cfg.BATCH_SIZE,
                                 num_workers=28,
                                 collate_fn=collate_fn)

        dev_eval, _, _ = test("dev", dev_loader, tag_idx, model)
        test_eval, _, _ = test("test", test_loader, tag_idx, model)

        dev_eval.verify_results()
        test_eval.verify_results()
        dev_eval_history.append(dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]])
        test_eval_history.append(test_eval.results['test_conll_f'])
        plot_curve(epoch, dev_eval_history, test_eval_history, "epochs",
                   "fscore", "epoch learning curve", plot_save_path)
        pickle.dump((dev_eval_history, test_eval_history),
                    open("plot_data.p", "wb"))
        # pick the best epoch
        if epoch < cfg.MIN_EPOCH_IMP or (
                dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] > best_res_val_0):
            best_epoch = epoch
            best_res_val_0 = dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]

            torch.save(model, model_save_path)

        print("current dev micro_score: {0}".format(
            dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]))
        print("current dev macro_score: {0}".format(
            dev_eval.results[cfg.BEST_MODEL_SELECTOR[1]]))
        print("best dev micro_score: {0}".format(best_res_val_0))
        print("best_epoch: {0}".format(str(best_epoch)))

        # if the best epoch model outperforms MA
        if 0 < cfg.MAX_EPOCH_IMP <= (epoch - best_epoch):
            break
    print("Loading Best Model ...")

    model = torch.load(model_save_path)
    return model
Esempio n. 11
0
def detect(save_img=False):
    out, source, weights, half, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    #device = torch_utils.select_device(opt.device)
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Load model
    '''
    The original method needs exactly the same folder structure and imports it was trained on
    this is a pickle limitation in the torch model.
    
    The alternative would be loading from github, which is not working
    
    Another issue is when training and detection have different devices (CPU/GPU)
    '''

    # DB 20201018 = Original method
    #google_utils.attempt_download(weights)
    attempt_download(weights)
    model = torch.load(weights, map_location=device)['model']  # ORIGINAL
    # torch.save(torch.load(weights, map_location=device), weights)  # update model if SourceChangeWarning
    # model.fuse()
    model.to(device).eval(
    )  # ATTENTION! UMCOMMENT THIS IF YOU UNCOMMENT model = torch.load(weights, map_location=device)['model'] # ORIGINAL
    #model.to(device).float().eval() # DB 20201018: detect on CPU using GPU trained model

    # DB 20201018: Load from github method
    #model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True).to(device).eval() # DB 20201016 MODEL IMPORT FIX
    #model = torch.hub.load('danfbento/SIB2', 'mod5_test_weight', pretrained=True).to(device).eval() # DB 20201016 MODEL IMPORT FIX

    # Second-stage classifier
    classify = False
    if classify:
        #modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.names if hasattr(model, 'names') else model.modules.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img.float()
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        #t1 = torch_utils.time_synchronized()
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]
        #t2 = torch_utils.time_synchronized()
        t2 = time_synchronized()

        # to float
        if half:
            pred = pred.float()

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   fast=True,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  #  normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(save_path[:save_path.rfind('.')] + '.txt',
                                  'a') as file:
                            file.write(('%g ' * 5 + '\n') %
                                       (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Esempio n. 12
0
def build_model(train_dataset, dev_dataset, test_dataset, collate_fn, tag_idx,
                is_oov, embedding_matrix, model_save_path, plot_save_path):
    # init model
    model = MultiBatchSeqNet(embedding_matrix,
                             batch_size=cfg.BATCH_SIZE,
                             isCrossEnt=False,
                             char_level=cfg.CHAR_LEVEL,
                             pos_feat=cfg.POS_FEATURE,
                             dep_rel_feat=cfg.DEP_LABEL_FEATURE,
                             dep_word_feat=cfg.DEP_WORD_FEATURE)

    # Turn on cuda
    model = model.cuda()

    # verify model
    print(model)

    # remove paramters that have required_grad = False

    optimizer = optim.Adadelta(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=cfg.LEARNING_RATE)
    # optimizer = optim.SGD(model.parameters(), lr=cfg.LEARNING_RATE, momentum=0.9)
    optimizer.zero_grad()
    model.zero_grad()

    # init loss criteria
    seq_criterion = nn.NLLLoss(size_average=False)
    lm_f_criterion = nn.NLLLoss(size_average=False)
    lm_b_criterion = nn.NLLLoss(size_average=False)
    att_loss = nn.CosineEmbeddingLoss(margin=1)
    best_res_val_0 = 0.0
    best_res_val_1 = 0.0
    best_epoch = 0
    dev_eval_history = []
    test_eval_history = []
    for epoch in range(cfg.MAX_EPOCH):
        print('-' * 40)
        print("EPOCH = {0}".format(epoch))
        print('-' * 40)

        random.seed(epoch)
        train_loader = DataLoader(train_dataset,
                                  batch_size=cfg.BATCH_SIZE,
                                  shuffle=cfg.RANDOM_TRAIN,
                                  num_workers=28,
                                  collate_fn=collate_fn)

        train_eval, model = train_a_epoch(name="train",
                                          data=train_loader,
                                          tag_idx=tag_idx,
                                          is_oov=is_oov,
                                          model=model,
                                          optimizer=optimizer,
                                          seq_criterion=seq_criterion,
                                          lm_f_criterion=lm_f_criterion,
                                          lm_b_criterion=lm_b_criterion,
                                          att_loss=att_loss,
                                          gamma=cfg.LM_GAMMA)

        dev_loader = DataLoader(dev_dataset,
                                batch_size=cfg.BATCH_SIZE,
                                num_workers=28,
                                collate_fn=collate_fn)
        test_loader = DataLoader(test_dataset,
                                 batch_size=cfg.BATCH_SIZE,
                                 num_workers=28,
                                 collate_fn=collate_fn)

        dev_eval, _, _, _ = test("dev", dev_loader, tag_idx, model)
        test_eval, _, _, _ = test("test", test_loader, tag_idx, model)

        dev_eval.verify_results()
        test_eval.verify_results()
        dev_eval_history.append(dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]])
        test_eval_history.append(test_eval.results['test_conll_f'])
        plot_curve(epoch, dev_eval_history, test_eval_history, "epochs",
                   "fscore", "epoch learning curve", plot_save_path)
        pickle.dump((dev_eval_history, test_eval_history),
                    open("plot_data.p", "wb"))
        # pick the best epoch
        if epoch < cfg.MIN_EPOCH_IMP or (
                dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]] > best_res_val_0):
            best_epoch = epoch
            best_res_val_0 = dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]

            torch.save(model, model_save_path)

        print("current dev micro_score: {0}".format(
            dev_eval.results[cfg.BEST_MODEL_SELECTOR[0]]))
        print("current dev macro_score: {0}".format(
            dev_eval.results[cfg.BEST_MODEL_SELECTOR[1]]))
        print("best dev micro_score: {0}".format(best_res_val_0))
        print("best_epoch: {0}".format(str(best_epoch)))

        # if the best epoch model outperforms MA
        if 0 < cfg.MAX_EPOCH_IMP <= (epoch - best_epoch):
            break
    print("Loading Best Model ...")

    model = torch.load(model_save_path)
    return model
Esempio n. 13
0
 def load_checkpoint(self, path):
     self.net.load_state_dict(torch.load(path))
def main():
##########################################################################################################
    #preparation part
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()
    
    num_classes = 80
    
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    
    assert inp_dim % 32 == 0                   #assert后面语句为false时触发,中断程序
    assert inp_dim > 32

    if CUDA:
        model.cuda()
            
    model.eval()
    
    global confirm
    global person
    
    fps = 0.0
    count = 0
    frame = 0    
    person = []
    confirm = False
    reconfirm = False
    count_yolo = 0
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1) 
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    #record the video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True)

    cap = cv2.VideoCapture(0)

    detect_time = []
    recogn_time = []
    kalman_time = []
    aux_time = []
    while True:
        start = time.time()  
        ret, color_image = cap.read()
        '''
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        color_image = np.asanyarray(color_frame.get_data())
        '''
        if color_image is None:
            break
        img, orig_im, dim = prep_image(color_image, inp_dim)
        
        im_dim = torch.FloatTensor(dim).repeat(1,2)             
##########################################################################################################
        #people detection part                
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        time_a = time.time()
        if count_yolo %3 == 0:                                                               #detect people every 3 frames
            output = model(Variable(img), CUDA)                         #适配后的图像放进yolo网络中,得到检测的结果
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)         


            if type(output) == int:
                fps  = ( fps + (1./(time.time()-start)) ) / 2
                print("fps= %f"%(fps))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
        
            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim                #夹紧张量,限制在一个区间内
        
            #im_dim = im_dim.repeat(output.size(0), 1)
            output[:,[1,3]] *= color_image.shape[1]
            output[:,[2,4]] *= color_image.shape[0]
            output = output.cpu().numpy() 
            output = sellect_person(output)                                       #把标签不是人的output去掉,减少计算量
            output = np.array(output)
            output_update = output
        elif count_yolo %3 != 0:
            output = output_update
        count_yolo += 1
        list(map(lambda x: write(x, orig_im), output))                #把结果加到原来的图像中   
        #output的[0,1:4]分别为框的左上和右下的点的位置
        detect_time.append(time.time() - time_a)
##########################################################################################################
        time_a = time.time()
        #kalman filter part
        outputs_tlwh = to_tlwh(output)                             ##把output数据变成适合kalman更新的类型
        features = encoder(orig_im,outputs_tlwh)
        detections = [Detection(output_tlwh, 1.0, feature) for output_tlwh, feature in zip(outputs_tlwh, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            box = track.to_tlbr()
            cv2.rectangle(orig_im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),(255,255,255), 2)
            cv2.putText(orig_im, str(track.track_id),(int(box[0]), int(box[1])),0, 5e-3 * 200, (0,255,0),2)  
        
        kalman_time.append(time.time() - time_a)
##########################################################################################################
        #face recognition part
        time_a = time.time()
        if confirm == False:
            saved_model = './ArcFace/model/068.pth'
            name_list = os.listdir('./users')
            path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list]
            total_features = np.empty((128,),np.float32)

            for i in path_list:
                temp = np.loadtxt(i)
                total_features = np.vstack((total_features,temp))
            total_features = total_features[1:]

            #threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
            threshold = 0.5
            model_facenet = mobileFaceNet()
            model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list'])
            model_facenet.eval()
            #use_cuda = torch.cuda.is_available() and True
            #device = torch.device("cuda" if use_cuda else "cpu")
            device = torch.device("cuda")

            # is_cuda_avilable
            trans = transforms.Compose([
                transforms.Resize((112,112)),
                transforms.ToTensor(),
                transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
            ])
            model_facenet.to(device)

            img = Image.fromarray(color_image)
            bboxes, landmark = detect_faces(img)                                                                  #首先检测脸

            if len(bboxes) == 0:
                print('detect no people')
            else:
                for bbox in bboxes:
                    loc_x_y = [bbox[2], bbox[1]]
                    person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy()              #从图像中截取框
                    feature = np.squeeze(get_feature(person_img, model_facenet, trans, device))                               #框里的图像计算feature
                    cos_distance = cosin_metric(total_features, feature)
                    index = np.argmax(cos_distance)
                    if  cos_distance[index] <= threshold:
                        continue
                    person = name_list[index]  
                    #在这里加框加文字
                    orig_im = draw_ch_zn(orig_im,person,font,loc_x_y)                                                                    #加名字
                    cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))           #加box
            #cv2.imshow("frame", orig_im)

##########################################################################################################
            #confirmpart
            print('confirmation rate: {} %'.format(count*10))
            cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
            if len(bboxes)!=0 and len(output)!=0:
                if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person:
                    count+=1
            frame+=1
            if count>=10 and frame<=30:
                confirm = True
                print('confirm the face is belong to that people')
            elif  frame >= 30:
                print('fail confirm, and start again')
                reconfirm = True
                count = 0
                frame = 0
            if reconfirm == True:
                cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)                   
##########################################################################################################
        recogn_time.append(time.time() - time_a)
        time_a = time.time()
        #show the final output result
        if not confirm:
            cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20),
                                     cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2)
        #把识别的名字加上去
        if confirm:  
            for track in tracker.tracks:
                bbox = track.to_tlbr()
                if track.track_id == 1:
                    cv2.putText(orig_im, person, (int(bbox[0])+100,int(bbox[1])+20),
                                            cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
                
                    #rate.sleep()
        cv2.imshow("frame", orig_im)
        #out.write(orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        
        aux_time.append(time.time()-time_a)
        fps  = ( fps + (1./(time.time()-start)) ) / 2
        print("fps= %f"%(fps))
    #calculate how long each part takes
    avg_detect_time = np.mean(detect_time)
    avg_recogn_time = np.mean(recogn_time)
    avg_kalman_time = np.mean(kalman_time)
    avg_aux_time = np.mean(aux_time)
    print("avg detect: {}".format(avg_detect_time))
    print("avg recogn: {}".format(avg_recogn_time))
    print("avg kalman: {}".format(avg_kalman_time))
    print("avg aux: {}".format(avg_aux_time))
    print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
Esempio n. 15
0
    def __init__(self,
                 text_params,
                 audio_params,
                 fusion_params,
                 paths,
                 p_drop=0.15,
                 post_tfn_subnet=128):
        super(Hierarchy_Attn, self).__init__()

        # define text & audio recurrent subnet
        self.text_rnn = Text_Encoder(*text_params)
        self.audio_rnn = Audio_Encoder(*audio_params)

        self.text_rnn.load_state_dict(torch.load(paths["text"]))
        self.text_rnn.eval()
        for p in self.text_rnn.parameters():
            p.requires_grad = False

        self.audio_rnn.load_state_dict(torch.load(paths["audio"]))
        self.audio_rnn.eval()
        for p in self.audio_rnn.parameters():
            p.requires_grad = False

        # define fusion RNN net
        self.fusion_rnn = Text_RNN(*fusion_params)

        # define cat-fusion attention level
        text_dim_tuple, audio_dim_tuple = \
            self.get_rnn_tuples(text_params[1], audio_params[1])
        self.fusion_net = Attn_Fusion(text_dim_tuple, audio_dim_tuple)

        # define mul-fusion layer
        self.mul_fusion = Mul_Fusion(text_dim_tuple, audio_dim_tuple)

        # get text hidden size and audio hidden size
        H = text_dim_tuple[2]
        D = audio_dim_tuple[2]

        # fused reps dimensionality reduction
        '''
        self.fusion_transform = nn.Sequential(nn.Linear(2*H+D, H),
                                              nn.Dropout(p_drop),
                                              nn.ReLU(),
                                              nn.Linear(H, H//2),
                                              nn.Dropout(p_drop),
                                              nn.ReLU())
        '''

        # deep representations
        self.deep_audio = nn.Sequential(nn.Linear(D, D), nn.Dropout(p_drop),
                                        nn.ReLU(), nn.Linear(D, D),
                                        nn.Dropout(p_drop), nn.ReLU())

        self.deep_text = nn.Sequential(nn.Linear(H, H), nn.ReLU(),
                                       nn.Linear(H, H), nn.ReLU())
        W = fusion_params[1] * 2
        _W = W + H + D
        self.deep_fused = nn.Sequential(nn.Linear(_W, _W), nn.Dropout(p_drop),
                                        nn.ReLU(), nn.Linear(_W, W),
                                        nn.Dropout(p_drop), nn.ReLU(),
                                        nn.Linear(W, W), nn.Dropout(p_drop),
                                        nn.ReLU())

        ################################
        ## deep feature + reps networks
        ###############################
        '''
        self.deep_audio_2 = nn.Sequential(nn.Linear(2*D,D),
                                          nn.Dropout(p_drop),
                                          nn.ReLU())

        self.deep_text_2 = nn.Sequential(nn.Linear(2*H,H),
                                         nn.ReLU())

        self.deep_fusion_2 = nn.Sequential(nn.Linear(2*D,D),
                                           nn.Dropout(p_drop),
                                           nn.ReLU())
        '''

        ###################################
        ### final fusion layer
        ###################################

        self.deep_mul = nn.Sequential(nn.Linear(D, D), nn.Dropout(p_drop),
                                      nn.ReLU(), nn.Linear(D, D),
                                      nn.Dropout(p_drop), nn.ReLU())

        # define dense layers
        cat_size = H + D + W
        self.dense = nn.Sequential(nn.Linear(cat_size, cat_size),
                                   nn.Dropout(p_drop), nn.ReLU(),
                                   nn.Linear(cat_size, cat_size // 2),
                                   nn.Dropout(p_drop), nn.ReLU(),
                                   nn.Linear(cat_size // 2, cat_size // 2),
                                   nn.Dropout(p_drop), nn.ReLU(),
                                   nn.Linear(cat_size // 2, H),
                                   nn.Dropout(p_drop), nn.ReLU(),
                                   nn.Linear(H, D), nn.Dropout(p_drop),
                                   nn.ReLU())

        self.softmax = nn.Softmax(dim=-1)

        # map according to task
        self.fusion_mapping = nn.Linear(D, 1)
        self.audio_mapping = nn.Linear(D, 1)
        self.text_mapping = nn.Linear(H, 1)