예제 #1
0
    def get_accuracy(self):
        # load dataloader
        _, _, t_l = get_loader('../Videos/HV', '../Videos/RV',
                               '../Videos/testRV', 1)

        # build network
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        for l, p in self.c2d.named_parameters():
            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()
        self.gru.load_state_dict(torch.load(self.ckpt))

        print(self.gru)

        self.gru.eval()
        avg_acc = 0

        for idx, (video, label) in enumerate(t_l):
            acc = 0.

            # forwarding
            test_video = Variable(video).cuda()
            predicted = self.gru(test_video)
            predicted = predicted.cpu().numpy()

            print('Predicted output:',
                  predicted)  # [forwarding score ....., backwarding score]
            print('Predicted output length:', len(predicted))
            print('Actual label:', label)
            print('Actual label length:', len(label))
예제 #2
0
    def build_model(self):
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        for l, p in self.c2d.named_parameters():
            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()
def run_model():
    """Run the following deep learning models based on specified parameters in config.yaml"""
    config = Config('config.yaml')
    if config.model == 'GRU':
        model = GRU()
    elif config.model == 'LSTM':
        model = LSTM()
    elif config.model == 'CNN':
        model = CNN()
    else:
        model = CNN_LSTM()
    model.run()
예제 #4
0
def load_model(model_path, TEXT=None, LABEL=None):
    #for saved model (.pt)
    if '.pt' in model_path:
        if torch.typename(torch.load(model_path)) == 'OrderedDict':
            if 'tut' in model_path:

                INPUT_DIM = len(TEXT.vocab)
                EMBEDDING_DIM = 100
                N_FILTERS = 100
                FILTER_SIZES = [3, 4, 5]
                OUTPUT_DIM = 1
                DROPOUT = 0.5
                PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

                model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES,
                            OUTPUT_DIM, DROPOUT, PAD_IDX)
            elif 'mnist' in model_path:

                model = Net()
            elif 'HELOC' or 'heloc' in model_path:
                input_size = 22
                model = MLP(input_size)
            model.load_state_dict(torch.load(model_path))

        else:
            model = torch.load(model_path)

    #for pretrained model
    elif model_path == 'VGG19':
        model = models.vgg19(pretrained=True)
    elif model_path == 'ResNet50':
        model = models.resnet50(pretrained=True)
    elif model_path == 'DenseNet161':
        model = models.densenet161(pretrained=True)

    model.eval()
    if cuda_available():
        model.cuda()

    return model
예제 #5
0
model = "cnn"   # 'cnn' or 'rnn'

# Load vocabulary and make dictionary
vocabs = load_vocab('data/imdb/imdb.vocab')
w2i = {w: i for i, w in enumerate(vocabs)}
i2w = {i: w for i, w in enumerate(vocabs)}
vocab_size = len(vocabs)

# Load Data
train_x, train_y = load_data('data/', train=True)
train_x, train_y = preprocess(train_x, train_y, w2i, maxlen)

# Build Model & Loss & Optimizer
model = RNN(embedding, rnn_hidden, num_layers, bi, output_dim, vocab_size) \
    if model == 'rnn' else CNN(filters, num_filters, maxlen, vocab_size, embedding, output_dim)

# Loss function & Optimizer
criterion = nn.BCELoss()
optim = torch.optim.Adam(model.parameters(), lr)

if cuda:
    model.cuda()
    train_x = train_x.cuda()
    train_y = train_y.cuda()

# Training procedure
# model.train() makes model be in training mode. (It is not a real training function)
# It is crucial to modules such as batch norm or dropout, which acts different when train or test
model.train()
for epoch in range(1, epochs + 1):
예제 #6
0
class TestViewer():
    """
    test_video : test video 하나의 filename (각 파일명 맨 뒤에 ground true hv의 frame이 적혀있음)
    extracted_hv : test_video 랑 같은 제목, 다른 확장자(npy)를 가지는 filename. numpy array를 가지고 있으며 각 snippet(48fs)마다 0, 1값이 표시됨.
    예상되는 애들은 00000011111111111000뭐 이런식인데[얘는 구현함] 0000011100111111100111이렇게 되는 경우도 생각해보자!!
    """
    def __init__(self, test_video, extracted_hv, ckpt):

        self.test_video = test_video
        self.extracted_hv = extracted_hv
        self.ckpt = ckpt

        # test video를 frame별로 불러와서 numpy array로 test_raw에 저장.
        cap = cv2.VideoCapture(self.test_video)
        frames = []
        while True:
            ret, frame = cap.read()
            if ret:
                b, g, r = cv2.split(frame)
                frame = cv2.merge([r, g, b])
                # HWC2CHW
                frame = frame.transpose(2, 0, 1)
                frames.append(frame)
            else:
                break
        cap.release()

        test_raw = np.concatenate(frames)
        self.test_raw = test_raw.reshape(-1, 3, 270, 480)

    def show(self, item=-1):
        if item == -1:
            self.showrv()
            self.showthv()
            self.showehv()
        elif item == 0:
            self.showrv()
        elif item == 1:
            self.showthv()
        elif item == 2:
            self.showehv()
        else:
            pass

    def showrv(self):

        viz0 = visdom.Visdom(use_incoming_socket=False)

        for f in range(0, self.test_raw.shape[0]):
            viz0.image(
                self.test_raw[f, :, :, :],
                win="gt video",
                opts={'title': 'TEST_RAW'},
            )
            time.sleep(0.01)

    def showthv(self):
        viz1 = visdom.Visdom(use_incoming_socket=False)
        # 이 과정은 test_true_hv를 보여주기 위해 test_raw에서 hv frame을 index함,
        filename = os.path.split(self.test_video)[-1]

        h_start = filename.index("(")
        h_end = filename.index(")")

        h_frames = filename[h_start + 1:h_end]
        # h_frames = "42, 120" or "nohv"

        if "," in h_frames:
            s, e = h_frames.split(',')
            h_start, h_end = int(s), int(e)

        else:
            h_start, h_end = 0, 0
        for f in range(h_start, h_end):
            if (h_start == h_end):

                break

            viz1.image(
                self.test_raw[f, :, :, :],
                win="gt1 video",
                opts={'title': 'TEST_TRUE_HV'},
            )
            time.sleep(0.01)

    def showehv(self):
        viz2 = visdom.Visdom(use_incoming_socket=False)
        # 이 과정은 test_extracted_hv를 보여주기 위해 test_raw에서 hv frame을 index함.
        ext = np.load(self.extracted_hv)
        ext_idx = np.asarray(ext.nonzero()).squeeze()
        print(ext_idx[0], ext_idx[-1])

        if ext_idx == []:
            e_start, e_end = 0, 0
        else:
            e_start = ext_idx[0] * 6
            e_end = ext_idx[-1] * 6 + 48
            # "42, 120" 이라면 "7, 12"

        for f in range(e_start, e_end):
            if (e_start == e_end):
                # no highlight라고 얘기하고 visdom에다가 싶은데?
                break

            viz2.image(
                self.test_raw[f, :, :, :],
                win="gt2 video",
                opts={'title': 'TEST_Extracted_HV'},
            )
            time.sleep(0.01)

    def get_accuracy(self):
        # load dataloader
        _, _, t_l = get_loader('../Videos/HV', '../Videos/RV',
                               '../Videos/testRV', 1)

        # build network
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        for l, p in self.c2d.named_parameters():
            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()
        self.gru.load_state_dict(torch.load(self.ckpt))

        print(self.gru)

        self.gru.eval()
        avg_acc = 0

        for idx, (video, label) in enumerate(t_l):
            acc = 0.

            # forwarding
            test_video = Variable(video).cuda()
            predicted = self.gru(test_video)
            predicted = predicted.cpu().numpy()

            print('Predicted output:',
                  predicted)  # [forwarding score ....., backwarding score]
            print('Predicted output length:', len(predicted))
            print('Actual label:', label)
            print('Actual label length:', len(label))
예제 #7
0
def train_on_epochs(train_loader: DataLoader, test_loader: DataLoader, opt):
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')

    model_type = models[opt.model_type]
    if model_type == 'ours':
        model = Baseline(use_gru=opt.use_gru, bi_branch=(opt.net_type == 2))
    elif model_type == 'cRNN':
        model = cRNN()
    elif model_type == 'end2end':
        model = get_resnet_3d()
    elif model_type == 'xception':
        model, *_ = model_selection(modelname='xception', num_out_classes=2)
    elif model_type == 'fwa':
        model = SPPNet(backbone=50)
    elif model_type == 'resvit':
        model = ResNet50ViT(img_dim=opt.img_size,
                            pretrained_resnet=True,
                            blocks=6,
                            num_classes=opt.num_classes,
                            dim_linear_block=256,
                            dim=256)
    elif model_type == 'vit':
        model = ViT(img_dim=opt.img_size,
                    in_channels=3,
                    patch_dim=16,
                    num_classes=opt.num_classes,
                    dim=512)
    elif model_type == 'res50':
        model = ResNet(layers=50)
    elif model_type == 'res101':
        model = ResNet(layers=101)
    elif model_type == 'res152':
        model = ResNet(layers=152)
    else:
        model = CNN()

    model.to(device)

    device_count = torch.cuda.device_count()
    if device_count > 1:
        print('Using {} GPUs'.format(device_count))
        model = nn.DataParallel(model)

    ckpt = {}
    restore_from = opt.restore_from
    if restore_from is not None:
        if model_type == 'fwa':
            ckpt = torch.load(restore_from)
            model.load_state_dict(ckpt['net'])
        else:
            ckpt = torch.load(restore_from, map_location='cpu')
            model.load_state_dict(ckpt['model_state_dict'])
        print('Model is loaded from %s' % restore_from)

    model_params = model.parameters()
    optimizer = torch.optim.Adam(model_params, lr=opt.learning_rate)

    if restore_from is not None and model_type != 'fwa':
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])

    info = {
        'train_losses': [],
        'train_scores': [],
        'test_losses': [],
        'test_scores': [],
        'test_auc': []
    }

    start_ep = ckpt[
        'epoch'] + 1 if 'epoch' in ckpt and model_type != 'fwa' else 0

    save_path = './checkpoints/' + model_type + str(opt.use_gru) + str(
        opt.net_type)
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    writer = SummaryWriter(
        logdir='./log-model_type:%s-gru:%s-loss:%s-gamma:%s' %
        (model_type, str(opt.use_gru), str(opt.loss_type), str(opt.gamma)))

    for ep in range(start_ep, opt.epoch):

        if opt.mode:
            train_losses, train_scores = train(model, train_loader, optimizer,
                                               writer, device, ep, opt)
            info['train_losses'].append(train_losses)
            info['train_scores'].append(train_scores)

        test_loss, test_score, test_auc = validation(model, test_loader,
                                                     writer, device, ep, opt)

        info['test_losses'].append(test_loss)
        info['test_scores'].append(test_score)
        info['test_auc'].append(test_auc)

        ckpt_path = os.path.join(
            save_path, 'model_type:%s-gru:%s-loss:%s-gamma:%s.pth' %
            (model_type, str(opt.use_gru), str(opt.loss_type), str(opt.gamma)))

        if (ep + 1) % opt.save_interval == 0:
            torch.save(
                {
                    'epoch': ep,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'label_map': train_loader.dataset.labels
                }, ckpt_path)
            print('Model of Epoch %3d has been saved to: %s' % (ep, ckpt_path))

    with open(
            './train_info-model_type:%s-gru:%s-loss:%s-gamma:%s.json' %
        (model_type, str(opt.use_gru), str(opt.loss_type), str(opt.gamma)),
            'w') as f:
        json.dump(info, f)

    print('over!')
예제 #8
0
class Trainer(object):
    def __init__(self, config, h_loader, r_loader, test_loader):
        self.config = config
        self.h_loader = h_loader
        self.r_loader = r_loader
        self.test_loader = test_loader

        self.lr = config.lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay

        self.n_epochs = config.n_epochs
        self.n_steps = config.n_steps
        self.log_interval = int(config.log_interval)  # in case
        self.checkpoint_step = int(config.checkpoint_step)

        self.use_cuda = config.cuda
        self.outf = config.outf
        self.build_model()
        self.vis = vis_tool.Visualizer()

    def build_model(self):
        self.c2d = CNN().cuda()
        self.c2d.load_state_dict(
            torch.load('cnn.pkl'))  # load pre-trained cnn extractor

        for l, p in self.c2d.named_parameters():
            p.requires_grad = False

        self.gru = GRU(self.c2d).cuda()

    def train(self):
        # create optimizers
        cfig = get_config()
        opt = optim.RMSprop(filter(lambda p: p.requires_grad,
                                   self.gru.parameters()),
                            lr=self.lr,
                            weight_decay=self.weight_decay)

        start_time = time.time()
        criterion = nn.BCELoss()

        max_acc = 0.

        for epoch in range(self.n_epochs):
            self.gru.train()
            epoch_loss = []
            for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)):
                h_video = h
                r_video = r

                # highlight video
                h_video = Variable(h_video).cuda()
                r_video = Variable(r_video).cuda()

                self.gru.zero_grad()

                predicted = self.gru(h_video)

                target = torch.ones(predicted.shape,
                                    dtype=torch.float32).cuda()

                h_loss = criterion(predicted, target)  # compute loss

                h_loss.backward()
                opt.step()

                self.gru.zero_grad()

                predicted = self.gru(r_video)  # predicted snippet's score

                target = torch.zeros(predicted.shape,
                                     dtype=torch.float32).cuda()
                r_loss = criterion(predicted, target)  # compute loss

                r_loss.backward()
                opt.step()

                step_end_time = time.time()

                total_loss = r_loss + h_loss
                epoch_loss.append((total_loss.data).cpu().numpy())

                print(
                    '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f'
                    % (epoch + 1, self.n_epochs, step + 1, self.n_steps,
                       step_end_time - start_time, h_loss, r_loss, total_loss))

                self.vis.plot(
                    'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (h_loss.data).cpu().numpy())

                self.vis.plot(
                    'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (r_loss.data).cpu().numpy())

                # if step == 3: break
                # if step == 2: break

            self.vis.plot("Avg loss plot", np.mean(epoch_loss))

            if epoch % self.checkpoint_step == 0:
                accuracy, savelist = self.test(self.test_loader)

                if accuracy > max_acc:
                    max_acc = accuracy
                    torch.save(
                        self.gru.state_dict(),
                        './samples/lr_%.4f_chkpoint' % cfig.lr +
                        str(epoch + 1) + '.pth')
                    for f in savelist:
                        np.save("./samples/" + f[0][0] + ".npy", f[1])
                    print(np.load("./samples/testRV04(198,360).mp4.npy"))
                    print("checkpoint saved")

    def test(self, t_loader):

        # Test accuracy
        self.gru.eval()
        test_avg_acc = 0.
        test_cnt = 0
        savelist = []

        for idx, (video, label, filename) in enumerate(self.test_loader):
            video = Variable(video).cuda()
            predicted = self.gru(video)  # [ frame 수, 1]

            predicted = predicted.view(1, -1)
            predicted = predicted.cpu().detach().numpy()

            predicted = predicted[0]
            label = label.cpu().numpy()

            # print(type(predicted), type(label))

            gt_label_predicted_score = predicted * label
            gt_label_predicted_score = list(gt_label_predicted_score)

            # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy()
            # print("Highlight frame predicted score:", gt_label_predicted_score)

            # print(gt_label_predicted_score)
            # print(gt_label_predicted_score.shape)

            # print(gt_label_predicted_score)

            for sc in gt_label_predicted_score[0]:
                if sc != 0.:
                    print("%.3f" % sc, end=' ')

            for i in range(len(predicted)):
                if predicted[i] >= 0.45:
                    predicted[i] = 1.
                else:
                    predicted[i] = 0.

            # print("After threshold predicted:", predicted)
            # print("Actual label:", label)

            acc = (predicted == label).sum().item() / float(len(predicted))
            print("filename: %s accuracy: %.4f" % (filename, acc))
            test_avg_acc += acc
            test_cnt += 1

            savelist.append([filename, predicted])

            print()

        test_avg_acc = test_avg_acc / test_cnt

        print("Accuracy:", round(test_avg_acc, 4))
        self.vis.plot("Accuracy with lr:%.3f" % self.lr, test_avg_acc)

        return test_avg_acc, savelist