def get_accuracy(self): # load dataloader _, _, t_l = get_loader('../Videos/HV', '../Videos/RV', '../Videos/testRV', 1) # build network self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda() self.gru.load_state_dict(torch.load(self.ckpt)) print(self.gru) self.gru.eval() avg_acc = 0 for idx, (video, label) in enumerate(t_l): acc = 0. # forwarding test_video = Variable(video).cuda() predicted = self.gru(test_video) predicted = predicted.cpu().numpy() print('Predicted output:', predicted) # [forwarding score ....., backwarding score] print('Predicted output length:', len(predicted)) print('Actual label:', label) print('Actual label length:', len(label))
def build_model(self): self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda()
def run_model(): """Run the following deep learning models based on specified parameters in config.yaml""" config = Config('config.yaml') if config.model == 'GRU': model = GRU() elif config.model == 'LSTM': model = LSTM() elif config.model == 'CNN': model = CNN() else: model = CNN_LSTM() model.run()
def load_model(model_path, TEXT=None, LABEL=None): #for saved model (.pt) if '.pt' in model_path: if torch.typename(torch.load(model_path)) == 'OrderedDict': if 'tut' in model_path: INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 100 N_FILTERS = 100 FILTER_SIZES = [3, 4, 5] OUTPUT_DIM = 1 DROPOUT = 0.5 PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX) elif 'mnist' in model_path: model = Net() elif 'HELOC' or 'heloc' in model_path: input_size = 22 model = MLP(input_size) model.load_state_dict(torch.load(model_path)) else: model = torch.load(model_path) #for pretrained model elif model_path == 'VGG19': model = models.vgg19(pretrained=True) elif model_path == 'ResNet50': model = models.resnet50(pretrained=True) elif model_path == 'DenseNet161': model = models.densenet161(pretrained=True) model.eval() if cuda_available(): model.cuda() return model
model = "cnn" # 'cnn' or 'rnn' # Load vocabulary and make dictionary vocabs = load_vocab('data/imdb/imdb.vocab') w2i = {w: i for i, w in enumerate(vocabs)} i2w = {i: w for i, w in enumerate(vocabs)} vocab_size = len(vocabs) # Load Data train_x, train_y = load_data('data/', train=True) train_x, train_y = preprocess(train_x, train_y, w2i, maxlen) # Build Model & Loss & Optimizer model = RNN(embedding, rnn_hidden, num_layers, bi, output_dim, vocab_size) \ if model == 'rnn' else CNN(filters, num_filters, maxlen, vocab_size, embedding, output_dim) # Loss function & Optimizer criterion = nn.BCELoss() optim = torch.optim.Adam(model.parameters(), lr) if cuda: model.cuda() train_x = train_x.cuda() train_y = train_y.cuda() # Training procedure # model.train() makes model be in training mode. (It is not a real training function) # It is crucial to modules such as batch norm or dropout, which acts different when train or test model.train() for epoch in range(1, epochs + 1):
class TestViewer(): """ test_video : test video 하나의 filename (각 파일명 맨 뒤에 ground true hv의 frame이 적혀있음) extracted_hv : test_video 랑 같은 제목, 다른 확장자(npy)를 가지는 filename. numpy array를 가지고 있으며 각 snippet(48fs)마다 0, 1값이 표시됨. 예상되는 애들은 00000011111111111000뭐 이런식인데[얘는 구현함] 0000011100111111100111이렇게 되는 경우도 생각해보자!! """ def __init__(self, test_video, extracted_hv, ckpt): self.test_video = test_video self.extracted_hv = extracted_hv self.ckpt = ckpt # test video를 frame별로 불러와서 numpy array로 test_raw에 저장. cap = cv2.VideoCapture(self.test_video) frames = [] while True: ret, frame = cap.read() if ret: b, g, r = cv2.split(frame) frame = cv2.merge([r, g, b]) # HWC2CHW frame = frame.transpose(2, 0, 1) frames.append(frame) else: break cap.release() test_raw = np.concatenate(frames) self.test_raw = test_raw.reshape(-1, 3, 270, 480) def show(self, item=-1): if item == -1: self.showrv() self.showthv() self.showehv() elif item == 0: self.showrv() elif item == 1: self.showthv() elif item == 2: self.showehv() else: pass def showrv(self): viz0 = visdom.Visdom(use_incoming_socket=False) for f in range(0, self.test_raw.shape[0]): viz0.image( self.test_raw[f, :, :, :], win="gt video", opts={'title': 'TEST_RAW'}, ) time.sleep(0.01) def showthv(self): viz1 = visdom.Visdom(use_incoming_socket=False) # 이 과정은 test_true_hv를 보여주기 위해 test_raw에서 hv frame을 index함, filename = os.path.split(self.test_video)[-1] h_start = filename.index("(") h_end = filename.index(")") h_frames = filename[h_start + 1:h_end] # h_frames = "42, 120" or "nohv" if "," in h_frames: s, e = h_frames.split(',') h_start, h_end = int(s), int(e) else: h_start, h_end = 0, 0 for f in range(h_start, h_end): if (h_start == h_end): break viz1.image( self.test_raw[f, :, :, :], win="gt1 video", opts={'title': 'TEST_TRUE_HV'}, ) time.sleep(0.01) def showehv(self): viz2 = visdom.Visdom(use_incoming_socket=False) # 이 과정은 test_extracted_hv를 보여주기 위해 test_raw에서 hv frame을 index함. ext = np.load(self.extracted_hv) ext_idx = np.asarray(ext.nonzero()).squeeze() print(ext_idx[0], ext_idx[-1]) if ext_idx == []: e_start, e_end = 0, 0 else: e_start = ext_idx[0] * 6 e_end = ext_idx[-1] * 6 + 48 # "42, 120" 이라면 "7, 12" for f in range(e_start, e_end): if (e_start == e_end): # no highlight라고 얘기하고 visdom에다가 싶은데? break viz2.image( self.test_raw[f, :, :, :], win="gt2 video", opts={'title': 'TEST_Extracted_HV'}, ) time.sleep(0.01) def get_accuracy(self): # load dataloader _, _, t_l = get_loader('../Videos/HV', '../Videos/RV', '../Videos/testRV', 1) # build network self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda() self.gru.load_state_dict(torch.load(self.ckpt)) print(self.gru) self.gru.eval() avg_acc = 0 for idx, (video, label) in enumerate(t_l): acc = 0. # forwarding test_video = Variable(video).cuda() predicted = self.gru(test_video) predicted = predicted.cpu().numpy() print('Predicted output:', predicted) # [forwarding score ....., backwarding score] print('Predicted output length:', len(predicted)) print('Actual label:', label) print('Actual label length:', len(label))
def train_on_epochs(train_loader: DataLoader, test_loader: DataLoader, opt): use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model_type = models[opt.model_type] if model_type == 'ours': model = Baseline(use_gru=opt.use_gru, bi_branch=(opt.net_type == 2)) elif model_type == 'cRNN': model = cRNN() elif model_type == 'end2end': model = get_resnet_3d() elif model_type == 'xception': model, *_ = model_selection(modelname='xception', num_out_classes=2) elif model_type == 'fwa': model = SPPNet(backbone=50) elif model_type == 'resvit': model = ResNet50ViT(img_dim=opt.img_size, pretrained_resnet=True, blocks=6, num_classes=opt.num_classes, dim_linear_block=256, dim=256) elif model_type == 'vit': model = ViT(img_dim=opt.img_size, in_channels=3, patch_dim=16, num_classes=opt.num_classes, dim=512) elif model_type == 'res50': model = ResNet(layers=50) elif model_type == 'res101': model = ResNet(layers=101) elif model_type == 'res152': model = ResNet(layers=152) else: model = CNN() model.to(device) device_count = torch.cuda.device_count() if device_count > 1: print('Using {} GPUs'.format(device_count)) model = nn.DataParallel(model) ckpt = {} restore_from = opt.restore_from if restore_from is not None: if model_type == 'fwa': ckpt = torch.load(restore_from) model.load_state_dict(ckpt['net']) else: ckpt = torch.load(restore_from, map_location='cpu') model.load_state_dict(ckpt['model_state_dict']) print('Model is loaded from %s' % restore_from) model_params = model.parameters() optimizer = torch.optim.Adam(model_params, lr=opt.learning_rate) if restore_from is not None and model_type != 'fwa': optimizer.load_state_dict(ckpt['optimizer_state_dict']) info = { 'train_losses': [], 'train_scores': [], 'test_losses': [], 'test_scores': [], 'test_auc': [] } start_ep = ckpt[ 'epoch'] + 1 if 'epoch' in ckpt and model_type != 'fwa' else 0 save_path = './checkpoints/' + model_type + str(opt.use_gru) + str( opt.net_type) if not os.path.exists(save_path): os.mkdir(save_path) writer = SummaryWriter( logdir='./log-model_type:%s-gru:%s-loss:%s-gamma:%s' % (model_type, str(opt.use_gru), str(opt.loss_type), str(opt.gamma))) for ep in range(start_ep, opt.epoch): if opt.mode: train_losses, train_scores = train(model, train_loader, optimizer, writer, device, ep, opt) info['train_losses'].append(train_losses) info['train_scores'].append(train_scores) test_loss, test_score, test_auc = validation(model, test_loader, writer, device, ep, opt) info['test_losses'].append(test_loss) info['test_scores'].append(test_score) info['test_auc'].append(test_auc) ckpt_path = os.path.join( save_path, 'model_type:%s-gru:%s-loss:%s-gamma:%s.pth' % (model_type, str(opt.use_gru), str(opt.loss_type), str(opt.gamma))) if (ep + 1) % opt.save_interval == 0: torch.save( { 'epoch': ep, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'label_map': train_loader.dataset.labels }, ckpt_path) print('Model of Epoch %3d has been saved to: %s' % (ep, ckpt_path)) with open( './train_info-model_type:%s-gru:%s-loss:%s-gamma:%s.json' % (model_type, str(opt.use_gru), str(opt.loss_type), str(opt.gamma)), 'w') as f: json.dump(info, f) print('over!')
class Trainer(object): def __init__(self, config, h_loader, r_loader, test_loader): self.config = config self.h_loader = h_loader self.r_loader = r_loader self.test_loader = test_loader self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.weight_decay = config.weight_decay self.n_epochs = config.n_epochs self.n_steps = config.n_steps self.log_interval = int(config.log_interval) # in case self.checkpoint_step = int(config.checkpoint_step) self.use_cuda = config.cuda self.outf = config.outf self.build_model() self.vis = vis_tool.Visualizer() def build_model(self): self.c2d = CNN().cuda() self.c2d.load_state_dict( torch.load('cnn.pkl')) # load pre-trained cnn extractor for l, p in self.c2d.named_parameters(): p.requires_grad = False self.gru = GRU(self.c2d).cuda() def train(self): # create optimizers cfig = get_config() opt = optim.RMSprop(filter(lambda p: p.requires_grad, self.gru.parameters()), lr=self.lr, weight_decay=self.weight_decay) start_time = time.time() criterion = nn.BCELoss() max_acc = 0. for epoch in range(self.n_epochs): self.gru.train() epoch_loss = [] for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)): h_video = h r_video = r # highlight video h_video = Variable(h_video).cuda() r_video = Variable(r_video).cuda() self.gru.zero_grad() predicted = self.gru(h_video) target = torch.ones(predicted.shape, dtype=torch.float32).cuda() h_loss = criterion(predicted, target) # compute loss h_loss.backward() opt.step() self.gru.zero_grad() predicted = self.gru(r_video) # predicted snippet's score target = torch.zeros(predicted.shape, dtype=torch.float32).cuda() r_loss = criterion(predicted, target) # compute loss r_loss.backward() opt.step() step_end_time = time.time() total_loss = r_loss + h_loss epoch_loss.append((total_loss.data).cpu().numpy()) print( '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f' % (epoch + 1, self.n_epochs, step + 1, self.n_steps, step_end_time - start_time, h_loss, r_loss, total_loss)) self.vis.plot( 'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (h_loss.data).cpu().numpy()) self.vis.plot( 'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' % (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay), (r_loss.data).cpu().numpy()) # if step == 3: break # if step == 2: break self.vis.plot("Avg loss plot", np.mean(epoch_loss)) if epoch % self.checkpoint_step == 0: accuracy, savelist = self.test(self.test_loader) if accuracy > max_acc: max_acc = accuracy torch.save( self.gru.state_dict(), './samples/lr_%.4f_chkpoint' % cfig.lr + str(epoch + 1) + '.pth') for f in savelist: np.save("./samples/" + f[0][0] + ".npy", f[1]) print(np.load("./samples/testRV04(198,360).mp4.npy")) print("checkpoint saved") def test(self, t_loader): # Test accuracy self.gru.eval() test_avg_acc = 0. test_cnt = 0 savelist = [] for idx, (video, label, filename) in enumerate(self.test_loader): video = Variable(video).cuda() predicted = self.gru(video) # [ frame 수, 1] predicted = predicted.view(1, -1) predicted = predicted.cpu().detach().numpy() predicted = predicted[0] label = label.cpu().numpy() # print(type(predicted), type(label)) gt_label_predicted_score = predicted * label gt_label_predicted_score = list(gt_label_predicted_score) # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy() # print("Highlight frame predicted score:", gt_label_predicted_score) # print(gt_label_predicted_score) # print(gt_label_predicted_score.shape) # print(gt_label_predicted_score) for sc in gt_label_predicted_score[0]: if sc != 0.: print("%.3f" % sc, end=' ') for i in range(len(predicted)): if predicted[i] >= 0.45: predicted[i] = 1. else: predicted[i] = 0. # print("After threshold predicted:", predicted) # print("Actual label:", label) acc = (predicted == label).sum().item() / float(len(predicted)) print("filename: %s accuracy: %.4f" % (filename, acc)) test_avg_acc += acc test_cnt += 1 savelist.append([filename, predicted]) print() test_avg_acc = test_avg_acc / test_cnt print("Accuracy:", round(test_avg_acc, 4)) self.vis.plot("Accuracy with lr:%.3f" % self.lr, test_avg_acc) return test_avg_acc, savelist