def __init__(self): """ :param self.train_frames: how many frame are used for training :param self.num_of_vid: number of videos :param self.extractor: pretrained model for feature extraction :param self.G: what I want to training model :param self.optimizerG: optimizer (default: Adam) :param self.train_data: training data loader :param self.device: gpu or cpu device(default: gpu) """ self.train_frames = args.train_frames self.num_of_vid = args.num_of_vid print("===> Building model") # Model Setting # prtrained model setting self.extractor = resnet101(num_classes=400, shortcut_type='B', cardinality=32, sample_size=args.crop_size, sample_duration=args.train_frames) # load pretrained model # eval 모드를 무조건 해야됨! weight = get_pretrain_weight(pretrain_path, self.extractor) self.extractor.load_state_dict(weight) self.extractor.eval() self.in_channel = args.in_channel self.G = BaseNet(self.in_channel * self.num_of_vid, self.train_frames, self.num_of_vid) self.num_stage = args.num_stage # optimizer self.optimizerG = optim.Adam(self.G.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-8) # loss self.CE_loss = nn.CrossEntropyLoss() self.L1_loss = nn.L1Loss() self.L2_loss = nn.MSELoss() self.Softmax = nn.Softmax() # data self.train_data = train_data_loader # self.test_data = test_data_loader # training self.device = device self.epochs = args.epochs self.avg_G_loss_arr = [] self.checkpoint = args.checkpoint self.checkpoint_set() # cuda if torch.cuda.is_available(): self.extractor.cuda() self.G.cuda()
def __init__(self): self.train_frames = args.train_frames print("===> Building model") # Model Setting # prtrained model setting self.extractor = resnet101(num_classes=400, shortcut_type='B', cardinality=32, sample_size=args.crop_size, sample_duration=args.train_frames) # load pretrained model # eval 모드를 무조건 해야됨! weight = get_pretrain_weight(pretrain_path, self.extractor) self.extractor.load_state_dict(weight) self.extractor.eval() self.G = BaseNet(16384 * 4 * 4 * 4, 10) # optimizer self.optimizerG = optim.Adam(self.G.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-8) # loss self.BCE_loss = nn.BCELoss() self.L1_loss = nn.L1Loss() self.L2_loss = nn.MSELoss() # data self.train_data = train_data_loader # self.test_data = test_data_loader # training self.device = device self.epochs = args.epochs self.avg_G_loss_arr = [] self.checkpoint = args.checkpoint # cuda if torch.cuda.is_available(): self.extractor.cuda() self.G.cuda() # model save self.save_mname = args.save_model_name # checkpoint root make_dirs(log_path) self.log_dir = log_path + f'/{self.save_mname}' # self.load_check_dir = self.save_check_dir = make_dirs(self.log_dir) + '/' + 'checkpoint.pkl' # CSV logging system self.CSVlogger = LogCSV(log_dir=self.log_dir + f"/{self.save_mname}_log.csv", header=['epoch', 'avg_G_Loss', 'accuracy'])
fm_use=args.fm_use) # I3D.replace_logits(157) # Charades: 157 only used for load trained model # -----------load pre_train model -------- ------------ Pre_tn_model = './model/pretred_mod/rgb_imagenet.pt' pretrained_dict = torch.load(Pre_tn_model) model_dict = I3D.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) I3D.load_state_dict(model_dict) I3D.replace_logits(157) # used for load Knitices model Model = I3D train_trans = transforms.Compose([videotransform.IMG_resize(args.in_fm_sz, args.in_fm_sz), videotransform.ToTensor()]) # contain normalize # -----------load pre_train model -------------------- elif args.arch == "R3D": R3D = resnext.resnet101(num_classes=400, shortcut_type='B', cardinality=32, sample_size=args.in_fm_sz, sample_duration=args.fm_use, phase='train', Dense_out=False) R3D.replace_logits(157) # -----------load pre_train model -------------------- # Pre_tn_model = './model/pretred_mod/resnext-101-kinetics.pth' Pre_tn_model = './model/pretred_mod/resnext-101-64f-kinetics.pth' # Pre_tn_model = './model/pretred_mod/R3D025_Charades_model.pth' pretrained_dict = torch.load(Pre_tn_model) model_dict = R3D.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) R3D.load_state_dict(model_dict) Model = R3D train_trans = transforms.Compose([videotransform.IMG_resize(args.in_fm_sz, args.in_fm_sz), videotransform.Normalize_R3D([114.7748, 107.7354, 99.4750], [1, 1, 1])]) # -----------load pre_train model --------------------