def get_model(args, get_video_encoder_only=True, logger=None): # Load model model = load_model( vid_base_arch=args.vid_base_arch, aud_base_arch=args.aud_base_arch, pretrained=args.pretrained, num_classes=args.num_clusters, norm_feat=False, use_mlp=args.use_mlp, headcount=args.headcount ) # Load model weights start = time.time() weight_path_type = type(args.weights_path) if weight_path_type == str: weight_path_not_none = args.weights_path != 'None' else: weight_path_not_none = args.weights_path is not None if weight_path_not_none: print("Loading model weights") if os.path.exists(args.weights_path): ckpt_dict = torch.load(args.weights_path) model_weights = ckpt_dict["model"] args.ckpt_epoch = ckpt_dict['epoch'] print(f"Epoch checkpoint: {args.ckpt_epoch}", flush=True) utils.load_model_parameters(model, model_weights) print(f"Time to load model weights: {time.time() - start}") # Put model in eval mode model.eval() # Get video encoder for video-only retrieval if get_video_encoder_only: model = model.video_network.base if args.pool_op == 'max': pool = torch.nn.MaxPool3d((2, 2, 2), stride=(2, 2, 2)) elif args.pool_op == 'avg': pool = torch.nn.AvgPool3d((2, 2, 2), stride=(2, 2, 2)) else: assert("Only 'max' and 'avg' pool operations allowed") # Set up model model = torch.nn.Sequential(*[ model.stem, model.layer1, model.layer2, model.layer3, model.layer4, pool, Flatten(), ]) if torch.cuda.is_available(): model = model.cuda() model = torch.nn.DataParallel(model) return model
def make_basic_cnn(nb_filters=64, nb_classes=10, input_shape=(None, 28, 28, 1)): layers = [Conv2D(nb_filters, (8, 8), (2, 2), "SAME"), ReLU(), Conv2D(nb_filters * 2, (6, 6), (2, 2), "VALID"), ReLU(), Conv2D(nb_filters * 2, (5, 5), (1, 1), "VALID"), ReLU(), Flatten(), Linear(nb_classes), Softmax()] model = MLP(nb_classes, layers, input_shape) return model
def __init__(self, obs_shape, action_space, hidden_size=512, base_kwargs=None): super(ICM, self).__init__() self.obs_shape = obs_shape # C x H x W self.action_space = action_space num_inputs = self.obs_shape[0] num_outputs = self.action_space.n init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) # f(obs) = hidden_size self.phi = nn.Sequential(init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, hidden_size)), nn.ReLU()) # f(phi_obs, action) = hidden_size num_inputs_forward_dynamic = hidden_size + num_outputs self.forward_dynamic = nn.Sequential( nn.Linear(num_inputs_forward_dynamic, 256), nn.ReLU(), nn.Linear(256, hidden_size)) # f(phi_obs, phi_obs_next) = num_outputs num_inputs_inverse_dynamic = hidden_size + hidden_size self.inverse_dynamic = nn.Sequential( nn.Linear(num_inputs_inverse_dynamic, 256), nn.ReLU(), nn.Linear(256, num_outputs)) model_params = list(self.phi.parameters()) + list( self.forward_dynamic.parameters()) + list( self.inverse_dynamic.parameters()) self.fwd_loss_func = nn.MSELoss(reduction='none') self.inv_loss_func = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(model_params, lr=1e-3) self.num_inputs = num_inputs self.num_outputs = num_outputs
def make_madry_ngpu(nb_classes=10, input_shape=(None, 28, 28, 1), **kwargs): """ Create a multi-GPU model similar to Madry et al. (arXiv:1706.06083). """ layers = [Conv2DnGPU(32, (5, 5), (1, 1), "SAME"), ReLU(), MaxPool((2, 2), (2, 2), "SAME"), Conv2DnGPU(64, (5, 5), (1, 1), "SAME"), ReLU(), MaxPool((2, 2), (2, 2), "SAME"), Flatten(), LinearnGPU(1024), ReLU(), LinearnGPU(nb_classes), Softmax()] model = MLPnGPU(nb_classes, layers, input_shape) return model
def __init__(self, batch_size): super(CNN_2_EDropout, self).__init__() self.cnn1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=5, padding=1, stride=1) self.maxpool1 = nn.MaxPool2d(kernel_size=3, padding=0, stride=2) self.cnn2 = nn.Conv2d(in_channels=96, out_channels=128, kernel_size=5, padding=2, stride=1) self.maxpool2 = nn.MaxPool2d(kernel_size=3, padding=0, stride=2) self.cnn3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding=2, stride=1) self.maxpool3 = nn.MaxPool2d(kernel_size=3, padding=0, stride=2) self.flatten = Flatten() self.fc1 = nn.Linear(in_features=1024, out_features=2048) self.fc2 = nn.Linear(in_features=2048, out_features=2048) self.fc3 = nn.Linear(in_features=2048, out_features=10) self.ed = EDropout(p=0.5, train=self.training, inplace=True) self.batch_size = batch_size
# nn.Dropout2d(p=0.25), # Flatten(), # nn.Linear(channel_3 * 64 * 64//4, 2) # ) channel_0 = 32 channel_1 = 16 channel_2 = 8 channel_3 = 4 model = nn.Sequential(nn.Conv2d(3, channel_0, (7, 7), padding=3), nn.ReLU(), nn.Dropout2d(p=0.25), nn.Conv2d(channel_0, channel_1, (5, 5), padding=2), nn.ReLU(), nn.MaxPool2d((2, 2)), nn.Conv2d(channel_1, channel_2, (3, 3), padding=1), nn.ReLU(), nn.Dropout2d(p=0.25), nn.Conv2d(channel_2, channel_3, (3, 3), padding=1), nn.ReLU(), nn.MaxPool2d((2, 2)), Flatten(), nn.Linear(channel_3 * 64 * 64 // 16, 2)) model.load_state_dict(load("model_bis0.92.pth", map_location=dvc), strict=False) def predict_image(image): to_pil = ToPILImage() image = to_pil(image).convert('RGB') test_transforms = Compose([ Resize([64, 64]), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) image_tensor = test_transforms(image).float() image_tensor = image_tensor.unsqueeze_(0)