def __init__(self, n_image_features, vocab_size, embedding_dim, hidden_size, bound_idx, max_sentence_length, vl_loss_weight, bound_weight, should_train_cnn, n_rsa_samples, use_gpu, K, use_distractors_in_sender): super().__init__() self.use_gpu = use_gpu self.bound_token_idx = bound_idx self.max_sentence_length = max_sentence_length self.vocab_size = vocab_size self.vl_loss_weight = vl_loss_weight # lambda self.bound_weight = bound_weight # alpha self.should_train_cnn = should_train_cnn self.n_rsa_samples = n_rsa_samples self.use_distractors_in_sender = use_distractors_in_sender self.n_image_features = n_image_features if self.should_train_cnn: self.cnn = CNN(n_image_features) if self.use_distractors_in_sender: sender_image_features = (K+1)*n_image_features else: sender_image_features = n_image_features self.sender = Sender(sender_image_features, vocab_size, embedding_dim, hidden_size, bound_idx, max_sentence_length, vl_loss_weight, bound_weight, use_gpu) self.receiver = Receiver(n_image_features, vocab_size, embedding_dim, hidden_size, use_gpu)
'shapes' if not shapes_dataset is None else 'mscoco', vocab_size) print("loading pretrained cnn") # Load pretrained CNN if necessary if not should_train_visual and not use_symbolic_input and not shapes_dataset is None: cnn_model_id = cnn_model_file_name.split('/')[-1] features_folder_name = 'data/shapes/{}_{}'.format(shapes_dataset, cnn_model_id) # Check if the features were already extracted with this CNN if not os.path.exists(features_folder_name): # Load CNN from dumped model state = torch.load(cnn_model_file_name, map_location=lambda storage, location: storage) cnn_state = {k[4:]: v for k, v in state.items() if 'cnn' in k} trained_cnn = CNN(n_image_features) trained_cnn.load_state_dict(cnn_state) if use_gpu: trained_cnn = trained_cnn.cuda() print("=CNN state loaded=") print("Extracting features...") # Dump the features to then load them features_folder_name = save_features(trained_cnn, shapes_dataset, cnn_model_id) print("crating one hot metadata") if not shapes_dataset is None: # Create onehot metadata if not created yet
class Model(nn.Module): def __init__(self, n_image_features, vocab_size, embedding_dim, hidden_size, bound_idx, max_sentence_length, vl_loss_weight, bound_weight, should_train_cnn, n_rsa_samples, use_gpu, K, use_distractors_in_sender): super().__init__() self.use_gpu = use_gpu self.bound_token_idx = bound_idx self.max_sentence_length = max_sentence_length self.vocab_size = vocab_size self.vl_loss_weight = vl_loss_weight # lambda self.bound_weight = bound_weight # alpha self.should_train_cnn = should_train_cnn self.n_rsa_samples = n_rsa_samples self.use_distractors_in_sender = use_distractors_in_sender self.n_image_features = n_image_features if self.should_train_cnn: self.cnn = CNN(n_image_features) if self.use_distractors_in_sender: sender_image_features = (K+1)*n_image_features else: sender_image_features = n_image_features self.sender = Sender(sender_image_features, vocab_size, embedding_dim, hidden_size, bound_idx, max_sentence_length, vl_loss_weight, bound_weight, use_gpu) self.receiver = Receiver(n_image_features, vocab_size, embedding_dim, hidden_size, use_gpu) def _pad(self, m, seq_lengths): max_len = m.shape[1] mask = torch.arange(max_len) if self.use_gpu: mask = mask.cuda() mask = mask.expand( len(seq_lengths), max_len ) < seq_lengths.unsqueeze(1) if self.training: mask = mask.type(dtype=m.dtype) m = m * mask.unsqueeze(2) m[:, :, self.bound_token_idx] += (mask == 0).float() else: m = m.masked_fill_(mask == 0, self.bound_token_idx) return m def _get_word_counts(self, m): if self.training: c = m.sum(dim=1).sum(dim=0).detach() # ToDo: are we sure about this???? Yeah, we are else: c = torch.zeros([self.vocab_size]) if self.use_gpu: c = c.cuda() for w_idx in range(self.vocab_size): c[w_idx] = (m == w_idx).sum() return c def _count_unique_messages(self, m): return len(np.unique(m.detach().cpu().numpy(), axis=0)) def grad_cam(self, mode): # mode either s_t, r_t, r_d self.mode = mode def forward(self, target, distractors, word_counts, target_onehot_metadata): self.receiver.mode = self.mode batch_size = target.shape[0] if self.use_gpu: target = target.cuda() distractors = [d.cuda() for d in distractors] n_dim = 5 if self.should_train_cnn else 3 use_different_targets = len(target.shape) == n_dim assert not use_different_targets or target.shape[1] == 2, 'This should only be two targets' if self.should_train_cnn: if not use_different_targets: # Extract features cnn_copy = type(self.cnn)(self.n_image_features) # get a new instance cnn_copy.load_state_dict(self.cnn.state_dict()) # copy weights and stuff cnn_copy.cuda() if self.mode in ['r_d', 's_d']: distractors = [self.cnn(d) for d in distractors] else: distractors = [torch.Tensor(cnn_copy(d).detach().cpu().numpy()).cuda() for d in distractors] # self.cnn.zero_grad() if self.mode == 's_t': target_out = self.cnn(target) else: target_out = torch.Tensor(cnn_copy(target).detach().cpu().numpy()).cuda() target_sender = target_out target_receiver = target_out else: cnn_copy = type(self.cnn)(self.n_image_features) # get a new instance cnn_copy.load_state_dict(self.cnn.state_dict()) # copy weights and stuff cnn_copy.cuda() # Extract features if self.mode == 's_t': target_sender = self.cnn(target[:, 0, :, :, :]) target_receiver = torch.Tensor(cnn_copy(target[:, 1, :, :, :]).detach().cpu().numpy()).cuda() elif self.mode == 'r_t': target_sender = torch.Tensor(cnn_copy(target[:, 0, :, :, :]).detach().cpu().numpy()).cuda() target_receiver = self.cnn(target[:, 1, :, :, :])
# Load metadata train_metadata, valid_metadata, test_metadata, noise_metadata = load_shapes_classdata( shapes_dataset) print("loaded metadata") print("loading data") # Load data train_data, valid_data, test_data, noise_data = load_images( 'shapes/{}'.format(shapes_dataset), BATCH_SIZE, K) print("data loaded") # Settings print("creating model") cnnmodel = CNN(n_image_features) import torch.nn as nn class MyModel(nn.Module): def __init__(self, cnn, n_out_features, out_classes): super(MyModel, self).__init__() self.cnn = cnn self.fc = nn.Linear(n_out_features, out_classes) def forward(self, x): x = self.cnn(x) x = self.fc(x) # x = nn.Softmax(x) return x