def __init__(self, learning_rate=0.05, cls_num=2, domain_num=2, input_size=768, hidden_layer_size=25, lambda_adapt=1., maxiter=5000, verbose=False, batch_size=64, use_cuda=True, name=None, cached=False, cpt_path=''): """ Domain Adversarial Neural Network for classification option "learning_rate" is the learning rate of the neural network. option "hidden_layer_size" is the hidden layer size. option "lambda_adapt" weights the domain adaptation regularization term. if 0 or None or False, then no domain adaptation regularization is performed option "maxiter" number of training iterations. option "epsilon_init" is a term used for initialization. if None the weight matrices are weighted by 6/(sqrt(r+c)) (where r and c are the dimensions of the weight matrix) option "adversarial_representation": if False, the adversarial classifier is trained but has no impact on the hidden layer representation. The label predictor is then the same as a standard neural-network one (see experiments_moon.py figures). option "seed" is the seed of the random number generator. """ super(DANN, self).__init__() self.hidden_layer_size = hidden_layer_size self.maxiter = maxiter self.lambda_adapt = lambda_adapt if lambda_adapt not in (None, False) else 0. self.learning_rate = learning_rate self.verbose = verbose self.input_size = input_size self.feature_extractor = nn.Sequential( Linear(self.input_size, self.hidden_layer_size), nn.Sigmoid()) self.classifier = nn.Linear(self.hidden_layer_size, cls_num) self.domain_classifier = nn.Sequential( nn.Linear(self.hidden_layer_size, 20), nn.Sigmoid(), nn.Linear(20, domain_num)) self.batch_size = batch_size self.rev_grad = RevGrad() self.use_cuda = use_cuda self.criterion = nn.CrossEntropyLoss(reduction='mean') # self.d_optimizer = optim.SGD([{"params": self.classifier.parameters(), 'lr': 1e-3}]) # self.optimizer = optim.SGD(self.parameters(), lr = 0.01, momentum = 0.9) self.optimizer = optim.Adam(self.parameters(), lr=0.001) self.print_freq = 100 self.name = name self.cached = cached self.checkpoint_path = cpt_path
def __init__( self, input_size, output_size, num_hidden_layers, hidden_dim, flip_gradient=False, batchnorm=False, drop_prob=0.0, activation=torch.nn.ReLU, ): super(DNN, self).__init__() layers = [ torch.nn.Linear(input_size, hidden_dim), torch.nn.Dropout(drop_prob), activation(), ] if batchnorm: raise NotImplementedError for i in range(num_hidden_layers): layers.append(torch.nn.Linear(hidden_dim, hidden_dim)) layers.append(torch.nn.Dropout(drop_prob)) layers.append(activation()) layers.append(torch.nn.Linear(hidden_dim, output_size)) if flip_gradient: layers.append(RevGrad()) self._network = torch.nn.Sequential(*layers)
def __init__(self, config): super(BLSTM, self).__init__() self.config = config self.vocab_size = get_vocab_size() self.num_mels = config.data.num_mels self.hidden_size = config.model.hidden_size self.num_layers = config.model.num_layers self.batch_first = config.model.batch_first self.dropout = config.model.dropout self.bidirectional = config.model.bidirectional self.lstm = nn.LSTM(input_size=self.num_mels, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, dropout=self.dropout, bidirectional=self.bidirectional) self.full1 = nn.Linear( in_features=self.hidden_size if not self.bidirectional else self.hidden_size * 2, out_features=500) self.adv_layer = nn.Sequential( RevGrad(), nn.Linear(in_features=self.hidden_size if not self.bidirectional else self.hidden_size * 2, out_features=2), #21 nn.Softmax(dim=-1)) self.full2 = nn.Linear(in_features=500, out_features=self.vocab_size)
def __init__(self, embedding_size, hidden_size, cls_num = 12, device = torch.device('cuda:1')): super(DANNClassifier, self).__init__() self.encoderA = Linear(embedding_size, hidden_size) self.encoderB = Linear(embedding_size, hidden_size) self.classifier = Linear(hidden_size, cls_num) self.device = device self.rev_grad = RevGrad() self.criterion = nn.CrossEntropyLoss()
def __init__(self, dim=512, input_dim=pose_dim, num_classes=226, num_signers=50): super().__init__(sign_loss=args.sign_loss, signer_loss=args.signer_loss, signer_loss_patience=args.signer_loss_patience) self.batch_norm = torch.nn.BatchNorm1d(num_features=input_dim) self.dropout = torch.nn.Dropout(p=0.2) self.proj = torch.nn.Linear(in_features=input_dim, out_features=dim) heads = args.encoder_heads depth = args.encoder_depth # self.transformer = Linformer( # dim=dim, # seq_len=seq_len + 1, # + 1 cls token # depth=depth, # heads=heads, # k=64, # dropout=0.4 # ) if args.encoder == "lstm": self.encoder = torch.nn.LSTM(input_size=dim, hidden_size=dim // 2, num_layers=depth, batch_first=True, dropout=0.1, bidirectional=True) else: self.encoder = Transformer(dim=dim, depth=depth, heads=heads, dim_head=dim // heads, mlp_dim=dim, dropout=0.4) self.cls_token = torch.nn.Parameter(torch.randn(1, 1, dim)) self.pos_embedding = torch.nn.Parameter( torch.randn(1, args.max_seq_size + 1, dim)) self.head_norm = torch.nn.LayerNorm(dim) self.mlp_head = torch.nn.Linear(dim, num_classes) self.mlp_signer = torch.nn.Sequential( RevGrad(), torch.nn.Linear(dim, num_signers))
def test_gradients_inverted(): network = torch.nn.Sequential(torch.nn.Linear(5, 3), torch.nn.Linear(3, 1)) revnetwork = torch.nn.Sequential(copy.deepcopy(network), RevGrad()) inp = torch.randn(8, 5) outp = torch.randn(8) criterion = torch.nn.MSELoss() criterion(network(inp), outp).backward() criterion(revnetwork(inp), outp).backward() assert all( (p1.grad == -p2.grad).all() for p1, p2 in zip(network.parameters(), revnetwork.parameters()))
def test_gradients_inverted_alpha(alpha_parameter): network = torch.nn.Sequential(torch.nn.Linear(5, 3), torch.nn.Linear(3, 1)) revnetwork = torch.nn.Sequential( copy.deepcopy(network), RevGrad(alpha=alpha_parameter) ) inp = torch.randn(8, 5) outp = torch.randn(8, 1) criterion = torch.nn.MSELoss() criterion(network(inp), outp).backward() criterion(revnetwork(inp), outp).backward() for p1, p2 in zip(network.parameters(), revnetwork.parameters()): assert torch.isclose(p1.grad, -p2.grad/alpha_parameter).all()
def __init__(self, model_params): super(Discriminator, self).__init__() """ self.emb_dim = 256 self.dis_hid_dim = 200 self.dis_layers = 1 self.dis_input_dropout = 0.2 self.dis_dropout = 0.2 layers = []#[RevGrad()] for i in range(self.dis_layers + 1): input_dim = self.emb_dim if i == 0 else self.dis_hid_dim output_dim = 2 if i == self.dis_layers else self.dis_hid_dim layers.append(nn.Linear(input_dim, output_dim)) if i < self.dis_layers: layers.append(nn.LeakyReLU(0.2)) layers.append(nn.Dropout(self.dis_dropout)) #layers.append(nn.Sigmoid()) self.layers = nn.Sequential(*layers) """ self.Classifier = nn.Sequential(RevGrad(), nn.Linear(256, 2))
def __init__(self, device, input_size=768, lstm_hidden_size=500, num_layers=1, bidirectional=False, hidden_dimensions=[500], cell_type='GRU', causal_layer=None, causal_hidden_dimensions=[30, 20], att_dim=30, dropout1=0.2, dropout2=0.2, activation='ReLU', adversarial_out=None, task='classification'): super(LSTMAttentionClassifier, self).__init__() self.task = task self.dropout1 = dropout1 self.dropout2 = dropout2 self.adversarial_out = adversarial_out self.device = device self.cell_type = cell_type if cell_type == 'GRU': self.rnn = nn.GRU(input_size=input_size, hidden_size=lstm_hidden_size, num_layers=num_layers, dropout=0.5, batch_first=True, bidirectional=bidirectional) self.hx = None elif cell_type == 'LSTM': self.rnn = nn.LSTM(input_size=input_size, hidden_size=lstm_hidden_size, num_layers=num_layers, dropout=0.5, batch_first=True, bidirectional=bidirectional) self.hx = None self.cx = None else: raise Exception('Invalid RNN type') self.bidirectional = bidirectional self.lstm_hidden_size = lstm_hidden_size self.num_layers = num_layers self.hidden_dimensions = hidden_dimensions if self.bidirectional: self.directions = 2 else: self.directions = 1 if causal_layer and causal_layer == 'residual': layer_input = lstm_hidden_size + 1 # layer_input = lstm_hidden_size + 768 # + causal_hidden_dimensions[-1] else: layer_input = lstm_hidden_size * self.directions self.fc_layers = nn.ModuleList([]) for layer_out in hidden_dimensions: self.fc_layers.append(nn.Linear(layer_input, layer_out)) layer_input = layer_out self.last_fc = nn.Linear(layer_input, 1) self.sigmoid = nn.Sigmoid() self.drop = nn.Dropout(self.dropout1) if activation == 'ReLU': self.activation = nn.ReLU() else: self.activation = nn.Tanh() self.att1 = nn.Linear(self.directions * lstm_hidden_size, att_dim, bias=False) self.att2 = nn.Linear(att_dim, 1, bias=False) self.causal_layer = causal_layer if causal_layer == 'adversarial': self.rev = RevGrad() self.drop2 = nn.Dropout(self.dropout2) layer_input = lstm_hidden_size * self.directions self.causal_layers = nn.ModuleList([]) for layer_out in causal_hidden_dimensions: self.causal_layers.append(nn.Linear(layer_input, layer_out)) layer_input = layer_out if not adversarial_out: self.causal_last_fc = nn.Linear(layer_input, 10) # regression as multiclass classification self.classes = torch.arange(1, 11).view(-1, 1).to(self.device, dtype=torch.float) # classes has shape 10, 1 self.softmax = nn.Softmax() else: # adversarial out is a tuple of (number_of_confounders, ids of confounders with sigmoid) self.causal_last_fc = nn.Linear(layer_input, adversarial_out[0]) elif causal_layer == 'residual': self.drop2 = nn.Dropout(self.dropout2) if not adversarial_out: layer_input = input_size else: layer_input = adversarial_out[0] self.causal_layers = nn.ModuleList([]) for layer_out in causal_hidden_dimensions: self.causal_layers.append(nn.Linear(layer_input, layer_out)) layer_input = layer_out self.causal_last_fc = nn.Linear(layer_input, 1)
def get_classifier(): return nn.Sequential(RevGrad(), nn.Linear(model_dim, args.hidden), nn.ReLU(), nn.Dropout(0.1), nn.Linear(args.hidden, len(languages))).to(device)
class DANN(nn.Module): def __init__(self, learning_rate=0.05, cls_num=2, domain_num=2, input_size=768, hidden_layer_size=25, lambda_adapt=1., maxiter=5000, verbose=False, batch_size=64, use_cuda=True, name=None, cached=False, cpt_path=''): """ Domain Adversarial Neural Network for classification option "learning_rate" is the learning rate of the neural network. option "hidden_layer_size" is the hidden layer size. option "lambda_adapt" weights the domain adaptation regularization term. if 0 or None or False, then no domain adaptation regularization is performed option "maxiter" number of training iterations. option "epsilon_init" is a term used for initialization. if None the weight matrices are weighted by 6/(sqrt(r+c)) (where r and c are the dimensions of the weight matrix) option "adversarial_representation": if False, the adversarial classifier is trained but has no impact on the hidden layer representation. The label predictor is then the same as a standard neural-network one (see experiments_moon.py figures). option "seed" is the seed of the random number generator. """ super(DANN, self).__init__() self.hidden_layer_size = hidden_layer_size self.maxiter = maxiter self.lambda_adapt = lambda_adapt if lambda_adapt not in (None, False) else 0. self.learning_rate = learning_rate self.verbose = verbose self.input_size = input_size self.feature_extractor = nn.Sequential( Linear(self.input_size, self.hidden_layer_size), nn.Sigmoid()) self.classifier = nn.Linear(self.hidden_layer_size, cls_num) self.domain_classifier = nn.Sequential( nn.Linear(self.hidden_layer_size, 20), nn.Sigmoid(), nn.Linear(20, domain_num)) self.batch_size = batch_size self.rev_grad = RevGrad() self.use_cuda = use_cuda self.criterion = nn.CrossEntropyLoss(reduction='mean') # self.d_optimizer = optim.SGD([{"params": self.classifier.parameters(), 'lr': 1e-3}]) # self.optimizer = optim.SGD(self.parameters(), lr = 0.01, momentum = 0.9) self.optimizer = optim.Adam(self.parameters(), lr=0.001) self.print_freq = 100 self.name = name self.cached = cached self.checkpoint_path = cpt_path def forward(self, x): x = self.feature_extractor(x) x = self.classifier(x) return x def _hidden_representation(self, x): x = (self.feature_extractor(x)) return x def predict_(self, x): # outputs = self(torch.FloatTensor(x)) x = torch.FloatTensor(x) outputs = self(x) _, predicted = torch.max(outputs.data, 1) return predicted.cpu().numpy() def _predict(self, x): outputs = self(x.cuda()) _, predicted = torch.max(outputs.data, 1) return predicted.cpu().numpy() def predict(self, x): x = torch.FloatTensor(x) outputs = self(x.cuda()) _, predicted = torch.max(outputs.data, 1) return predicted.cpu().numpy() def _predict_domain(self, x): outputs = self._hidden_representation(x) _, predicted = torch.max(self.domain_classifier(outputs), 1) return predicted.cpu().numpy() def L_y(self, x, y): x = self.feature_extractor(x) x = self.classifier(x) return self.criterion(x, y) def L_d(self, x, domain_y): x = self.rev_grad(self.feature_extractor(x)) x = self.domain_classifier(x) return self.criterion(x, domain_y) def validate(self, x, y): with torch.no_grad(): preds = self._predict(x) acc = np.mean(preds == y) return acc def validate_domain(self, X, X_adapt): with torch.no_grad(): domain_labels = np.array([0] * X_adapt.size(0) + [1] * X.size(0)) domain_ds = data_utils.TensorDataset( torch.cat([X_adapt, X], dim=0), ) loader = data_utils.DataLoader(domain_ds, batch_size=1024, shuffle=True, pin_memory=True, num_workers=4, drop_last=False) preds = [] for x, in loader: if (self.use_cuda): x = x.cuda() preds.extend(self._predict_domain(x)) acc = np.mean(preds == domain_labels) return acc def fit(self, X, Y, X_adapt, X_valid=None, Y_valid=None, do_random_init=True): """ Trains the domain adversarial neural network until it reaches a total number of iterations of "self.maxiter" since it was initialize. inputs: X : Source data matrix Y : Source labels X_adapt : Target data matrix (X_valid, Y_valid) : validation set used for early stopping. do_random_init : A boolean indicating whether to use random initialization or not. """ if (self.cached and self.verbose): print("Attempt to Load Model from {} ...".format( self.checkpoint_path)) if (self.cached and os.path.exists(self.checkpoint_path)): self.load_state_dict(torch.load(self.checkpoint_path)) preds = self.predict_(X) correct = np.sum(preds == Y) correct = correct / len(Y) # print("Source Domain batch Acc.: {:.4f}".format(correct)) if (self.use_cuda): self.cuda() return correct # X = X - np.mean(X, axis = 0) # X_adapt = X_adapt - np.mean(X_adapt, axis = 0) # print(X) # print(X_adapt) X, X_adapt = torch.FloatTensor(X), torch.FloatTensor(X_adapt) if (self.verbose): print("Adaptation size: {}".format(len(X_adapt))) X_valid = torch.FloatTensor(X_valid) Y_cpu = Y.copy() Y = torch.LongTensor(Y) # domain_labels = torch.LongTensor([1]*X_adapt.size(0) + [1]*X.size(0)) domain_ds = data_utils.TensorDataset(X_adapt, ) clf_ds = data_utils.TensorDataset(X, Y) domain_loader = data_utils.DataLoader(domain_ds, batch_size=self.batch_size, shuffle=True, pin_memory=True, num_workers=4, drop_last=True) clf_loader = data_utils.DataLoader(clf_ds, batch_size=self.batch_size, shuffle=True, pin_memory=True, num_workers=4, drop_last=True) domain_loader = list(domain_loader) clf_loader = list(clf_loader) best_acc = 0.0 avg_acc = [] print_count = 0 if (self.use_cuda): self.cuda() running_loss = 0.0 running_ld = 0.0 running_ly = 0.0 batch_counter = 0 num_steps = (X.size(0) // self.batch_size) * self.maxiter for i in tqdm(range(self.maxiter)): for x, y in clf_loader: p = float(batch_counter) / num_steps l = 2. / (1. + np.exp(-10. * p)) - 1 self.rev_grad.set_scale(l) # Adaptation param and learning rate schedule as described in the paper self.optimizer.zero_grad() # self.d_optimizer.zero_grad() # remove the random choicing of the batch data domain_x, = domain_loader[batch_counter % len(domain_loader)] domain_x = torch.cat([domain_x, x], dim=0) domain_y = torch.LongTensor([0] * self.batch_size + [1] * self.batch_size) if (self.use_cuda): x, y = x.cuda(), y.cuda() domain_x, domain_y = domain_x.cuda(), domain_y.cuda() l_y = self.L_y(x, y) l_d = self.L_d(domain_x, domain_y) loss = l_y + self.lambda_adapt * l_d loss.backward() # self.d_optimizer.step() self.optimizer.step() lr = 0.01 / (1. + 10 * p)**0.75 # for g in self.optimizer.param_groups: # g['lr'] = lr batch_counter += 1 # update scale # running_loss += loss.item() running_ld += l_d.item() running_ly += l_y.item() if ((i + 1) % self.print_freq == 0): if self.verbose: print( 'Iter {}/{} loss: {:.5f} Ly: {:.5f} Ld: {:5f}'.format( i + 1, self.maxiter, running_loss / self.print_freq, running_ly / self.print_freq, running_ld / self.print_freq)) print("p: {:.4f} l: {:.4f} lr: {:.4f}".format(p, l, lr)) running_loss = 0.0 running_ld = 0.0 running_ly = 0.0 target_acc = self.validate(X_valid, Y_valid) avg_acc.append(target_acc) if self.verbose: print("Source Domain Acc.: {:.4f}".format( self.validate(X, Y_cpu))) print("Target Domain Acc.: {:.4f}".format(target_acc)) print("Domain Clf Acc.: {:.4f}".format( self.validate_domain( X, X_adapt, ))) if (target_acc >= best_acc): best_acc = target_acc print_count += 1 torch.save(self.state_dict(), self.checkpoint_path) print( "INFER {} Best ACC in Valid Dataset. {:.4f} Average ACC {}".format( self.name, best_acc, avg_acc)) return best_acc
def __init__(self, model_params): super(Discriminator_Matcher, self).__init__() self.Classifier = nn.Sequential(RevGrad(), nn.Linear(20, 20), nn.LeakyReLU(0.1), nn.Linear(20, 2))
def __init__(self, model_params): super(Discriminator_Compressor, self).__init__() self.Classifier = nn.Sequential(RevGrad(), nn.Linear(256, 128), nn.LeakyReLU(0.1), nn.Linear(128, 2))