예제 #1
0
 def __init__(self,
              learning_rate=0.05,
              cls_num=2,
              domain_num=2,
              input_size=768,
              hidden_layer_size=25,
              lambda_adapt=1.,
              maxiter=5000,
              verbose=False,
              batch_size=64,
              use_cuda=True,
              name=None,
              cached=False,
              cpt_path=''):
     """
     Domain Adversarial Neural Network for classification
     
     option "learning_rate" is the learning rate of the neural network.
     option "hidden_layer_size" is the hidden layer size.
     option "lambda_adapt" weights the domain adaptation regularization term.
             if 0 or None or False, then no domain adaptation regularization is performed
     option "maxiter" number of training iterations.
     option "epsilon_init" is a term used for initialization.
             if None the weight matrices are weighted by 6/(sqrt(r+c))
             (where r and c are the dimensions of the weight matrix)
     option "adversarial_representation": if False, the adversarial classifier is trained
             but has no impact on the hidden layer representation. The label predictor is
             then the same as a standard neural-network one (see experiments_moon.py figures). 
     option "seed" is the seed of the random number generator.
     """
     super(DANN, self).__init__()
     self.hidden_layer_size = hidden_layer_size
     self.maxiter = maxiter
     self.lambda_adapt = lambda_adapt if lambda_adapt not in (None,
                                                              False) else 0.
     self.learning_rate = learning_rate
     self.verbose = verbose
     self.input_size = input_size
     self.feature_extractor = nn.Sequential(
         Linear(self.input_size, self.hidden_layer_size), nn.Sigmoid())
     self.classifier = nn.Linear(self.hidden_layer_size, cls_num)
     self.domain_classifier = nn.Sequential(
         nn.Linear(self.hidden_layer_size, 20), nn.Sigmoid(),
         nn.Linear(20, domain_num))
     self.batch_size = batch_size
     self.rev_grad = RevGrad()
     self.use_cuda = use_cuda
     self.criterion = nn.CrossEntropyLoss(reduction='mean')
     # self.d_optimizer = optim.SGD([{"params": self.classifier.parameters(), 'lr': 1e-3}])
     # self.optimizer = optim.SGD(self.parameters(), lr = 0.01, momentum = 0.9)
     self.optimizer = optim.Adam(self.parameters(), lr=0.001)
     self.print_freq = 100
     self.name = name
     self.cached = cached
     self.checkpoint_path = cpt_path
예제 #2
0
    def __init__(
        self,
        input_size,
        output_size,
        num_hidden_layers,
        hidden_dim,
        flip_gradient=False,
        batchnorm=False,
        drop_prob=0.0,
        activation=torch.nn.ReLU,
    ):
        super(DNN, self).__init__()

        layers = [
            torch.nn.Linear(input_size, hidden_dim),
            torch.nn.Dropout(drop_prob),
            activation(),
        ]

        if batchnorm:
            raise NotImplementedError

        for i in range(num_hidden_layers):
            layers.append(torch.nn.Linear(hidden_dim, hidden_dim))
            layers.append(torch.nn.Dropout(drop_prob))
            layers.append(activation())

        layers.append(torch.nn.Linear(hidden_dim, output_size))

        if flip_gradient:
            layers.append(RevGrad())

        self._network = torch.nn.Sequential(*layers)
예제 #3
0
 def __init__(self, config):
     super(BLSTM, self).__init__()
     self.config = config
     self.vocab_size = get_vocab_size()
     self.num_mels = config.data.num_mels
     self.hidden_size = config.model.hidden_size
     self.num_layers = config.model.num_layers
     self.batch_first = config.model.batch_first
     self.dropout = config.model.dropout
     self.bidirectional = config.model.bidirectional
     self.lstm = nn.LSTM(input_size=self.num_mels,
                         hidden_size=self.hidden_size,
                         num_layers=self.num_layers,
                         batch_first=self.batch_first,
                         dropout=self.dropout,
                         bidirectional=self.bidirectional)
     self.full1 = nn.Linear(
         in_features=self.hidden_size
         if not self.bidirectional else self.hidden_size * 2,
         out_features=500)
     self.adv_layer = nn.Sequential(
         RevGrad(),
         nn.Linear(in_features=self.hidden_size
                   if not self.bidirectional else self.hidden_size * 2,
                   out_features=2),  #21
         nn.Softmax(dim=-1))
     self.full2 = nn.Linear(in_features=500, out_features=self.vocab_size)
예제 #4
0
 def __init__(self, embedding_size, hidden_size, cls_num = 12, device = torch.device('cuda:1')):
     super(DANNClassifier, self).__init__()
     self.encoderA = Linear(embedding_size, hidden_size)
     self.encoderB = Linear(embedding_size, hidden_size)
     self.classifier = Linear(hidden_size, cls_num)
     self.device = device
     self.rev_grad = RevGrad()
     self.criterion = nn.CrossEntropyLoss()
예제 #5
0
    def __init__(self,
                 dim=512,
                 input_dim=pose_dim,
                 num_classes=226,
                 num_signers=50):
        super().__init__(sign_loss=args.sign_loss,
                         signer_loss=args.signer_loss,
                         signer_loss_patience=args.signer_loss_patience)

        self.batch_norm = torch.nn.BatchNorm1d(num_features=input_dim)
        self.dropout = torch.nn.Dropout(p=0.2)

        self.proj = torch.nn.Linear(in_features=input_dim, out_features=dim)

        heads = args.encoder_heads
        depth = args.encoder_depth

        # self.transformer = Linformer(
        #   dim=dim,
        #   seq_len=seq_len + 1,  # + 1 cls token
        #   depth=depth,
        #   heads=heads,
        #   k=64,
        #   dropout=0.4
        # )

        if args.encoder == "lstm":
            self.encoder = torch.nn.LSTM(input_size=dim,
                                         hidden_size=dim // 2,
                                         num_layers=depth,
                                         batch_first=True,
                                         dropout=0.1,
                                         bidirectional=True)
        else:
            self.encoder = Transformer(dim=dim,
                                       depth=depth,
                                       heads=heads,
                                       dim_head=dim // heads,
                                       mlp_dim=dim,
                                       dropout=0.4)

        self.cls_token = torch.nn.Parameter(torch.randn(1, 1, dim))
        self.pos_embedding = torch.nn.Parameter(
            torch.randn(1, args.max_seq_size + 1, dim))

        self.head_norm = torch.nn.LayerNorm(dim)
        self.mlp_head = torch.nn.Linear(dim, num_classes)
        self.mlp_signer = torch.nn.Sequential(
            RevGrad(), torch.nn.Linear(dim, num_signers))
예제 #6
0
def test_gradients_inverted():
    network = torch.nn.Sequential(torch.nn.Linear(5, 3), torch.nn.Linear(3, 1))
    revnetwork = torch.nn.Sequential(copy.deepcopy(network), RevGrad())

    inp = torch.randn(8, 5)
    outp = torch.randn(8)

    criterion = torch.nn.MSELoss()

    criterion(network(inp), outp).backward()
    criterion(revnetwork(inp), outp).backward()

    assert all(
        (p1.grad == -p2.grad).all()
        for p1, p2 in zip(network.parameters(), revnetwork.parameters()))
def test_gradients_inverted_alpha(alpha_parameter):
    network = torch.nn.Sequential(torch.nn.Linear(5, 3), torch.nn.Linear(3, 1))
    revnetwork = torch.nn.Sequential(
        copy.deepcopy(network), RevGrad(alpha=alpha_parameter)
    )

    inp = torch.randn(8, 5)
    outp = torch.randn(8, 1)

    criterion = torch.nn.MSELoss()

    criterion(network(inp), outp).backward()
    criterion(revnetwork(inp), outp).backward()

    for p1, p2 in zip(network.parameters(), revnetwork.parameters()):
        assert torch.isclose(p1.grad, -p2.grad/alpha_parameter).all()
예제 #8
0
 def __init__(self, model_params):
     super(Discriminator, self).__init__()
     """
     self.emb_dim = 256
     self.dis_hid_dim = 200
     self.dis_layers = 1
     self.dis_input_dropout = 0.2
     self.dis_dropout = 0.2
     layers = []#[RevGrad()]
     for i in range(self.dis_layers + 1):
         input_dim = self.emb_dim if i == 0 else self.dis_hid_dim
         output_dim = 2 if i == self.dis_layers else self.dis_hid_dim
         layers.append(nn.Linear(input_dim, output_dim))
         if i < self.dis_layers:
             layers.append(nn.LeakyReLU(0.2))
             layers.append(nn.Dropout(self.dis_dropout))
     #layers.append(nn.Sigmoid())
     self.layers = nn.Sequential(*layers)
     """
     self.Classifier = nn.Sequential(RevGrad(), nn.Linear(256, 2))
    def __init__(self, device, input_size=768, lstm_hidden_size=500, num_layers=1, bidirectional=False, hidden_dimensions=[500],
                 cell_type='GRU', causal_layer=None, causal_hidden_dimensions=[30, 20], att_dim=30, dropout1=0.2,
                 dropout2=0.2, activation='ReLU', adversarial_out=None, task='classification'):
        super(LSTMAttentionClassifier, self).__init__()
        self.task = task
        self.dropout1 = dropout1
        self.dropout2 = dropout2
        self.adversarial_out = adversarial_out
        self.device = device
        self.cell_type = cell_type
        if cell_type == 'GRU':
            self.rnn = nn.GRU(input_size=input_size, hidden_size=lstm_hidden_size, num_layers=num_layers, dropout=0.5,
                              batch_first=True, bidirectional=bidirectional)
            self.hx = None
        elif cell_type == 'LSTM':
            self.rnn = nn.LSTM(input_size=input_size, hidden_size=lstm_hidden_size, num_layers=num_layers, dropout=0.5,
                                batch_first=True, bidirectional=bidirectional)
            self.hx = None
            self.cx = None
        else:
            raise Exception('Invalid RNN type')
        self.bidirectional = bidirectional
        self.lstm_hidden_size = lstm_hidden_size
        self.num_layers = num_layers
        self.hidden_dimensions = hidden_dimensions
        if self.bidirectional:
            self.directions = 2
        else:
            self.directions = 1
        if causal_layer and causal_layer == 'residual':
            layer_input = lstm_hidden_size + 1
            # layer_input = lstm_hidden_size + 768 # + causal_hidden_dimensions[-1]
        else:
            layer_input = lstm_hidden_size * self.directions
        self.fc_layers = nn.ModuleList([])
        for layer_out in hidden_dimensions:
            self.fc_layers.append(nn.Linear(layer_input, layer_out))
            layer_input = layer_out
        self.last_fc = nn.Linear(layer_input, 1)
        self.sigmoid = nn.Sigmoid()
        self.drop = nn.Dropout(self.dropout1)
        if activation == 'ReLU':
            self.activation = nn.ReLU()
        else:
            self.activation = nn.Tanh()

        self.att1 = nn.Linear(self.directions * lstm_hidden_size, att_dim, bias=False)
        self.att2 = nn.Linear(att_dim, 1, bias=False)

        self.causal_layer = causal_layer
        if causal_layer == 'adversarial':
            self.rev = RevGrad()
            self.drop2 = nn.Dropout(self.dropout2)
            layer_input = lstm_hidden_size * self.directions
            self.causal_layers = nn.ModuleList([])
            for layer_out in causal_hidden_dimensions:
                self.causal_layers.append(nn.Linear(layer_input, layer_out))
                layer_input = layer_out
            if not adversarial_out:
                self.causal_last_fc = nn.Linear(layer_input, 10) # regression as multiclass classification
                self.classes = torch.arange(1, 11).view(-1, 1).to(self.device, dtype=torch.float) # classes has shape 10, 1
                self.softmax = nn.Softmax()
            else:
                # adversarial out is a tuple of (number_of_confounders, ids of confounders with sigmoid)
                self.causal_last_fc = nn.Linear(layer_input, adversarial_out[0])

        elif causal_layer == 'residual':
            self.drop2 = nn.Dropout(self.dropout2)
            if not adversarial_out:
                layer_input = input_size
            else:
                layer_input = adversarial_out[0]
            self.causal_layers = nn.ModuleList([])
            for layer_out in causal_hidden_dimensions:
                self.causal_layers.append(nn.Linear(layer_input, layer_out))
                layer_input = layer_out
            self.causal_last_fc = nn.Linear(layer_input, 1)
예제 #10
0
 def get_classifier():
     return nn.Sequential(RevGrad(), nn.Linear(model_dim, args.hidden),
                          nn.ReLU(), nn.Dropout(0.1),
                          nn.Linear(args.hidden, len(languages))).to(device)
예제 #11
0
class DANN(nn.Module):
    def __init__(self,
                 learning_rate=0.05,
                 cls_num=2,
                 domain_num=2,
                 input_size=768,
                 hidden_layer_size=25,
                 lambda_adapt=1.,
                 maxiter=5000,
                 verbose=False,
                 batch_size=64,
                 use_cuda=True,
                 name=None,
                 cached=False,
                 cpt_path=''):
        """
        Domain Adversarial Neural Network for classification
        
        option "learning_rate" is the learning rate of the neural network.
        option "hidden_layer_size" is the hidden layer size.
        option "lambda_adapt" weights the domain adaptation regularization term.
                if 0 or None or False, then no domain adaptation regularization is performed
        option "maxiter" number of training iterations.
        option "epsilon_init" is a term used for initialization.
                if None the weight matrices are weighted by 6/(sqrt(r+c))
                (where r and c are the dimensions of the weight matrix)
        option "adversarial_representation": if False, the adversarial classifier is trained
                but has no impact on the hidden layer representation. The label predictor is
                then the same as a standard neural-network one (see experiments_moon.py figures). 
        option "seed" is the seed of the random number generator.
        """
        super(DANN, self).__init__()
        self.hidden_layer_size = hidden_layer_size
        self.maxiter = maxiter
        self.lambda_adapt = lambda_adapt if lambda_adapt not in (None,
                                                                 False) else 0.
        self.learning_rate = learning_rate
        self.verbose = verbose
        self.input_size = input_size
        self.feature_extractor = nn.Sequential(
            Linear(self.input_size, self.hidden_layer_size), nn.Sigmoid())
        self.classifier = nn.Linear(self.hidden_layer_size, cls_num)
        self.domain_classifier = nn.Sequential(
            nn.Linear(self.hidden_layer_size, 20), nn.Sigmoid(),
            nn.Linear(20, domain_num))
        self.batch_size = batch_size
        self.rev_grad = RevGrad()
        self.use_cuda = use_cuda
        self.criterion = nn.CrossEntropyLoss(reduction='mean')
        # self.d_optimizer = optim.SGD([{"params": self.classifier.parameters(), 'lr': 1e-3}])
        # self.optimizer = optim.SGD(self.parameters(), lr = 0.01, momentum = 0.9)
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.print_freq = 100
        self.name = name
        self.cached = cached
        self.checkpoint_path = cpt_path

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

    def _hidden_representation(self, x):
        x = (self.feature_extractor(x))
        return x

    def predict_(self, x):
        # outputs = self(torch.FloatTensor(x))
        x = torch.FloatTensor(x)
        outputs = self(x)
        _, predicted = torch.max(outputs.data, 1)
        return predicted.cpu().numpy()

    def _predict(self, x):
        outputs = self(x.cuda())
        _, predicted = torch.max(outputs.data, 1)
        return predicted.cpu().numpy()

    def predict(self, x):
        x = torch.FloatTensor(x)
        outputs = self(x.cuda())
        _, predicted = torch.max(outputs.data, 1)
        return predicted.cpu().numpy()

    def _predict_domain(self, x):
        outputs = self._hidden_representation(x)
        _, predicted = torch.max(self.domain_classifier(outputs), 1)
        return predicted.cpu().numpy()

    def L_y(self, x, y):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return self.criterion(x, y)

    def L_d(self, x, domain_y):
        x = self.rev_grad(self.feature_extractor(x))
        x = self.domain_classifier(x)
        return self.criterion(x, domain_y)

    def validate(self, x, y):
        with torch.no_grad():
            preds = self._predict(x)
            acc = np.mean(preds == y)
        return acc

    def validate_domain(self, X, X_adapt):
        with torch.no_grad():
            domain_labels = np.array([0] * X_adapt.size(0) + [1] * X.size(0))
            domain_ds = data_utils.TensorDataset(
                torch.cat([X_adapt, X], dim=0), )
            loader = data_utils.DataLoader(domain_ds,
                                           batch_size=1024,
                                           shuffle=True,
                                           pin_memory=True,
                                           num_workers=4,
                                           drop_last=False)
            preds = []
            for x, in loader:
                if (self.use_cuda):
                    x = x.cuda()
                preds.extend(self._predict_domain(x))
            acc = np.mean(preds == domain_labels)
        return acc

    def fit(self,
            X,
            Y,
            X_adapt,
            X_valid=None,
            Y_valid=None,
            do_random_init=True):
        """         
        Trains the domain adversarial neural network until it reaches a total number of
        iterations of "self.maxiter" since it was initialize.
        inputs:
              X : Source data matrix
              Y : Source labels
              X_adapt : Target data matrix
              (X_valid, Y_valid) : validation set used for early stopping.
              do_random_init : A boolean indicating whether to use random initialization or not.
        """

        if (self.cached and self.verbose):
            print("Attempt to Load Model from {} ...".format(
                self.checkpoint_path))

        if (self.cached and os.path.exists(self.checkpoint_path)):

            self.load_state_dict(torch.load(self.checkpoint_path))
            preds = self.predict_(X)
            correct = np.sum(preds == Y)
            correct = correct / len(Y)
            # print("Source Domain batch Acc.: {:.4f}".format(correct))

            if (self.use_cuda):
                self.cuda()
            return correct

        # X = X - np.mean(X, axis = 0)
        # X_adapt = X_adapt - np.mean(X_adapt, axis = 0)
        # print(X)
        # print(X_adapt)
        X, X_adapt = torch.FloatTensor(X), torch.FloatTensor(X_adapt)
        if (self.verbose):
            print("Adaptation size: {}".format(len(X_adapt)))
        X_valid = torch.FloatTensor(X_valid)
        Y_cpu = Y.copy()
        Y = torch.LongTensor(Y)
        # domain_labels = torch.LongTensor([1]*X_adapt.size(0) + [1]*X.size(0))
        domain_ds = data_utils.TensorDataset(X_adapt, )
        clf_ds = data_utils.TensorDataset(X, Y)
        domain_loader = data_utils.DataLoader(domain_ds,
                                              batch_size=self.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=4,
                                              drop_last=True)
        clf_loader = data_utils.DataLoader(clf_ds,
                                           batch_size=self.batch_size,
                                           shuffle=True,
                                           pin_memory=True,
                                           num_workers=4,
                                           drop_last=True)
        domain_loader = list(domain_loader)
        clf_loader = list(clf_loader)
        best_acc = 0.0
        avg_acc = []
        print_count = 0

        if (self.use_cuda):
            self.cuda()
        running_loss = 0.0
        running_ld = 0.0
        running_ly = 0.0
        batch_counter = 0
        num_steps = (X.size(0) // self.batch_size) * self.maxiter

        for i in tqdm(range(self.maxiter)):
            for x, y in clf_loader:
                p = float(batch_counter) / num_steps
                l = 2. / (1. + np.exp(-10. * p)) - 1
                self.rev_grad.set_scale(l)
                # Adaptation param and learning rate schedule as described in the paper

                self.optimizer.zero_grad()
                # self.d_optimizer.zero_grad()
                # remove the random choicing of the batch data
                domain_x, = domain_loader[batch_counter % len(domain_loader)]
                domain_x = torch.cat([domain_x, x], dim=0)
                domain_y = torch.LongTensor([0] * self.batch_size +
                                            [1] * self.batch_size)
                if (self.use_cuda):
                    x, y = x.cuda(), y.cuda()
                    domain_x, domain_y = domain_x.cuda(), domain_y.cuda()
                l_y = self.L_y(x, y)
                l_d = self.L_d(domain_x, domain_y)
                loss = l_y + self.lambda_adapt * l_d
                loss.backward()
                # self.d_optimizer.step()

                self.optimizer.step()
                lr = 0.01 / (1. + 10 * p)**0.75
                # for g in self.optimizer.param_groups:
                #    g['lr'] = lr
                batch_counter += 1

                # update scale
                #

                running_loss += loss.item()
                running_ld += l_d.item()
                running_ly += l_y.item()
            if ((i + 1) % self.print_freq == 0):

                if self.verbose:
                    print(
                        'Iter {}/{} loss: {:.5f} Ly: {:.5f} Ld: {:5f}'.format(
                            i + 1, self.maxiter,
                            running_loss / self.print_freq,
                            running_ly / self.print_freq,
                            running_ld / self.print_freq))
                    print("p: {:.4f} l: {:.4f} lr: {:.4f}".format(p, l, lr))
                running_loss = 0.0
                running_ld = 0.0
                running_ly = 0.0
                target_acc = self.validate(X_valid, Y_valid)
                avg_acc.append(target_acc)

                if self.verbose:
                    print("Source Domain Acc.: {:.4f}".format(
                        self.validate(X, Y_cpu)))
                    print("Target Domain Acc.: {:.4f}".format(target_acc))
                    print("Domain Clf Acc.: {:.4f}".format(
                        self.validate_domain(
                            X,
                            X_adapt,
                        )))

                if (target_acc >= best_acc):
                    best_acc = target_acc
                    print_count += 1
                    torch.save(self.state_dict(), self.checkpoint_path)
        print(
            "INFER {} Best ACC in Valid Dataset. {:.4f} Average ACC {}".format(
                self.name, best_acc, avg_acc))
        return best_acc
예제 #12
0
 def __init__(self, model_params):
     super(Discriminator_Matcher, self).__init__()
     self.Classifier = nn.Sequential(RevGrad(),
                                     nn.Linear(20, 20),
                                     nn.LeakyReLU(0.1),
                                     nn.Linear(20, 2))
예제 #13
0
 def __init__(self, model_params):
     super(Discriminator_Compressor, self).__init__()
     self.Classifier = nn.Sequential(RevGrad(),
                                     nn.Linear(256, 128),
                                     nn.LeakyReLU(0.1),
                                     nn.Linear(128, 2))