Exemple #1
0
class NN(nn.Module):
    def __init__(self, input_dim, output_dim, fp16, dev):
        super(NN, self).__init__()
        hidden = 512
        dropout = 0.5
        self.add_module('fc0', nn.Linear(input_dim, hidden))
        self.add_module('bn0', nn.BatchNorm1d(hidden))
        self.add_module('ac0', nn.ReLU())
        self.add_module('d0', nn.Dropout(dropout))
        for _ in range(5):
            self.add_module('fc' + str(_ + 1), nn.Linear(hidden, hidden))
            self.add_module('bn' + str(_ + 1), nn.BatchNorm1d(hidden))
            self.add_module('ac' + str(_ + 1), nn.ReLU())
            self.add_module('d' + str(_ + 1), nn.Dropout(dropout))
        self.add_module('fc' + str(6), nn.Linear(hidden, output_dim))
        self.add_module('ac' + str(6), nn.Softmax(dim=1))
        self.train_acc = []
        self.val_acc = []
        self.epochs = [0]
        self.cri = F.cross_entropy
        self.op = Adam(self.parameters(), lr=0.001)
        self.fp16 = fp16
        self = self.to(dev)
        if self.fp16:
            self = self.half()
            self.op = apex.fp16_utils.FP16_Optimizer(self.op,
                                                     static_loss_scale=512.0,
                                                     verbose=False)

    def forward(self, x):
        for layer in self.children():
            x = layer(x)
        return x

    def num_layers(self):
        length = 0
        for children in self.children():
            length += 1
        return length

    def layer_output(self, x, n):
        #Do make it replicable
        #call num_layers-1 for final output
        #Call num_layers-1-.. for features
        self.eval()
        for i, layer in enumerate(self.children()):
            x = layer(x)
            if i == n:
                break
        self.train()
        return x

    def train_on(self, trainloader, validloader, epochs):
        for epoch in range(self.epochs[-1] + 1, self.epochs[-1] + epochs):
            for (x, y) in trainloader:
                y_pred = self.forward(x)
                loss = self.cri(y_pred, y)
                if self.fp16:
                    self.op.backward(loss)
                else:
                    loss.backward()
                self.op.step()
                self.op.zero_grad()
            if epoch % 3 == 0:
                train_acc = acc(self, trainloader)
                val_acc = acc(self, validloader)
                self.train_acc.append(train_acc)
                self.val_acc.append(val_acc)
                self.epochs.append(epoch)
                print('Epoch', epoch, 'acc:', train_acc, 'val_acc:', val_acc)
            if epoch % 10 == 0:
                self.save()
        self.save()

    def save(self):
        path = './models/NN'
        if self.fp16:
            path = path + '16.pth'
        else:
            path = path + '32.pth'
        torch.save(
            (self.state_dict(), self.epochs, self.train_acc, self.val_acc),
            path)
                    student_patience = student_patience.transpose(
                        0, 1).contiguous().view(n_layer, input_ids.shape[0],
                                                -1).transpose(0, 1)
                pt_loss = args.beta * patience_loss(teacher_patience,
                                                    student_patience,
                                                    args.normalize_patience)
                loss = loss_dl + pt_loss
            else:
                pt_loss = torch.tensor(0.0)
                loss = loss_dl

            if n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.

            if args.fp16:
                optimizer.backward(loss)
            else:
                loss.backward()

            n_sample = input_ids.shape[0]
            tr_loss += loss.item() * n_sample
            if isinstance(kd_loss, float):
                tr_kd_loss += kd_loss * n_sample
            else:
                tr_kd_loss += kd_loss.item() * n_sample
            tr_ce_loss += ce_loss.item() * n_sample
            tr_loss_pt = pt_loss.item() * n_sample

            pred_cls = logits_pred_student.data.max(1)[1]
            tr_acc += pred_cls.eq(label_ids).sum().cpu().item()
            nb_tr_examples += n_sample
Exemple #3
0
class CNN(nn.Module):
    def __init__(self, dev, output_dim, fp16):
        super(CNN, self).__init__()
        hidden = 1024
        dropout = 0.5
        hidden_conv = 256

        self.add_module('conv0', nn.Conv2d(1, hidden_conv, 3, padding=1))
        self.add_module('bn0', nn.BatchNorm2d(hidden_conv))
        self.add_module('act0', nn.ReLU())
        self.add_module('d0', nn.Dropout2d(dropout))
        for _ in range(3):
            self.add_module('conv'+str(_+1), nn.Conv2d(hidden_conv, hidden_conv, 3, padding=1))
            self.add_module('bn'+str(_+1), nn.BatchNorm2d(hidden_conv))
            self.add_module('act'+str(_+1), nn.ReLU())
            self.add_module('d'+str(_+1), nn.Dropout2d(dropout))
        self.add_module('conv5', nn.Conv2d(hidden_conv, hidden_conv // 2, 3, padding=1))
        self.add_module('bn5', nn.BatchNorm2d(hidden_conv // 2))
        self.add_module('act5', nn.ReLU())
        self.add_module('d5', nn.Dropout2d(dropout))
        self.add_module('p5', nn.MaxPool2d(2))

        self.add_module('Flatten', Flatten())

        self.add_module('fc6', nn.Linear(7168, hidden))
        self.add_module('bn6', nn.BatchNorm1d(hidden))
        self.add_module('act6', nn.ReLU())
        self.add_module('d7', nn.Dropout(dropout))
        self.add_module('fc8', nn.Linear(hidden, output_dim))
        self.add_module('act6', nn.Softmax(dim=1))
        self.train_acc = []
        self.val_acc = []
        self.epochs = [0]
        self.cri = F.cross_entropy
        self.op = Adam(self.parameters(), lr=0.002)
        self = self.to(dev)
        self.fp16 = fp16
        if self.fp16:
            self = self.half()
            self.op = apex.fp16_utils.FP16_Optimizer(self.op, static_loss_scale=128.0, verbose=False)
        
    def forward(self, x):
        #Add a channel dimension
        x = x.unsqueeze(1)
        #print(x.size())
        for layer in self.children():
            x = layer(x)
        return x

    def num_layers(self):
        length = 0
        for children in self.children():
            length += 1
        return length

    def layer_output(self, x, n):
        #Do make it replicable
        #call num_layers-1 for final output
        #Call num_layers-1-.. for features
        self.eval()
        for i, layer in enumerate(self.children()):
            x = layer(x)
            if i == n:
                break 
        self.train()
        return x

    def train_on(self, trainloader, validloader, epochs):
        for epoch in range(self.epochs[-1] + 1, self.epochs[-1] + epochs):
            for (x, y) in trainloader:
                y_pred = self.forward(x)
                loss = self.cri(y_pred, y)
                if self.fp16:
                    self.op.backward(loss)
                else:
                    loss.backward()
                self.op.step()
                self.op.zero_grad()

            if epoch % 3 == 0:
                train_acc = acc(self, trainloader)
                val_acc = acc(self, validloader)
                self.train_acc.append(train_acc)
                self.val_acc.append(val_acc)
                print('Epoch', epoch, 'acc:', train_acc, 'val_acc:', val_acc)
                self.epochs.append(epoch)
            if epoch % 10 == 0:
                self.save()
        self.save()
    
    def save(self):
        path = './models/CNN'
        if self.fp16:
            path = path + '16.pth'
        else:
            path = path + '32.pth'
        torch.save((self.state_dict(), self.epochs, self.train_acc, self.val_acc), path)