Ejemplo n.º 1
0
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
    ):
        r"""
        labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
            Labels for computing the ELECTRA loss. Input should be a sequence of tokens (see :obj:`input_ids` docstring)
            Indices should be in ``[0, 1]``.
            ``0`` indicates the token is an original token,
            ``1`` indicates the token was replaced.

    Returns:
        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.ElectraConfig`) and inputs:
        loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
            Total loss of the ELECTRA objective.
        scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`)
            Prediction scores of the head (scores for each token before SoftMax).
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when :obj:`config.output_hidden_states=True`):
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or ``config.output_attentions=True``):
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.


    Examples::

        from transformers import ElectraTokenizer, ElectraForPreTraining
        import torch

        tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-discriminator')
        model = ElectraForPreTraining.from_pretrained('google/electra-small-discriminator')

        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
        outputs = model(input_ids)

        prediction_scores, seq_relationship_scores = outputs[:2]

        """

        discriminator_hidden_states = self.electra(
            input_ids,
            attention_mask,
            token_type_ids,
            position_ids,
            head_mask,
            inputs_embeds,
            output_attentions,
        )
        discriminator_sequence_output = discriminator_hidden_states[0]

        logits = self.discriminator_predictions(discriminator_sequence_output,
                                                attention_mask)

        output = (logits, )

        if labels is not None:
            loss_fct = nn.BCEWithLogitsLoss()
            if attention_mask is not None:
                active_loss = attention_mask.view(
                    -1, discriminator_sequence_output.shape[1]) == 1
                active_logits = logits.view(
                    -1, discriminator_sequence_output.shape[1])[active_loss]
                active_labels = labels[active_loss]
                loss = loss_fct(active_logits, active_labels.float())
            else:
                loss = loss_fct(
                    logits.view(-1, discriminator_sequence_output.shape[1]),
                    labels.float())

            output = (loss, ) + output

        output += discriminator_hidden_states[1:]

        return output  # (loss), scores, (hidden_states), (attentions)
Ejemplo n.º 2
0
method = "ap-perf"              # uncomment if we want to use ap-perf objective 
# method = "bce-loss"           # uncomment if we want to use bce-loss objective

torch.manual_seed(1)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

nvar = X_tr.shape[1]
model = Net(nvar).to(device)

if method == "ap-perf":
    criterion = MetricLayer(f2).to(device)
    lr = 3e-3
    weight_decay = 1e-3
else:
    criterion = nn.BCEWithLogitsLoss().to(device)
    lr = 1e-2
    weight_decay = 1e-3

optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)

for epoch in range(100):

    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        output = model(inputs)

        loss = criterion(output, labels)
Ejemplo n.º 3
0
def train_model_2_class(directory, f, opt):
    filepath = directory + f
    print('Loading data...')
    train_ldrs, test_ldrs = load_data(filepath + '.csv')

    tr_loss = []
    tr_acc = []
    vl_loss = []
    vl_acc = []
    train_sizes = []
    test_sizes = []
    for train_ldr, test_ldr in zip(train_ldrs, test_ldrs):
        train_sizes.append(len(train_ldr.dataset))
        test_sizes.append(len(test_ldr.dataset))
        net = utils.SmallNet(2).to(device)
        net.size = net_size
        net.n_filters = net.size
        criterion = nn.BCEWithLogitsLoss()

        if opt == 'SGD':
            optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9,
                                  weight_decay=0.0005, nesterov=True)
        elif opt == 'Adam':
            optimizer = optim.Adam(net.parameters(), lr=0.001,
                                   betas=(0.9, 0.999), weight_decay=0.0005,
                                   amsgrad=False)
        elif opt == 'RMSprop':
            optimizer = optim.RMSprop(net.parameters(), lr=0.01,
                                      weight_decay=0.0005, momentum=0.9)
        else:
            raise ValueError('Invalid optimizer selected. Choose \'SGD\' or '
                             '\'Adam\'.')
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=n_schedule,
                                                   gamma=0.1)

        print('Training...')
        print('Filters per layer:', net.n_filters)
        print('Criterion:', criterion)
        print(optimizer)

        losses = [[], [100]]
        accs = [[], []]
        early_stopping = 0
        for epoch in range(n_epochs):
            # Training
            net.training = True
            train_correct = 0
            train_total = 0
            train_loss = 0.0
            for local_batch, local_labels in train_ldr:
                # Transfer to GPU
                local_batch = local_batch.to(device, dtype=torch.float)
                local_labels = local_labels.view(-1, 1).to(device,
                                                           dtype=torch.float)

                # Train
                optimizer.zero_grad()

                # Forward + backward + optimize
                logits = net(local_batch).view(-1, 1)
                loss = criterion(logits, local_labels)
                loss.backward()
                optimizer.step()

                # Tracking
                train_loss += loss.item()
                outputs = torch.sigmoid(logits)
                predicted = (outputs >= 0.5).view(-1).to(device,
                                                         dtype=torch.long)
                local_labels = local_labels.view(-1).to(device, dtype=torch.long)
                train_total += local_labels.size(0)
                train_correct += (predicted == local_labels).sum().item()

            train_acc = train_correct / train_total
            scheduler.step()

            # Validation
            net.training = False
            val_correct = 0
            val_total = 0
            val_loss = 0
            with torch.no_grad():
                for local_batch, local_labels in test_ldr:
                    # Transfer to GPU
                    local_batch = local_batch.to(device, dtype=torch.float)
                    local_labels = local_labels.view(-1, 1).to(device,
                                                               dtype=torch.float)

                    # Test
                    logits = net(local_batch).view(-1, 1)
                    loss = criterion(logits, local_labels)

                    # Tracking
                    val_loss += loss.item()
                    outputs = torch.sigmoid(logits)
                    predicted = (outputs >= 0.5).view(-1).to(device,
                                                             dtype=torch.long)
                    local_labels = local_labels.view(-1).to(device,
                                                            dtype=torch.long)
                    val_total += local_labels.size(0)
                    val_correct += (predicted == local_labels).sum().item()

            val_acc = val_correct / val_total

            losses[0].append(train_loss)
            losses[1].append(val_loss)
            accs[0].append(train_acc)
            accs[1].append(val_acc)

            if val_loss >= losses[1][-2]:
                early_stopping += 1
            elif early_stopping > 0:
                early_stopping -= 1

            early = False
            if early_stopping >= n_early and epoch > min_epochs:
                early = True

            if epoch % 10 == 9 or early:
                print('Epoch:', epoch + 1,
                      '| Train Acc:', round(train_acc, 8),
                      '| Train Loss:', round(train_loss, 8),
                      '| Val Acc:', round(val_acc, 8),
                      '| Val Loss:', round(val_loss, 8),
                      '| Early:', early_stopping)

            if early:
                print('Early stopping.')
                break

        losses[1] = losses[1][1:]

        tr_loss.append(losses[0])
        tr_acc.append(accs[0])
        vl_loss.append(losses[1])
        vl_acc.append(accs[1])

    best = [mean(a[-10:]) for a in vl_acc]
    if plot_:
        # Plot loss and accuracy
        savedir_ = savedir + '\cnn-2d\\' + f[1:] + '\\'
        plot(savedir_, f, tr_loss, tr_acc, vl_loss, vl_acc, best)

    return best, train_sizes, test_sizes
Ejemplo n.º 4
0
def compute_loss(p, targets, model):  # predictions, targets, model
    ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
    lcls, lbox, lobj = ft([0]), ft([0]), ft([0])
    tcls, tbox, indices, anchor_vec = build_targets(model, targets)
    h = model.hyp  # hyperparameters
    arc = model.arc  # # (default, uCE, uBCE) detection architectures

    # Define criteria
    BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']]))
    BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']]))
    BCE = nn.BCEWithLogitsLoss()
    CE = nn.CrossEntropyLoss()  # weight=model.class_weights

    if 'F' in arc:  # add focal loss
        g = h['fl_gamma']
        BCEcls, BCEobj, BCE, CE = FocalLoss(BCEcls, g), FocalLoss(
            BCEobj, g), FocalLoss(BCE, g), FocalLoss(CE, g)

    # Compute losses
    for i, pi in enumerate(p):  # layer index, layer predictions
        b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
        tobj = torch.zeros_like(pi[..., 0])  # target obj

        # Compute losses
        nb = len(b)
        if nb:  # number of targets
            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
            tobj[b, a, gj, gi] = 1.0  # obj
            # ps[:, 2:4] = torch.sigmoid(ps[:, 2:4])  # wh power loss (uncomment)

            # GIoU
            pxy = torch.sigmoid(
                ps[:,
                   0:2])  # pxy = pxy * s - (s - 1) / 2,  s = 1.5  (scale_xy)
            pbox = torch.cat(
                (pxy, torch.exp(ps[:, 2:4]).clamp(max=1E4) * anchor_vec[i]),
                1)  # predicted box
            giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False,
                            GIoU=True)  # giou computation
            lbox += (1.0 - giou).mean()  # giou loss

            if 'default' in arc and model.nc > 1:  # cls loss (only if multiple classes)
                t = torch.zeros_like(ps[:, 5:])  # targets
                t[range(nb), tcls[i]] = 1.0
                lcls += BCEcls(ps[:, 5:], t)  # BCE
                # lcls += CE(ps[:, 5:], tcls[i])  # CE

                # Instance-class weighting (use with reduction='none')
                # nt = t.sum(0) + 1  # number of targets per class
                # lcls += (BCEcls(ps[:, 5:], t) / nt).mean() * nt.mean()  # v1
                # lcls += (BCEcls(ps[:, 5:], t) / nt[tcls[i]].view(-1,1)).mean() * nt.mean()  # v2

            # Append targets to text file
            # with open('targets.txt', 'a') as file:
            #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

        if 'default' in arc:  # separate obj and cls
            lobj += BCEobj(pi[..., 4], tobj)  # obj loss

        elif 'BCE' in arc:  # unified BCE (80 classes)
            t = torch.zeros_like(pi[..., 5:])  # targets
            if nb:
                t[b, a, gj, gi, tcls[i]] = 1.0
            lobj += BCE(pi[..., 5:], t)

        elif 'CE' in arc:  # unified CE (1 background + 80 classes)
            t = torch.zeros_like(pi[..., 0], dtype=torch.long)  # targets
            if nb:
                t[b, a, gj, gi] = tcls[i] + 1
            lcls += CE(pi[..., 4:].view(-1, model.nc + 1), t.view(-1))

    lbox *= h['giou']
    lobj *= h['obj']
    lcls *= h['cls']
    loss = lbox + lobj + lcls
    return loss, torch.cat((lbox, lobj, lcls, loss)).detach()
Ejemplo n.º 5
0
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
    model.static_embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.static_embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
else:
    model.non_static_embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
    model.non_static_embedding.weight.data[UNK_IDX] = torch.zeros(
        EMBEDDING_DIM)
    model.non_static_embedding.weight.data[PAD_IDX] = torch.zeros(
        EMBEDDING_DIM)

# setting optimizer
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))

# setting loss function
criterion = nn.BCEWithLogitsLoss()
criterion = criterion.to(device)

# setting some vars
N_EPOCHS = 20
best_valid_loss = float('inf')
last_valid_loss = float('inf')

model = model.to(device)

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss, train_acc = utils.train(model, train_iterator, optimizer,
                                        criterion)
Ejemplo n.º 6
0
    def build_model(self):
        """ DataLoader """
        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            # resize to +15 for 128 pix image, +30 for 256 pix image
            transforms.Resize((self.img_size + 15, self.img_size + 15)),
            transforms.RandomCrop(self.img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])
        test_transform = transforms.Compose([
            transforms.Resize((self.img_size, self.img_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])

        self.trainA = ImageFolder(
            os.path.join('dataset', self.dataset, 'trainA'), train_transform)
        self.trainB = ImageFolder(
            os.path.join('dataset', self.dataset, 'trainB'), train_transform)
        self.testA = ImageFolder(
            os.path.join('dataset', self.dataset, 'testA'), test_transform)
        self.testB = ImageFolder(
            os.path.join('dataset', self.dataset, 'testB'), test_transform)
        self.trainA_loader = DataLoader(self.trainA,
                                        batch_size=self.batch_size,
                                        shuffle=True)
        self.trainB_loader = DataLoader(self.trainB,
                                        batch_size=self.batch_size,
                                        shuffle=True)
        self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False)
        self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False)
        """ Define Generator, Discriminator """
        self.genA2B = ResnetGenerator(
            input_nc=3,
            output_nc=3,
            ngf=self.ch,
            n_blocks=self.n_res,
            img_size=self.img_size,
            light=self.light,
        ).to(self.device)
        self.genB2A = ResnetGenerator(
            input_nc=3,
            output_nc=3,
            ngf=self.ch,
            n_blocks=self.n_res,
            img_size=self.img_size,
            light=self.light,
        ).to(self.device)
        self.disGA = Discriminator(
            input_nc=3,
            ndf=self.ch,
            n_layers=7,
        ).to(self.device)
        self.disGB = Discriminator(
            input_nc=3,
            ndf=self.ch,
            n_layers=7,
        ).to(self.device)
        self.disLA = Discriminator(
            input_nc=3,
            ndf=self.ch,
            n_layers=5,
        ).to(self.device)
        self.disLB = Discriminator(
            input_nc=3,
            ndf=self.ch,
            n_layers=5,
        ).to(self.device)
        """ Define Loss """
        self.L1_loss = nn.L1Loss().to(self.device)
        self.MSE_loss = nn.MSELoss().to(self.device)
        self.BCE_loss = nn.BCEWithLogitsLoss().to(self.device)
        """ Trainer """
        self.G_optim = torch.optim.Adam(
            itertools.chain(
                self.genA2B.parameters(),
                self.genB2A.parameters(),
            ),
            lr=self.lr,
            betas=(0.5, 0.999),
            weight_decay=self.weight_decay,
        )
        self.D_optim = torch.optim.Adam(
            itertools.chain(
                self.disGA.parameters(),
                self.disGB.parameters(),
                self.disLA.parameters(),
                self.disLB.parameters(),
            ),
            lr=self.lr,
            betas=(0.5, 0.999),
            weight_decay=self.weight_decay,
        )
        """ Define Rho clipper to constraint the value of rho in AdaILN
        and ILN"""
        self.Rho_clipper = RhoClipper(0, 1)
Ejemplo n.º 7
0
    def __init__(
        self,
        document_embeddings: flair.embeddings.DocumentEmbeddings,
        label_dictionary: Dictionary,
        label_type: str = None,
        multi_label: bool = None,
        multi_label_threshold: float = 0.5,
        beta: float = 1.0,
        loss_weights: Dict[str, float] = None,
    ):
        """
        Initializes a TextClassifier
        :param document_embeddings: embeddings used to embed each data point
        :param label_dictionary: dictionary of labels you want to predict
        :param multi_label: auto-detected by default, but you can set this to True to force multi-label prediction
        or False to force single-label prediction
        :param multi_label_threshold: If multi-label you can set the threshold to make predictions
        :param beta: Parameter for F-beta score for evaluation and training annealing
        :param loss_weights: Dictionary of weights for labels for the loss function
        (if any label's weight is unspecified it will default to 1.0)
        """

        super(TextClassifier, self).__init__()

        self.document_embeddings: flair.embeddings.DocumentEmbeddings = document_embeddings
        self.label_dictionary: Dictionary = label_dictionary
        self.label_type = label_type

        if multi_label is not None:
            self.multi_label = multi_label
        else:
            self.multi_label = self.label_dictionary.multi_label

        self.multi_label_threshold = multi_label_threshold

        self.beta = beta

        self.weight_dict = loss_weights
        # Initialize the weight tensor
        if loss_weights is not None:
            n_classes = len(self.label_dictionary)
            weight_list = [1. for i in range(n_classes)]
            for i, tag in enumerate(self.label_dictionary.get_items()):
                if tag in loss_weights.keys():
                    weight_list[i] = loss_weights[tag]
            self.loss_weights = torch.FloatTensor(weight_list).to(flair.device)
        else:
            self.loss_weights = None

        self.decoder = nn.Linear(self.document_embeddings.embedding_length,
                                 len(self.label_dictionary))

        nn.init.xavier_uniform_(self.decoder.weight)

        if self.multi_label:
            self.loss_function = nn.BCEWithLogitsLoss(weight=self.loss_weights)
        else:
            self.loss_function = nn.CrossEntropyLoss(weight=self.loss_weights)

        # auto-spawn on GPU if available
        self.to(flair.device)
 def forward(self, teacher_features, features, y_pred, labels):
     consistency_loss = nn.MSELoss()(teacher_features.view(-1), features.view(-1))
     cls_loss = nn.BCEWithLogitsLoss()(y_pred, labels)
     loss = self.weights[0] * consistency_loss + self.weights[1] * cls_loss
     return loss
def train_loop(folds, fold):

    if CFG.device == 'GPU':
        LOGGER.info(f"========== fold: {fold} training ==========")
    elif CFG.device == 'TPU':
        if CFG.nprocs == 1:
            LOGGER.info(f"========== fold: {fold} training ==========")
        elif CFG.nprocs == 8:
            xm.master_print(f"========== fold: {fold} training ==========")
            
    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    
    train_folds = train_folds[train_folds['StudyInstanceUID'].isin(train_annotations['StudyInstanceUID'].unique())].reset_index(drop=True)
    
    valid_labels = valid_folds[CFG.target_cols].values
    
    train_dataset = TrainDataset(train_folds, train_annotations, use_annot=True,
                                 transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, train_annotations, use_annot=False,
                                 transform=get_transforms(data='valid'))
    
    if CFG.device == 'GPU':
        train_loader = DataLoader(train_dataset, 
                                  batch_size=CFG.batch_size, 
                                  shuffle=True, 
                                  num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
        valid_loader = DataLoader(valid_dataset, 
                                  batch_size=CFG.batch_size * 2, 
                                  shuffle=False, 
                                  num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
        
    elif CFG.device == 'TPU':
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset,
                                                                        num_replicas=xm.xrt_world_size(),
                                                                        rank=xm.get_ordinal(),
                                                                        shuffle=True)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=CFG.batch_size,
                                                   sampler=train_sampler,
                                                   drop_last=True,
                                                   num_workers=CFG.num_workers)
        
        valid_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset,
                                                                        num_replicas=xm.xrt_world_size(),
                                                                        rank=xm.get_ordinal(),
                                                                        shuffle=False)
        valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=CFG.batch_size * 2,
                                                   sampler=valid_sampler,
                                                   drop_last=False,
                                                   num_workers=CFG.num_workers)
        
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler
    
    # ====================================================
    # model & optimizer
    # ====================================================
    if CFG.device == 'TPU':
        device = xm.xla_device()
    elif CFG.device == 'GPU':
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
    teacher_model = CustomSeResNet152D(CFG.model_name, pretrained=False)
    teacher_model.to(device)
    state = torch.load(CFG.teacher)
    teacher_model.load_state_dict(state['model'])
    for param in teacher_model.parameters():
        param.requires_grad = False
    teacher_model.eval()
#     teacher_model.to(device)
    
    model = CustomSeResNet152D_WLF(CFG.model_name, pretrained=True)
    model.to(device)
#     state = torch.load(CFG.student)
#     model.load_state_dict(state['model'])

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)
    
    # ====================================================
    # loop
    # ====================================================
    train_criterion = CustomLoss(weights=CFG.weights)
    valid_criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        if CFG.device == 'TPU':
            if CFG.nprocs == 1:
                avg_loss = train_fn(train_loader, teacher_model, model, train_criterion, optimizer, epoch, scheduler, device)
            elif CFG.nprocs == 8:
                para_train_loader = pl.ParallelLoader(train_loader, [device])
                avg_loss = train_fn(para_train_loader.per_device_loader(device), teacher_model, model, train_criterion, optimizer, epoch, scheduler, device)
        elif CFG.device == 'GPU':
            avg_loss = train_fn(train_loader, teacher_model, model, train_criterion, optimizer, epoch, scheduler, device)
        
        # eval
        if CFG.device == 'TPU':
            if CFG.nprocs == 1:
                avg_val_loss, preds, _ = valid_fn(valid_loader, model, valid_criterion, device)
            elif CFG.nprocs == 8:
                para_valid_loader = pl.ParallelLoader(valid_loader, [device])
                avg_val_loss, preds, valid_labels = valid_fn(para_valid_loader.per_device_loader(device), model, valid_criterion, device)
                preds = idist.all_gather(torch.tensor(preds)).to('cpu').numpy()
                valid_labels = idist.all_gather(torch.tensor(valid_labels)).to('cpu').numpy()
        elif CFG.device == 'GPU':
            avg_val_loss, preds, _ = valid_fn(valid_loader, model, valid_criterion, device)
            
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
            
        # scoring
        score, scores = get_score(valid_labels, preds)

        elapsed = time.time() - start_time
        
        if CFG.device == 'GPU':
            LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
            LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}')
        elif CFG.device == 'TPU':
            if CFG.nprocs == 1:
                LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
                LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}')
            elif CFG.nprocs == 8:
                xm.master_print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
                xm.master_print(f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}')
                
        if score > best_score:
            best_score = score
            if CFG.device == 'GPU':
                LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                torch.save({'model': model.state_dict(), 
                            'preds': preds},
                           OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_score.pth')
            elif CFG.device == 'TPU':
                if CFG.nprocs == 1:
                    LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                elif CFG.nprocs == 8:
                    xm.master_print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                xm.save({'model': model, 
                         'preds': preds}, 
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_score.pth')
                
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            if CFG.device == 'GPU':
                LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
                torch.save({'model': model.state_dict(), 
                            'preds': preds},
                           OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_loss.pth')
            elif CFG.device == 'TPU':
                if CFG.nprocs == 1:
                    LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
                elif CFG.nprocs == 8:
                    xm.master_print(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
                xm.save({'model': model, 
                         'preds': preds}, 
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_loss.pth')
                
#         # inference用に全て保存しておく
#         if CFG.device == 'TPU':
#             xm.save({'model': model.state_dict()}, OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_epoch{epoch+1}.pth')
#         elif CFG.device == 'GPU':
#             torch.save({'model': model.state_dict()}, OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_epoch{epoch+1}.pth')
                
        if CFG.nprocs != 8:
            check_point = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best_score.pth')
            for c in [f'pred_{c}' for c in CFG.target_cols]:
                valid_folds[c] = np.nan
            valid_folds[[f'pred_{c}' for c in CFG.target_cols]] = check_point['preds']

    return valid_folds
Ejemplo n.º 10
0
def bce_loss(output, target):
    criterion = nn.BCEWithLogitsLoss(reduction='mean')
    return criterion(output, target)
Ejemplo n.º 11
0
 def __init__(self, batch=True):
     super(dice_bce_loss, self).__init__()
     self.batch = batch
     #         self.bce_loss = nn.BCELoss()
     self.bce_loss = nn.BCEWithLogitsLoss()
Ejemplo n.º 12
0
G = VAE_SR(input_dim=3, dim=64, scale_factor=opt.upscale_factor)
D = discriminator(num_channels=3,
                  base_filter=64,
                  image_size=opt.patch_size * opt.upscale_factor)
feat_extractor = VGGFeatureExtractor(feature_layer=34,
                                     use_bn=False,
                                     use_input_norm=True,
                                     device='cuda')

denoiser = torch.nn.DataParallel(denoiser, device_ids=gpus_list)
G = torch.nn.DataParallel(G, device_ids=gpus_list)
D = torch.nn.DataParallel(D, device_ids=gpus_list)
feat_extractor = torch.nn.DataParallel(feat_extractor, device_ids=gpus_list)

L1_loss = nn.L1Loss()
BCE_loss = nn.BCEWithLogitsLoss()

print('---------- Generator architecture -------------')
print_network(G)
print('---------- Discriminator architecture -------------')
print_network(D)
print('----------------------------------------------')

model_denoiser = os.path.join(opt.save_folder + 'VAE_denoiser.pth')
denoiser.load_state_dict(
    torch.load(model_denoiser, map_location=lambda storage, loc: storage))
print('Pre-trained Denoiser model is loaded.')

if opt.pretrained:
    model_G = os.path.join(opt.save_folder + opt.pretrained_sr)
    model_D = os.path.join(opt.save_folder + opt.pretrained_D)
Ejemplo n.º 13
0
    def __init__(self, num_hard=0):
        super(Loss, self).__init__()

        self.classify_loss = nn.BCEWithLogitsLoss()
    def train_model(model,
                    tag_name,
                    target_cols_now,
                    fine_tune_scheduler=None):

        print(f'=={tag_name}==')

        train_losses = list()
        valid_losses = list()

        x_train, y_train = train_df[feature_cols].values, train_df[
            target_cols_now].values
        x_valid, y_valid = valid_df[feature_cols].values, valid_df[
            target_cols_now].values

        train_dataset = MoADataset(x_train, y_train)
        valid_dataset = MoADataset(x_valid, y_valid)

        trainloader = torch.utils.data.DataLoader(train_dataset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=False)

        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=LEARNING_RATE,
                                     weight_decay=WEIGHT_DECAY[tag_name])
        scheduler = optim.lr_scheduler.OneCycleLR(
            optimizer=optimizer,
            steps_per_epoch=len(trainloader),
            pct_start=PCT_START,
            div_factor=DIV_FACTOR[tag_name],
            max_lr=MAX_LR[tag_name],
            epochs=EPOCHS)

        loss_fn = nn.BCEWithLogitsLoss()
        loss_tr = SmoothCrossEntropyLoss(smoothing=smoothing)

        oof = np.zeros((len(train), len(target_cols_now)))
        best_loss = np.inf

        #         for epoch in range(EPOCHS):

        #             if fine_tune_scheduler is not None:
        #                 fine_tune_scheduler.step(epoch, model)

        #             print(f'-----EPOCH{epoch+1}-----')

        #             train_loss, train_metric = train_fn(model, optimizer, scheduler, loss_tr, loss_fn, trainloader, DEVICE)
        #             print(f'train_loss: {train_loss:.5f}, train_metric: {train_metric:.5f}')
        #             train_losses.append(train_loss)
        #             valid_loss, valid_metric, valid_preds = valid_fn(model, loss_tr, loss_fn, validloader, DEVICE)
        #             print(f'valid_loss: {valid_loss:.5f}, valid_metric: {valid_metric:.5f}')
        #             valid_losses.append(valid_loss)

        #             if valid_loss < best_loss:

        #                 best_loss = valid_loss
        #                 oof[val_idx] = valid_preds
        #                 torch.save(model.state_dict(), f'Simple_Deep_FOLD{fold+1}_SEED{seed}.pth')

        #         plt.plot(train_losses, label='train_losses')
        #         plt.plot(valid_losses, label='valid_losses')
        #         plt.xlabel('epochs')
        #         plt.ylabel('loss')
        #         if tag_name == 'ALL_TARGETS':
        #             plt.ylim([6e-3, 1.5e-2])
        #         else:
        #             plt.ylim([1e-2, 2e-2])
        #         plt.title(f'fold{fold+1} losses')
        #         plt.show()

        valid_loss, valid_metric, valid_preds = valid_fn(
            model, loss_tr, loss_fn, validloader, DEVICE)
        oof[val_idx] = valid_preds

        return oof
Ejemplo n.º 15
0
def main():
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    if args.cuda:
        cudnn.benchmark = True

    noise_list = ['airport_', 'babble_', 'car_', 'destroyerengine_', 'F16_cockpit_', 'factory_', 'machinegun_',
                  'street_', 'train_', 'volvo_']
    SNR_list = ['SNR-5', 'SNR0', 'SNR5', 'SNR10']
    DB_list = []

    for i in range(len(noise_list)):
        for j in range(len(SNR_list)):
            DB_list.append(noise_list[i] + SNR_list[j])

    LOG_DIR = args.log_dir + str(
        args.seed) + '/Padding-{}/Atype-{}_Loss-{}_gamma-{}'.format(args.padding_time, args.attention_type, args.loss, args.gamma)
    print(LOG_DIR)

    input_size = c.FILTER_BANK
    model = Model(rnn_model=args.RNN_model, input_size=input_size, rnn_hidden_size=args.hidden_size,
                  num_layers=args.num_layers, dnn_hidden_size=c.P_DNN_HIDDEN_SIZE, seq_len=args.seq_len,
                  attention_type=args.attention_type)

    test_DB = read_DB_structure(os.path.join(c.MFB_DIR + '_' + str(1.0), 'test_folder'), 'test')

    device_num = 'cuda:' + args.gpu_id
    device = torch.device(device_num)

    if args.cuda:
        model.to(device)

    print('=> loading checkpoint: CP_NUM = ' + str(args.cp_num))
    checkpoint = torch.load(LOG_DIR + '/checkpoint ' + str(args.cp_num) + '.pth')

    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    criterion = nn.BCEWithLogitsLoss()

    snr_files = np.zeros(4)
    snr_AUC = np.zeros(4)
    five_files = np.zeros(4)
    five_noises_auc = np.zeros(4)

    for i in range(len(DB_list)):
        selected_DB = select_test_DB(test_DB, DB_list[i])
        print(DB_list[i])
        m_Acc, m_AUC, m_EER, m_cost, temp_AUC, n_files = test(model, selected_DB, criterion)
        snr = DB_list[i].split('_')[-1][3:]

        if snr == '-5':
            snr_files[0] = snr_files[0] + n_files
            snr_AUC[0] = snr_AUC[0] + temp_AUC
            if DB_list[i].split('_')[0] == 'babble' or DB_list[i].split('_')[0] == 'destroyerengine' or \
                    DB_list[i].split('_')[0] == 'F16_cockpit' or DB_list[i].split('_')[0] == 'factory' or DB_list[i].split('_')[0] == 'street':
                five_noises_auc[0] = five_noises_auc[0] + temp_AUC
                five_files[0] = five_files[0] + n_files

        elif snr == '0':
            snr_files[1] = snr_files[1] + n_files
            snr_AUC[1] = snr_AUC[1] + temp_AUC
            if DB_list[i].split('_')[0] == 'babble' or DB_list[i].split('_')[0] == 'destroyerengine' or \
                    DB_list[i].split('_')[0] == 'F16_cockpit' or DB_list[i].split('_')[0] == 'factory' or DB_list[i].split('_')[0] == 'street':
                five_noises_auc[1] = five_noises_auc[1] + temp_AUC
                five_files[1] = five_files[1] + n_files

        elif snr == '5':
            snr_files[2] = snr_files[2] + n_files
            snr_AUC[2] = snr_AUC[2] + temp_AUC
            if DB_list[i].split('_')[0] == 'babble' or DB_list[i].split('_')[0] == 'destroyerengine' or \
                    DB_list[i].split('_')[0] == 'F16_cockpit' or DB_list[i].split('_')[0] == 'factory' or DB_list[i].split('_')[0] == 'street':
                five_noises_auc[2] = five_noises_auc[2] + temp_AUC
                five_files[2] = five_files[2] + n_files

        elif snr == '10':
            snr_files[3] = snr_files[3] + n_files
            snr_AUC[3] = snr_AUC[3] + temp_AUC
            if DB_list[i].split('_')[0] == 'babble' or DB_list[i].split('_')[0] == 'destroyerengine' or \
                    DB_list[i].split('_')[0] == 'F16_cockpit' or DB_list[i].split('_')[0] == 'factory' or DB_list[i].split('_')[0] == 'street':
                five_noises_auc[3] = five_noises_auc[3] + temp_AUC
                five_files[3] = five_files[3] + n_files

    print('-'*7 + 'All Noises' + '-'*7)
    print(tabulate([['-5dB AUC', 100*(snr_AUC[0] / snr_files[0])], [' 0dB AUC', 100*(snr_AUC[1] / snr_files[1])],
                    [' 5dB AUC', 100*(snr_AUC[2] / snr_files[2])], ['10dB AUC', 100*(snr_AUC[3] / snr_files[3])],
                    ['-5,0dB AVG', 100*((snr_AUC[0]/snr_files[0] + snr_AUC[1]/snr_files[1])/2)],
                    ['Total AVG', 100*((snr_AUC[0]/snr_files[0] + snr_AUC[1]/snr_files[1] + snr_AUC[2]/snr_files[2] +
                                        snr_AUC[3]/snr_files[3])/4)]],
                   tablefmt='grid'))
    print('-' * 7 + '5 Noises' + '-' * 7)
    print(tabulate([['-5dB AUC', 100 * (five_noises_auc[0] / five_files[0])], [' 0dB AUC', 100 * (five_noises_auc[1] / five_files[1])],
                    [' 5dB AUC', 100 * (five_noises_auc[2] / five_files[2])], ['10dB AUC', 100 * (five_noises_auc[3] / five_files[3])],
                    ['-5,0dB AVG', 100*((five_noises_auc[0]/five_files[0] + five_noises_auc[1]/five_files[1])/2)],
                    ['Total AVG', 100*((five_noises_auc[0]/five_files[0] + five_noises_auc[1]/five_files[1] +
                                        five_noises_auc[2]/five_files[2] + five_noises_auc[3]/five_files[3])/4)]],
                   tablefmt='grid'))
Ejemplo n.º 16
0

train_dataset = SleepDataset('/beegfs/ga4493/projects/groupb/data/training/RECORDS', '/beegfs/ga4493/projects/groupb/data/training/', 100, 150)

train_loaders = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=1,
                                           shuffle=True)

model_v1 = Model_V1(window_size, hanning_window, window_size)

if torch.cuda.is_available():
    print('using cuda')
    model_v1.cuda()
# TODO change this to BCEWithLogitsLoss
# TODO CHANGE size_average to false, taking the mean is probably not a good idea :(
criterion = nn.BCEWithLogitsLoss(size_average=False)

optimizer = torch.optim.Adam(model_v1.parameters(), lr=learning_rate)#, momentum=0.9)#, weight_decay=1e-3)  
sig = nn.Sigmoid()
test_dataset = SleepDatasetTest('/beegfs/ga4493/projects/groupb/data/training/RECORDS', '/beegfs/ga4493/projects/groupb/data/training/', 0, 10)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=1, 
                                          shuffle=False)

# i, ((data, cent), v_l) = next(enumerate(test_loader))
losses = []
v_losses = []
l = None
for epoch in range(50):
    loss_t = 0.0
Ejemplo n.º 17
0
def train_model(X_train,
                y_train,
                subject_indices,
                model="binary",
                criterion="BCE",
                optimizer="SGD",
                smote=False,
                activation_function="relu",
                batch_size=128,
                hidden_layer_dims=[150],
                epochs=1000,
                learning_rate=0.0001,
                dropout=0.5,
                weight_decay=0.5,
                split_val=0.33,
                verbose=True,
                random_state=SEED):
    """
    Modular Function for initializing and training a model.

    Args:
        X_train (np.array): training samples
        y_train (np.array): training labels
        subject_indices (np.array): indices of distinct subjects
        model (string): type of model; currently implemented: ["binary"]
        criterion (string): loss function; currently implemented: ["BCE"]
        optimizer (string): optimizer; currently implemented: ["Adam"]
        smote (bool): whether to perform SMOTE class equalization
        activation_function (string): which activation function; currently
                                            implemented: ["relu"]
        batch_size (int): size of training batches
        hidden_layer_dims (list of ints): the dimensions (and amount) of hidden
                                            layer in the neural network. Ordered
                                            from input to output (without input
                                            and output layer sizes)
        epochs (int): max number of training epochs
        learning_rate (float): learning rate for the model
        dropout (float): dropout rate
        weight_decay (float): weight decay rate (i.e. regularization)
        split_val (float): validation dataset size
        verbose (bool): print information
        random_state (int): seed for random functions
    Returns:
        model (nn.Module): A trained model
    """

    # split them into train and test according to the groups
    gss = GroupShuffleSplit(n_splits=1,
                            train_size=1 - split_val,
                            random_state=SEED)
    # since we only split once, use this command to get the
    # corresponding train and test indices
    for train_idx, val_idx in gss.split(X_train, y_train, subject_indices):
        continue

    X_val = X_train[val_idx]
    y_val = y_train[val_idx]
    X_train = X_train[train_idx]
    y_train = y_train[train_idx]

    # apply class imbalance equalization using SMOTE
    if smote:
        oversample = SMOTE(random_state=random_state)
        X_train, y_train = oversample.fit_resample(X_train, y_train)
        y_train = y_train.reshape((-1, 1))

    # create torch data loaders for training and validation
    train_loader = get_data_loader(X_train, y_train, batch_size)
    val_loader = get_data_loader(X_val, y_val, X_val.shape[0])

    # initiate the correct model

    assert model in "binary", "Model not implemented yet!"

    if model == "binary":
        model = BinaryClassification([X_train.shape[1], *hidden_layer_dims, 1],
                                     dropout=dropout,
                                     activation_function=activation_function)
    # cpu or gpu, depending on setup
    model.to(device)

    # print model summary
    if verbose:
        print(model)

    # initiate the correct criterion/loss function
    if criterion == "BCE":
        criterion = nn.BCEWithLogitsLoss()

    # initiate the correct optimizer
    if optimizer == "Adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=weight_decay)
    elif optimizer == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=learning_rate,
                                    weight_decay=weight_decay,
                                    momentum=0.5)

    # saves validation losses over all epochs
    val_losses_epochs = []
    val_acc_epochs = []
    val_auc_epochs = []

    # used to store weights of the best model
    best_model = None
    # indicates best index of the epoch that produced the best model
    best_epoch = -1

    for epoch in range(1, epochs + 1):
        # set pytorch model into training mode (relevant for batch norm, dropout, ..)
        model.train()

        # go through all batches
        for X_batch, y_batch in train_loader:
            # map them to cpu/gpu tensors
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            # set up optimizer
            optimizer.zero_grad()

            # make a prediction with the model
            y_pred = model(X_batch)

            # taking care of dims
            if y_pred.ndim == 2:
                y_pred = y_pred[:, 0]
            if y_batch.ndim == 2:
                y_batch = y_batch[:, 0]

            # calculate the loss of the prediction
            loss = criterion(y_pred, y_batch)

            # perform backpropagation
            loss.backward()
            # use the optimizer to update the weights for this batch
            optimizer.step()

        # put model into evaluation mode
        model.eval()

        # validation run
        for X_batch, y_batch in val_loader:
            # make a prediction
            y_pred = model(X_batch)

            # taking care of dims
            if y_pred.ndim == 2:
                y_pred = y_pred[:, 0]
            if y_batch.ndim == 2:
                y_batch = y_batch[:, 0]
            # print(y_pred)
            # print(y_batch)
            loss = criterion(y_pred, y_batch)

            # calculate acc and auc of the prediction
            acc = binary_acc(y_pred, y_batch)

            # calculate the auc, while taking into consideration, that
            # a batch might not contain positive samples
            if len(np.unique(y_batch)) == 1:
                auc = "None"
            else:
                auc = area_under_the_curve(y_pred.detach().numpy(),
                                           y_batch.detach().numpy())

        # add the average loss, acc and auc to lists
        val_losses_epochs.append(np.mean(loss.item()))
        val_acc_epochs.append(acc)
        val_auc_epochs.append(auc)

        # for printing
        indicator_string = ""

        # update the best model, if the current epoch produced lowest auc/loss
        # use auc, if we never had a case that a batch contained only 0s
        try_acc = False
        if not ("None" in val_auc_epochs) and (np.max(val_auc_epochs)
                                               == val_auc_epochs[-1]):
            best_epoch = epoch
            best_model = model.state_dict()
            indicator_string += "!"
        elif "None" in val_auc_epochs:
            try_acc = True
        # otherwise use loss
        if try_acc and np.min(val_losses_epochs) == val_losses_epochs[-1]:
            best_epoch = epoch
            best_model = model.state_dict()
            indicator_string += "!"

        if verbose:
            if isinstance(auc, str):
                print(
                    f'Epoch {epoch + 0:03}: | Validation Loss: {val_losses_epochs[-1]:.3f}  | ACC: {val_acc_epochs[-1]:.3f} | AUC: {val_auc_epochs[-1]} {indicator_string}'
                )
            else:
                print(
                    f'Epoch {epoch + 0:03}: | Validation Loss: {val_losses_epochs[-1]:.3f}  | ACC: {val_acc_epochs[-1]:.3f} | AUC: {val_auc_epochs[-1]:.3f} {indicator_string}'
                )

        # convergence criterion: at least found a new best model in the last 5 epochs
        if (epoch - best_epoch) >= 5:
            break

    # load the best model from memory
    model.load_state_dict(best_model)

    return model
Ejemplo n.º 18
0
def train_superres(load_trained):
    # logdir
    logdir = os.path.join(Hyper.logdir, "superres")
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    if not os.path.exists(os.path.join(logdir, "pkg")):
        os.mkdir(os.path.join(logdir, "pkg"))
    # device
    device = Hyper.device_superres
    # graph
    graph = SuperRes().to(device)
    graph.train()
    # load data
    names, lengths, texts = load_data()
    batch_maker = BatchMaker(Hyper.batch_size, names, lengths, texts)
    # loss
    criterion_mags = nn.L1Loss().to(device)
    criterion_bd2 = nn.BCEWithLogitsLoss().to(device)
    lossplot_mags = LogHelper("mag_l1", logdir)
    lossplot_bd2 = LogHelper("mag_BCE", logdir)
    # optim
    optimizer = torch.optim.Adam(graph.parameters(),
                                 lr=Hyper.adam_alpha,
                                 betas=Hyper.adam_betas,
                                 eps=Hyper.adam_eps)
    # load
    global_step = 0
    if load_trained > 0:
        print("load model trained for {}k batches".format(load_trained))
        global_step = load(
            os.path.join(logdir, "pkg/save_{}k.pkg".format(load_trained)),
            graph, {
                "mags": criterion_mags,
                "bd2": criterion_bd2
            }, optimizer)

    for loop_cnt in range(
            int(Hyper.num_batches / batch_maker.num_batches() + 0.5)):
        print("loop", loop_cnt)
        bar = PrettyBar(batch_maker.num_batches())
        bar.set_description("training...")
        loss_str0 = MovingAverage()
        loss_str1 = MovingAverage()

        for bi in bar:
            batch = batch_maker.next_batch()
            # low res
            mels = torch.FloatTensor(batch["mels"]).to(device)
            # high res
            mags = torch.FloatTensor(batch["mags"]).to(device)

            # forward
            mag_logits, mag_pred = graph(mels)

            # loss
            loss_mags = criterion_mags(mag_pred, mags)
            loss_bd2 = criterion_bd2(mag_logits, mags)
            loss = loss_mags + loss_bd2

            # backward
            graph.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            # clip grad
            nn.utils.clip_grad_value_(graph.parameters(), 1)
            optimizer.step()

            # log
            loss_str0.add(loss_mags.cpu().data.mean())
            loss_str1.add(loss_bd2.cpu().data.mean())
            lossplot_mags.add(loss_str0(), global_step)
            lossplot_bd2.add(loss_str1(), global_step)
            bar.set_description("gs: {}, mags: {}, bd2: {}".format(
                global_step, loss_str0(), loss_str1()))

            # plot
            if global_step % 100 == 0:
                gs = 0
                plot_spectrum(mag_pred[0].cpu().data, "pred", gs, dir=logdir)
                plot_spectrum(mags[0].cpu().data, "true", gs, dir=logdir)
                plot_spectrum(mels[0].cpu().data, "input", gs, dir=logdir)
                if global_step % 100 == 0:
                    lossplot_mags.plot()
                    lossplot_bd2.plot()

                if global_step % 10000 == 0:
                    save(
                        os.path.join(logdir, "pkg/save_{}k.pkg").format(
                            global_step // 1000), graph, {
                                "mags": criterion_mags,
                                "bd2": criterion_bd2
                            }, optimizer, global_step, True)

            global_step += 1
Ejemplo n.º 19
0
                        nn.Linear(256, 256), nn.LeakyReLU(0.2),
                        nn.Linear(256, 1))
    return net


#生成网络
def generator(noise_dim=NOISE_DIM):
    net = nn.oSequential(nn.Linear(noise_dim, 1024), nn.ReLU(True),
                         nn.Linear(1024, 1024), nn.ReLU(True),
                         nn.Linear(1024, 784), nn.Tanh())
    return net


#判别器的 loss 就是将真实数据的得分判断为 1,假的数据的得分判断为 0,而生成器的 loss 就是将假的数据判断为 1

bce_loss = nn.BCEWithLogitsLoss()  #交叉熵损失函数


def discriminator_loss(logits_real, logits_fake):  # 判别器的 loss
    size = logits_real.shape[0]
    true_labels = Variable(torch.ones(size, 1)).float()
    false_labels = Variable(torch.zeros(size, 1)).float()
    loss = bce_loss(logits_real, true_labels) + bce_loss(
        logits_fake, false_labels)
    return loss


def generator_loss(logits_fake):  # 生成器的 loss
    size = logits_fake.shape[0]
    true_labels = Variable(torch.ones(size, 1)).float()
    loss = bce_loss(logits_fake, true_labels)
Ejemplo n.º 20
0
def train_text2mel(load_trained):
    # create log dir
    logdir = os.path.join(Hyper.logdir, "text2mel")
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    if not os.path.exists(os.path.join(logdir, "pkg")):
        os.mkdir(os.path.join(logdir, "pkg"))

    # device ##cuda:0
    device = Hyper.device_text2mel

    graph = Text2Mel().to(device)
    # set the training flag
    graph.train()
    # load data and get batch maker
    names, lengths, texts = load_data()
    batch_maker = BatchMaker(Hyper.batch_size, names, lengths, texts)

    criterion_mels = nn.L1Loss().to(device)
    criterion_bd1 = nn.BCEWithLogitsLoss().to(device)
    criterion_atten = nn.L1Loss().to(device)
    optimizer = torch.optim.Adam(graph.parameters(),
                                 lr=Hyper.adam_alpha,
                                 betas=Hyper.adam_betas,
                                 eps=Hyper.adam_eps)

    lossplot_mels = LogHelper("mel_l1", logdir)
    lossplot_bd1 = LogHelper("mel_BCE", logdir)
    lossplot_atten = LogHelper("atten", logdir)

    dynamic_guide = float(Hyper.guide_weight)
    global_step = 0

    # check if load
    if load_trained > 0:
        print("load model trained for {}k batches".format(load_trained))
        global_step = load(
            os.path.join(logdir, "pkg/save_{}k.pkg".format(load_trained)),
            graph, {
                "mels": criterion_mels,
                "bd1": criterion_bd1,
                "atten": criterion_atten
            }, optimizer)
        dynamic_guide *= Hyper.guide_decay**(load_trained * 1000)

    for loop_cnt in range(
            int(Hyper.num_batches / batch_maker.num_batches() + 0.5)):
        print("loop", loop_cnt)
        bar = PrettyBar(batch_maker.num_batches())
        bar.set_description("training...")
        loss_str0 = MovingAverage()
        loss_str1 = MovingAverage()
        loss_str2 = MovingAverage()
        for bi in bar:
            batch = batch_maker.next_batch()
            # make batch
            texts = torch.LongTensor(batch["texts"]).to(device)
            # shift mel
            shift_mels = torch.FloatTensor(
                np.concatenate((np.zeros(
                    (batch["mels"].shape[0], batch["mels"].shape[1], 1)),
                                batch["mels"][:, :, :-1]),
                               axis=2)).to(device)
            # ground truth
            mels = torch.FloatTensor(batch["mels"]).to(device)

            # forward
            pred_logits, pred_mels = graph(texts, shift_mels)
            # loss
            if False:
                loss_mels = sum(
                    criterion_mels(
                        torch.narrow(pred_mels[i], -1, 0, batch["mel_lengths"]
                                     [i]),
                        torch.narrow(mels[i], -1, 0, batch["mel_lengths"][i]))
                    for i in range(batch_maker.batch_size())) / float(
                        batch_maker.batch_size())
                loss_bd1 = sum(
                    criterion_bd1(
                        torch.narrow(pred_logits[i], -1, 0,
                                     batch["mel_lengths"][i]),
                        torch.narrow(mels[i], -1, 0, batch["mel_lengths"][i]))
                    for i in range(batch_maker.batch_size())) / float(
                        batch_maker.batch_size())
            else:
                loss_mels = criterion_mels(pred_mels, mels)
                loss_bd1 = criterion_bd1(pred_logits, mels)
            # guide attention
            atten_guide = torch.FloatTensor(batch["atten_guides"]).to(device)
            atten_mask = torch.FloatTensor(batch["atten_masks"]).to(device)
            atten_mask = torch.ones_like(graph.attention)
            loss_atten = criterion_atten(
                atten_guide * graph.attention * atten_mask,
                torch.zeros_like(graph.attention)) * dynamic_guide
            loss = loss_mels + loss_bd1 + loss_atten

            # backward
            graph.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            # clip grad
            nn.utils.clip_grad_value_(graph.parameters(), 1)
            optimizer.step()
            # log
            loss_str0.add(loss_mels.cpu().data.mean())
            loss_str1.add(loss_bd1.cpu().data.mean())
            loss_str2.add(loss_atten.cpu().data.mean())
            lossplot_mels.add(loss_str0(), global_step)
            lossplot_bd1.add(loss_str1(), global_step)
            lossplot_atten.add(loss_str2(), global_step)

            # adjust dynamic_guide
            # dynamic_guide = float((loss_mels + loss_bd1).cpu().data.mean() / loss_atten.cpu().data.mean())
            dynamic_guide *= Hyper.guide_decay
            if dynamic_guide < Hyper.guide_lowbound:
                dynamic_guide = Hyper.guide_lowbound
            bar.set_description(
                "gs: {}, mels: {}, bd1: {}, atten: {}, scale: {}".format(
                    global_step, loss_str0(), loss_str1(), loss_str2(),
                    "%4f" % dynamic_guide))

            # plot
            if global_step % 100 == 0:
                gs = 0
                plot_spectrum(mels[0].cpu().data, "mel_true", gs, dir=logdir)
                plot_spectrum(shift_mels[0].cpu().data,
                              "mel_input",
                              gs,
                              dir=logdir)
                plot_spectrum(pred_mels[0].cpu().data,
                              "mel_pred",
                              gs,
                              dir=logdir)
                plot_spectrum(graph.query[0].cpu().data,
                              "query",
                              gs,
                              dir=logdir)
                plot_attention(graph.attention[0].cpu().data,
                               "atten",
                               gs,
                               True,
                               dir=logdir)
                plot_attention((atten_guide)[0].cpu().data,
                               "atten_guide",
                               gs,
                               True,
                               dir=logdir)
                if global_step % 500 == 0:
                    lossplot_mels.plot()
                    lossplot_bd1.plot()
                    lossplot_atten.plot()

                if global_step % 10000 == 0:
                    save(
                        os.path.join(logdir, "pkg/save_{}k.pkg").format(
                            global_step // 1000), graph, {
                                "mels": criterion_mels,
                                "bd1": criterion_bd1,
                                "atten": criterion_atten
                            }, optimizer, global_step, True)

            # increase global step
            global_step += 1
Ejemplo n.º 21
0
def main(args):
    # -----------------------------------------------------------------------------
    # Create model
    # -----------------------------------------------------------------------------
    if args.model == 'dicenet':
        from model.classification import dicenet as net
        model = net.CNNModel(args)
    elif args.model == 'espnetv2':
        from model.classification import espnetv2 as net
        model = net.EESPNet(args)
    elif args.model == 'shufflenetv2':
        from model.classification import shufflenetv2 as net
        model = net.CNNModel(args)
    else:
        print_error_message('Model {} not yet implemented'.format(args.model))
        exit()

    if args.finetune:
        # laod the weights for finetuning
        if os.path.isfile(args.weights_ft):
            pretrained_dict = torch.load(args.weights_ft,
                                         map_location=torch.device('cpu'))
            print_info_message('Loading pretrained basenet model weights')
            model_dict = model.state_dict()

            overlap_dict = {
                k: v
                for k, v in model_dict.items() if k in pretrained_dict
            }

            total_size_overlap = 0
            for k, v in enumerate(overlap_dict):
                total_size_overlap += torch.numel(overlap_dict[v])

            total_size_pretrain = 0
            for k, v in enumerate(pretrained_dict):
                total_size_pretrain += torch.numel(pretrained_dict[v])

            if len(overlap_dict) == 0:
                print_error_message(
                    'No overlaping weights between model file and pretrained weight file. Please check'
                )

            print_info_message('Overlap ratio of weights: {:.2f} %'.format(
                (total_size_overlap * 100.0) / total_size_pretrain))

            model_dict.update(overlap_dict)
            model.load_state_dict(model_dict, strict=False)
            print_info_message('Pretrained basenet model loaded!!')
        else:
            print_error_message('Unable to find the weights: {}'.format(
                args.weights_ft))

    # -----------------------------------------------------------------------------
    # Writer for logging
    # -----------------------------------------------------------------------------
    if not os.path.isdir(args.savedir):
        os.makedirs(args.savedir)
    writer = SummaryWriter(log_dir=args.savedir,
                           comment='Training and Validation logs')
    writer.add_graph(model,
                     input_to_model=torch.randn(1, 3, args.inpSize,
                                                args.inpSize))

    # network properties
    num_params = model_parameters(model)
    flops = compute_flops(model)
    print_info_message('FLOPs: {:.2f} million'.format(flops))
    print_info_message('Network Parameters: {:.2f} million'.format(num_params))

    # -----------------------------------------------------------------------------
    # Optimizer
    # -----------------------------------------------------------------------------

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    best_acc = 0.0
    num_gpus = torch.cuda.device_count()
    device = 'cuda' if num_gpus >= 1 else 'cpu'
    if args.resume:
        if os.path.isfile(args.resume):
            print_info_message("=> loading checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'],
                                  map_location=torch.device(device))
            optimizer.load_state_dict(checkpoint['optimizer'])
            print_info_message("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print_warning_message("=> no checkpoint found at '{}'".format(
                args.resume))

    # -----------------------------------------------------------------------------
    # Loss Fn
    # -----------------------------------------------------------------------------
    if args.dataset == 'imagenet':
        criterion = nn.CrossEntropyLoss()
        acc_metric = 'Top-1'
    elif args.dataset == 'coco':
        criterion = nn.BCEWithLogitsLoss()
        acc_metric = 'F1'
    else:
        print_error_message('{} dataset not yet supported'.format(
            args.dataset))

    if num_gpus >= 1:
        model = torch.nn.DataParallel(model)
        model = model.cuda()
        criterion = criterion.cuda()
        if torch.backends.cudnn.is_available():
            import torch.backends.cudnn as cudnn
            cudnn.benchmark = True
            cudnn.deterministic = True

    # -----------------------------------------------------------------------------
    # Data Loaders
    # -----------------------------------------------------------------------------
    # Data loading code
    if args.dataset == 'imagenet':
        train_loader, val_loader = img_loader.data_loaders(args)
        # import the loaders too
        from utilities.train_eval_classification import train, validate
    elif args.dataset == 'coco':
        from data_loader.classification.coco import COCOClassification
        train_dataset = COCOClassification(root=args.data,
                                           split='train',
                                           year='2017',
                                           inp_size=args.inpSize,
                                           scale=args.scale,
                                           is_training=True)
        val_dataset = COCOClassification(root=args.data,
                                         split='val',
                                         year='2017',
                                         inp_size=args.inpSize,
                                         is_training=False)

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   pin_memory=True,
                                                   num_workers=args.workers)
        val_loader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 pin_memory=True,
                                                 num_workers=args.workers)

        # import the loaders too
        from utilities.train_eval_classification import train_multi as train
        from utilities.train_eval_classification import validate_multi as validate
    else:
        print_error_message('{} dataset not yet supported'.format(
            args.dataset))

    # -----------------------------------------------------------------------------
    # LR schedulers
    # -----------------------------------------------------------------------------
    if args.scheduler == 'fixed':
        step_sizes = args.steps
        from utilities.lr_scheduler import FixedMultiStepLR
        lr_scheduler = FixedMultiStepLR(base_lr=args.lr,
                                        steps=step_sizes,
                                        gamma=args.lr_decay)
    elif args.scheduler == 'clr':
        from utilities.lr_scheduler import CyclicLR
        step_sizes = args.steps
        lr_scheduler = CyclicLR(min_lr=args.lr,
                                cycle_len=5,
                                steps=step_sizes,
                                gamma=args.lr_decay)
    elif args.scheduler == 'poly':
        from utilities.lr_scheduler import PolyLR
        lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs)
    elif args.scheduler == 'linear':
        from utilities.lr_scheduler import LinearLR
        lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs)
    elif args.scheduler == 'hybrid':
        from utilities.lr_scheduler import HybirdLR
        lr_scheduler = HybirdLR(base_lr=args.lr,
                                max_epochs=args.epochs,
                                clr_max=args.clr_max)
    else:
        print_error_message('Scheduler ({}) not yet implemented'.format(
            args.scheduler))
        exit()

    print_info_message(lr_scheduler)

    # set up the epoch variable in case resuming training
    if args.start_epoch != 0:
        for epoch in range(args.start_epoch):
            lr_scheduler.step(epoch)

    with open(args.savedir + os.sep + 'arguments.json', 'w') as outfile:
        import json
        arg_dict = vars(args)
        arg_dict['model_params'] = '{} '.format(num_params)
        arg_dict['flops'] = '{} '.format(flops)
        json.dump(arg_dict, outfile)

    # -----------------------------------------------------------------------------
    # Training and Val Loop
    # -----------------------------------------------------------------------------

    extra_info_ckpt = args.model + '_' + str(args.s)
    for epoch in range(args.start_epoch, args.epochs):
        lr_log = lr_scheduler.step(epoch)
        # set the optimizer with the learning rate
        # This can be done inside the MyLRScheduler
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr_log
        print_info_message("LR for epoch {} = {:.5f}".format(epoch, lr_log))
        train_acc, train_loss = train(data_loader=train_loader,
                                      model=model,
                                      criteria=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      device=device)
        # evaluate on validation set
        val_acc, val_loss = validate(data_loader=val_loader,
                                     model=model,
                                     criteria=criterion,
                                     device=device)

        # remember best prec@1 and save checkpoint
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)

        weights_dict = model.module.state_dict(
        ) if device == 'cuda' else model.state_dict()
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': weights_dict,
                'best_prec1': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best, args.savedir, extra_info_ckpt)

        writer.add_scalar('Classification/LR/learning_rate', lr_log, epoch)
        writer.add_scalar('Classification/Loss/Train', train_loss, epoch)
        writer.add_scalar('Classification/Loss/Val', val_loss, epoch)
        writer.add_scalar('Classification/{}/Train'.format(acc_metric),
                          train_acc, epoch)
        writer.add_scalar('Classification/{}/Val'.format(acc_metric), val_acc,
                          epoch)
        writer.add_scalar('Classification/Complexity/Top1_vs_flops', best_acc,
                          round(flops, 2))
        writer.add_scalar('Classification/Complexity/Top1_vs_params', best_acc,
                          round(num_params, 2))

    writer.close()
Ejemplo n.º 22
0
def compute_loss(p, targets, model):  # predictions, targets, model
    ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
    lcls, lbox, lobj = ft([0]), ft([0]), ft([0])
    tcls, tbox, indices, anchor_vec = build_targets(model, targets)
    h = model.hyp  # hyperparameters
    red = 'mean'  # Loss reduction (sum or mean)

    # Define criteria
    BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']]), reduction=red)
    BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']]), reduction=red)

    # class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
    cp, cn = smooth_BCE(eps=0.0)

    # focal loss
    g = h['fl_gamma']  # focal loss gamma
    if g > 0:
        BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)

    # Compute losses
    np, ng = 0, 0  # number grid points, targets
    for i, pi in enumerate(p):  # layer index, layer predictions
        b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
        tobj = torch.zeros_like(pi[..., 0])  # target obj
        np += tobj.numel()

        # Compute losses
        nb = len(b)
        if nb:  # number of targets
            ng += nb
            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
            # ps[:, 2:4] = torch.sigmoid(ps[:, 2:4])  # wh power loss (uncomment)

            # GIoU
            pxy = torch.sigmoid(ps[:, 0:2])  # pxy = pxy * s - (s - 1) / 2,  s = 1.5  (scale_xy)
            pwh = torch.exp(ps[:, 2:4]).clamp(max=1E3) * anchor_vec[i]
            pbox = torch.cat((pxy, pwh), 1)  # predicted box
            giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True)  # giou computation
            lbox += (1.0 - giou).sum() if red == 'sum' else (1.0 - giou).mean()  # giou loss
            tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio

            if model.nc > 1:  # cls loss (only if multiple classes)
                t = torch.full_like(ps[:, 5:], cn)  # targets
                t[range(nb), tcls[i]] = cp
                lcls += BCEcls(ps[:, 5:], t)  # BCE
                # lcls += CE(ps[:, 5:], tcls[i])  # CE

            # Append targets to text file
            # with open('targets.txt', 'a') as file:
            #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

        lobj += BCEobj(pi[..., 4], tobj)  # obj loss

    lbox *= h['giou']
    lobj *= h['obj']
    lcls *= h['cls']
    if red == 'sum':
        bs = tobj.shape[0]  # batch size
        lobj *= 3 / (6300 * bs) * 2  # 3 / np * 2
        if ng:
            lcls *= 3 / ng / model.nc
            lbox *= 3 / ng

    loss = lbox + lobj + lcls
    return loss, torch.cat((lbox, lobj, lcls, loss)).detach()
Ejemplo n.º 23
0
 def __init__(self, alpha=0.05):
     super(BCEBlurWithLogitsLoss, self).__init__()
     self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none')  # must be nn.BCEWithLogitsLoss()
     self.alpha = alpha
Ejemplo n.º 24
0
def main(args):
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(1)
    device = torch.device('cuda' if use_cuda else 'cpu')

    train_dataset = WakeWordData(data_json=args.train_data_json,
                                 sample_rate=args.sample_rate,
                                 valid=False)
    test_dataset = WakeWordData(data_json=args.test_data_json,
                                sample_rate=args.sample_rate,
                                valid=True)

    kwargs = {
        'num_workers': args.num_workers,
        'pin_memory': True
    } if use_cuda else {}
    train_loader = data.DataLoader(dataset=train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   collate_fn=collate_fn,
                                   **kwargs)
    test_loader = data.DataLoader(dataset=test_dataset,
                                  batch_size=args.eval_batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  **kwargs)

    model_params = {
        "num_classes": 1,
        "feature_size": 40,
        "hidden_size": args.hidden_size,
        "num_layers": 1,
        "dropout": 0.1,
        "bidirectional": False
    }
    model = LSTMWakeWord(**model_params, device=device)
    model = model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=args.lr)
    loss_fn = nn.BCEWithLogitsLoss()

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='max',
                                                     factor=0.5,
                                                     patience=2)

    best_train_acc, best_train_report = 0, None
    best_test_acc, best_test_report = 0, None
    best_epoch = 0
    for epoch in range(args.epochs):
        print("\nstarting training with learning rate",
              optimizer.param_groups[0]['lr'])
        train_acc, train_report = train(train_loader, model, optimizer,
                                        loss_fn, device, epoch)
        test_acc, test_report = test(test_loader, model, device, epoch)

        # record best train and test
        if train_acc > best_train_acc:
            best_train_acc = train_acc
        if test_acc > best_test_acc:
            best_test_acc = test_acc

        # saves checkpoint if metrics are better than last
        if args.save_checkpoint_path and test_acc >= best_test_acc:
            checkpoint_path = os.path.join(args.save_checkpoint_path,
                                           args.model_name + ".pt")
            print("found best checkpoint. saving model as", checkpoint_path)
            save_checkpoint(
                checkpoint_path,
                model,
                optimizer,
                scheduler,
                model_params,
                notes="train_acc: {}, test_acc: {}, epoch: {}".format(
                    best_train_acc, best_test_acc, epoch),
            )
            best_train_report = train_report
            best_test_report = test_report
            best_epoch = epoch

        table = [["Train ACC", train_acc], ["Test ACC", test_acc],
                 ["Best Train ACC", best_train_acc],
                 ["Best Test ACC", best_test_acc], ["Best Epoch", best_epoch]]
        # print("\ntrain acc:", train_acc, "test acc:", test_acc, "\n",
        #     "best train acc", best_train_acc, "best test acc", best_test_acc)
        print(tabulate(table))

        scheduler.step(train_acc)

    print("Done Training...")
    print("Best Model Saved to", checkpoint_path)
    print("Best Epoch", best_epoch)
    print("\nTrain Report \n")
    print(best_train_report)
    print("\nTest Report\n")
    print(best_test_report)
    def __init__(self):
        super(BCELoss, self).__init__()

        self.criterion = nn.BCEWithLogitsLoss()
Ejemplo n.º 26
0
def main(params: dict):
    import mlflow
    print("start params={}".format(params))
    logger = get_logger()
    df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")
    # df = pd.read_pickle("../input/riiid-test-answer-prediction/split10/train_0.pickle").sort_values(["user_id", "timestamp"]).reset_index(drop=True)
    if is_debug:
        df = df.head(30000)
    column_config = {
        ("content_id", "content_type_id"): {"type": "category"},
         "user_answer": {"type": "category"},
         "part": {"type": "category"},
         "prior_question_elapsed_time_bin300": {"type": "category"},
         "duration_previous_content_bin300": {"type": "category"}
    }

    if not load_pickle or is_debug:
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"]["DurationPreviousContent"] = DurationPreviousContent()
        feature_factory_dict["user_id"]["ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_manager = FeatureFactoryManager(feature_factory_dict=feature_factory_dict,
                                                        logger=logger,
                                                        split_num=1,
                                                        model_id="all",
                                                        load_feature=not is_debug,
                                                        save_feature=not is_debug)

        print("all_predict")
        df = feature_factory_manager.all_predict(df)
        df = df[["user_id", "content_id", "content_type_id", "part", "user_answer", "answered_correctly", "prior_question_elapsed_time_bin300", "duration_previous_content_bin300"]]
        print(df.head(10))

        print("data preprocess")

        train_idx = []
        val_idx = []
        np.random.seed(0)
        for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"):
            if np.random.random() < 0.01:
                # all val
                val_idx.extend(w_df.index.tolist())
            else:
                train_num = int(len(w_df) * 0.95)
                train_idx.extend(w_df[:train_num].index.tolist())
                val_idx.extend(w_df[train_num:].index.tolist())
    ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config,
                                                      dict_path="../feature_engineering/",
                                                      sequence_length=params["max_seq"],
                                                      logger=logger)
    ff_for_transformer.make_dict(df=pd.DataFrame())
    n_skill = len(ff_for_transformer.embbed_dict[("content_id", "content_type_id")])
    if not load_pickle or is_debug:
        df["is_val"] = 0
        df["is_val"].loc[val_idx] = 1
        w_df = df[df["is_val"] == 0]
        w_df["group"] = (w_df.groupby("user_id")["user_id"].transform("count") - w_df.groupby("user_id").cumcount()) // params["max_seq"]
        w_df["user_id"] = w_df["user_id"].astype(str) + "_" + w_df["group"].astype(str)

        group = ff_for_transformer.all_predict(w_df)

        dataset_train = SAKTDataset(group,
                                    n_skill=n_skill,
                                    max_seq=params["max_seq"])

        del w_df
        gc.collect()

    ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config,
                                                      dict_path="../feature_engineering/",
                                                      sequence_length=params["max_seq"],
                                                      logger=logger)
    if not load_pickle or is_debug:
        group = ff_for_transformer.all_predict(df[df["content_type_id"] == 0])
        dataset_val = SAKTDataset(group,
                                  is_test=True,
                                  n_skill=n_skill,
                                  max_seq=params["max_seq"])

    os.makedirs("../input/feature_engineering/model051", exist_ok=True)
    if not is_debug and not load_pickle:
        with open(f"../input/feature_engineering/model051/train.pickle", "wb") as f:
            pickle.dump(dataset_train, f)
        with open(f"../input/feature_engineering/model051/val.pickle", "wb") as f:
            pickle.dump(dataset_val, f)

    if not is_debug and load_pickle:
        with open(f"../input/feature_engineering/model051/train.pickle", "rb") as f:
            dataset_train = pickle.load(f)
        with open(f"../input/feature_engineering/model051/val.pickle", "rb") as f:
            dataset_val = pickle.load(f)
        print("loaded!")
    dataloader_train = DataLoader(dataset_train, batch_size=params["batch_size"], shuffle=True, num_workers=1)
    dataloader_val = DataLoader(dataset_val, batch_size=params["batch_size"], shuffle=False, num_workers=1)

    model = SAKTModel(n_skill, embed_dim=params["embed_dim"], max_seq=params["max_seq"], dropout=dropout)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=params["lr"],
                      weight_decay=0.01,
                      )
    num_train_optimization_steps = int(len(dataloader_train) * epochs)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=params["num_warmup_steps"],
                                                num_training_steps=num_train_optimization_steps)
    criterion = nn.BCEWithLogitsLoss()

    model.to(device)
    criterion.to(device)

    for epoch in range(epochs):
        loss, acc, auc, auc_val = train_epoch(model, dataloader_train, dataloader_val, optimizer, criterion, scheduler, device)
        print("epoch - {} train_loss - {:.3f} auc - {:.4f} auc-val: {:.4f}".format(epoch, loss, auc, auc_val))

    preds = []
    labels = []
    for item in tqdm(dataloader_val):
        x = item["x"].to(device).long()
        target_id = item["target_id"].to(device).long()
        part = item["part"].to(device).long()
        label = item["label"].to(device).float()
        elapsed_time = item["elapsed_time"].to(device).long()
        duration_previous_content = item["duration_previous_content"].to(device).long()

        output = model(x, target_id, part, elapsed_time, duration_previous_content)

        preds.extend(torch.nn.Sigmoid()(output[:, -1]).view(-1).data.cpu().numpy().tolist())
        labels.extend(label[:, -1].view(-1).data.cpu().numpy().tolist())

    auc_transformer = roc_auc_score(labels, preds)
    print("single transformer: {:.4f}".format(auc_transformer))
    df_oof = pd.DataFrame()
    # df_oof["row_id"] = df.loc[val_idx].index
    df_oof["predict"] = preds
    df_oof["target"] = df.loc[val_idx]["answered_correctly"].values

    df_oof.to_csv(f"{output_dir}/transformers1.csv", index=False)
    """
    df_oof2 = pd.read_csv("../output/ex_237/20201213110353/oof_train_0_lgbm.csv")
    df_oof2.columns = ["row_id", "predict_lgbm", "target"]
    df_oof2 = pd.merge(df_oof, df_oof2, how="inner")

    auc_lgbm = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values)
    print("lgbm: {:.4f}".format(auc_lgbm))

    print("ensemble")
    max_auc = 0
    max_nn_ratio = 0
    for r in np.arange(0, 1.05, 0.05):
        auc = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values*(1-r) + df_oof2["predict"].values*r)
        print("[nn_ratio: {:.2f}] AUC: {:.4f}".format(r, auc))

        if max_auc < auc:
            max_auc = auc
            max_nn_ratio = r
    print(len(df_oof2))
    """
    if not is_debug:
        mlflow.start_run(experiment_id=10,
                         run_name=os.path.basename(__file__))

        for key, value in params.items():
            mlflow.log_param(key, value)
        mlflow.log_metric("auc_val", auc_transformer)
        mlflow.end_run()
    torch.save(model.state_dict(), f"{output_dir}/transformers.pth")
    del model
    with open(f"{output_dir}/transformer_param.json", "w") as f:
        json.dump(params, f)
    if is_make_feature_factory:
        # feature factory
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"]["DurationPreviousContent"] = DurationPreviousContent(is_partial_fit=True)
        feature_factory_dict["user_id"]["ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_manager = FeatureFactoryManager(feature_factory_dict=feature_factory_dict,
                                                        logger=logger,
                                                        split_num=1,
                                                        model_id="all",
                                                        load_feature=not is_debug,
                                                        save_feature=not is_debug)

        ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config,
                                                          dict_path="../feature_engineering/",
                                                          sequence_length=params["max_seq"],
                                                          logger=logger)
        df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")
        if is_debug:
            df = df.head(10000)
        df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True)
        feature_factory_manager.fit(df)
        df = feature_factory_manager.all_predict(df)
        for dicts in feature_factory_manager.feature_factory_dict.values():
            for factory in dicts.values():
                factory.logger = None
        feature_factory_manager.logger = None
        with open(f"{output_dir}/feature_factory_manager.pickle", "wb") as f:
            pickle.dump(feature_factory_manager, f)

        ff_for_transformer.fit(df)
        ff_for_transformer.logger = None
        with open(f"{output_dir}/feature_factory_manager_for_transformer.pickle", "wb") as f:
            pickle.dump(ff_for_transformer, f)
Ejemplo n.º 27
0
def train(net, dataloader, device, config):
    in_nchannel = len(dataloader.dataset)

    optimizer = optim.SGD(net.parameters(),
                          lr=config.lr,
                          momentum=config.momentum,
                          weight_decay=config.weight_decay)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, 0.95)

    crit = nn.BCEWithLogitsLoss()

    net.train()
    train_iter = iter(dataloader)
    # val_iter = iter(val_dataloader)
    logging.info(f'LR: {scheduler.get_lr()}')
    for i in range(config.max_iter):

        s = time()
        data_dict = train_iter.next()
        d = time() - s

        optimizer.zero_grad()
        init_coords = torch.zeros((config.batch_size, 4), dtype=torch.int)
        init_coords[:, 0] = torch.arange(config.batch_size)

        in_feat = torch.zeros((config.batch_size, in_nchannel))
        in_feat[torch.arange(config.batch_size), data_dict['labels']] = 1

        sin = ME.SparseTensor(
            feats=in_feat,
            coords=init_coords,
            allow_duplicate_coords=True,  # for classification, it doesn't matter
            tensor_stride=config.resolution,
        ).to(device)

        # Generate target sparse tensor
        cm = sin.coords_man
        target_key = cm.create_coords_key(ME.utils.batched_coordinates(
            data_dict['xyzs']),
                                          force_creation=True,
                                          allow_duplicate_coords=True)

        # Generate from a dense tensor
        out_cls, targets, sout = net(sin, target_key)
        num_layers, loss = len(out_cls), 0
        losses = []
        for out_cl, target in zip(out_cls, targets):
            curr_loss = crit(out_cl.F.squeeze(),
                             target.type(out_cl.F.dtype).to(device))
            losses.append(curr_loss.item())
            loss += curr_loss / num_layers

        loss.backward()
        optimizer.step()
        t = time() - s

        if i % config.stat_freq == 0:
            logging.info(
                f'Iter: {i}, Loss: {loss.item():.3e}, Depths: {len(out_cls)} Data Loading Time: {d:.3e}, Tot Time: {t:.3e}'
            )

        if i % config.val_freq == 0 and i > 0:
            torch.save(
                {
                    'state_dict': net.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                    'curr_iter': i,
                }, config.weights)

            scheduler.step()
            logging.info(f'LR: {scheduler.get_lr()}')

            net.train()
Ejemplo n.º 28
0
    def __init__(self, args):
        self.reconstruction_path = args.reconstruction_path
        if not os.path.exists(self.reconstruction_path):
            os.makedirs(self.reconstruction_path)

        self.beta = args.beta
        self.train_batch_size = args.train_batch_size
        self.test_batch_size = args.test_batch_size
        self.epochs = args.epochs
        self.early_stop = args.early_stop
        self.early_stop_observation_period = args.early_stop_observation_period
        self.use_scheduler = False
        self.print_training = args.print_training

        self.z_dim = args.z_dim
        # self.disc_input_dim = int(self.z_dim / 2)
        self.disc_input_dim = 22
        self.class_idx = range(0, 22)
        self.membership_idx = range(22, 44)
        self.style_idx = range(44, 64)
        # self.class_idx = range(0, self.disc_input_dim)
        # self.membership_idx = range(self.disc_input_dim, self.z_dim)

        self.nets = dict()

        if args.dataset in ['MNIST', 'Fashion-MNIST', 'CIFAR-10', 'SVHN']:
            if args.dataset in ['MNIST', 'Fashion-MNIST']:
                self.num_channels = 1
            elif args.dataset in ['CIFAR-10', 'SVHN']:
                self.num_channels = 3

            self.nets['encoder'] = module.VAEConvEncoder(self.z_dim, self.num_channels)
            self.nets['decoder'] = module.VAEConvDecoder(self.z_dim, self.num_channels)

        elif args.dataset in ['adult', 'location']:
            self.nets['encoder'] = module.VAEFCEncoder(args.encoder_input_dim, self.z_dim)
            self.nets['decoder'] = module.FCDecoder(args.encoder_input_dim, self.z_dim)

        self.discs = {
            'class_fz': module.ClassDiscriminator(self.z_dim, args.class_num),
            'class_cz': module.ClassDiscriminator(self.disc_input_dim, args.class_num),
            'class_mz': module.ClassDiscriminator(self.disc_input_dim, args.class_num),

            'membership_fz': module.MembershipDiscriminator(self.z_dim, 1),
            'membership_cz': module.MembershipDiscriminator(self.disc_input_dim, 1),
            'membership_mz': module.MembershipDiscriminator(self.disc_input_dim, 1),
        }

        self.recon_loss = self.get_loss_function()
        self.class_loss = nn.CrossEntropyLoss(reduction='sum')
        self.membership_loss = nn.BCEWithLogitsLoss(reduction='sum')

        # optimizer
        self.optimizer = dict()
        for net_type in self.nets:
            self.optimizer[net_type] = optim.Adam(self.nets[net_type].parameters(), lr=args.recon_lr,
                                                  betas=(0.5, 0.999))
        self.discriminator_lr = args.disc_lr
        for disc_type in self.discs:
            self.optimizer[disc_type] = optim.Adam(self.discs[disc_type].parameters(), lr=self.discriminator_lr,
                                                   betas=(0.5, 0.999))

        self.weights = {
            'recon': args.recon_weight,
            'class_cz': args.class_cz_weight,
            'class_mz': args.class_mz_weight,
            'membership_cz': args.membership_cz_weight,
            'membership_mz': args.membership_mz_weight,
        }

        self.scheduler_enc = StepLR(self.optimizer['encoder'], step_size=50, gamma=0.1)
        self.scheduler_dec = StepLR(self.optimizer['decoder'], step_size=50, gamma=0.1)

        # to device
        self.device = torch.device("cuda:{}".format(args.gpu_id))
        for net_type in self.nets:
            self.nets[net_type] = self.nets[net_type].to(self.device)
        for disc_type in self.discs:
            self.discs[disc_type] = self.discs[disc_type].to(self.device)

        self.disentangle = (self.weights['class_cz'] + self.weights['class_mz']
                            + self.weights['membership_cz'] + self.weights['membership_mz'] > 0)

        self.start_epoch = 0
        self.best_valid_loss = float("inf")
        # self.train_loss = 0
        self.early_stop_count = 0

        self.acc_dict = {
            'class_fz': 0, 'class_cz': 0, 'class_mz': 0,
            'membership_fz': 0, 'membership_cz': 0, 'membership_mz': 0,
        }
        self.best_acc_dict = {}

        if 'cuda' in str(self.device):
            cudnn.benchmark = True

        if args.resume:
            print('==> Resuming from checkpoint..')
            try:
                self.load()
            except FileNotFoundError:
                print('There is no pre-trained model; Train model from scratch')
Ejemplo n.º 29
0
# In[ ]:


try:
    model_ft.load_state_dict(torch.load('models/{}.pt'.format(filename))) #load weights if already completed
except:
    print('Starting from scratch..')

model_ft = model_ft.to(device)

df = dframe['train'].iloc[:,5:].copy()
df = df.replace(-1,0)
pos_weight = torch.Tensor([df[cl].sum()/df.shape[0] for cl in class_names])
if u_approach == 'ignore': #Use masked binary cross-entropy for first run
    criterion = nn.BCEWithLogitsLoss(reduction='none',pos_weight=pos_weight).to(device)
else:
    criterion = nn.BCEWithLogitsLoss(reduction='sum',pos_weight=pos_weight).to(device)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# # Pick AdamW optimizer - https://github.com/mpyrozhok/adamwr
# optimizer = adamw.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# epoch_size = np.round(num_samples / batch_size) # number of training examples/batch size
# #Cosine annealing: adjusting on batch update rather than epoch - https://github.com/mpyrozhok/adamwr
# scheduler = cosine_scheduler.CosineLRWithRestarts(optimizer, batch_size, epoch_size, restart_period=5, t_mult=1.2)
Ejemplo n.º 30
0
    def train_model(self,
                    generator,
                    discriminator,
                    dataloader,
                    num_epochs=300):
        # GPUが使えるかを確認
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print("Device:", device)

        # 最適化手法の設定
        g_lr, d_lr = 0.0001, 0.0004
        beta1, beta2 = 0.0, 0.9
        g_optimizer = torch.optim.Adam(generator.parameters(), g_lr,
                                       [beta1, beta2])
        d_optimizer = torch.optim.Adam(discriminator.parameters(), d_lr,
                                       [beta1, beta2])

        # 誤差関数を定義
        criterion = nn.BCEWithLogitsLoss(reduction='mean')

        # パラメータをハードコーディング
        mini_batch_size = 64

        # ネットワークをGPUへ
        generator.to(device)
        discriminator.to(device)

        generator.train()  # モデルを訓練モードに
        discriminator.train()  # モデルを訓練モードに

        # ネットワークがある程度固定であれば、高速化させる
        torch.backends.cudnn.benchmark = True

        # 画像の枚数
        num_train_imgs = len(dataloader.dataset)
        batch_size = dataloader.batch_size

        # イテレーションカウンタをセット
        iteration = 1
        logs = []

        # epochのループ
        for epoch in range(num_epochs):

            # 開始時刻を保存
            t_epoch_start = time.time()
            epoch_g_loss = 0.0  # epochの損失和
            epoch_d_loss = 0.0  # epochの損失和

            print('-------------')
            print('Epoch {}/{}'.format(epoch, num_epochs))
            print('-------------')
            print('(train)')

            # データローダーからminibatchずつ取り出すループ
            for imges, y in dataloader:

                # --------------------
                # 1. Discriminatorの学習
                # --------------------
                # ミニバッチがサイズが1だと、バッチノーマライゼーションでエラーになるのでさける
                if imges.size()[0] == 1:
                    continue

                # GPUが使えるならGPUにデータを送る
                imges = imges.to(device)

                # 正解ラベルと偽ラベルを作成
                # epochの最後のイテレーションはミニバッチの数が少なくなる
                mini_batch_size = imges.size()[0]
                label_real = torch.full((mini_batch_size, ), 1).to(device)
                label_fake = torch.full((mini_batch_size, ), 0).to(device)

                # 真の画像を判定
                d_out_real = discriminator(imges)

                # 偽の画像を生成して判定
                input_z = torch.randn(mini_batch_size,
                                      self.latent_dim).to(device)
                input_z = input_z.view(input_z.size(0), input_z.size(1), 1, 1)
                fake_images = generator(input_z)
                d_out_fake = discriminator(fake_images)

                # 誤差を計算
                d_loss_real = criterion(d_out_real.view(-1), label_real)
                d_loss_fake = criterion(d_out_fake.view(-1), label_fake)
                d_loss = d_loss_real + d_loss_fake

                # バックプロパゲーション
                g_optimizer.zero_grad()
                d_optimizer.zero_grad()

                d_loss.backward()
                d_optimizer.step()

                # --------------------
                # 2. Generatorの学習
                # --------------------
                # 偽の画像を生成して判定
                input_z = torch.randn(mini_batch_size, z_dim).to(device)
                input_z = input_z.view(input_z.size(0), input_z.size(1), 1, 1)
                fake_images = generator(input_z)
                d_out_fake = discriminator(fake_images)

                # 誤差を計算
                g_loss = criterion(d_out_fake.view(-1), label_real)

                # バックプロパゲーション
                g_optimizer.zero_grad()
                d_optimizer.zero_grad()
                g_loss.backward()
                g_optimizer.step()

                # --------------------
                # 3. 記録
                # --------------------
                epoch_d_loss += d_loss.item()
                epoch_g_loss += g_loss.item()
                iteration += 1

            # epochのphaseごとのlossと正解率
            t_epoch_finish = time.time()
            print('-------------')
            print(
                'epoch {} || Epoch_D_Loss:{:.4f} ||Epoch_G_Loss:{:.4f}'.format(
                    epoch, epoch_d_loss / batch_size,
                    epoch_g_loss / batch_size))
            print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
            t_epoch_start = time.time()

        return generator, discriminator