Esempio n. 1
0
        epoch_loss = 0
        model.train()
        if dataset_loader_crf:
            for f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v, SCRF_labels, mask_SCRF_labels, cnn_features in tqdm(
                itertools.chain.from_iterable(dataset_loader_crf), mininterval=2,
                desc=' - Tot it %d (epoch %d)' % (tot_length, args.start_epoch), leave=False, file=sys.stderr):

                f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, SCRF_labels, mask_SCRF_labels, cnn_features = packer.repack(f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v, SCRF_labels, mask_SCRF_labels, cnn_features, test=False)

                optimizer.zero_grad()

                loss = model(f_f, f_p, b_f, b_p, w_f, cnn_features, tg_v, mask_v,
                      mask_v.long().sum(0), SCRF_labels, mask_SCRF_labels, onlycrf=True)

                epoch_loss += utils.to_scalar(loss)

                loss.backward()
                nn.utils.clip_grad_norm(model.parameters(), args.clip_grad)
                optimizer.step()

        for f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v, SCRF_labels, mask_SCRF_labels, cnn_features in tqdm(
                itertools.chain.from_iterable(dataset_loader), mininterval=2,
                desc=' - Tot it %d (epoch %d)' % (tot_length, args.start_epoch), leave=False, file=sys.stderr):

            f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, SCRF_labels, mask_SCRF_labels, cnn_features = packer.repack(f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v, SCRF_labels, mask_SCRF_labels, cnn_features, test=False)
            optimizer.zero_grad()

            loss = model(f_f, f_p, b_f, b_p, w_f, cnn_features, tg_v, mask_v,
                         mask_v.long().sum(0), SCRF_labels, mask_SCRF_labels, onlycrf=False)
Esempio n. 2
0
        epoch_loss = 0
        ner_model.train()

        for feature, tg, mask in tqdm(
                itertools.chain.from_iterable(dataset_loader), mininterval=2,
                desc=' - Tot it %d (epoch %d)' % (tot_length, args.start_epoch), leave=False, file=sys.stdout):

            fea_v, tg_v, mask_v = packer.repack_vb(feature, tg, mask)
            ner_model.zero_grad()
            scores, hidden = ner_model.forward(fea_v)
            loss = crit.forward(scores, tg_v, mask_v)
            loss.backward()
            nn.utils.clip_grad_norm(ner_model.parameters(), args.clip_grad)
            optimizer.step()
            epoch_loss += utils.to_scalar(loss)

        # update lr
        utils.adjust_learning_rate(optimizer, args.lr / (1 + (args.start_epoch + 1) * args.lr_decay))

        # average
        epoch_loss /= tot_length

        # eval & save check_point

        if 'f' in args.eva_matrix:
            dev_f1, dev_pre, dev_rec, dev_acc = evaluator.calc_score(ner_model, dev_dataset_loader)

            if dev_f1 > best_f1:
                patience_count = 0
                best_f1 = dev_f1
Esempio n. 3
0
    def train_epoch(self, cur_dataset, crf_no, crit_ner, optimizer, args):
        #cur_dataset = crf2train_dataloader[crf_no]

        self.ner_model.train()
        epoch_loss = 0

        num_sample = sum(map(lambda t: len(t), cur_dataset))

        train_corpus = [
            args.train_file[i].split("/")[-2] for i in self.crf2corpus[crf_no]
        ]
        print("Epoch: [{:d}/{:d}]".format(args.start_epoch, args.epoch - 1))
        print("Train corpus: ", train_corpus)

        if not args.idea[:2] in ['P2', 'P3']:
            data_iter = itertools.chain.from_iterable(cur_dataset)
        else:
            data_iter = iter(cur_dataset)

        for f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v, corpus_mask_v, reorder in tqdm(
                data_iter,
                mininterval=2,
                desc=' - Total it %d' % (num_sample),
                leave=False,
                file=sys.stdout):

            if args.idea[:2] not in ['P2', 'P3']:
                f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, corpus_mask_v = self.packer.repack_vb(
                    f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v,
                    corpus_mask_v)
            else:
                if args.idea in ['P23', 'P33']:
                    proba_dist, tg_v = tg_v
                f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v, corpus_mask_v, reorder = f_f.cuda(
                ), f_p.cuda(), b_f.cuda(), b_p.cuda(), w_f.cuda(), tg_v.cuda(
                ), mask_v.cuda(), len_v.cuda(), corpus_mask_v.cuda(
                ), reorder.cuda()

            self.ner_model.zero_grad()
            scores = self.ner_model(f_f, f_p, b_f, b_p, w_f, crf_no,
                                    corpus_mask_v)

            if args.idea in ['P23', 'P33']:
                loss = crit_ner(scores, [proba_dist, tg_v],
                                mask_v,
                                corpus_mask_v,
                                idea=args.idea,
                                sigmoid=args.sigmoid,
                                mask_value=args.mask_value)
            else:
                loss = crit_ner(scores,
                                tg_v,
                                mask_v,
                                corpus_mask_v,
                                idea=args.idea,
                                sigmoid=args.sigmoid,
                                mask_value=args.mask_value)

            epoch_loss += utils.to_scalar(loss)
            if args.co_train:
                cf_p = f_p[0:-1, :].contiguous()
                cb_p = b_p[1:, :].contiguous()
                cf_y = w_f[1:, :].contiguous()
                cb_y = w_f[0:-1, :].contiguous()
                cfs, _ = self.ner_model.word_pre_train_forward(f_f, cf_p)
                loss = loss + args.lambda0 * self.crit_lm(cfs, cf_y.view(-1))
                cbs, _ = self.ner_model.word_pre_train_backward(b_f, cb_p)
                loss = loss + args.lambda0 * self.crit_lm(cbs, cb_y.view(-1))
            loss.backward()
            nn.utils.clip_grad_norm(self.ner_model.parameters(),
                                    args.clip_grad)
            optimizer.step()

        epoch_loss = epoch_loss / num_sample
        self.sample_cnter[crf_no] += 1

        print("training loss: {:.4f}".format(epoch_loss))
        return epoch_loss
Esempio n. 4
0
def train_a_epoch(name, data, tag_idx, is_oov, model, optimizer, seq_criterion,
                  lm_f_criterion, lm_b_criterion, att_loss, gamma):
    evaluator = Evaluator(name, [0, 1],
                          main_label_name=cfg.POSITIVE_LABEL,
                          label2id=tag_idx,
                          conll_eval=True)
    t = tqdm(data, total=len(data))

    if is_oov[0] == 1:
        print("Yes, UNKNOWN token is out of vocab")
    else:
        print("No, UNKNOWN token is not out of vocab")

    for SENT, X, C, POS, Y, P in t:
        batch_size = len(SENT)
        # zero the parameter gradients
        optimizer.zero_grad()
        model.zero_grad()
        model.init_state(len(X))

        x_var, c_var, pos_var, y_var, lm_X = to_variables(X=X,
                                                          C=C,
                                                          POS=POS,
                                                          Y=Y)

        np.set_printoptions(threshold=np.nan)

        if cfg.CHAR_LEVEL == "Attention":
            lm_f_out, lm_b_out, seq_out, seq_lengths, emb, char_emb = model(
                x_var, c_var)
            unrolled_x_var = list(chain.from_iterable(x_var))

            not_oov_seq = [-1 if is_oov[idx] else 1 for idx in unrolled_x_var]
            char_att_loss = att_loss(
                emb.detach(), char_emb,
                Variable(torch.cuda.LongTensor(not_oov_seq))) / batch_size

        else:
            lm_f_out, lm_b_out, seq_out, seq_lengths = model(x_var, c_var)

        logger.debug("lm_f_out : {0}".format(lm_f_out))
        logger.debug("lm_b_out : {0}".format(lm_b_out))
        logger.debug("seq_out : {0}".format(seq_out))

        logger.debug("tensor X variable: {0}".format(x_var))

        # remove start and stop tags
        pred = argmax(seq_out)

        logger.debug("Predicted output {0}".format(pred))
        seq_loss = seq_criterion(
            seq_out,
            Variable(torch.LongTensor(y_var)).cuda()) / batch_size

        # to limit the vocab size of the sample sentence ( trick used to improve lm model)
        # TODO make sure that start and end symbol of sentence gets through this filtering.
        logger.debug("Sample input {0}".format(lm_X))
        if gamma != 0:
            lm_X_f = [x1d[1:] for x1d in lm_X]
            lm_X_b = [x1d[:-1] for x1d in lm_X]
            lm_X_f = list(chain.from_iterable(lm_X_f))
            lm_X_b = list(chain.from_iterable(lm_X_b))
            lm_f_loss = lm_f_criterion(
                lm_f_out.squeeze(),
                Variable(cuda.LongTensor(lm_X_f)).squeeze()) / batch_size
            lm_b_loss = lm_b_criterion(
                lm_b_out.squeeze(),
                Variable(cuda.LongTensor(lm_X_b)).squeeze()) / batch_size

            if cfg.CHAR_LEVEL == "Attention":
                total_loss = seq_loss + Variable(cuda.FloatTensor(
                    [gamma])) * (lm_f_loss + lm_b_loss) + char_att_loss
            else:
                total_loss = seq_loss + Variable(cuda.FloatTensor(
                    [gamma])) * (lm_f_loss + lm_b_loss)

        else:
            if cfg.CHAR_LEVEL == "Attention":
                total_loss = seq_loss + char_att_loss
            else:
                total_loss = seq_loss

        desc = "total_loss: {0:.4f} = seq_loss: {1:.4f}".format(
            to_scalar(total_loss), to_scalar(seq_loss))
        if gamma != 0:
            desc += " + gamma: {0} * (lm_f_loss: {1:.4f} + lm_b_loss: {2:.4f})".format(
                gamma, to_scalar(lm_f_loss), to_scalar(lm_b_loss))

        if cfg.CHAR_LEVEL == "Attention":
            desc += " + char_att_loss: {0:.4f}".format(
                to_scalar(char_att_loss))

        t.set_description(desc)

        preds = roll(pred, seq_lengths)
        for pred, x, y in zip(preds, X, Y):
            evaluator.append_data(to_scalar(total_loss), pred, x, y)

        total_loss.backward()
        if cfg.CLIP is not None:
            clip_grad_norm(model.parameters(), cfg.CLIP)

        optimizer.step()

    evaluator.classification_report()

    return evaluator, model
Esempio n. 5
0
        
            if args.co_train:
                cf_p = f_p[0:-1, :].contiguous()
                cb_p = b_p[1:, :].contiguous()
                cf_y = w_f[1:, :].contiguous()
                cb_y = w_f[0:-1, :].contiguous()

            
            

                cfs, _ = ner_model.word_pre_train_forward(f_f, cf_p)
                cbs, _ = ner_model.word_pre_train_backward(b_f,cb_p)
                
                cfs_loss=args.lambda0 * crit_lm(cfs, cf_y.view(-1))
                cbs_loss=args.lambda0 * crit_lm(cbs, cb_y.view(-1))
                lm_loss+=utils.to_scalar(cfs_loss)+utils.to_scalar(cbs_loss)
                if i=='train':
                    crf_loss+=utils.to_scalar(loss)
                    loss = loss + cfs_loss
                else:
                    loss=cfs_loss
                loss = loss + cbs_loss
                
            epoch_loss+=utils.to_scalar(loss)
            loss.backward()
            nn.utils.clip_grad_norm_(ner_model.parameters(), args.clip_grad)
            optimizer.step()

        

            
Esempio n. 6
0
    def train(self, data, *args, **kwargs):
        tot_length = sum(map(lambda t: len(t), self.dataset_loader))
        loss_list = []
        acc_list = []
        best_f1 = []
        for i in range(self.file_num):
            best_f1.append(float('-inf'))

        best_pre = []
        for i in range(self.file_num):
            best_pre.append(float('-inf'))

        best_rec = []
        for i in range(self.file_num):
            best_rec.append(float('-inf'))

        start_time = time.time()
        epoch_list = range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epoch)
        patience_count = 0
        for epoch_idx, self.args.start_epoch in enumerate(epoch_list):

            sample_num = 1

            epoch_loss = 0
            self.ner_model.train()

            for sample_id in tqdm(range(sample_num),
                                  mininterval=2,
                                  desc=' - Tot it %d (epoch %d)' %
                                  (tot_length, self.args.start_epoch),
                                  leave=False,
                                  file=sys.stdout):

                self.file_no = random.randint(0, self.file_num - 1)
                cur_dataset = self.dataset_loader[self.file_no]

                for f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v in itertools.chain.from_iterable(
                        cur_dataset):

                    f_f, f_p, b_f, b_p, w_f, tg_v, mask_v = self.packer.repack_vb(
                        f_f, f_p, b_f, b_p, w_f, tg_v, mask_v, len_v)

                    self.ner_model.zero_grad()
                    scores = self.ner_model(f_f, f_p, b_f, b_p, w_f,
                                            self.file_no)
                    loss = self.crit_ner(scores, tg_v, mask_v)

                    epoch_loss += utils.to_scalar(loss)
                    if self.args.co_train:
                        cf_p = f_p[0:-1, :].contiguous()
                        cb_p = b_p[1:, :].contiguous()
                        cf_y = w_f[1:, :].contiguous()
                        cb_y = w_f[0:-1, :].contiguous()
                        cfs, _ = self.ner_model.word_pre_train_forward(
                            f_f, cf_p)
                        loss = loss + self.args.lambda0 * self.crit_lm(
                            cfs, cf_y.view(-1))
                        cbs, _ = self.ner_model.word_pre_train_backward(
                            b_f, cb_p)
                        loss = loss + self.args.lambda0 * self.crit_lm(
                            cbs, cb_y.view(-1))
                    loss.backward()
                    nn.utils.clip_grad_norm(self.ner_model.parameters(),
                                            self.args.clip_grad)
                    self.optimizer.step()

            epoch_loss /= tot_length

            # update lr
            utils.adjust_learning_rate(
                self.optimizer, self.args.lr /
                (1 + (self.args.start_epoch + 1) * self.args.lr_decay))

            # eval & save check_point
            if 'f' in self.args.eva_matrix:
                dev_f1, dev_pre, dev_rec, dev_acc = self.evaluate(
                    None, None, self.dev_dataset_loader[self.file_no],
                    self.file_no)
                loss_list.append(epoch_loss)
                acc_list.append(dev_acc)
                if dev_f1 > best_f1[self.file_no]:
                    patience_count = 0
                    best_f1[self.file_no] = dev_f1
                    best_pre[self.file_no] = dev_pre
                    best_rec[self.file_no] = dev_rec
                    self.track_list.append({
                        'loss': epoch_loss,
                        'dev_f1': dev_f1,
                        'dev_acc': dev_acc
                    })
                    print(
                        '(loss: %.4f, epoch: %d, dataset: %d, dev F1 = %.4f, dev pre = %.4f, dev rec = %.4f)'
                        % (epoch_loss, self.args.start_epoch, self.file_no,
                           dev_f1, dev_pre, dev_rec))
                    try:
                        self.save_model(None)
                    except Exception as inst:
                        print(inst)

                else:
                    patience_count += 1
                    print(
                        '(loss: %.4f, epoch: %d, dataset: %d, dev F1 = %.4f, dev pre = %.4f, dev rec = %.4f)'
                        % (epoch_loss, self.args.start_epoch, self.file_no,
                           dev_f1, dev_pre, dev_rec))
                    self.track_list.append({
                        'loss': epoch_loss,
                        'dev_f1': dev_f1,
                        'dev_acc': dev_acc
                    })

            print('epoch: ' + str(self.args.start_epoch) + '\t in ' +
                  str(self.args.epoch) + ' take: ' +
                  str(time.time() - start_time) + ' s')

            if patience_count >= self.args.patience and self.args.start_epoch >= self.args.least_iters:
                break
        return loss_list, acc_list