예제 #1
0
    def train_eval(self):
        params = filter(lambda p: p.requires_grad, self.model.parameters())
        optimizer = Optimizer(params, args)
        task_dev_acc_dict = dict()
        task_test_err_dict = dict()
        for ep in range(1, self.args.epoch + 1):
            for task_id, train_data in enumerate(self.train_set):
                task_name = get_task(task_id)
                print(f'training {task_name} task ...')
                task_train_loss, task_train_acc = self.train_iter(
                    ep, task_id, train_data, optimizer)

                task_dev_acc = self.eval(task_id, self.dev_set[task_id])
                if task_id not in task_dev_acc_dict or task_dev_acc_dict[
                        task_id] < task_dev_acc:
                    task_dev_acc_dict[task_id] = task_dev_acc

                    task_test_acc = self.eval(task_id, self.test_set[task_id])
                    task_test_err_dict[task_id] = 1 - task_test_acc

                logger.info(
                    '[Epoch %d][Task %s] train loss: %.4f, lr: %f, Train ACC: %.4f, Dev ACC: %.4f, Best Dev ACC: %.4f, Best Test ERR: %.4f'
                    %
                    (ep, task_name, task_train_loss, optimizer.get_lr(),
                     task_train_acc, task_dev_acc, task_dev_acc_dict[task_id],
                     task_test_err_dict[task_id]))

            for tid, test_err in task_test_err_dict.items():
                logger.info('[Epoch %d][Task %s] Test Err: %.4f' %
                            (ep, get_task(tid), test_err))

        all_task_err = list(task_test_err_dict.values())
        logger.info('Avg Test Err: %.4f' % np.mean(all_task_err))
예제 #2
0
    def train(self):
        params = filter(lambda p: p.requires_grad, self.model.parameters())
        optimizer = Optimizer(params, args)
        patient = 0
        best_dev_acc, best_test_acc = 0, 0
        for ep in range(1, self.args.epoch + 1):
            train_loss, train_acc = self.train_iter(ep, self.train_set,
                                                    optimizer)

            dev_acc = self.eval(self.val_set)
            if dev_acc > best_dev_acc:
                best_dev_acc = dev_acc
                test_acc = self.eval(self.test_set)
                if test_acc > best_test_acc:
                    best_test_acc = test_acc
                patient = 0
            else:
                patient += 1

            logger.info(
                '[Epoch %d] train loss: %.4f, lr: %f, Train ACC: %.4f, Dev ACC: %.4f, Best Dev ACC: %.4f, Best Test ACC: %.4f, patient: %d'
                % (ep, train_loss, optimizer.get_lr(), train_acc, dev_acc,
                   best_dev_acc, best_test_acc, patient))

            if patient >= args.patient:
                break

        logger.info('Final Test ACC: %.4f' % best_test_acc)
예제 #3
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 num_layers=1,
                 optimizer_type='Adagrad',
                 lr=.01,
                 weight_decay=0,
                 momentum=0,
                 eps=1e-6,
                 loss_type='TOP1',
                 clip_grad=-1,
                 dropout_input=.0,
                 dropout_hidden=.5,
                 batch_size=50,
                 use_cuda=True,
                 time_sort=False,
                 pretrained=None):
        """ The GRU4REC model

        Args:
            input_size (int): dimension of the gru input variables
            hidden_size (int): dimension of the gru hidden units
            output_size (int): dimension of the gru output variables
            num_layers (int): the number of layers in the GRU
            optimizer_type (str): optimizer type for GRU weights
            lr (float): learning rate for the optimizer
            weight_decay (float): weight decay for the optimizer
            momentum (float): momentum for the optimizer
            eps (float): eps for the optimizer
            loss_type (str): type of the loss function to use
            clip_grad (float): clip the gradient norm at clip_grad. No clipping if clip_grad = -1
            dropout_input (float): dropout probability for the input layer
            dropout_hidden (float): dropout probability for the hidden layer
            batch_size (int): mini-batch size
            use_cuda (bool): whether you want to use cuda or not
            time_sort (bool): whether to ensure the the order of sessions is chronological (default: False)
            pretrained (modules.layer.GRU): pretrained GRU layer, if it exists (default: None)
        """

        # Initialize the GRU Layer
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
        self.use_cuda = use_cuda
        if pretrained is None:
            self.gru = GRU(input_size,
                           hidden_size,
                           output_size,
                           num_layers,
                           dropout_input=dropout_input,
                           dropout_hidden=dropout_hidden,
                           use_cuda=use_cuda,
                           batch_size=batch_size)
        else:
            self.gru = pretrained

        # Initialize the optimizer
        self.optimizer_type = optimizer_type
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.lr = lr
        self.eps = eps
        self.optimizer = Optimizer(self.gru.parameters(),
                                   optimizer_type=optimizer_type,
                                   lr=lr,
                                   weight_decay=weight_decay,
                                   momentum=momentum,
                                   eps=eps)

        # Initialize the loss function
        self.loss_type = loss_type
        self.loss_fn = LossFunction(loss_type, use_cuda)

        # gradient clipping(optional)
        self.clip_grad = clip_grad

        # etc
        self.time_sort = time_sort
예제 #4
0
    def train_eval(self):
        train_loader = DataLoader(
            self.train_set, batch_size=self.args.batch_size, shuffle=True)
        self.args.max_step = self.args.epoch * \
            (len(train_loader) // self.args.update_step)
        print('max step:', self.args.max_step)
        optimizer = Optimizer(
            filter(lambda p: p.requires_grad, self.model.parameters()), args)
        best_dev_metric, best_test_metric = dict(), dict()
        patient = 0
        for ep in range(1, 1 + self.args.epoch):
            train_loss = 0.
            self.model.train()
            t1 = time.time()
            train_right, train_pred, train_gold = 0, 0, 0
            for i, batcher in enumerate(train_loader):
                batch = batch_variable(batcher, self.vocabs)
                batch.to_device(self.args.device)

                pred_score = self.model(
                    batch.wd_ids, batch.ch_ids, batch.tag_ids, batch.bert_inps)
                loss = self.calc_loss(pred_score, batch.ner_ids)
                loss_val = loss.data.item()
                train_loss += loss_val

                sent_lens = batch.wd_ids.gt(0).sum(dim=1)
                gold_res = self.ner_gold(
                    batch.ner_ids, sent_lens, self.vocabs['ner'])
                pred_res = self.ner_pred(
                    pred_score, sent_lens, self.vocabs['ner'])
                nb_right, nb_pred, nb_gold = self.calc_acc(
                    pred_res, gold_res, return_prf=False)
                train_right += nb_right
                train_pred += nb_pred
                train_gold += nb_gold
                train_p, train_r, train_f = self.calc_prf(
                    train_right, train_pred, train_gold)

                if self.args.update_step > 1:
                    loss = loss / self.args.update_step

                loss.backward()

                if (i + 1) % self.args.update_step == 0 or (i == self.args.max_step - 1):
                    nn_utils.clip_grad_norm_(filter(lambda p: p.requires_grad, self.model.parameters()),
                                             max_norm=self.args.grad_clip)
                    optimizer.step()
                    self.model.zero_grad()

                logger.info('[Epoch %d] Iter%d time cost: %.2fs, lr: %.6f, train loss: %.3f, P: %.3f, R: %.3f, F: %.3f' % (
                    ep, i + 1, (time.time() - t1), optimizer.get_lr(), loss_val, train_p, train_r, train_f))

            dev_metric = self.evaluate('dev')
            if dev_metric['f'] > best_dev_metric.get('f', 0):
                best_dev_metric = dev_metric
                test_metric = self.evaluate('test')
                if test_metric['f'] > best_test_metric.get('f', 0):
                    # check_point = {'model': self.model.state_dict(), 'settings': args}
                    # torch.save(check_point, self.args.model_chkp)
                    best_test_metric = test_metric
                patient = 0
            else:
                patient += 1

            logger.info('[Epoch %d] train loss: %.4f, lr: %f, patient: %d, dev_metric: %s, test_metric: %s' % (
                ep, train_loss, optimizer.get_lr(), patient, best_dev_metric, best_test_metric))

            # if patient >= (self.args.patient // 2 + 1):  # 训练一定epoch, dev性能不上升, decay lr
            #     optimizer.lr_decay(0.95)

            if patient >= self.args.patient:  # early stopping
                break

        logger.info('Final Metric: %s' % best_test_metric)
예제 #5
0
    def train_eval(self):
        train_loader = DataLoader(self.train_set,
                                  batch_size=self.args.batch_size,
                                  shuffle=True)
        self.args.max_step = self.args.epoch * (len(train_loader) //
                                                self.args.update_step)
        print('max step:', self.args.max_step)
        optimizer = Optimizer(
            filter(lambda p: p.requires_grad, self.model.parameters()), args)
        best_dev_metric, best_test_metric = dict(), dict()
        patient = 0
        for ep in range(1, 1 + self.args.epoch):
            train_loss = 0.
            self.model.train()
            t1 = time.time()
            train_head_acc, train_rel_acc, train_total_head = 0, 0, 0
            for i, batcher in enumerate(train_loader):
                batch = batch_variable(batcher, self.vocabs)
                batch.to_device(self.args.device)

                head_score, rel_score = self.model(batch.wd_ids, batch.ch_ids,
                                                   batch.tag_ids)
                loss = self.calc_loss(head_score, rel_score, batch.head_ids,
                                      batch.rel_ids, batch.wd_ids.gt(0))
                loss_val = loss.data.item()
                train_loss += loss_val

                head_acc, rel_acc, total_head = self.calc_acc(
                    head_score, rel_score, batch.head_ids, batch.rel_ids)
                train_head_acc += head_acc
                train_rel_acc += rel_acc
                train_total_head += total_head

                if self.args.update_step > 1:
                    loss = loss / self.args.update_step

                loss.backward()

                if (i + 1) % self.args.update_step == 0 or (
                        i == self.args.max_step - 1):
                    nn_utils.clip_grad_norm_(filter(lambda p: p.requires_grad,
                                                    self.model.parameters()),
                                             max_norm=self.args.grad_clip)
                    optimizer.step()
                    self.model.zero_grad()

                logger.info(
                    '[Epoch %d] Iter%d time cost: %.2fs, lr: %.6f, train loss: %.3f, head acc: %.3f, rel acc: %.3f'
                    % (ep, i + 1, (time.time() - t1), optimizer.get_lr(),
                       loss_val, train_head_acc / train_total_head,
                       train_rel_acc / train_total_head))

            dev_metric = self.evaluate('dev')
            if dev_metric['uf'] > best_dev_metric.get('uf', 0):
                best_dev_metric = dev_metric
                test_metric = self.evaluate('test')
                if test_metric['uf'] > best_test_metric.get('uf', 0):
                    # check_point = {'model': self.model.state_dict(), 'settings': args}
                    # torch.save(check_point, self.args.model_chkp)
                    best_test_metric = test_metric
                patient = 0
            else:
                patient += 1

            logger.info(
                '[Epoch %d] train loss: %.4f, lr: %f, patient: %d, dev_metric: %s, test_metric: %s'
                % (ep, train_loss, optimizer.get_lr(), patient,
                   best_dev_metric, best_test_metric))

            # if patient == (self.args.patient // 2 + 1):  # 训练一定epoch, dev性能不上升, decay lr
            #     optimizer.lr_decay(0.95)

            if patient >= self.args.patient:  # early stopping
                break

        logger.info('Final Metric: %s' % best_test_metric)
예제 #6
0
def train(model, train_data, dev_data, test_data, args, word_vocab,
          extwd_vocab, lbl_vocab):
    args.max_step = args.epoch * ((len(train_data) + args.batch_size - 1) //
                                  (args.batch_size * args.update_steps))
    optimizer = Optimizer(
        filter(lambda p: p.requires_grad, lni_model.parameters()), args)
    best_dev_acc, best_test_acc = 0, 0
    patient = 0
    for ep in range(1, 1 + args.epoch):
        model.train()
        train_loss = 0.
        start_time = time.time()
        for i, batch_data in enumerate(
                batch_iter(train_data, args.batch_size, True)):
            batcher = batch_variable(batch_data, word_vocab, extwd_vocab,
                                     lbl_vocab)
            batcher = (x.to(args.device) for x in batcher)
            sent1, sent2, extsent1, extsent2, gold_lbl = batcher
            pred = model((sent1, sent2), (extsent1, extsent2))
            loss = criterion(pred, gold_lbl)
            if args.update_steps > 1:
                loss = loss / args.update_steps

            loss_val = loss.data.item()
            train_loss += loss_val

            loss.backward()

            if (i + 1) % args.update_steps == 0 or (i == args.max_step - 1):
                nn.utils.clip_grad_norm_(filter(lambda p: p.requires_grad,
                                                model.parameters()),
                                         max_norm=args.grad_clip)
                optimizer.step()
                model.zero_grad()

            train_acc = calc_acc(pred, gold_lbl) / len(batch_data)
            logger.info(
                'Iter%d time cost: %.2fs, lr: %.8f, train loss: %.3f, train acc: %.3f'
                % (i + 1, (time.time() - start_time), optimizer.get_lr(),
                   loss_val, train_acc))

        train_loss /= len(train_data)
        dev_acc = eval(model, dev_data, args, word_vocab, extwd_vocab,
                       lbl_vocab)
        logger.info('[Epoch %d] train loss: %.3f, lr: %f, DEV ACC: %.3f' %
                    (ep, train_loss, optimizer.get_lr(), dev_acc))

        if dev_acc > best_dev_acc:
            patient = 0
            best_dev_acc = dev_acc
            test_acc = eval(model, test_data, args, word_vocab, extwd_vocab,
                            lbl_vocab)
            logger.info('Test ACC: %.3f' % test_acc)
            if test_acc > best_test_acc:
                best_test_acc = test_acc
        else:
            patient += 1

        if patient > args.patient:
            break

    logger.info('Final Test ACC: %.3f' % best_test_acc)
예제 #7
0
    def __init__(self,
                 input_size,
                 if_embedding,
                 embedding_size,
                 hidden_size,
                 output_size,
                 num_layers=1,
                 optimizer_type='Adagrad',
                 lr=.01,
                 weight_decay=0,
                 momentum=0,
                 eps=1e-6,
                 loss_type='TOP1',
                 clip_grad=-1,
                 dropout_input=.0,
                 dropout_hidden=.5,
                 batch_size=50,
                 use_cuda=True,
                 cuda_id=1,
                 compress=False,
                 time_sort=False,
                 pretrained=None):
        """ The GRU4REC model

        Args:
            input_size (int): dimension of the gru input variables
            hidden_size (int): dimension of the gru hidden units
            output_size (int): dimension of the gru output variables
            num_layers (int): the number of layers in the GRU
            optimizer_type (str): optimizer type for GRU weights
            lr (float): learning rate for the optimizer
            weight_decay (float): weight decay for the optimizer
            momentum (float): momentum for the optimizer
            eps (float): eps for the optimizer
            loss_type (str): type of the loss function to use
            clip_grad (float): clip the gradient norm at clip_grad. No clipping if clip_grad = -1
            dropout_input (float): dropout probability for the input layer
            dropout_hidden (float): dropout probability for the hidden layer
            batch_size (int): mini-batch size
            use_cuda (bool): whether you want to use cuda or not
            time_sort (bool): whether to ensure the the order of sessions is chronological (default: False)
            pretrained (modules.layer.GRU): pretrained GRU layer, if it exists (default: None)
        """

        # Initialize the GRU Layer
        self.input_size = input_size
        self.if_embedding = if_embedding
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
        self.use_cuda = use_cuda
        self.cuda_id = cuda_id
        self.device = torch.device(
            'cuda:%d' % cuda_id if use_cuda else 'cpu'
        )  # must specify cuda_id or it will be torch.cuda.current_device()
        print(self.device)
        if pretrained is None:
            self.gru = GRU(input_size,
                           if_embedding,
                           embedding_size,
                           hidden_size,
                           output_size,
                           num_layers,
                           dropout_input=dropout_input,
                           dropout_hidden=dropout_hidden,
                           batch_size=batch_size,
                           use_cuda=use_cuda,
                           cuda_id=cuda_id)
        else:
            self.gru = pretrained

        # Initialize the optimizer
        self.optimizer_type = optimizer_type
        self.weight_decay = weight_decay
        self.momentum = momentum
        self.lr = lr
        self.eps = eps

        self.compress = compress
        self.compression_scheduler = None
        if self.compress:
            # Create a CompressionScheduler and configure it from a YAML schedule file
            source = self.compress
            self.compression_scheduler = distiller.config.file_config(
                self.gru, None, self.compress)

        self.optimizer = Optimizer(self.gru.parameters(),
                                   optimizer_type=optimizer_type,
                                   lr=lr,
                                   weight_decay=weight_decay,
                                   momentum=momentum,
                                   eps=eps)

        # Initialize the loss function
        self.loss_type = loss_type
        self.loss_fn = LossFunction(loss_type, use_cuda, cuda_id)

        # gradient clipping(optional)
        self.clip_grad = clip_grad

        # etc
        self.time_sort = time_sort