コード例 #1
0
ファイル: detector.py プロジェクト: wangjs9/strategy-based
    def __init__(self, vocab, model_file_path=None, load_optim=False):
        super(Emotion, self).__init__()
        self.device = config.device
        self.vocab = vocab
        self.vocab_size = vocab.n_words
        self.embedding = share_embedding(self.vocab, True)
        self.encoder = Encoder(config.emb_dim,
                               config.hidden_dim,
                               num_layers=config.hop,
                               num_heads=config.heads,
                               total_key_depth=config.depth,
                               total_value_depth=config.depth,
                               filter_size=config.filter,
                               universal=True)

        self.context_ecoder = Encoder(config.emb_dim,
                                      config.hidden_dim,
                                      num_layers=config.hop,
                                      num_heads=config.heads,
                                      total_key_depth=config.depth,
                                      total_value_depth=config.depth,
                                      filter_size=config.filter,
                                      universal=True)

        self.linear = nn.Linear(config.emb_dim, 2)
        # self.linear = nn.Linear(config.emb_dim, len(config.emo_map))

        optimizer = torch.optim.Adam(self.parameters(),
                                     lr=0,
                                     weight_decay=config.weight_decay,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=[config.schedule * i for i in range(4)],
            gamma=0.1)
        self.scheduler = NoamOpt(config.hidden_dim, 1, 8000, optimizer,
                                 scheduler)

        if model_file_path is not None:
            print("loading weights")
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            self.iter = state['iter']
            self.current_acc = state['current_acc']
            self.embedding.load_state_dict(state['embedding_dict'])
            self.encoder.load_state_dict(state['encoder_state_dict'])
            self.context_ecoder.load_state_dict(
                state['context_encoder_state_dict'])
            if load_optim:
                try:
                    self.scheduler.load_state_dict(state['optimizer'])
                except AttributeError:
                    pass
        self.model_dir = config.save_path
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_path = ""
コード例 #2
0
    def __init__(self,
                 vocab,
                 decoder_number,
                 model_file_path=None,
                 load_optim=False):
        """
        vocab: a Lang type data, which is defined in data_reader.py
        decoder_number: the number of classes
        """
        super(Transformer, self).__init__()
        self.iter = 0
        self.current_loss = 1000
        self.vocab = vocab
        self.vocab_size = vocab.n_words

        self.embedding = share_embedding(self.vocab, config.pretrain_emb)
        self.encoder = Encoder(config.emb_dim,
                               config.hidden_dim,
                               num_layers=config.hop,
                               num_heads=config.heads,
                               total_key_depth=config.depth,
                               total_value_depth=config.depth,
                               filter_size=config.filter,
                               universal=config.universal)

        ## decoders
        self.decoder = Decoder(config.emb_dim,
                               hidden_size=config.hidden_dim,
                               num_layers=config.hop,
                               num_heads=config.heads,
                               total_key_depth=config.depth,
                               total_value_depth=config.depth,
                               filter_size=config.filter,
                               max_length=config.max_length)

        self.decoder_key = nn.Linear(config.hidden_dim,
                                     decoder_number,
                                     bias=False)
        self.generator = Generator(config.hidden_dim, self.vocab_size)

        if config.weight_sharing:
            # Share the weight matrix between target word embedding & the final logit dense layer
            self.generator.proj.weight = self.embedding.lut.weight

        self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx)
        if config.label_smoothing:
            self.criterion = LabelSmoothing(size=self.vocab_size,
                                            padding_idx=config.PAD_idx,
                                            smoothing=0.1)
            self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx)

        if (config.noam):
            optimizer = torch.optim.Adam(self.parameters(),
                                         lr=0,
                                         weight_decay=config.weight_decay,
                                         betas=(0.9, 0.98),
                                         eps=1e-9)
            scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer,
                milestones=[config.schedule * i for i in range(4)],
                gamma=0.1)
            self.scheduler = NoamOpt(config.hidden_dim, 1, 8000, optimizer,
                                     scheduler)
        else:
            self.optimizer = torch.optim.Adam(self.parameters(),
                                              lr=config.lr,
                                              weight_decay=config.weight_decay)
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                self.optimizer,
                milestones=[config.schedule * i for i in range(4)],
                gamma=0.1)

        if model_file_path is not None:
            print("loading weights")
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            self.iter = state['iter']
            self.current_loss = state['current_loss']
            self.encoder.load_state_dict(state['encoder_state_dict'])
            self.decoder.load_state_dict(state['decoder_state_dict'])
            self.generator.load_state_dict(state['generator_dict'])
            self.embedding.load_state_dict(state['embedding_dict'])
            self.decoder_key.load_state_dict(state['decoder_key_state_dict'])
            if (load_optim):
                self.scheduler.load_state_dict(state['optimizer'])
            self.eval()

        self.model_dir = config.save_path
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_path = ""
コード例 #3
0
class Transformer(nn.Module):
    def __init__(self,
                 vocab,
                 decoder_number,
                 model_file_path=None,
                 load_optim=False):
        """
        vocab: a Lang type data, which is defined in data_reader.py
        decoder_number: the number of classes
        """
        super(Transformer, self).__init__()
        self.iter = 0
        self.current_loss = 1000
        self.vocab = vocab
        self.vocab_size = vocab.n_words

        self.embedding = share_embedding(self.vocab, config.pretrain_emb)
        self.encoder = Encoder(config.emb_dim,
                               config.hidden_dim,
                               num_layers=config.hop,
                               num_heads=config.heads,
                               total_key_depth=config.depth,
                               total_value_depth=config.depth,
                               filter_size=config.filter,
                               universal=config.universal)

        ## decoders
        self.decoder = Decoder(config.emb_dim,
                               hidden_size=config.hidden_dim,
                               num_layers=config.hop,
                               num_heads=config.heads,
                               total_key_depth=config.depth,
                               total_value_depth=config.depth,
                               filter_size=config.filter,
                               max_length=config.max_length)

        self.decoder_key = nn.Linear(config.hidden_dim,
                                     decoder_number,
                                     bias=False)
        self.generator = Generator(config.hidden_dim, self.vocab_size)

        if config.weight_sharing:
            # Share the weight matrix between target word embedding & the final logit dense layer
            self.generator.proj.weight = self.embedding.lut.weight

        self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx)
        if config.label_smoothing:
            self.criterion = LabelSmoothing(size=self.vocab_size,
                                            padding_idx=config.PAD_idx,
                                            smoothing=0.1)
            self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx)

        if (config.noam):
            optimizer = torch.optim.Adam(self.parameters(),
                                         lr=0,
                                         weight_decay=config.weight_decay,
                                         betas=(0.9, 0.98),
                                         eps=1e-9)
            scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer,
                milestones=[config.schedule * i for i in range(4)],
                gamma=0.1)
            self.scheduler = NoamOpt(config.hidden_dim, 1, 8000, optimizer,
                                     scheduler)
        else:
            self.optimizer = torch.optim.Adam(self.parameters(),
                                              lr=config.lr,
                                              weight_decay=config.weight_decay)
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                self.optimizer,
                milestones=[config.schedule * i for i in range(4)],
                gamma=0.1)

        if model_file_path is not None:
            print("loading weights")
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            self.iter = state['iter']
            self.current_loss = state['current_loss']
            self.encoder.load_state_dict(state['encoder_state_dict'])
            self.decoder.load_state_dict(state['decoder_state_dict'])
            self.generator.load_state_dict(state['generator_dict'])
            self.embedding.load_state_dict(state['embedding_dict'])
            self.decoder_key.load_state_dict(state['decoder_key_state_dict'])
            if (load_optim):
                self.scheduler.load_state_dict(state['optimizer'])
            self.eval()

        self.model_dir = config.save_path
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_path = ""

    def save_model(self, running_avg_ppl, iter):
        self.iter = iter
        state = {
            'iter': iter,
            'encoder_state_dict': self.encoder.state_dict(),
            'decoder_state_dict': self.decoder.state_dict(),
            'generator_dict': self.generator.state_dict(),
            'decoder_key_state_dict': self.decoder_key.state_dict(),
            'embedding_dict': self.embedding.state_dict(),
            'optimizer': self.scheduler.state_dict(),
            'current_loss': running_avg_ppl
        }
        model_save_path = os.path.join(
            self.model_dir, 'model_{}_{:.4f}'.format(iter, running_avg_ppl))
        self.best_path = model_save_path
        torch.save(state, model_save_path)

    def train_one_batch(self, batch, train=True):
        enc_batch, cause_batch = get_input_from_batch(batch)
        dec_batch = get_output_from_batch(batch)

        if (config.noam):
            self.scheduler.optimizer.zero_grad()
        else:
            self.optimizer.zero_grad()

        ## Encode
        mask_src = enc_batch.data.eq(config.PAD_idx).unsqueeze(1)

        emb_mask = self.embedding(batch["mask_input"])
        encoder_outputs = self.encoder(
            self.embedding(enc_batch) + emb_mask,
            mask_src)  # (batch_size, seq_len, hidden_size)

        # Decode
        sos_token = torch.LongTensor([config.SOS_idx] *
                                     enc_batch.size(0)).unsqueeze(1).to(
                                         config.device)
        dec_batch_shift = torch.cat(
            (sos_token, dec_batch[:, :-1]),
            1)  # make the first token of sentence be SOS

        mask_trg = dec_batch_shift.data.eq(config.PAD_idx).unsqueeze(1)
        pre_logit, attn_dist = self.decoder(self.embedding(dec_batch_shift),
                                            encoder_outputs,
                                            (mask_src, mask_trg))
        # shape: pre_logit --> (batch_size, seq_len, hidden_size)
        ## compute output dist
        logit = self.generator(pre_logit)

        loss = self.criterion(logit.contiguous().view(-1, logit.size(-1)),
                              dec_batch.contiguous().view(-1))

        loss_bce_program, program_acc = 0, 0
        # multi-task
        if config.emo_multitask:
            # add the loss function of label prediction
            q_h = encoder_outputs[:,
                                  0]  # the first token of the sentence CLS, shape: (batch_size, 1, hidden_size)
            logit_prob = self.decoder_key(q_h).to(
                'cuda')  # (batch_size, 1, decoder_num)
            loss += nn.CrossEntropyLoss()(logit_prob,
                                          torch.LongTensor(
                                              batch['program_label']).cuda())
            loss_bce_program = nn.CrossEntropyLoss()(
                logit_prob,
                torch.LongTensor(batch['program_label']).cuda()).item()
            pred_program = np.argmax(logit_prob.detach().cpu().numpy(), axis=1)
            program_acc = accuracy_score(batch["program_label"], pred_program)

        if (config.label_smoothing):
            loss_ppl = self.criterion_ppl(
                logit.contiguous().view(-1, logit.size(-1)),
                dec_batch.contiguous().view(-1)).item()

        if (train):
            loss.backward()
            self.scheduler.step()

        if (config.label_smoothing):
            return loss_ppl, math.exp(min(loss_ppl,
                                          100)), loss_bce_program, program_acc
        else:
            return loss.item(), math.exp(min(
                loss.item(), 100)), loss_bce_program, program_acc

    def compute_act_loss(self, module):
        R_t = module.remainders
        N_t = module.n_updates
        p_t = R_t + N_t
        avg_p_t = torch.sum(torch.sum(p_t, dim=1) / p_t.size(1)) / p_t.size(0)
        loss = config.act_loss_weight * avg_p_t.item()
        return loss

    def decoder_greedy(self, batch, max_dec_step=30):
        enc_batch, cause_batch = get_input_from_batch(batch)

        mask_src = enc_batch.data.eq(config.PAD_idx).unsqueeze(1)
        emb_mask = self.embedding(batch["mask_input"])
        encoder_outputs = self.encoder(
            self.embedding(enc_batch) + emb_mask, mask_src)

        ys = torch.ones(1, 1).fill_(config.SOS_idx).long().to(config.device)
        mask_trg = ys.data.eq(config.PAD_idx).unsqueeze(1)
        decoded_words = []
        for i in range(max_dec_step + 1):

            out, attn_dist = self.decoder(self.embedding(ys), encoder_outputs,
                                          (mask_src, mask_trg))

            prob = self.generator(out)
            _, next_word = torch.max(prob[:, -1], dim=1)
            decoded_words.append([
                '<EOS>' if ni.item() == config.EOS_idx else
                self.vocab.index2word[ni.item()] for ni in next_word.view(-1)
            ])
            next_word = next_word.data[0]

            ys = torch.cat([
                ys,
                torch.ones(1, 1).long().fill_(next_word).to(config.device)
            ],
                           dim=1).to(config.device)
            mask_trg = ys.data.eq(config.PAD_idx).unsqueeze(1)

        sent = []
        for _, row in enumerate(np.transpose(decoded_words)):
            st = ''
            for e in row:
                if e == '<EOS>':
                    break
                else:
                    st += e + ' '
            sent.append(st)
        return sent

    def beam_search(self, batch, max_dec_step=30):
        enc_batch, cause_batch = get_input_from_batch(batch)
コード例 #4
0
def main():

    args = constant.arg
    if not os.path.exists(constant.save_path):
        os.makedirs(constant.save_path)
    #device = torch.device("cuda", 3)
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    f1_avg = []
    for seed in range(10):
        train, val, val_nolab, emoji_tokens, emoji_vectors = get_data_for_bert(
            seed=seed, emoji_dim=args.emoji_dim)
        train_emojis, val_emojis, test_emojis = emoji_tokens
        train_examples = read_examples(train)
        val_examples = read_examples(val)
        if args.hier:
            max_seq_length = 40
        else:
            max_seq_length = 100
        train_features = convert_examples_to_features(
            examples=train_examples,
            seq_length=max_seq_length,
            tokenizer=tokenizer,
            hier=args.hier)

        val_features = convert_examples_to_features(examples=val_examples,
                                                    seq_length=max_seq_length,
                                                    tokenizer=tokenizer,
                                                    hier=args.hier)
        if args.hier:
            model = HierBertModel(
                context_encoder=args.context_encoder,
                dropout=args.dropout,
                double_supervision=args.double_supervision,
                emoji_vectors=emoji_vectors if args.emoji_emb else None)
        else:
            model = FlatBertModel()
        criterion = nn.CrossEntropyLoss()
        model.cuda()

        # Prepare optimizer
        if args.use_bertadam:
            param_optimizer = list(model.named_parameters())
            no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.01
            }, {
                'params': [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.0
            }]
            optimizer = BertAdam(
                optimizer_grouped_parameters,
                lr=5e-5,
                warmup=0.02,
                t_total=int(len(train_examples) / args.batch_size / 1 * 15))

        elif args.noam:
            optimizer = NoamOpt(
                constant.emb_dim,
                1,
                4000,
                torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=0,
                                 betas=(0.9, 0.98),
                                 eps=1e-9),
            )
        else:
            optimizer = Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=1e-3)

        #training
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.batch_size)
        #=====================training dataloader========================
        all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor(
            [f.input_type_ids for f in train_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features],
                                     dtype=torch.long)
        all_emoji_tokens = torch.tensor([emojis for emojis in train_emojis],
                                        dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask,
                                   all_segment_ids, all_label_ids,
                                   all_emoji_tokens)

        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data,
                                      sampler=train_sampler,
                                      batch_size=args.batch_size)

        #=====================val dataloader========================
        all_input_ids = torch.tensor([f.input_ids for f in val_features],
                                     dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in val_features],
                                      dtype=torch.long)
        all_segment_ids = torch.tensor(
            [f.input_type_ids for f in val_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in val_features],
                                     dtype=torch.long)
        all_emoji_tokens = torch.tensor([emojis for emojis in val_emojis],
                                        dtype=torch.long)
        val_data = TensorDataset(all_input_ids, all_input_mask,
                                 all_segment_ids, all_label_ids,
                                 all_emoji_tokens)

        val_sampler = SequentialSampler(val_data)
        val_dataloader = DataLoader(val_data,
                                    sampler=val_sampler,
                                    batch_size=args.batch_size)

        best_f1 = 0
        early_stop = 0
        for _ in trange(100, desc="Epoch"):
            model.train()
            tr_loss = 0
            nb_tr_steps = 0

            for step, batch in enumerate(
                    tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.cuda() for t in batch)
                input_ids, input_mask, segment_ids, label_ids, emoji_tokens = batch
                #print(input_ids.size())
                logits = model(
                    input_ids,
                    segment_ids,
                    input_mask,
                    args.sum_tensor,
                    train=True,
                    emoji_tokens=emoji_tokens if args.emoji_emb else None,
                    last_hidden=args.last_hidden)
                #print(logits.size(), label_ids.size())
                if len(logits) == 2:
                    loss = (1 - args.super_ratio) * criterion(
                        logits[0], label_ids) + args.super_ratio * criterion(
                            logits[1], label_ids)
                else:
                    loss = criterion(logits, label_ids)

                loss.backward()
                tr_loss += loss.item()
                nb_tr_steps += 1
                optimizer.step()
                model.zero_grad()
            logger.info("***** Running evaluation *****")
            logger.info("  Num examples = %d", len(val_examples))
            logger.info("  Batch size = %d", args.batch_size)
            model.eval()
            all_logits = []
            all_labels = []
            for step, batch in enumerate(tqdm(val_dataloader,
                                              desc="Iteration")):
                batch = tuple(t.cuda() for t in batch)
                input_ids, input_mask, segment_ids, label_ids, emoji_tokens = batch
                logits = model(
                    input_ids,
                    segment_ids,
                    input_mask,
                    args.sum_tensor,
                    emoji_tokens=emoji_tokens if args.emoji_emb else None,
                    last_hidden=args.last_hidden)
                logits = logits.detach().cpu().numpy()
                label_ids = label_ids.to('cpu').numpy()
                all_logits.append(logits)
                all_labels.append(label_ids)
            accuracy, microPrecision, microRecall, microF1 = getMetrics(
                np.concatenate(all_logits),
                np.concatenate(all_labels),
                verbose=True)
            if best_f1 < microF1:
                best_f1 = microF1
                save_model(model, seed)
            else:
                early_stop += 1
                if early_stop > 5:
                    break
        print('EXPERIMENT:{}, best_f1:{}'.format(seed, best_f1))
        f1_avg.append(best_f1)

    file_summary = constant.save_path + "summary.txt"
    with open(file_summary, 'w') as the_file:
        header = "\t".join(
            ["SPLIT_{}".format(i) for i, _ in enumerate(f1_avg)])
        the_file.write(header + "\tAVG\n")
        ris = "\t".join(["{:.4f}".format(e) for i, e in enumerate(f1_avg)])
        the_file.write(ris + "\t{:.4f}\n".format(np.mean(f1_avg)))
コード例 #5
0
ファイル: detector.py プロジェクト: wangjs9/strategy-based
class Emotion(nn.Module):
    def __init__(self, vocab, model_file_path=None, load_optim=False):
        super(Emotion, self).__init__()
        self.device = config.device
        self.vocab = vocab
        self.vocab_size = vocab.n_words
        self.embedding = share_embedding(self.vocab, True)
        self.encoder = Encoder(config.emb_dim,
                               config.hidden_dim,
                               num_layers=config.hop,
                               num_heads=config.heads,
                               total_key_depth=config.depth,
                               total_value_depth=config.depth,
                               filter_size=config.filter,
                               universal=True)

        self.context_ecoder = Encoder(config.emb_dim,
                                      config.hidden_dim,
                                      num_layers=config.hop,
                                      num_heads=config.heads,
                                      total_key_depth=config.depth,
                                      total_value_depth=config.depth,
                                      filter_size=config.filter,
                                      universal=True)

        self.linear = nn.Linear(config.emb_dim, 2)
        # self.linear = nn.Linear(config.emb_dim, len(config.emo_map))

        optimizer = torch.optim.Adam(self.parameters(),
                                     lr=0,
                                     weight_decay=config.weight_decay,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=[config.schedule * i for i in range(4)],
            gamma=0.1)
        self.scheduler = NoamOpt(config.hidden_dim, 1, 8000, optimizer,
                                 scheduler)

        if model_file_path is not None:
            print("loading weights")
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            self.iter = state['iter']
            self.current_acc = state['current_acc']
            self.embedding.load_state_dict(state['embedding_dict'])
            self.encoder.load_state_dict(state['encoder_state_dict'])
            self.context_ecoder.load_state_dict(
                state['context_encoder_state_dict'])
            if load_optim:
                try:
                    self.scheduler.load_state_dict(state['optimizer'])
                except AttributeError:
                    pass
        self.model_dir = config.save_path
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        self.best_path = ""

    def save_model(self, iter, acc, loss):
        self.iter = iter
        state = {
            'iter': self.iter,
            'embedding_dict': self.embedding.state_dict(),
            'encoder_state_dict': self.encoder.state_dict(),
            'context_encoder_state_dict': self.context_encoder.state_dict(),
            'optimizer': self.scheduler.state_dict(),
            'current_acc': acc
        }
        model_save_path = os.path.join(self.model_dir,
                                       'model_{}_{:.4f}'.format(iter, acc))
        self.best_path = model_save_path
        torch.save(state, model_save_path)

    def forward(self, batch, predict=False):
        context, target, emotion = batch
        emotion = (emotion > 0).to(int)
        self.scheduler.optimizer.zero_grad()
        context = self.context_ecoder(context)
        context = torch.sum(context, dim=-2, keepdim=True)
        target = torch.cat(context, self.encoder(target), dim=-2)
        target = torch.sum(target, dim=-2)
        pre_logit = torch.sigmoid(self.linear(target))
        logit = torch.softmax(pre_logit, dim=-1)

        predic = torch.max(logit.data, 1)[1]
        loss = -1
        if not predict:
            loss = F.cross_entropy(logit, emotion)
            loss.backward()
            self.scheduler.step()
            train_acc = metrics.accuracy_score(emotion.cpu(), predic.cpu())

        return loss, train_acc, predic
コード例 #6
0
def train(
    model,
    data_loader_train,
    data_loader_val,
    data_loader_test,
    vocab,
    patient=10,
    split=0,
    verbose=True,
):
    """ 
    Training loop
    Inputs:
        model: the model to be trained
        data_loader_train: training data loader
        data_loader_val: validation data loader
        vocab: vocabulary list
    Output:
        avg_best: best f1 score on validation data
    """
    if constant.USE_CUDA:
        device = torch.device("cuda:{}".format(constant.device))
        model.to(device)
    criterion = nn.CrossEntropyLoss()
    if constant.noam:
        opt = NoamOpt(
            constant.emb_dim,
            1,
            4000,
            torch.optim.Adam(model.parameters(),
                             lr=0,
                             betas=(0.9, 0.98),
                             eps=1e-9),
        )
    else:
        opt = torch.optim.Adam(model.parameters(), lr=constant.lr)

    ## TRAINING LOOP
    avg_best = 0
    cnt = 0
    for e in range(constant.max_epochs):
        model.train()
        loss_log = []
        f1_log = []

        pbar = tqdm(enumerate(data_loader_train), total=len(data_loader_train))
        for i, (X_1, X_2, X_3, x1_len, x2_len, x3_len, y, ind, X_text) in pbar:
            if constant.noam:
                opt.optimizer.zero_grad()
            else:
                opt.zero_grad()
            if x1_len is None:
                pred_prob = model(X_1, X_2, X_3)
            else:
                pred_prob = model(X_1, X_2, X_3, x1_len, x2_len, x3_len)

            if constant.double_supervision:
                loss = (1 - constant.super_ratio) * criterion(
                    pred_prob[0], y) + constant.super_ratio * criterion(
                        pred_prob[2], y)
            else:
                loss = criterion(pred_prob[0], y)

            if constant.act:
                R_t = pred_prob[2][0]
                N_t = pred_prob[2][1]
                p_t = R_t + N_t
                avg_p_t = torch.sum(
                    torch.sum(p_t, dim=1) / p_t.size(1)) / p_t.size(0)
                loss += constant.act_loss_weight * avg_p_t.item()
            loss.backward()
            opt.step()
            ## logging
            loss_log.append(loss.item())
            accuracy, microPrecision, microRecall, microF1 = getMetrics(
                pred_prob[0].detach().cpu().numpy(),
                y.cpu().numpy())
            f1_log.append(microF1)
            pbar.set_description(
                "(Epoch {}) TRAIN MICRO:{:.4f} TRAIN LOSS:{:.4f}".format(
                    (e + 1), np.mean(f1_log), np.mean(loss_log)))

        ## LOG
        if e % 1 == 0:
            microF1 = evaluate(model, criterion, data_loader_val, verbose)
            if microF1 > avg_best:
                avg_best = microF1
                save_model(model, split)
                predict(
                    model, criterion, data_loader_test,
                    split)  ## print the prediction with the highest Micro-F1
                cnt = 0
            else:
                cnt += 1
            if cnt == patient:
                break
            if avg_best == 1.0:
                break

            correct = 0
            loss_nb = 0

    return avg_best
コード例 #7
0
def train(model,
          data_loader_train,
          data_loader_val,
          data_loader_test,
          vocab,
          patient=10,
          split=0):
    """ 
    Training loop
    Inputs:
        model: the model to be trained
        data_loader_train: training data loader
        data_loader_val: validation data loader
        vocab: vocabulary list
    Output:
        avg_best: best f1 score on validation data
    """
    if (constant.USE_CUDA): model.cuda()
    criterion = nn.CrossEntropyLoss()
    if (constant.noam):
        opt = NoamOpt(
            constant.emb_dim, 1, 4000,
            torch.optim.Adam(model.parameters(),
                             lr=0,
                             betas=(0.9, 0.98),
                             eps=1e-9))
    else:
        opt = torch.optim.Adam(model.parameters(), lr=constant.lr)

    avg_best = 0
    cnt = 0
    for e in range(constant.max_epochs):
        model.train()
        loss_log = []
        f1_log = 0

        pbar = tqdm(enumerate(data_loader_train), total=len(data_loader_train))
        for i, (X, x_len, y, ind, X_text) in pbar:
            if constant.noam:
                opt.optimizer.zero_grad()
            else:
                opt.zero_grad()
            if x_len is None: pred_prob = model(X)
            else: pred_prob = model(X, x_len)

            loss = criterion(pred_prob[0], y)

            loss.backward()
            opt.step()

            ## logging
            loss_log.append(loss.item())
            accuracy, microPrecision, microRecall, microF1 = getMetrics(
                pred_prob[0].detach().cpu().numpy(),
                y.cpu().numpy())
            f1_log += microF1
            pbar.set_description(
                "(Epoch {}) TRAIN MICRO:{:.4f} TRAIN LOSS:{:.4f}".format(
                    (e + 1), f1_log / float(i + 1), np.mean(loss_log)))

        ## LOG
        if (e % 1 == 0):
            microF1 = evaluate(model, criterion, data_loader_val)
            if (microF1 > avg_best):
                avg_best = microF1
                save_model(model, split)
                predict(model, criterion, data_loader_test, "", split=split
                        )  ## print the prediction with the highest Micro-F1
                cnt = 0
            else:
                cnt += 1
            if (cnt == patient): break
            if (avg_best == 1.0): break

            correct = 0
            loss_nb = 0

    return avg_best
コード例 #8
0
def main(config):
    vocab_len = get_babi_vocab(config.task)
    train_iter, val_iter, test_iter = datasets.BABI20.iters(
        batch_size=config.batch_size,
        root='.data',
        memory_size=70,
        task=config.task,
        joint=False,
        tenK=False,
        only_supporting=False,
        sort=False,
        shuffle=True)
    model = BabiUTransformer(num_vocab=vocab_len,
                             embedding_size=config.emb,
                             hidden_size=config.emb,
                             num_layers=config.max_hops,
                             num_heads=config.heads,
                             total_key_depth=config.depth,
                             total_value_depth=config.depth,
                             filter_size=config.filter,
                             act=config.act)
    if (config.verbose):
        print(model)
        print("ACT", config.act)
    if (config.cuda): model.cuda()

    criterion = nn.CrossEntropyLoss()
    if (config.noam):
        opt = NoamOpt(
            config.emb, 1, 4000,
            torch.optim.Adam(model.parameters(),
                             lr=0,
                             betas=(0.9, 0.98),
                             eps=1e-9))
    else:
        opt = torch.optim.Adam(model.parameters(), lr=config.lr)

    if (config.verbose):
        acc_val, loss_val = evaluate(model, criterion, val_iter)
        print("RAND_VAL ACC:{:.4f}\t RAND_VAL LOSS:{:.4f}".format(
            acc_val, loss_val))
    correct = []
    loss_nb = []
    cnt_batch = 0
    avg_best = 0
    cnt = 0
    model.train()
    for b in train_iter:
        story, query, answer = b.story, b.query, b.answer.squeeze()
        if (config.cuda):
            story, query, answer = story.cuda(), query.cuda(), answer.cuda()
        if (config.noam):
            opt.optimizer.zero_grad()
        else:
            opt.zero_grad()
        pred_prob = model(story, query)
        loss = criterion(pred_prob[0], answer)
        if (config.act):
            R_t = pred_prob[2][0]
            N_t = pred_prob[2][1]
            p_t = R_t + N_t
            avg_p_t = torch.sum(
                torch.sum(p_t, dim=1) / p_t.size(1)) / p_t.size(0)
            loss += config.act_loss_weight * avg_p_t.item()

        loss.backward()
        opt.step()

        ## LOG
        loss_nb.append(loss.item())
        pred = pred_prob[1].data.max(1)[1]  # max func return (max, argmax)
        correct.append(np.mean(pred.eq(answer.data).cpu().numpy()))
        cnt_batch += 1
        if (cnt_batch % 10 == 0):
            acc = np.mean(correct)
            loss_nb = np.mean(loss_nb)
            if (config.verbose):
                print("TRN ACC:{:.4f}\tTRN LOSS:{:.4f}".format(acc, loss_nb))

            acc_val, loss_val = evaluate(model, criterion, val_iter)
            if (config.verbose):
                print("VAL ACC:{:.4f}\tVAL LOSS:{:.4f}".format(
                    acc_val, loss_val))

            if (acc_val > avg_best):
                avg_best = acc_val
                weights_best = deepcopy(model.state_dict())
                cnt = 0
            else:
                cnt += 1
            if (cnt == 45): break
            if (avg_best == 1.0): break

            correct = []
            loss_nb = []
            cnt_batch = 0

    model.load_state_dict({name: weights_best[name] for name in weights_best})
    acc_test, loss_test = evaluate(model, criterion, test_iter)
    if (config.verbose):
        print("TST ACC:{:.4f}\tTST LOSS:{:.4f}".format(acc_val, loss_val))
    return acc_test