Ejemplo n.º 1
0
    def __init__(self, config, n_gpu, vocab, train_loader=None, val_loader=None):
        self.config = config
        self.vocab = vocab
        self.n_gpu = n_gpu
        self.train_loader = train_loader
        self.val_loader = val_loader

        # Build model
        vocab_size = self.vocab.vocab_size()

        self.model = CNN_Text(self.config, vocab_size, self.config.n_label)
        self.model.to(device)

        if self.n_gpu > 1:
            self.model = nn.DataParallel(self.model)

        # Build optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr, weight_decay=0.0005)

        # Build criterion
        self.criterion = nn.CrossEntropyLoss()
Ejemplo n.º 2
0
def main_train():
    def clean_str(string):
        string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
        string = re.sub(r"\'s", " \'s", string)
        string = re.sub(r"\'ve", " \'ve", string)
        string = re.sub(r"n\'t", " n\'t", string)
        string = re.sub(r"\'re", " \'re", string)
        string = re.sub(r"\'d", " \'d", string)
        string = re.sub(r"\'ll", " \'ll", string)
        string = re.sub(r",", " , ", string)
        string = re.sub(r"!", " ! ", string)
        string = re.sub(r"\(", " \( ", string)
        string = re.sub(r"\)", " \) ", string)
        string = re.sub(r"\?", " \? ", string)
        string = re.sub(r"\s{2,}", " ", string)
        return string

    TEXT = data.Field(sequential=True, lower=True, batch_first=True)
    TEXT.preprocessing = data.Pipeline(clean_str)
    LABEL = data.Field(sequential=False, use_vocab=False, batch_first=True)

    trainset, valset = MR.splits(data_path, fields=[("text", TEXT), ("label", LABEL)])
    TEXT.build_vocab(trainset)

    with open("text.field", 'wb') as f:
        dill.dump(TEXT, f)

    trainiter = data.BucketIterator(trainset, batch_size=batch_size, sort_key=lambda x: len(x.text),
                                    shuffle=True, device=device)

    valiter = data.BucketIterator(valset, batch_size=batch_size, sort_key=lambda x: len(x.text),
                                  shuffle=True, device=device)

    model = CNN_Text(channel_dim, len(TEXT.vocab), embed_dim, output_dim, kernel_sizes, is_static=False,
                     dropout_rate=dropout_rate)
    model = model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
    train_model(epochs, model, trainiter, valiter, optimizer, criterion)
Ejemplo n.º 3
0
def main(args):
    # # Device configuration
    device = torch.device(
        'cuda:{}'.format(args.gpu) if torch.cuda.is_available() else 'cpu')
    num_epochs = 80
    num_classes = 8
    learning_rate = 0.08
    num_views = 3
    num_layers = 4
    data_path = args.dir
    file_list = [
        './data/train_web_content.npy', './data/train_web_links.npy',
        './data/train_web_title.npy', './data/test_web_content.npy',
        './data/test_web_links.npy', './data/test_web_title.npy',
        './data/train_label.npy', './data/test_label.npy'
    ]
    aaa = list(map(os.path.exists, file_list))
    if sum(aaa) != len(aaa):
        print(
            'Raw data has not been pre-processed! Start pre-processing the raw data.'
        )
        data_loader.preprocess(data_path)
    else:
        print('Loading the existing data set...')
    # train_dataset = data_loader.Load_datasets('train', num_classes)
    train_dataset = data_loader.Load_datasets('train', 8)
    train_loader = DataLoader(train_dataset,
                              batch_size=32,
                              shuffle=True,
                              num_workers=4)
    input_dims = np.array(train_dataset.data[0]).shape
    model = CNN_Text(input_dims, [64, 32, 32, 32], [1, 2, 3, 4], num_classes,
                     0.5, num_layers, num_views).to(device)
    model = model.double()
    model.device = device
    model.learning_rate = learning_rate
    model.epoch = 0
    if args.model != None:
        model.load_state_dict(torch.load(args.mpodel))
        print('Successfully load pre-trained model!')
    # train the model until the model is fully trained
    train_model(model, train_loader, num_epochs)
    print('Finish training process!')
    evaluation(model)
Ejemplo n.º 4
0
def build_model(args):
    if args.clf_model.lower() == "cnn":
        # easy for text tokenization
        tokenizer = DistilBertTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)
        model = CNN_Text(args)

    elif args.clf_model.lower() == "robert":
        print("name is {}".format(args.model_name_or_path))
        tokenizer = RobertaTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)

        config = RobertaConfig.from_pretrained(args.model_name_or_path,
                                               num_labels=args.num_labels,
                                               finetuning_task=args.task_name)

        model = RobertaForSequenceClassification.from_pretrained(
            args.model_name_or_path, config=config)
        # freeze the weight for transformers
        if args.freeze:
            for n, p in model.named_parameters():
                if "bert" in n:
                    p.requires_grad = False
    elif args.clf_model.lower() == "bert":
        tokenizer = BertTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)

        config = BertConfig.from_pretrained(args.model_name_or_path,
                                            num_labels=args.num_labels,
                                            finetuning_task=args.task_name)

        model = BertForSequenceClassification.from_pretrained(
            args.model_name_or_path, config=config)
        # freeze the weight for transformers
        # if args.freeze:
        #     for n, p in model.named_parameters():
        #         if "bert" in n:
        #             p.requires_grad = False

    else:
        tokenizer = DistilBertTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)
        config = DistilBertConfig.from_pretrained(
            args.model_name_or_path,
            num_labels=args.num_labels,
            finetuning_task=args.task_name)
        model = DistilBertForSequenceClassification.from_pretrained(
            args.model_name_or_path, config=config)

    model.expand_class_head(args.multi_head)
    model = model.to(args.device)
    return tokenizer, model
Ejemplo n.º 5
0
    tokens.append('oov')
    tokens.append('bos')
    id = 0
    word2id = {}
    for word in tokens:
        word2id[word] = id
        id += 1
    args.embed_num = len(tokens)
    args.class_num = 2
    args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]

    #print("\nParameters:")
    #for attr, value in sorted(args.__dict__.items()):
    #	print("\t{}={}".format(attr.upper(), value))

    model = CNN_Text(args)

    if torch.cuda.is_available():
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    report_interval = 5000
    for epoch in range(1, args.epochs + 1):
        train_batch_i = 0
        batch_counter = 0
        accumulated_loss = 0
        train_sents_scaned = 0
        train_num_correct = 0
        model.train()
        print('--' * 20)
        start_time = time.time()
Ejemplo n.º 6
0
text_field = data.Field(lower=True)
label_field = data.Field(sequential=False)
train_data, dev_data = MR.splits(text_field, label_field)
text_field.build_vocab(train_data, dev_data)
label_field.build_vocab(train_data, dev_data)

args = Args()
args.dropout = 0.5
args.max_norm = 3.0

args.embed_dim = 128
args.kernel_num = 100
args.kernel_sizes = '3,4,5'
args.static = False
args.snapshot = 'snapshot/best.pt'
args.embed_num = len(text_field.vocab)
args.class_num = len(label_field.vocab) - 1
args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]

model = CNN_Text(args)
model.load_state_dict(torch.load(args.snapshot, map_location='cpu'))
model = model.to(device)


@app.route('/cls/<text>')
def classify_text(text):
    app.logger.warning(text)
    result, conf = predict(text, model, text_field, label_field, device)
    app.logger.warning(conf)
    return result
Ejemplo n.º 7
0
    #use CUDA to speed up
    use_cuda = torch.cuda.is_available()

    #get data
    train_loader = Data.DataLoader(dataset=CustomDataset(path="train.json",
                                                         balance=False),
                                   batch_size=BATCH_SIZE,
                                   shuffle=True)
    test_loader = Data.DataLoader(dataset=CustomDataset(path="test.json",
                                                        balance=False),
                                  batch_size=BATCH_SIZE,
                                  shuffle=True)

    #initialize model
    cnn = CNN_Text()
    if use_cuda:
        cnn = cnn.cuda()
    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR, weight_decay=0.0005)

    #train
    for epoch in range(EPOCH):
        print("epoch :")
        if epoch % 5 == 0:
            test(cnn, test_loader, use_cuda)
        for step, data in enumerate(train_loader):
            vec, lens, label = data
            #print(vec.shape)
            if use_cuda:
                vec = vec.cuda()
                label = label.cuda()
Ejemplo n.º 8
0
    def train(self, m_2, m_3, m_4):
        word_dict, label_dict = self.divide_two_dict(m_2)
        if self.hyperparameter_1.word_embedding:
            path = "word2vec/glove.6B.100d.txt"
            print("loading word2vec ")
            word_vecs = self.load_my_vector(path, word_dict.m_list)
            print("new words already in word2vec:" + str(len(word_vecs)))
            print("loading unknow word2vec and convert to list... ")
            word_vecs = self.add_unknow_words_by_average(
                word_vecs, word_dict.m_list, k=self.hyperparameter_1.embed_dim)
            print("unknown word2vec load ! and converted to list...")
            # if self.hyperparameter_1.word_embedding:
            self.hyperparameter_1.pretrained_weight = word_vecs
            # pretrained_weight = np.array(self.hyperparameter_1.pretrained_weight)
            # self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))

        # self.nn = network(2, 2, 2, hidden_layer_weights=None, hidden_layer_bias=None, output_layer_weights=None, output_layer_bias=None)
        train_example = self.out_example_index(m_2, m_2)
        dev_example = self.out_example_index(m_2, m_3)
        test_example = self.out_example_index(m_2, m_4)

        random.shuffle(train_example)
        random.shuffle(dev_example)
        random.shuffle(test_example)

        self.model = CNN_Text(self.hyperparameter_1)
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=self.hyperparameter_1.lr)
        train_example_idx = self.set_index(train_example)
        random.shuffle(train_example_idx)
        steps = 0
        self.model.train()
        for epoch in range(1, self.hyperparameter_1.epochs + 1):
            batchBlock = self.set_batchBlock(train_example)
            for every_batchBlock in range(batchBlock):
                exams = []
                start_pos = every_batchBlock * self.hyperparameter_1.batch_size
                end_pos = (every_batchBlock +
                           1) * self.hyperparameter_1.batch_size
                if end_pos > len(train_example):
                    end_pos = len(train_example)
                for idx in range(start_pos, end_pos):
                    exams.append(train_example[train_example_idx[idx]])
                max_len = self.get_max_sentence_len(exams)
                optimizer.zero_grad()
                feat, label = self.batch(exams,
                                         self.hyperparameter_1.batch_size,
                                         max_len)
                label = label.view(len(exams))
                logit = self.model.forward(feat)
                loss = F.cross_entropy(logit, label)
                loss.backward()
                optimizer.step()
                steps += 1
                if steps % self.hyperparameter_1.log_interval == 0:
                    train_size = len(train_example)
                    corrects = (torch.max(logit, 1)[1].view(
                        label.size()).data == label.data).sum()
                    accuracy = corrects / self.hyperparameter_1.batch_size * 100.0
                    sys.stdout.write(
                        '\rBatch[{}/{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.
                        format(steps, train_size, loss.data[0], accuracy,
                               corrects, self.hyperparameter_1.batch_size))
                if steps % self.hyperparameter_1.test_interval == 0:
                    self.eval(dev_example, self.model)
                if steps % self.hyperparameter_1.save_interval == 0:
                    if not os.path.isdir(self.hyperparameter_1.save_dir):
                        os.makedirs(self.hyperparameter_1.save_dir)
                    save_prefix = os.path.join(self.hyperparameter_1.save_dir,
                                               'snapshot')
                    save_path = '{}_steps{}.pt'.format(save_prefix, steps)
                    torch.save(self.model, save_path)
Ejemplo n.º 9
0
class Classifier:
    def __init__(self):
        self.hyperparameter_1 = Hyperparameter()
        self.inst = inst()
        self.Read_inst = Read_inst()
        self.aphabet = alphabet()
        self.example = example()

    def divide_two_dict(self, m_2):
        all_w = []
        all_l = []
        for inst in m_2:
            for w in inst.m_word:
                all_w.append(w)
        all_w.append(self.hyperparameter_1.unknow)
        all_w.append(self.hyperparameter_1.padding)
        word_alphabet = self.aphabet.add_dict(all_w)
        for inst in m_2:
            for w in inst.m_label:
                all_l.append(w)
        label_alphabet = self.aphabet.add_dict(all_l)
        return word_alphabet, label_alphabet

    def load_my_vector(self, path, vocab):
        word_vecs = {}
        with open(path, encoding='UTF - 8') as f:
            lines = f.readlines()[1:]
            for line in lines:
                values = line.split(' ')
                word = values[0]
                if word in vocab:
                    vector = []
                    for count, val in enumerate(values):
                        if count == 0:
                            continue
                        vector.append(float(val))
                    word_vecs[word] = vector
        return word_vecs

    def add_unknow_words_by_uniform(self, word_vecs, vocab, k=100):
        list_word2vec = []
        oov = 0
        iov = 0
        for word in vocab:
            if word not in word_vecs:
                oov += 1
                word_vecs[word] = np.random.uniform(-0.25, 0.25,
                                                    k).round(6).tolist()
                list_word2vec.append(word_vecs[word])
            else:
                iov += 1
                list_word2vec.append(word_vecs[word])
        return list_word2vec

    def add_unknow_words_by_average(self, word_vecs, vocab, k=100):
        word_vecs_numpy = []
        for word in vocab:
            if word in word_vecs:
                word_vecs_numpy.append(word_vecs[word])
        col = []
        for i in range(k):
            sum = 0.0
            for j in range(int(len(word_vecs_numpy))):
                sum += word_vecs_numpy[j][i]
                sum = round(sum, 6)
            col.append(sum)
        zero = []
        for m in range(k):
            avg = col[m] / (len(word_vecs_numpy))
            avg = round(avg, 6)
            zero.append(float(avg))
        list_word2vec = []
        oov = 0
        iov = 0
        for word in vocab:
            if word not in word_vecs:
                oov += 1
                word_vecs[word] = zero
                list_word2vec.append(word_vecs[word])
            else:
                iov += 1
                list_word2vec.append(word_vecs[word])
        return list_word2vec

    def get_max_sentence_len(self, all_example):
        max_sentence_len = 0
        for exam in all_example:
            if max_sentence_len < len(exam.m_word_index):
                max_sentence_len = len(exam.m_word_index)
        return max_sentence_len

    def batch(self, examples, batch_size, max_len):
        for exam in examples:
            if len(exam.m_word_index) == max_len:
                continue
            for i in range(max_len - len(exam.m_word_index)):
                exam.m_word_index.append(self.hyperparameter_1.padding_id)
        minibatch_word = []
        minibatch_label = []
        for exam in examples:
            minibatch_word.append(exam.m_word_index)
            minibatch_label.append(exam.m_label_index)

            if len(minibatch_word) % batch_size == 0:
                minibatch_word = Variable(torch.LongTensor(minibatch_word))
                minibatch_label = Variable(torch.LongTensor(minibatch_label))
                return minibatch_word, minibatch_label
        if minibatch_word or minibatch_label:
            minibatch_word = Variable(torch.LongTensor(minibatch_word))
            minibatch_label = Variable(torch.LongTensor(minibatch_label))
            return minibatch_word, minibatch_label

    def set_batchBlock(self, examples):
        if len(examples) % self.hyperparameter_1.batch_size == 0:
            batchBlock = len(examples) // self.hyperparameter_1.batch_size
        else:
            batchBlock = len(examples) // self.hyperparameter_1.batch_size + 1
        return batchBlock

    def set_index(self, examples):
        index = []
        for i in range(len(examples)):
            index.append(i)
        return index

    def out_example_index(self, m_2, m_3):
        word_dict, label_dict = self.divide_two_dict(m_2)
        all_example = []
        for i in m_3:
            b = example()
            b.m_label_index.append(label_dict.dict[i.m_label])
            for j in i.m_word:
                if j not in word_dict.dict:
                    b.m_word_index.append(
                        word_dict.dict[self.hyperparameter_1.unknow])
                else:
                    b.m_word_index.append(word_dict.dict[j])
            all_example.append(b)
        self.hyperparameter_1.unknow_id = word_dict.dict[
            self.hyperparameter_1.unknow]
        self.hyperparameter_1.padding_id = word_dict.dict[
            self.hyperparameter_1.padding]
        self.hyperparameter_1.vocab_num = len(word_dict.m_list)
        return all_example

    def train(self, m_2, m_3, m_4):
        word_dict, label_dict = self.divide_two_dict(m_2)
        if self.hyperparameter_1.word_embedding:
            path = "word2vec/glove.6B.100d.txt"
            print("loading word2vec ")
            word_vecs = self.load_my_vector(path, word_dict.m_list)
            print("new words already in word2vec:" + str(len(word_vecs)))
            print("loading unknow word2vec and convert to list... ")
            word_vecs = self.add_unknow_words_by_average(
                word_vecs, word_dict.m_list, k=self.hyperparameter_1.embed_dim)
            print("unknown word2vec load ! and converted to list...")
            # if self.hyperparameter_1.word_embedding:
            self.hyperparameter_1.pretrained_weight = word_vecs
            # pretrained_weight = np.array(self.hyperparameter_1.pretrained_weight)
            # self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight))

        # self.nn = network(2, 2, 2, hidden_layer_weights=None, hidden_layer_bias=None, output_layer_weights=None, output_layer_bias=None)
        train_example = self.out_example_index(m_2, m_2)
        dev_example = self.out_example_index(m_2, m_3)
        test_example = self.out_example_index(m_2, m_4)

        random.shuffle(train_example)
        random.shuffle(dev_example)
        random.shuffle(test_example)

        self.model = CNN_Text(self.hyperparameter_1)
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=self.hyperparameter_1.lr)
        train_example_idx = self.set_index(train_example)
        random.shuffle(train_example_idx)
        steps = 0
        self.model.train()
        for epoch in range(1, self.hyperparameter_1.epochs + 1):
            batchBlock = self.set_batchBlock(train_example)
            for every_batchBlock in range(batchBlock):
                exams = []
                start_pos = every_batchBlock * self.hyperparameter_1.batch_size
                end_pos = (every_batchBlock +
                           1) * self.hyperparameter_1.batch_size
                if end_pos > len(train_example):
                    end_pos = len(train_example)
                for idx in range(start_pos, end_pos):
                    exams.append(train_example[train_example_idx[idx]])
                max_len = self.get_max_sentence_len(exams)
                optimizer.zero_grad()
                feat, label = self.batch(exams,
                                         self.hyperparameter_1.batch_size,
                                         max_len)
                label = label.view(len(exams))
                logit = self.model.forward(feat)
                loss = F.cross_entropy(logit, label)
                loss.backward()
                optimizer.step()
                steps += 1
                if steps % self.hyperparameter_1.log_interval == 0:
                    train_size = len(train_example)
                    corrects = (torch.max(logit, 1)[1].view(
                        label.size()).data == label.data).sum()
                    accuracy = corrects / self.hyperparameter_1.batch_size * 100.0
                    sys.stdout.write(
                        '\rBatch[{}/{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.
                        format(steps, train_size, loss.data[0], accuracy,
                               corrects, self.hyperparameter_1.batch_size))
                if steps % self.hyperparameter_1.test_interval == 0:
                    self.eval(dev_example, self.model)
                if steps % self.hyperparameter_1.save_interval == 0:
                    if not os.path.isdir(self.hyperparameter_1.save_dir):
                        os.makedirs(self.hyperparameter_1.save_dir)
                    save_prefix = os.path.join(self.hyperparameter_1.save_dir,
                                               'snapshot')
                    save_path = '{}_steps{}.pt'.format(save_prefix, steps)
                    torch.save(self.model, save_path)

    def eval(self, data_example, model):
        self.model.eval()
        corrects, avg_loss = 0, 0
        data_example_idx = self.set_index(data_example)
        batchBlock = self.set_batchBlock(data_example)
        for every_batchBlock in range(batchBlock):
            exams = []
            start_pos = every_batchBlock * self.hyperparameter_1.batch_size
            end_pos = (every_batchBlock + 1) * self.hyperparameter_1.batch_size
            if end_pos > len(data_example):
                end_pos = len(data_example)
            for idx in range(start_pos, end_pos):
                exams.append(data_example[data_example_idx[idx]])
            max_len = self.get_max_sentence_len(exams)
            feat, label = self.batch(exams, self.hyperparameter_1.batch_size,
                                     max_len)
            label = label.view(len(exams))
            logit = self.model.forward(feat)
            loss = F.cross_entropy(logit, label, size_average=False)
            # print(loss.data[0])
            avg_loss += loss.data[0]
            corrects += (torch.max(logit, 1)[1].view(
                label.size()).data == label.data).sum()

        size = len(data_example)
        avg_loss = loss.data[0] / size
        accuracy = corrects / size * 100.0
        self.model.train()
        print('\nEvaluation - loss: {:.6f}  acc: {:.4f}%({}/{}) \n'.format(
            avg_loss, accuracy, corrects, size))

    def variable(self, example):
        x = Variable(torch.LongTensor(1, len(example.m_word_index)))
        y = Variable(torch.LongTensor(1))
        for i in range(len(example.m_word_index)):
            x.data[0][i] = example.m_word_index[i]
        y.data[0] = example.m_label_index[0]
        return x, y
Ejemplo n.º 10
0
class Trainer:
    def __init__(self, config, n_gpu, vocab, train_loader=None, val_loader=None):
        self.config = config
        self.vocab = vocab
        self.n_gpu = n_gpu
        self.train_loader = train_loader
        self.val_loader = val_loader

        # Build model
        vocab_size = self.vocab.vocab_size()

        self.model = CNN_Text(self.config, vocab_size, self.config.n_label)
        self.model.to(device)

        if self.n_gpu > 1:
            self.model = nn.DataParallel(self.model)

        # Build optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr, weight_decay=0.0005)

        # Build criterion
        self.criterion = nn.CrossEntropyLoss()

    def train(self):
        best_f1 = 0.0
        best_acc = 0.0
        global_step = 0
        batch_f1 = []
        batch_acc = []


        for epoch in range(self.config.num_epoch):
            batch_loss = []

            for step, batch in enumerate(self.train_loader):
                self.model.train()
                batch = tuple(t.to(device) for t in batch)
                batch = sort_batch(batch)
                input_ids, input_lengths, labels = batch

                outputs = self.model(input_ids)
                
                loss = self.criterion(outputs['logits'].view(-1, self.config.n_label), labels.view(-1))

                f1, acc = ic_metric(labels.cpu(), outputs['predicted_intents'].cpu())

                if self.n_gpu > 1:
                    loss = loss.mean()

                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

                global_step += 1
                batch_loss.append(loss.float().item())
                batch_f1.append(f1)
                batch_acc.append(acc)

                if (global_step == 1) or (global_step % self.config.log_interval == 0):
                    mean_loss = np.mean(batch_loss)
                    mean_f1 = np.mean(batch_f1)
                    mean_acc = np.mean(batch_acc)
                    batch_loss = []
                    nsml.report(summary=True, scope=locals(), epoch=epoch, train_loss=mean_loss, step=global_step)

                if (global_step > 0) and (global_step % self.config.val_interval == 0):
                    val_loss, val_f1, val_acc = self.evaluation()
                    nsml.report(summary=True, scope=locals(), epoch=epoch, val_loss=val_loss,
                                val_f1=val_f1, val_acc=val_acc, step=global_step)

                    if val_f1 > best_f1:
                        best_f1 = val_f1
                        best_acc = val_acc
                        nsml.save(global_step)


    def evaluation(self):
        self.model.eval()
        total_loss = []
        preds = []
        targets = []
        with torch.no_grad():
            for step, batch in enumerate(self.val_loader):
                batch = tuple(t.to(device) for t in batch)
                batch = sort_batch(batch)
                input_ids, input_lengths, labels = batch

                outputs = self.model(input_ids)

                loss = self.criterion(outputs['logits'].view(-1, self.config.n_label), labels.view(-1))

                pred = outputs['predicted_intents'].squeeze(-1).cpu().numpy().tolist()
                target = labels.cpu().numpy().tolist()

                preds.extend(pred)
                targets.extend(target)
                total_loss.append(loss.float().item())

        mean_loss = np.mean(total_loss)
        mean_f1, mean_acc = ic_metric(targets, preds)
        return mean_loss, mean_f1, mean_acc