import os
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

from pytorch_pretrained_bert.tokenization import BertTokenizer
from preprocessing.data_processor import MyPro, convert_examples_to_features
import config.args as args
from util.Logginger import init_logger

logger = init_logger(f"{args.task_name}", logging_path=args.log_path)


def init_parameters():
    tokenizer = BertTokenizer(vocab_file=args.VOCAB_FILE)
    tokenizer.save_vocabulary(args.output_dir)  # 保存词表文件
    processor = MyPro()
    return tokenizer, processor


def create_batch_iter(mode, path):
    """构造迭代器"""
    logger.info(f'{mode} path is {path}')
    tokenizer, processor = init_parameters()
    if mode == "train":
        examples = processor.get_train_examples(path)
        num_train_steps = int(
            len(examples) / args.train_batch_size /
            args.gradient_accumulation_steps * args.num_train_epochs)
        batch_size = args.train_batch_size
        logger.info("  Num train steps = %d", num_train_steps)
    elif mode == "dev":
Esempio n. 2
0
import os
import time
import inspect
import torch
import torch.nn as nn
from torch.autograd import Variable

import config.config as config
from util.gpu_mem_track import MemTracker
from util.plot_util import loss_acc_plot
from util.lr_util import lr_update
from util.Logginger import init_logger

logger = init_logger("torch", logging_path=config.LOG_PATH)

torch.manual_seed(2018)
torch.cuda.manual_seed(2018)
torch.cuda.manual_seed_all(2018)


import warnings

warnings.filterwarnings('ignore')

os.environ["CUDA_VISIBLE_DEVICES"] = "%d"%config.device


frame = inspect.currentframe()
gpu_tracker = MemTracker(frame)
use_cuda = config.use_cuda if torch.cuda.is_available() else False
import time
import torch
from pytorch_pretrained_bert.optimization import BertAdam

import config.args as args
from util.plot_util import loss_acc_plot
from util.Logginger import init_logger
from evaluate.loss import loss_fn
from evaluate.acc_f1 import qa_evaluate
from util.model_util import save_model, load_model

logger = init_logger("torch", logging_path=args.log_path)

torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
import warnings

warnings.filterwarnings('ignore')


def warmup_linear(x, warmup=0.002):
    if x < warmup:
        return x / warmup
    return 1.0 - x


def fit(model,
        training_iter,
        eval_iter,
        num_epoch,
Esempio n. 4
0
import os
import json
import random
import collections
from tqdm import tqdm
import config.args as args
from util.Logginger import init_logger
from pytorch_pretrained_bert.tokenization import BertTokenizer

logger = init_logger("QA", logging_path=args.log_path)


class InputExample(object):
    "Template for a single data"

    def __init__(
        self,
        qas_id,  # question id
        question_text,  # question text
        doc_tokens,  # context
        orig_answer_text=None,  # answer text
        start_position=None,  # For Yes, No & no-answer, start_position = 0
        end_position=None,  # For Yes, No & no-answer, start_position = 0
        answer_type=None  # We denote answer type as Yes: 0 No: 1 no-answer: 2 long-answer: 3
    ):
        self.qas_id = qas_id
        self.question_text = question_text
        self.doc_tokens = doc_tokens
        self.orig_answer_text = orig_answer_text
        self.start_position = start_position
        self.end_position = end_position
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

from pytorch_pretrained_bert.tokenization import BertTokenizer
from preprocessing.data_processor import MyPro, convert_examples_to_features
import config.args as args
from util.Logginger import init_logger

logger = init_logger("bert_ner", logging_path=args.log_path)


def init_params():
    processors = {"bert_ner": MyPro}
    task_name = args.task_name.lower()
    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))
    processor = processors[task_name]()
    tokenizer = BertTokenizer(vocab_file=args.VOCAB_FILE)
    return processor, tokenizer


def create_batch_iter(mode):
    """构造迭代器"""
    processor, tokenizer = init_params()
    if mode == "train":
        examples = processor.get_train_examples(args.data_dir)

        num_train_steps = int(
            len(examples) / args.train_batch_size /
            args.gradient_accumulation_steps * args.num_train_epochs)
import torch
import config.args as args
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from pytorch_pretrained_bert.tokenization import BertTokenizer
from preprocessing.data_processor import read_qa_examples, convert_examples_to_features
from util.Logginger import init_logger

logger = init_logger("bert_class", logging_path=args.log_path)


def init_params():
    tokenizer = BertTokenizer(vocab_file=args.VOCAB_FILE)
    return tokenizer


def create_batch_iter(mode):
    """构造迭代器"""
    tokenizer = init_params()
    if mode == "train":
        examples = read_qa_examples(args.data_dir, "train")
        batch_size = args.train_batch_size
    elif mode == "dev":
        examples = read_qa_examples(args.data_dir, "dev")
        batch_size = args.eval_batch_size
    else:
        raise ValueError("Invalid mode %s" % mode)

    # 特征
    features = convert_examples_to_features(examples,
                                            tokenizer,
import json
from util.Logginger import init_logger
import config.args as args

logger = init_logger("model_net", logging_path=args.log_path)


class InputExample(object):
    def __init__(self, guid, text_a, text_b=None, label=None):
        """创建一个输入实例
        Args:
            guid: 每个example拥有唯一的id
            text_a: 第一个句子的原始文本,一般对于文本分类来说,只需要text_a
            text_b: 第二个句子的原始文本,在句子对的任务中才有,分类问题中为None
            label: example对应的标签,对于训练集和验证集应非None,测试集为None
        """
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
        self.label = label


class InputFeature(object):
    def __init__(self, input_ids, input_mask, segment_ids, label_id,
                 output_mask):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id
        self.output_mask = output_mask