コード例 #1
0
def training(opt):
    if args.task_type == "mrc":
        # 62 for mrc query
        processor = NERProcessor(opt.max_seq_len - 62)
    else:
        processor = NERProcessor(opt.max_seq_len)

    # todo ???
    # train_raw_examples = processor.read_json(
    #     os.path.join(opt.raw_data_dir, "stack.json")
    # )
    train_raw_examples = processor.read_json(
        os.path.join(opt.raw_data_dir, "train.json"))

    # add pseudo data to train data
    # pseudo_raw_examples = processor.read_json(
    #     os.path.join(opt.raw_data_dir, "pseudo.json")
    # )

    # train_raw_examples = train_raw_examples + pseudo_raw_examples
    train_raw_examples = train_raw_examples

    train_examples = processor.get_examples(train_raw_examples, "train")

    dev_examples = None
    if opt.eval_model:
        dev_raw_examples = processor.read_json(
            os.path.join(opt.raw_data_dir, "dev.json"))
        dev_examples = processor.get_examples(dev_raw_examples, "dev")

    train_base(opt, train_examples, dev_examples)
コード例 #2
0
def stacking(opt):
    logger.info('Start to KFold stack attribution model')

    if args.task_type == 'mrc':
        # 62 for mrc query
        processor = NERProcessor(opt.max_seq_len-62)
    else:
        processor = NERProcessor(opt.max_seq_len)

    kf = KFold(5, shuffle=True, random_state=42)

    stack_raw_examples = processor.read_json(os.path.join(opt.raw_data_dir, 'stack.json'))

    pseudo_raw_examples = processor.read_json(os.path.join(opt.raw_data_dir, 'pseudo.json'))

    base_output_dir = opt.output_dir

    for i, (train_ids, dev_ids) in enumerate(kf.split(stack_raw_examples)):
        logger.info(f'Start to train the {i} fold')
        train_raw_examples = [stack_raw_examples[_idx] for _idx in train_ids]

        # add pseudo data to train data
        train_raw_examples = train_raw_examples + pseudo_raw_examples
        train_examples = processor.get_examples(train_raw_examples, 'train')

        dev_raw_examples = [stack_raw_examples[_idx] for _idx in dev_ids]
        dev_info = processor.get_examples(dev_raw_examples, 'dev')

        tmp_output_dir = os.path.join(base_output_dir, f'v{i}')

        opt.output_dir = tmp_output_dir

        train_base(opt, train_examples, dev_info)
コード例 #3
0
def training(opt):
    if args.task_type == 'mrc':
        # 62 for mrc query
        processor = NERProcessor(opt.max_seq_len-62)
    else:
        processor = NERProcessor(opt.max_seq_len)

    train_raw_examples = processor.read_json(os.path.join(opt.raw_data_dir, 'train.json'))

    # add pseudo data to train data
    pseudo_raw_examples = processor.read_json(os.path.join(opt.raw_data_dir, 'pseudo.json'))
    train_raw_examples = train_raw_examples + pseudo_raw_examples

    train_examples = processor.get_examples(train_raw_examples, 'train')

    dev_examples = None
    if opt.eval_model:
        dev_raw_examples = processor.read_json(os.path.join(opt.raw_data_dir, 'dev.json'))
        dev_examples = processor.get_examples(dev_raw_examples, 'dev')

    train_base(opt, train_examples, dev_examples)