def load_train_val_examples(args): lines = [] for guid, text, _, entities in train_data_generator(args.train_file, args.eval_file): sl = LabeledText(guid, text, entities) lines.append({'guid': guid, 'text': text, 'entities': entities}) allow_overlap = args.allow_overlap if args.num_augements > 0: allow_overlap = False train_base_examples = load_ner_labeled_examples( lines, ner_labels, seg_len=args.seg_len, seg_backoff=args.seg_backoff, num_augements=args.num_augements, allow_overlap=allow_overlap) train_examples, val_examples = split_train_eval_examples( train_base_examples, train_rate=args.train_rate, fold=args.fold, shuffle=True) logger.info(f"Loaded {len(train_examples)} train examples, " f"{len(val_examples)} val examples.") return train_examples, val_examples
def load_eval_examples(eval_text_file, eval_bio_file): lines = [] for guid, text, _, entities in train_data_generator( eval_text_file, eval_bio_file): sl = LabeledText(guid, text, entities) lines.append({'guid': guid, 'text': text, 'entities': entities}) train_base_examples = load_ner_labeled_examples( lines, ner_labels, seg_len=args.seg_len, seg_backoff=args.seg_backoff, num_augments=0, allow_overlap=False) eval_examples = train_base_examples logger.info(f"Loaded {len(eval_examples)} eval examples") return eval_examples