def predict(args): # vocabs = load_vocab(args.vocab) # inv_vocabs = {v: k for k, v in vocabs.items()} model_config, optimizer_config, _ = Config.from_json(args.config) model_name = model_config.name model_class = getattr(models, model_name) if model_config.init_weight_path is None: model_config.init_weight = None else: model_config.init_weight = t.from_numpy( pickle.load(open(model_config.init_weight_path, 'rb'))).float() phase = 'test' fea_filename = os.path.join(args.data, '{}.fea'.format(phase)) pos_filename = os.path.join(args.data, '{}.pos'.format(phase)) fea_file = open(fea_filename, 'rb') with open(pos_filename, 'r') as f: positions = [int(v.strip()) for v in f] dataset = MRCDataset(fea_file, positions) dataloader = t.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collect_mrc, num_workers=1) model = model_class(**model_config.values) ckpt_file = os.path.join(args.save_dir, 'model.{}.pt.tar'.format(args.model)) if os.path.isfile(ckpt_file): load_ckpt(ckpt_file, model) else: raise Exception("No such path {}".format(ckpt_file)) if args.cuda: model = model.cuda() model.eval() curr_preds = defaultdict(set) pbar = tqdm(dataloader) for data in pbar: with t.no_grad(): results = infer(data, model, args) for key in results: curr_preds[key].update(results[key]) idxs = [] entities = [] for key in curr_preds: print(key) idxs.append(key) curr_preds[key].remove('') entities.append(';'.join([v for v in curr_preds[key] if len(v) > 1])) preds = pd.DataFrame({'id': idxs, 'unknownEntities': entities}) preds.to_csv(os.path.join(args.save_dir, 'submit.csv'), index=False)
def predict(args): # vocabs = load_vocab(args.vocab) # inv_vocabs = {v: k for k, v in vocabs.items()} phase = 'test' fea_filename = os.path.join(args.data, '{}.fea'.format(phase)) pos_filename = os.path.join(args.data, '{}.pos'.format(phase)) fea_file = open(fea_filename, 'rb') with open(pos_filename, 'r') as f: positions = [int(v.strip()) for v in f] dataset = GraphDataset(fea_file, positions) dataloader = t.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collect_single, num_workers=1) model_list = [] for name in args.names.split(','): config_path = os.path.join('outputs', name, 'model_config.json') model_config, optimizer_config, _ = Config.from_json(config_path) model_name = model_config.name model_class = getattr(models, model_name) model = model_class(**model_config.values) ckpt_file = os.path.join('outputs', name, 'model.best.pt.tar') if os.path.isfile(ckpt_file): load_ckpt(ckpt_file, model) else: raise Exception("No such path {}".format(ckpt_file)) if args.cuda: model = model.cuda() model.eval() model_list.append(model) curr_preds = defaultdict(set) pbar = tqdm(dataloader) for data in pbar: with t.no_grad(): results = infer(data, model_list, args) for key in results: curr_preds[key].update(results[key]) idxs = [] entities = [] for key in curr_preds: # print(key) idxs.append(key) curr_preds[key].remove('') entities.append(';'.join([v for v in curr_preds[key] if len(v) > 1])) preds = pd.DataFrame({'id': idxs, 'unknownEntities': entities}) preds.to_csv(args.save_name, index=False)
def train(args): Log = log_info(os.path.join(args.save_dir, 'process.info')) Log(args) model_config, optimizer_config, scheduler_config = Config.from_json( args.config) model_name = model_config.name model_class = getattr(models, model_name) if model_config.init_weight_path is None: model_config.init_weight = None else: model_config.init_weight = t.from_numpy( pickle.load(open(model_config.init_weight_path, 'rb'))).float() model = model_class(**model_config.values) phase = 'dev' dataloaders = {} datasets = {} collate_fn = collect_single fea_filename = os.path.join(args.data, '{}.fea'.format(phase)) pos_filename = os.path.join(args.data, '{}.pos'.format(phase)) fea_file = open(fea_filename, 'rb') with open(pos_filename, 'r') as f: positions = [int(v.strip()) for v in f] dataset = GraphDataset(fea_file, positions) dataloader = t.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn, num_workers=1) dataloaders[phase] = dataloader datasets[phase] = dataset if model_config.freeze: for param in model.bert4pretrain.parameters(): param.requires_grad = False optimizer_config.lr = optimizer_config.lr * args.lr_scale if hasattr(optim, optimizer_config.name): optimizer = getattr(optim, optimizer_config.name)(model.parameters(), **optimizer_config.values) scheduler = getattr(optim.lr_scheduler, scheduler_config.name)(optimizer, **scheduler_config.values) else: t_total = len(dataloaders['dev']) * args.epoch # no_decay = ['bias', 'LayerNorm.weight'] # optimizer_grouped_parameters = [ # {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, # {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} # ] optimizer = getattr(optimization, optimizer_config.name)(model.parameters(), **optimizer_config.values) scheduler = getattr(optimization, scheduler_config.name)(optimizer, t_total=t_total, **scheduler_config.values) ckpt_file = os.path.join(args.load_dir, 'model.best.pt.tar') if os.path.isfile(ckpt_file): load_ckpt(ckpt_file, model, optimizer, scheduler, args.cuda) else: raise Exception("No such path {}".format(ckpt_file)) # pdb.set_trace() for epoch in range(1, 1 + args.epoch): model.train() pbar = tqdm(dataloaders[phase]) pbar.set_description("[{} Epoch {}]".format(phase, epoch)) running_loss = 0. running_size = 0. for data in pbar: optimizer.zero_grad() size, loss = infer(data, model, args.cuda) loss.backward() optimizer.step() running_loss += loss.item() running_size += size pbar.set_postfix(mean_loss=running_loss / running_size) save_ckpt(os.path.join(args.save_dir, 'model.best.pt.tar'), epoch, model.state_dict(), optimizer.state_dict(), scheduler.state_dict())
def train(args): Log = log_info(os.path.join(args.save_dir, 'process{}.info'.format(args.fold))) Log(args) model_config, optimizer_config, scheduler_config = Config.from_json(args.config) model_name = model_config.name model_class = getattr(models, model_name) Log(model_config.values) model = model_class(**model_config.values) dataloaders = {} datasets = {} sampler = None collate_fn = collect_mrc phases = ['train'] if args.do_eval: phases.append('dev') if args.do_test: phases.append('test') for phase in phases: if phase != 'test' and args.fold: fea_filename = os.path.join(args.data, 'fold{}'.format(args.fold), '{}.fea'.format(phase)) pos_filename = os.path.join(args.data, 'fold{}'.format(args.fold), '{}.pos'.format(phase)) else: fea_filename = os.path.join(args.data, '{}.fea'.format(phase)) pos_filename = os.path.join(args.data, '{}.pos'.format(phase)) fea_file = open(fea_filename, 'rb') with open(pos_filename, 'r') as f: positions = [int(v.strip()) for v in f] dataset = MRCDataset(fea_file, positions) if args.multi_gpu and phase == 'train': sampler = t.utils.data.RandomSampler(dataset) dataloader = t.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn, sampler=sampler, num_workers=0) else: dataloader = t.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=(phase=='train'), collate_fn=collate_fn, num_workers=0) dataloaders[phase] = dataloader datasets[phase] = dataset if args.multi_gpu: args.n_gpu = t.cuda.device_count() model = model.cuda() model = t.nn.DataParallel(model) elif args.cuda: args.n_gpu = 1 model = model.cuda() bert_parameters = list(map(id, model.bert4pretrain.parameters())) other_parameters = filter(lambda p: id(p) not in bert_parameters, model.parameters()) if hasattr(optim, optimizer_config.name): optimizer = getattr(optim, optimizer_config.name)([ {'params': other_parameters, 'lr': optimizer_config.lr*args.scale_rate}, {'params': model.bert4pretrain.parameters()} ], **optimizer_config.values) scheduler = getattr(optim.lr_scheduler, scheduler_config.name)(optimizer, **scheduler_config.values) else: t_total = len(dataloaders['train']) * args.epoch # no_decay = ['bias', 'LayerNorm.weight'] # optimizer_grouped_parameters = [ # {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, # {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} # ] optimizer = getattr(optimization, optimizer_config.name)([ {'params': other_parameters, 'lr': optimizer_config.lr*args.scale_rate}, {'params': model.bert4pretrain.parameters()} ], **optimizer_config.values) scheduler = getattr(optimization, scheduler_config.name)(optimizer, t_total=t_total, **scheduler_config.values) if not os.path.isdir(args.save_dir): os.mkdir(args.save_dir) # pdb.set_trace() if args.log: writer = SummaryWriter(os.path.join(args.save_dir, 'logs')) else: writer = None pre_fn, step_fn, post_fn = tm.mrc_acc_metric_builder(args, scheduler_config, model, optimizer, scheduler, writer, Log) for epoch in range(1, 1+args.epoch): for phase in phases: pre_fn() if phase == 'train': model.train() else: model.eval() pbar = tqdm(dataloaders[phase]) pbar.set_description("[{} Epoch {}]".format(phase, epoch)) for data in pbar: optimizer.zero_grad() with t.set_grad_enabled(phase == 'train'): result, loss = infer(data, model, args.cuda, is_evaluate=phase!='train') if args.multi_gpu and args.n_gpu > 1: loss = loss.mean() if phase == 'train': loss.backward() # t.nn.utils.clip_grad_norm_(model.parameters(), 7) optimizer.step() step_fn(result, loss, pbar, phase) post_fn(phase, epoch) if args.log: writer.close() with open(os.path.join(args.save_dir, 'invalid_entities'), 'wb') as f: pickle.dump(tm.Invalid_entities, f)
def predict(args): model_config, *_ = Config.from_json(args.config) model_name = model_config.name model_class = getattr(models, model_name) if model_config.init_weight_path is None: model_config.init_weight = None else: model_config.init_weight = t.from_numpy(pickle.load(open(model_config.init_weight_path, 'rb'))).float() if model_config.activation is None: pass elif model_config.activation == 'identical': model_config.activation = lambda v: v elif model_config.activation == 'gelu': model_config.activation = models.layers.activation.gelu else: model_config.activation = getattr(t, model_config.activation, None) or getattr(F, model_config.activation, None) collate_fn = lambda batch: collect_multigraph(model_config.need_norm, model_config.concat_ab, batch) phase = 'test' fea_filename = os.path.join(args.data, '{}.fea'.format(phase)) tgt_filename = os.path.join(args.data, '{}.tgt'.format(phase)) pos_filename = os.path.join(args.data, '{}.pos'.format(phase)) fea_file = open(fea_filename, 'rb') with open(tgt_filename, 'r') as f: targets = [int(v.strip()) for v in f] with open(pos_filename, 'r') as f: positions = [int(v.strip()) for v in f] dataset = GraphDataset(fea_file, targets, positions) dataloader = t.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn, num_workers=1) epoch = args.best_epoch total_proba = None model = model_class(**model_config.values) ckpt_file = os.path.join(args.save_dir, 'model.epoch{}.pt.tar'.format(epoch)) if os.path.isfile(ckpt_file): load_ckpt(ckpt_file, model) else: raise Exception("No such path {}".format(ckpt_file)) if args.cuda: model = model.cuda() model.eval() running_loss = 0. running_results = Counter() curr_proba = [] pbar = tqdm(dataloader) for data in pbar: with t.no_grad(): proba = infer(data, model, model_config.seq_len, args.cuda) curr_proba.append(proba) curr_proba = np.concatenate(curr_proba, axis=0) if total_proba is None: total_proba = curr_proba else: assert total_proba.shape == curr_proba.shape total_proba += curr_proba df = pd.DataFrame(data=total_proba, columns=['proba0', 'proba1']) predictions = total_proba.argmax(1) df['predictions'] = predictions df['targets'] = dataset.targets df.to_csv(os.path.join(args.save_dir, 'result.csv'))