def make_data(): base_dirs = [setting["parsed_data_path"]["test"], setting["parsed_data_path"]["dev"], setting["parsed_data_path"]["unlabeled"]] print("base_dirs are", base_dirs) corpus = ParsedCorpus(base_dirs) vocab = HeadWordVocabulary() if os.path.exists("./voc.txt"): vocab.load() else: vocab.make_vocabulary(corpus, "headWord") vocab.save() print("vocab length is", len(vocab.stoi)) entity_vocab = HeadWordVocabulary() if os.path.exists("./evoc.txt"): entity_vocab.load("./evoc.txt") else: entity_vocab.make_vocabulary(corpus, "entityType") entity_vocab.save("./evoc.txt") print("entity label vocab length is", len(entity_vocab.stoi)) data_iterator = DataIterator(corpus, vocab, entity_vocab) return data_iterator, vocab, entity_vocab
def infer(model, rank=0): model = model.cuda() model = DataParallel(model) model.load_state_dict(torch.load(model_state_dict)) model.eval() if rank == 0: print('preparing dataset...') data_iterator = DataIterator(coco_dir, resize=resize, max_size=max_size, batch_size=batch_size, stride=stride, training=training, dist=dist) if rank == 0: print('finish loading dataset!') results = [] with torch.no_grad(): for i, (data, ids, ratios) in enumerate(data_iterator, start=1): scores, boxes, classes = model(data) results.append([scores, boxes, classes, ids, ratios]) if rank == 0: size = len(data_iterator.ids) msg = '[{:{len}}/{}]'.format(min(i * batch_size, size), size, len=len(str(size))) print(msg, flush=True) results = [torch.cat(r, dim=0) for r in zip(*results)] results = [r.cpu() for r in results]
def train(args): # load dataset train_sentence_packs = json.load(open(args.prefix + args.dataset + '/train.json')) random.shuffle(train_sentence_packs) dev_sentence_packs = json.load(open(args.prefix + args.dataset + '/dev.json')) instances_train = load_data_instances(train_sentence_packs, args) instances_dev = load_data_instances(dev_sentence_packs, args) random.shuffle(instances_train) trainset = DataIterator(instances_train, args) devset = DataIterator(instances_dev, args) if not os.path.exists(args.model_dir): os.makedirs(args.model_dir) model = MultiInferBert(args).to(args.device) optimizer = torch.optim.Adam([ {'params': model.bert.parameters(), 'lr': 5e-5}, {'params': model.cls_linear.parameters()} ], lr=5e-5) best_joint_f1 = 0 best_joint_epoch = 0 for i in range(args.epochs): print('Epoch:{}'.format(i)) for j in trange(trainset.batch_count): _, tokens, lengths, masks, _, _, aspect_tags, tags = trainset.get_batch(j) preds = model(tokens, masks) preds_flatten = preds.reshape([-1, preds.shape[3]]) tags_flatten = tags.reshape([-1]) loss = F.cross_entropy(preds_flatten, tags_flatten, ignore_index=-1) optimizer.zero_grad() loss.backward() optimizer.step() joint_precision, joint_recall, joint_f1 = eval(model, devset, args) if joint_f1 > best_joint_f1: model_path = args.model_dir + 'bert' + args.task + '.pt' torch.save(model, model_path) best_joint_f1 = joint_f1 best_joint_epoch = i print('best epoch: {}\tbest dev {} f1: {:.5f}\n\n'.format(best_joint_epoch, args.task, best_joint_f1))
def _predict(self): all_context = DataIterator(self.train_matrix_dense.tolist(), batch_size=self.batch_size) all_rating = [] for users in all_context: r_hat = self.sess.run(self.r_hat, feed_dict={self.user_context: users}) all_rating.extend(r_hat) return np.array(all_rating)
def test(args): print("Evaluation on testset:") model_path = args.model_dir + 'bert' + args.task + '.pt' model = torch.load(model_path).to(args.device) model.eval() sentence_packs = json.load(open(args.prefix + args.dataset + '/test.json')) instances = load_data_instances(sentence_packs, args) testset = DataIterator(instances, args) eval(model, testset, args)
def test(args): print("Evaluation on testset:") model_path = args.model_dir + args.model + args.task + '.pt' model = torch.load(model_path).to(args.device) model.eval() word2index = json.load(open(args.prefix + 'doubleembedding/word_idx.json')) sentence_packs = json.load(open(args.prefix + args.dataset + '/test.json')) instances = load_data_instances(sentence_packs, word2index, args) testset = DataIterator(instances, args) eval(model, testset, args)
def get_train_data(self): users_list = [] items_list = [] for user, items in self.user_pos_train.items(): users_list.extend([user] * len(items)) items_list.extend(items) dataloader = DataIterator(users_list, items_list, batch_size=self.batch_size, shuffle=True) return dataloader
def get_train_data(self): users_list, pos_items, neg_items = [], [], [] train_users = list(self.user_pos_train.keys()) with ThreadPoolExecutor() as executor: data = executor.map(self.get_train_data_one_user, train_users) data = list(data) for users, pos, neg in data: users_list.extend(users) pos_items.extend(pos) neg_items.extend(neg) dataloader = DataIterator(users_list, pos_items, neg_items, batch_size=self.batch_size, shuffle=True) return dataloader
def get_train_data(self): users_list, items_list, labels_list = [], [], [] train_users = list(self.user_pos_train.keys()) with ThreadPoolExecutor() as executor: data = executor.map(self.get_train_data_one_user, train_users) data = list(data) for users, items, labels in data: users_list.extend(users) items_list.extend(items) labels_list.extend(labels) dataloader = DataIterator(users_list, items_list, labels_list, batch_size=self.batch_size, shuffle=True) return dataloader
def get_training_data(self): users = [] pos_items = [] neg_items = [] for u, pos in self.user_pos_train.items(): pos_len = len(pos) neg = random_choice(self.all_items, size=pos_len, exclusion=pos) users.extend([u] * pos_len) pos_items.extend(pos.tolist()) neg_items.extend(neg.tolist()) return DataIterator(users, pos_items, neg_items, batch_size=self.batch_size, shuffle=True)
def get_training_data(self): users_list = [] pos_items_list = [] neg_items_list = [] users = self.user_pos_train.keys() with ThreadPoolExecutor() as executor: batch_result = executor.map(self._get_neg_items, users) for user, pos, neg in batch_result: users_list.extend(user) pos_items_list.extend(pos) neg_items_list.extend(neg) return DataIterator(users_list, pos_items_list, neg_items_list, batch_size=self.batch_size, shuffle=True)
def get_train_data(self): self._mask = np.zeros([self.users_num, self.items_num]) self._N_zr = np.zeros([self.users_num, self.items_num]) for user, pos_items in self.user_pos_train.items(): pos_items = self.user_pos_train[user] self._mask[user][pos_items] = 1 neg = random_choice(self.all_items, size=int(self.s_pm * self.items_num), replace=False, exclusion=pos_items) self._mask[user][neg] = 1 neg = random_choice(self.all_items, size=int(self.s_zr * self.items_num), replace=False, exclusion=pos_items) self._N_zr[user][neg] = 1 return DataIterator(self.train_matrix_dense.tolist(), self._mask, self._N_zr, batch_size=self.batch_size, shuffle=True)
from config import ConfigBinaryClassification from config import ConfigTripleClassification if __name__ == "__main__": args = get_args() print_args(args) if args.class_num == 2: cfg = ConfigBinaryClassification() elif args.class_num == 3: cfg = ConfigTripleClassification() else: raise ValueError("wrong class num") device = torch.device("cuda:%d" % args.cuda) Data = DataIterator(config=cfg, train_batchsize=args.batch_size) model = CNN(vocab_size=len(Data.vocab), embedding_dim=100, n_filters=100, filter_sizes=range(2, 5), output_dim=args.class_num, dropout=0.5, pad_idx=1).to(device) optimizer = Adam(model.parameters(), lr=args.lr) criterion = FocalLoss(classes=args.class_num, device=device).to(device) for epoch in range(args.epoch_num): print(epoch) for sample in Data.train_iter: model.train()
def train(train_dir, batch_size=64, image_height=60, image_width=180, image_channel=1, checkpoint_dir="../checkpoint/", num_epochs=100): # 加载数据 train_data = DataIterator(data_dir=train_dir, batch_size=batch_size, begin=0, end=800) valid_data = DataIterator(data_dir=train_dir, batch_size=batch_size, begin=800, end=1000) print('train data batch number: {}'.format(train_data.number_batch)) print('valid data batch number: {}'.format(valid_data.number_batch)) # 模型 model = cnn_lstm_otc_ocr.LSTMOCR(NumClasses, batch_size, image_height=image_height, image_width=image_width, image_channel=image_channel, is_train=True) model.build_graph() config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), allow_soft_placement=True) with tf.Session(config=config) as sess: # 初始化 sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) train_writer = tf.summary.FileWriter(checkpoint_dir + 'train', sess.graph) # 加载模型 ckpt = tf.train.latest_checkpoint(checkpoint_dir) if ckpt: saver.restore(sess, ckpt) print('restore from checkpoint{0}'.format(ckpt)) else: print('no checkpoint to restore') pass print('=======begin training=======') for cur_epoch in range(num_epochs): start_time = time.time() batch_time = time.time() # 训练 train_cost = 0 for cur_batch in range(train_data.number_batch): if cur_batch % 100 == 0: print('batch {}/{} time: {}'.format(cur_batch, train_data.number_batch, time.time() - batch_time)) batch_time = time.time() batch_inputs, _, sparse_labels = train_data.next_train_batch() summary, cost, step, _ = sess.run([model.merged_summay, model.cost, model.global_step, model.train_op], {model.inputs: batch_inputs, model.labels: sparse_labels}) train_cost += cost train_writer.add_summary(summary, step) pass print("loss is {}".format(train_cost / train_data.number_batch)) # 保存模型 if cur_epoch % 1 == 0: if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir) saver.save(sess, os.path.join(checkpoint_dir, 'ocr-model'), global_step=cur_epoch) pass # 测试 if cur_epoch % 1 == 0: lr = 0 acc_batch_total = 0 for j in range(valid_data.number_batch): val_inputs, _, sparse_labels, ori_labels = valid_data.next_test_batch(j) dense_decoded, lr = sess.run([model.dense_decoded, model.lrn_rate], {model.inputs: val_inputs, model.labels: sparse_labels}) acc_batch_total += accuracy_calculation(ori_labels, dense_decoded, -1) pass accuracy = acc_batch_total / valid_data.number_batch now = datetime.datetime.now() log = "{}/{} {}:{}:{} Epoch {}/{}, accuracy = {:.3f}, time = {:.3f},lr={:.8f}" print(log.format(now.month, now.day, now.hour, now.minute, now.second, cur_epoch + 1, num_epochs, accuracy, time.time() - start_time, lr)) pass pass pass
def infer(args, model, val_iterator=None): training = val_iterator is not None if not training: model = model.cuda() val_dataset = BitcoinDataset(args.price_path, args.tweet_path, date_from=args.start_date, date_to=args.end_date) val_iterator = DataIterator(val_dataset, args.batch_size, val=True) criterion = L1Loss(reduction='none') count = len(val_iterator) print('Running inference on {} datapoints...'.format(count)) profiler = Profiler(['infer', 'fw']) results, losses, losses_daily = [], [], [] model.eval() for i, (price, tweet, trgt) in enumerate(val_iterator): profiler.start('fw') with torch.no_grad(): out = model(price, tweet) loss = criterion(out, trgt) results.append([out, loss]) loss_daily = loss.mean(axis=0).view(7, 24).mean(axis=1) loss = loss.mean() losses_daily.append(loss_daily) losses.append(loss) profiler.stop('fw') profiler.bump('infer') if not training and (profiler.totals['infer'] > 60 or i == count // args.batch_size): avg_loss = torch.stack(losses).mean().item() avg_loss_daily = torch.stack(losses_daily).mean(axis=0).tolist() print(' | '.join([ f'[{min((i+1) * args.batch_size, count):{len(str(count))}}/{count}] loss: {avg_loss:.4f}', ('loss-daily: [' + ', '.join(['{:.4f}']*7) + ']').format(*avg_loss_daily), f'{profiler.means["infer"]:.3f}s/{args.batch_size}-batch' + \ f'(fw: {profiler.means["fw"]:.3f}s, bw: {profiler.means["bw"]:.3f}s)', ]), flush=True) profiler.reset() results = [torch.cat(r, dim=0).cpu() for r in zip(*results)] if not training: take = 2 mean, std = 7.9078, 1.5308 out = (results[0].numpy() * std) + mean out = diags(out.T[:take], offsets=np.arange(take), shape=(out.shape[0], out.shape[0] + take)) out = np.asarray(out.sum(axis=0)).T[take - 1:-take - 1] / take trgt = (val_dataset.price_trgt.numpy()[take // 2:-take // 2, 0] * std) + mean date_from = pd.Timestamp('2020-09-01') + pd.Timedelta(days=30, hours=take // 2) date_to = pd.Timestamp('2021-02-01') - pd.Timedelta( days=7, hours=take // 2 + 1) date_range = pd.date_range(date_from, date_to, freq='H') pd.DataFrame(data=np.concatenate([trgt, out], axis=-1), index=date_range, columns=['actual', 'forecast']).to_csv(args.output, index_label='timestamp') loss = results[1].mean().item() loss_daily = results[1].mean(axis=0).view(7, 24).mean(axis=1).tolist() print(' | '.join([ f'[Inference] loss: {loss:.4f}', ('loss-daily: [' + ', '.join(['{:.4f}'] * 7) + ']').format(*loss_daily) ]), flush=True) return loss, loss_daily
def main(): #training_args = GlueTraingArgs(do_train=True) data_args_task0 = GlueDataArgs(task_name=task0) data_args_task1 = GlueDataArgs(task_name=task1) if use_gpu: print("Training on GPU.") # logging log_format = '[%(asctime)s] %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S') t = time.time() local_time = time.localtime(t) if not os.path.exists('./log'): os.mkdir('./log') fh = logging.FileHandler( os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logger.info("Tasks:" + task0 + "," + task1) config_task0 = BertConfig.from_pretrained( bert_path, num_labels=glue_tasks_num_labels[data_args_task0.task_name], finetuning_task=data_args_task0.task_name, cache_dir=cache_dir) config_task1 = BertConfig.from_pretrained( bert_path, num_labels=glue_tasks_num_labels[data_args_task1.task_name], finetuning_task=data_args_task1.task_name, cache_dir=cache_dir) # Model Prepare, The Bert Model has loaded the pretrained model, # and these downstream structures are initialized randomly. # TODO: Adding Seed for random. referee: Trainer.train() if use_gpu: model_Bert = BertModel.from_pretrained(bert_path, return_dict=True).cuda() model_task0 = SequenceClassification(config_task0).cuda() model_task1 = SequenceClassification(config_task1).cuda() else: model_Bert = BertModel.from_pretrained(bert_path, return_dict=True) model_task0 = SequenceClassification(config_task0) model_task1 = SequenceClassification(config_task1) # print(model_Bert) # print(model_task0) # print(model_task1) # return # Data prepare tokenizer = BertTokenizer.from_pretrained(bert_path, cache_dir=cache_dir) data_iterator_train_task0 = DataIterator(data_args_task0, tokenizer=tokenizer, mode="train", cache_dir=cache_dir, batch_size=batch_size) data_iterator_train_task1 = DataIterator(data_args_task1, tokenizer=tokenizer, mode="train", cache_dir=cache_dir, batch_size=batch_size) data_iterator_eval_task0 = DataIterator(data_args_task0, tokenizer=tokenizer, mode="dev", cache_dir=cache_dir, batch_size=batch_size) data_iterator_eval_task1 = DataIterator(data_args_task1, tokenizer=tokenizer, mode="dev", cache_dir=cache_dir, batch_size=batch_size) logger.info("*** DataSet Ready ***") # data0 = data_iterator_train_task0.next() # print(data0) # input_ids0=data0['input_ids'] # attention_mask0=data0['attention_mask'] # token_type_ids0=data0['token_type_ids'] # label0=data0['labels'] # print(input_ids0) # print(input_ids0.size()) # print(input_ids0.type()) # print(attention_mask0) # print(attention_mask0.size()) # print(attention_mask0.type()) # print(token_type_ids0) # print(token_type_ids0.size()) # print(token_type_ids0.type()) # print(label0) # print(label0.size()) # print(label0.type()) # Optimizer and lr_scheduler opt_bert = torch.optim.AdamW(model_Bert.parameters(), lr=learning_rate) opt_task0 = torch.optim.AdamW(model_task0.parameters(), lr=learning_rate) opt_task1 = torch.optim.AdamW(model_task1.parameters(), lr=learning_rate) metrics_task0 = ComputeMetrics(data_args_task0) metrics_task1 = ComputeMetrics(data_args_task1) iterations = (epochs * len(data_iterator_train_task1) // batch_size) + 1 print(iterations) scheduler = torch.optim.lr_scheduler.LambdaLR( opt_bert, lambda step: (1.0 - step / iterations)) all_iters = 0 for i in range(1, iterations + 1): all_iters += 1 model_Bert.train() model_task0.train() model_task1.train() data0 = data_iterator_train_task0.next() data1 = data_iterator_train_task1.next() if use_gpu: input_ids0 = data0['input_ids'].cuda() attention_mask0 = data0['attention_mask'].cuda() token_type_ids0 = data0['token_type_ids'].cuda() label0 = data0['labels'].cuda() input_ids1 = data1['input_ids'].cuda() attention_mask1 = data1['attention_mask'].cuda() token_type_ids1 = data1['token_type_ids'].cuda() label1 = data1['labels'].cuda() else: input_ids0 = data0['input_ids'] attention_mask0 = data0['attention_mask'] token_type_ids0 = data0['token_type_ids'] label0 = data0['labels'] input_ids1 = data1['input_ids'] attention_mask1 = data1['attention_mask'] token_type_ids1 = data1['token_type_ids'] label1 = data1['labels'] output_inter0 = model_Bert(input_ids=input_ids0, attention_mask=attention_mask0, token_type_ids=token_type_ids0, return_dict=True) output_inter1 = model_Bert(input_ids=input_ids1, attention_mask=attention_mask1, token_type_ids=token_type_ids1, return_dict=True) loss0 = model_task0(input=output_inter0, labels=label0)[0] loss1 = model_task1(input=output_inter1, labels=label1)[0] loss = loss0 + loss1 # balance the losses of sub-tasks ratio = loss0 / loss1 weight0 = (2 * ratio) / (1 + ratio) weight1 = 2 - weight0 loss = loss0 * weight0 + loss1 * weight1 printInfo = 'TOTAL/Train {}/{} - lr:{}, sl={:.6f}, l0/w0-{:.6f}/{:.6f}, l1/w1-{:.6f}/{:.6f}'.format( all_iters, iterations, scheduler.get_lr(), loss, loss0, weight0, loss1, weight1) logging.info(printInfo) # print(loss) # print(all_iters) opt_bert.zero_grad() opt_task0.zero_grad() opt_task1.zero_grad() # loss0.backward() loss.backward() opt_bert.step() opt_task0.step() opt_task1.step() scheduler.step() if (i % eval_interval == 0): evaluate(model_Bert, model_task0, data_iterator_eval_task0, metrics_task0) evaluate(model_Bert, model_task1, data_iterator_eval_task1, metrics_task1) evaluate(model_Bert, model_task0, data_iterator_eval_task0, metrics_task0) evaluate(model_Bert, model_task1, data_iterator_eval_task1, metrics_task1) # Saving models model_Bert.save_pretrained(model_save_dir + "main") model_task0.save_pretrained(model_save_dir + "task0") model_task1.save_pretrained(model_save_dir + "task1")
def main(): ntasks = len(tasks) data_args = list() configuration = list() sub_models = list() train_iter = list() dev_iter = list() test_iter = list() sub_optimizer = list() metrics = list() tokenizer = DistilBertTokenizer.from_pretrained(bert_path, cache_dir=cache_dir) for i in range(ntasks): logger.info("Tasks:" + tasks[i]) data_args.append(GlueDataArgs(task_name=tasks[i])) configuration.append(DistilBertConfig.from_pretrained(bert_path, num_labels=glue_tasks_num_labels[data_args[i].task_name], finetuning_task=data_args[i].task_name, cache_dir = cache_dir)) if use_gpu: sub_models.append(SequenceClassification(configuration[i]).cuda()) else: sub_models.append(SequenceClassification(configuration[i])) train_iter.append(DataIterator(data_args[i], tokenizer=tokenizer, mode="train", cache_dir=cache_dir, batch_size=batch_size[i])) dev_iter.append(DataIterator(data_args[i], tokenizer=tokenizer, mode="dev", cache_dir=cache_dir, batch_size=batch_size_val[i])) sub_optimizer.append(torch.optim.AdamW(sub_models[i].parameters(), lr=learning_rate_0)) metrics.append(ComputeMetrics(data_args[i])) logger.info("*** DataSet Ready ***") if use_gpu: Bert_model = DistilBertModel.from_pretrained(bert_path, return_dict=True).cuda() else: Bert_model = DistilBertModel.from_pretrained(bert_path, return_dict=True) bert_optimizer = torch.optim.AdamW(Bert_model.parameters(), lr=learning_rate_0) # balaned dataset train_num = list() for i in range(ntasks): train_num.append(len(train_iter[i])) #train_nummax = #train_num = [x/train_nummax for x in train_num] #print(train_num) iterations = (epochs * max(train_num) // bs) + 1 #print(iterations) sub_scheduler = list() for i in range(ntasks): sub_scheduler.append(torch.optim.lr_scheduler.LambdaLR(sub_optimizer[i], lambda step: (1.0-step/iterations) if step <= frozen else learning_rate_1)) Bert_scheduler = torch.optim.lr_scheduler.LambdaLR(bert_optimizer, lambda step: (1.0-step/iterations) if step <= frozen else learning_rate_1) for i in range(1, iterations+1): if i > frozen: for p in Bert_model.parameters(): p.requires_grad = True Bert_model.train() elif i == frozen: for p in Bert_model.parameters(): p.requires_grad = True Bert_model.train() logging.info("#####################################") logging.info("Release the Traing of the Main Model.") logging.info("#####################################") else: for p in Bert_model.parameters(): p.requires_grad = False Bert_model.eval() losses=list() loss_rates=list() for j in range(ntasks): sub_models[j].train() data = train_iter[j].next() if use_gpu: input_ids=data['input_ids'].cuda() attention_mask=data['attention_mask'].cuda() #token_type_ids=data['token_type_ids'].cuda() label=data['labels'].cuda() else: input_ids=data['input_ids'] attention_mask=data['attention_mask'] #token_type_ids=data['token_type_ids'] label=data['labels'] output_inter = Bert_model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True) # token_type_ids=token_type_ids, losses.append(sub_models[j](input=output_inter, labels=label)[0]) losssum = sum(losses).item() for j in range(ntasks): loss_rates.append(losses[j].item()/losssum) loss = 0 printInfo = 'TOTAL/Train {}/{}, lr:{}'.format(i, iterations, Bert_scheduler.get_lr()) for j in range(ntasks): loss += losses[j] * batch_size[j] * loss_rates[j] printInfo += ', loss{}-{:.6f}'.format(j,losses[j]) sub_optimizer[j].zero_grad() logging.info(printInfo) if i > frozen: bert_optimizer.zero_grad() loss.backward() if i > frozen: bert_optimizer.step() for j in range(ntasks): sub_optimizer[j].step() sub_scheduler[j].step() Bert_scheduler.step() if (i % eval_interval == 0): for j in range(ntasks): evaluate(Bert_model, sub_models[j], dev_iter[j], batch_size_val[j], metrics[j]) sub_models[j].save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format(tasks[j], i))) Bert_model.save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format("main", i))) for i in range(ntasks): evaluate(Bert_model, sub_models[i], dev_iter[i], batch_size_val[i], metrics[i]) sub_models[i].save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format(tasks[j], iterations))) Bert_model.save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format("main", iterations)))
def train(model: BaseModel, config, train_dataset, val_dataset, step=0): train_iterator = DataIterator(train_dataset, batch_size=config.batch_size, num_workers=config.data.num_workers, sampler=InfiniteRandomSampler(train_dataset)) # Prepare for summary writer = SummaryWriter(config.log_dir) config_str = yaml.dump(namedtuple_to_dict(config)) writer.add_text('config', config_str) train_sampler = SubsetSequentialSampler(train_dataset, config.summary.train_samples) val_sampler = SubsetSequentialSampler(val_dataset, config.summary.val_samples) train_sample_iterator = DataIterator(train_dataset.for_summary(), sampler=train_sampler, num_workers=2) val_sample_iterator = DataIterator(val_dataset.for_summary(), sampler=val_sampler, num_workers=2) # Training loop start_time = time.time() start_step = step while True: step += 1 save_summary = step % config.summary_step == 0 d_summary, g_summary, p_summary = None, None, None if config.mode == MODE_PRED: if model.lr_sched_p is not None: model.lr_sched_p.step() x, y = next(train_iterator) p_summary = model.optimize_p(x, y, step=step, summarize=save_summary) else: if model.lr_sched_d is not None: model.lr_sched_d.step() x, y = next(train_iterator) summarize_d = save_summary and config.d_updates_per_step == 1 d_summary = model.optimize_d(x, y, step=step, summarize=summarize_d) for i in range(config.d_updates_per_step - 1): x, y = next(train_iterator) summarize_d = save_summary and ( i == config.d_updates_per_step - 2) d_summary = model.optimize_d(x, y, step=step, summarize=summarize_d) if model.lr_sched_g is not None: model.lr_sched_g.step() summarize_g = save_summary and config.g_updates_per_step == 1 g_summary = model.optimize_g(x, y, step=step, summarize=summarize_g) for i in range(config.g_updates_per_step - 1): x, y = next(train_iterator) summarize_g = save_summary and ( i == config.g_updates_per_step - 2) g_summary = model.optimize_g(x, y, step=step, summarize=summarize_g) # Print status elapsed_time = time.time() - start_time elapsed_step = step - start_step print('\r[Step %d] %s' % (step, time.strftime('%H:%M:%S', time.gmtime(elapsed_time))), end='') if elapsed_time > elapsed_step: print(' | %.2f s/it' % (elapsed_time / elapsed_step), end='') else: print(' | %.2f it/s' % (elapsed_step / elapsed_time), end='') if step % config.ckpt_step == 0: model.save(step) if save_summary: # Save summaries from optimization process for summary in [p_summary, d_summary, g_summary]: if summary is None: continue model.write_summary(writer, summary, step) # Summarize learning rates and gradients for component, optimizer in [ ('d', model.optim_d), ('g', model.optim_g), ('p', model.optim_p), ]: if optimizer is None: continue for i, group in enumerate(optimizer.param_groups): writer.add_scalar('lr/%s/%d' % (component, i), group['lr'], step) grads = [] for param in group['params']: if param.grad is not None: grads.append(param.grad.data.view([-1])) if grads: grads = torch.cat(grads, 0) writer.add_histogram('grad/%s/%d' % (component, i), grads, step) # Custom summaries model.summarize(writer, step, train_sample_iterator, val_sample_iterator)
def train(model, rank=0): model.cuda() optimizer = SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentem) model, optimizer = amp.initialize(model, optimizer, opt_level='O0', loss_scale=loss_scale) model = DistributedDataParallel(model) model.train() if rank == 0: print('preparing dataset...') data_iterator = DataIterator(path=coco_dir, batch_size=batch_size, stride=stride, shuffle=shuffle, resize=resize, dist=dist, world_size=world_size) if rank == 0: print('finish loading dataset!') def schedule_warmup(i): return warmup_ratio if i < warmup else 1 def schedule(epoch): return gamma**len([m for m in milestores if m <= epoch]) scheduler_warmup = LambdaLR(optimizer, schedule_warmup) scheduler = LambdaLR(optimizer, schedule) if rank == 0: print('starting training...') for epoch in range(1, epochs + 1): cls_losses, box_losses, centerness_losses = [], [], [] if epoch != 1: scheduler.step(epoch) for i, (data, target) in enumerate(data_iterator, start=1): optimizer.zero_grad() cls_loss, box_loss, centerness_loss = model([data, target]) with amp.scale_loss(cls_loss + box_loss + centerness_loss, optimizer) as scaled_loss: scaled_loss.backward() # torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_norm) optimizer.step() if epoch == 1 and i <= warmup: scheduler_warmup.step(i) cls_loss, box_loss, centerness_loss = cls_loss.mean().clone( ), box_loss.mean().clone(), centerness_loss.mean().clone() torch.distributed.all_reduce(cls_loss) torch.distributed.all_reduce(box_loss) torch.distributed.all_reduce(centerness_loss) cls_loss /= world_size box_loss /= world_size centerness_loss /= world_size if rank == 0: cls_losses.append(cls_loss) box_losses.append(box_loss) centerness_losses.append(centerness_loss) if rank == 0 and not isfinite(cls_loss + box_loss + centerness_loss): raise RuntimeError('Loss is diverging!') del cls_loss, box_loss, centerness_loss, target, data if rank == 0 and i % 10 == 0: focal_loss = torch.FloatTensor(cls_losses).mean().item() box_loss = torch.FloatTensor(box_losses).mean().item() centerness_loss = torch.FloatTensor( centerness_losses).mean().item() learning_rate = optimizer.param_groups[0]['lr'] msg = '[{:{len}}/{}]'.format(epoch, epochs, len=len(str(epochs))) msg += '[{:{len}}/{}]'.format(i, len(data_iterator), len=len(str(len(data_iterator)))) msg += ' focal loss: {:.3f}'.format(focal_loss) msg += ', box loss: {:.3f}'.format(box_loss) msg += ', centerness loss: {:.3f}'.format(centerness_loss) msg += ', lr: {:.2g}'.format(learning_rate) msg += ', cuda_memory: {:.3g} GB'.format( torch.cuda.memory_cached() / mb_to_gb_factor) print(msg, flush=True) del cls_losses[:], box_losses[:], centerness_losses[:], focal_loss, box_loss, centerness_loss if rank == 0: print('saving model for epoch {}'.format(epoch)) torch.save(model.state_dict(), './checkpoints/epoch-{}.pth'.format(epoch)) if rank == 0: print('finish training, saving the final model...') torch.save(model.state_dict(), './checkpoints/final.pth') print('-' * 10 + 'completed!' + '-' * 10)
def train(data_path, model, optimizer, criterion, device, logger, args): data_loader = DataLoader(data_path, args.verbose) X, y, seq = data_loader.run_pipeline(args.split_rate) train_iter = DataIterator(X[0], y[0], seq[0], batch_size=args.batch_size) test_iter = DataIterator(X[1], y[1], seq[1], batch_size=args.batch_size) train_err, test_err = [], [] train_acc, test_acc = [], [] logger.info(model) for epoch in range(args.epoch): logger.info("Epoch: {} / {}".format(epoch + 1, args.epoch)) ### TRAIN LOOP ### err = [] acc = [] model.train() for proteins, sequence_lengths, targets in (tqdm( train_iter, ascii=False, desc="Training", total=int(len(X[0]) / args.batch_size), unit="batch") if args.verbose else train_iter): inputs = proteins.to(device) seq_lens = sequence_lengths.to(device) targets = targets.to(device) predictions = model(inputs, seq_lens) mask = build_mask(sequence_lengths).to(device) optimizer.zero_grad() batch_loss = criterion(predictions, targets, mask) batch_loss.backward() optimizer.step() cos_sim = cosine_similarity(predictions, targets, mask) err.append(batch_loss.cpu().item()) acc.append(cos_sim.cpu().item()) epoch_trainig_error = sum(err) / len(err) epoch_training_accuracy = sum(acc) / len(acc) train_err.append(epoch_trainig_error) train_acc.append(epoch_training_accuracy) ### TEST LOOP ### err = [] acc = [] model.eval() for proteins, sequence_lengths, targets in (tqdm( test_iter, ascii=False, desc="Testing", total=int(len(X[1]) / args.batch_size), unit="batch") if args.verbose else test_iter): inputs = proteins.to(device) seq_lens = sequence_lengths.to(device) targets = targets.to(device) predictions = model(inputs, seq_lens) mask = build_mask(sequence_lengths).to(device) batch_loss = criterion(predictions, targets, mask) cos_sim = cosine_similarity(predictions, targets, mask) err.append(batch_loss.cpu().item()) acc.append(cos_sim.cpu().item()) epoch_test_error = sum(err) / len(err) epoch_test_accuracy = sum(acc) / len(acc) test_err.append(epoch_test_error) test_acc.append(epoch_test_accuracy) logger.info( "Training error: {0:.4f},\tTest error: {1:.4f}\t\tTraining accuracy: {2:.4f}\tTest accuracy: {3:.4f}" .format(epoch_trainig_error, epoch_test_error, epoch_training_accuracy, epoch_test_accuracy)) return (train_err, test_err), (train_acc, test_acc)
threshold = 0.5 corpus = ParsedCorpus(base_dirs) vocab = HeadWordVocabulary() vocab.load() entity_vocab = HeadWordVocabulary() entity_vocab.load("./evoc.txt") net_arch = args net_arch.num_input = len(vocab) model = Extractor(net_arch) model.load_cpu_model(args.model_path, None) model.cuda() model.eval() iterator = DataIterator(corpus, vocab, entity_vocab) iterator.reset() slot_word_dist = F.log_softmax(torch.FloatTensor(model.get_unnormalized_phi()), dim=-1) # tensor [K, V] assert torch.isnan(slot_word_dist).sum().item() == 0 slot_mean_dist = torch.FloatTensor(model.get_beta_mean()) # tensor [K, D + 1] slot_stdvar_dist = torch.FloatTensor( model.get_beta_logvar()).exp().sqrt() # tensor [K, D + 1] if not args.nogpu: slot_word_dist = slot_word_dist.cuda() slot_mean_dist = slot_mean_dist.cuda() slot_stdvar_dist = slot_stdvar_dist.cuda() dists = [ MultivariateNormal(loc=slot_mean_dist[k], covariance_matrix=torch.diag_embed(slot_stdvar_dist[k]))
def infer(model, args): rank = args.local_rank epoch_name = args.epoch model_state_dict_dir = 'checkpoints/final.pth' if epoch_name == 'final' else 'checkpoints/epoch-{}.pth'.format( epoch_name) load = torch.load(model_state_dict_dir, map_location='cpu') load = {k.replace('module.', ''): v for k, v in load.items()} model_state_dict = load model.load_state_dict(model_state_dict) model = model.cuda() model = amp.initialize(model, opt_level='O2', keep_batchnorm_fp32=True, verbosity=0) # model = DistributedDataParallel(model) model.eval() if rank == 0: print('preparing dataset...') data_iterator = DataIterator(coco_dir, resize=resize, max_size=max_size, batch_size=batch_size, stride=stride, training=training, dist=dist, world_size=world_size) if rank == 0: print('finish loading dataset!') results = [] with torch.no_grad(): for i, (data, ids, ratios) in enumerate(data_iterator, start=1): scores, boxes, classes = model(data) results.append([scores, boxes, classes, ids, ratios]) if rank == 0: size = len(data_iterator.ids) msg = '[{:{len}}/{}]'.format(min(i * batch_size, size), size, len=len(str(size))) print(msg, flush=True) if rank == 0: print('gathering results...') results = [torch.cat(r, dim=0) for r in zip(*results)] for r, result in enumerate(results): all_result = [ torch.ones_like(result, device=result.device) for _ in range(world_size) ] torch.distributed.all_gather(list(all_result), result) results[r] = torch.cat(all_result, dim=0) if rank == 0: results = [r.cpu() for r in results] detections = [] processed_ids = set() for scores, boxes, classes, image_id, ratios in zip(*results): image_id = image_id.item() if image_id in processed_ids: continue processed_ids.add(image_id) keep = (scores > 0).nonzero() scores = scores[keep].view(-1) boxes = boxes[keep, :].view(-1, 4) / ratios classes = classes[keep].view(-1).int() for score, box, cat in zip(scores, boxes, classes): x1, y1, x2, y2 = box.data.tolist() cat = cat.item() cat = data_iterator.coco.getCatIds()[cat] detections.append({ 'image_id': image_id, 'score': score.item(), 'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1], 'category_id': cat }) if detections: print('writing {}...'.format(detection_file)) detections = {'annotations': detections} detections['images'] = data_iterator.coco.dataset['images'] detections['categories'] = [ data_iterator.coco.dataset['categories'] ] json.dump(detections, open(detection_file, 'w'), indent=4) print('evaluating model...') coco_pred = data_iterator.coco.loadRes(detections['annotations']) coco_eval = COCOeval(data_iterator.coco, coco_pred, 'bbox') coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() else: print('no detections!')
p5 = self.smooth5(p5) return p3, p4, p5, p6, p7 def ResNet50FPN( state_dict_path='/Users/nick/.cache/torch/checkpoints/resnet50-19c8e357.pth', stride=128): return FPN(ResNet(layers=[3, 4, 6, 3], outputs=[3, 4, 5], state_dict_path=state_dict_path), stride=stride) if __name__ == '__main__': net = ResNet50FPN() net.initialize() from data import DataIterator dataiter = DataIterator() net.initialize() i = 0 for data, target in dataiter: i += 1 if i == 5: break y = net(data) for item in y: print(item.shape, end=' ') print()
def train_running_save(data_path, model, optimizer, criterion, device, logger, args, step=10): if not os.path.exists("results"): os.mkdir("results") if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) data_loader = DataLoader(data_path, args.verbose) X, y, seq = data_loader.run_pipeline(args.split_rate) train_iter = DataIterator(X[0], y[0], seq[0], batch_size=args.batch_size) test_iter = DataIterator(X[1], y[1], seq[1], batch_size=args.batch_size) train_err, test_err = [], [] train_acc, test_acc = [], [] logger.info(model) for epoch in range(args.epoch): logger.info("Epoch: {} / {}".format(epoch + 1, args.epoch)) ### TRAIN LOOP ### err = [] acc = [] model.train() for proteins, sequence_lengths, targets in (tqdm( train_iter, ascii=False, desc="Training", total=int(len(X[0]) / args.batch_size), unit="batch") if args.verbose else train_iter): inputs = proteins.to(device) seq_lens = sequence_lengths.to(device) targets = targets.to(device) predictions = model(inputs, seq_lens) mask = build_mask(sequence_lengths).to(device) optimizer.zero_grad() batch_loss = criterion(predictions, targets, mask) batch_loss.backward() optimizer.step() cos_sim = cosine_similarity(predictions, targets, mask) err.append(batch_loss.cpu().item()) acc.append(cos_sim.cpu().item()) epoch_trainig_error = sum(err) / len(err) epoch_training_accuracy = sum(acc) / len(acc) train_err.append(epoch_trainig_error) train_acc.append(epoch_training_accuracy) ### TEST LOOP ### err = [] acc = [] model.eval() for proteins, sequence_lengths, targets in (tqdm( test_iter, ascii=False, desc="Testing", total=int(len(X[1]) / args.batch_size), unit="batch") if args.verbose else test_iter): inputs = proteins.to(device) seq_lens = sequence_lengths.to(device) targets = targets.to(device) predictions = model(inputs, seq_lens) mask = build_mask(sequence_lengths).to(device) batch_loss = criterion(predictions, targets, mask) cos_sim = cosine_similarity(predictions, targets, mask) err.append(batch_loss.cpu().item()) acc.append(cos_sim.cpu().item()) epoch_test_error = sum(err) / len(err) epoch_test_accuracy = sum(acc) / len(acc) test_err.append(epoch_test_error) test_acc.append(epoch_test_accuracy) logger.info( "Training error: {0:.4f},\tTest error: {1:.4f}\t\tTraining accuracy: {2:.4f}\tTest accuracy: {3:.4f}" .format(epoch_trainig_error, epoch_test_error, epoch_training_accuracy, epoch_test_accuracy)) if epoch % step == 0: logger.info("Saving checkpoint") performance_path = os.path.join("results", "{}-epoch{}.pk".format( args.results_name.split(".")[0], epoch)) # temporary name checkpoint_name = "{}-epoch{}.pt".format( args.checkpoint_name.split(".")[0], epoch) # temporary name results = (train_err, test_err), (train_acc, test_acc) with open(performance_path, "wb") as file: pickle.dump(results, file) torch.save( { "epoch": args.epoch, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict() }, os.path.join(args.checkpoint_dir, checkpoint_name)) return (train_err, test_err), (train_acc, test_acc)
def train(args): # load double embedding word2index = json.load(open(args.prefix + 'doubleembedding/word_idx.json')) general_embedding = numpy.load(args.prefix + 'doubleembedding/gen.vec.npy') general_embedding = torch.from_numpy(general_embedding) domain_embedding = numpy.load(args.prefix + 'doubleembedding/' + args.dataset + '_emb.vec.npy') domain_embedding = torch.from_numpy(domain_embedding) # load dataset train_sentence_packs = json.load( open(args.prefix + args.dataset + '/train.json')) random.shuffle(train_sentence_packs) dev_sentence_packs = json.load( open(args.prefix + args.dataset + '/dev.json')) instances_train = load_data_instances(train_sentence_packs, word2index, args) instances_dev = load_data_instances(dev_sentence_packs, word2index, args) random.shuffle(instances_train) trainset = DataIterator(instances_train, args) devset = DataIterator(instances_dev, args) if not os.path.exists(args.model_dir): os.makedirs(args.model_dir) # build model if args.model == 'bilstm': model = MultiInferRNNModel(general_embedding, domain_embedding, args).to(args.device) elif args.model == 'cnn': model = MultiInferCNNModel(general_embedding, domain_embedding, args).to(args.device) parameters = list(model.parameters()) parameters = filter(lambda x: x.requires_grad, parameters) optimizer = torch.optim.Adam(parameters, lr=args.lr) # training best_joint_f1 = 0 best_joint_epoch = 0 for i in range(args.epochs): print('Epoch:{}'.format(i)) for j in trange(trainset.batch_count): _, sentence_tokens, lengths, masks, aspect_tags, _, tags = trainset.get_batch( j) predictions = model(sentence_tokens, lengths, masks) loss = 0. tags_flatten = tags[:, :lengths[0], :lengths[0]].reshape([-1]) for k in range(len(predictions)): prediction_flatten = predictions[k].reshape( [-1, predictions[k].shape[3]]) loss = loss + F.cross_entropy( prediction_flatten, tags_flatten, ignore_index=-1) optimizer.zero_grad() loss.backward() optimizer.step() joint_precision, joint_recall, joint_f1 = eval(model, devset, args) if joint_f1 > best_joint_f1: model_path = args.model_dir + args.model + args.task + '.pt' torch.save(model, model_path) best_joint_f1 = joint_f1 best_joint_epoch = i print('best epoch: {}\tbest dev {} f1: {:.5f}\n\n'.format( best_joint_epoch, args.task, best_joint_f1))
# negative = random.sample(negative, len(positive)) # train_dataset = positive + negative train_dataset = positive random.shuffle(train_dataset) positive = [data for data in test_dataset if np.max(data.other_need) > 0] negative = [data for data in test_dataset if np.max(data.other_need) == 0] print("[player Ting Pai] positive : negative = %d : %d" % (len(positive), len(negative))) # negative = random.sample(negative, len(positive)) # test_dataset = positive + negative test_dataset = positive random.shuffle(test_dataset) augmentator = DataAugmentator() train_iter = DataIterator(train_dataset, x_names=cfg.x_names, y_names=cfg.y_names, batch_size=128, augmentator=augmentator) test_iter = DataIterator(test_dataset, x_names=cfg.x_names, y_names=cfg.y_names, batch_size=128) model = BuildModel(cfg.x_names, cfg.y_names) model.summary() model.fit_generator(train_iter, steps_per_epoch=train_iter.steps_per_epoch, epochs=200, validation_data=test_iter, validation_steps=test_iter.steps_per_epoch)
text = sample.text.permute(1,0).to(device) output = model(text) p = output.argmax(1).cpu().tolist() l = sample.label.tolist() preds += p labels += l report = classification_report(preds, labels) print(report) if __name__ == "__main__": device = torch.device("cuda:0") save_dir = "./checkpoints" config = "CNN-debias-" cfg = ConfigBinaryClassification() Data = DataIterator(config=cfg) print("loading model") model = torch.load("checkpoints/CNN-distill-26").to(device) print("loading tokenizer") tokenizer = Data.tokenizer PAD_IND = tokenizer.vocab.stoi['<pad>'] seq_length = 256 token_reference = TokenReferenceBase(reference_token_idx=PAD_IND) lig = LayerIntegratedGradients(model, model.embedding) reference_tokens = token_reference.generate_reference(seq_length, device=device).unsqueeze(0).to(device) #black_list = {0:["克罗恩病"], 1:["肠结核"]} black_list = {0:["克罗恩病", "循腔", "进镜",