class Instructor(object): """ 特点: 使用flyai字典的get all data | flyai提供的next batch """ def __init__(self, args): self.args = args self.dataset = Dataset(epochs=self.args.EPOCHS, batch=self.args.BATCH, val_batch=self.args.BATCH) def run(self): best_err1 = 100. best_epoch = 0 logger.info('==> creating model "{}"'.format(args.model_name)) model = Util.getModel(**vars(args)) model = model.to(DEVICE) # 大部分情况下,设置这个flag可以让内置的cuDNN的auto - tuner自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题。 cudnn.benchmark = True # define loss function (criterion) and pptimizer # criterion = nn.CrossEntropyLoss().to(DEVICE) # 标签平滑 criterion = LabelSmoothingLoss(classes=self.args.num_classes, smoothing=0.1) # Focal Loss # criterion = FocalLoss(class_num=self.args.num_classes) # define optimizer optimizer = Util.getOptimizer(model=model, args=self.args) trainer = Trainer_1(dataset=self.dataset, criterion=criterion, optimizer=optimizer, args=self.args, logger=logger) logger.info('train: {} test: {}'.format( self.dataset.get_train_length(), self.dataset.get_validation_length())) for epoch in range(0, self.args.EPOCHS): # train for one epoch model = trainer.train(model=model, epoch=epoch) # evaluate on validation set model, val_err1 = trainer.test(model=model, epoch=epoch) # remember best err@1 and save checkpoint is_best = val_err1 < best_err1 if is_best: best_err1 = val_err1 best_epoch = epoch logger.info('Best var_err1 {}'.format(best_err1)) Util.save_checkpoint(model.state_dict(), is_best, args.output_models_dir) if not is_best and epoch - best_epoch >= args.patience > 0: break logger.info('Best val_err1: {:.4f} at epoch {}'.format( best_err1, best_epoch))
''' 项目的超参 ''' parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=1, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=1, type=int, help="batch size") args = parser.parse_args() ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' print('batch_size: %d, epoch_size: %d' % (args.BATCH, args.EPOCHS)) dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH, val_batch=32) model = Model(dataset) print("number of train examples:%d" % dataset.get_train_length()) print("number of validation examples:%d" % dataset.get_validation_length()) # region 超参数 n_classes = 45 fc1_dim = 512 # endregion # region 定义输入变量 x_inputs = tf.placeholder(shape=(None, 224, 224, 3), dtype=tf.float32, name='x_inputs') y_inputs = tf.placeholder(shape=(None, n_classes), dtype=tf.float32, name='y_inputs') # lr = tf.placeholder(dtype=tf.float32, name='lr') inputs = preprocess_input(x_inputs, )
''' parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=200, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=64, type=int, help="batch size") args = parser.parse_args() ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) model = flyai_model(dataset) print('dataset.get_train_length()', dataset.get_train_length()) print('dataset.get_validation_length()', dataset.get_validation_length()) dataset_slice = wangyi.getDatasetListByClassfy(classify_count=3) x_train_slice, y_train_slice, x_val_slice, y_val_slice = [], [], [], [] for epoch in range(3): x_1, y_1, x_2, y_2 = dataset_slice[epoch].get_all_processor_data() x_train_slice.append(x_1) y_train_slice.append(y_1) x_val_slice.append(x_2) y_val_slice.append(y_2) # 超参 vocab_size = 20655 # 总词汇量 embedding_dim = 64 # 嵌入层大小 hidden_dim = 1024 # Dense层大小 max_seq_len = 34 # 最大句长
# 训练并评估模型 data = Dataset(epochs=args.EPOCHS, batch=args.BATCH,) model = Model(data) x, y, x_test, y_test = data.get_all_processor_data() # # validateNum = 30 # x_train = x[0:x.shape[0]-validateNum,:] # y_train = y[0:y.shape[0]-validateNum] # x_test = x[-validateNum:,:] # y_test = y[-validateNum:] x_train = x y_train = y print("the length of train data: %d" % data.get_train_length()) print("the length of x_train: %d" % x_train.shape[0]) print("the length of x_test: %d" % x_test.shape[0]) # the length of train data: 162 # the length of x_train: 162 # the length of x_test: 54 # the length of test datas: 54 # x_train, y_train = data.get_all_validation_data() # print(args.BATCH) # print(args.EPOCHS) # read in data dtrain = xgb.DMatrix(x_train, label=y_train) dtest = xgb.DMatrix(x_test, label=y_test) best_accuracy = 0 # specify parameters via map
capped_gvs = [(tf.clip_by_value(gard, -2., 2.), var) for gard, var in gvs if gard is not None] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(capped_gvs) with tf.name_scope("summary"): tf.summary.scalar("loss", loss) tf.summary.scalar("acc", accuracy) merged_summary = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph) print('the total length of train dataset', dataset.get_train_length()) print('the total length of validation dataset', dataset.get_validation_length()) print('dataset.get_step:', dataset.get_step()) all_train_steps = int( dataset.get_train_length() / args.BATCH) * args.EPOCHS current_step = 0 acc_flag = 0 last_provement = 0 # 早停步骤 eraly_stop = 100 # for step in range(args.EPOCHS): # for batch_train in data_augment.get_batch_dataset(all_train_x,all_train_y,args.BATCH,current_step):
KERAS_MODEL_NAME = "model.h5" # 超参 parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=10, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=32, type=int, help="batch size") args = parser.parse_args() # 数据获取辅助类 dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) # # 模型操作辅助类 modelpp = Model(dataset) train_size = dataset.get_train_length() val_size = dataset.get_validation_length() print("train size:" + str(train_size)) print("test size:" + str(val_size)) steps_per_epoch = int((train_size - 1) / args.BATCH) + 1 print("steps_per_epoch:", steps_per_epoch) def get_train_generator(): while 1: yield dataset.next_train_batch() train_generator = get_train_generator() val_data = dataset.get_all_validation_data()
项目中的超参 ''' parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=1, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=64, type=int, help="batch size") args = parser.parse_args() ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) vocab_size = Processor().getWordsCount() # region 准备数据 allDataLength = dataset.get_train_length() print('length of all dev data: %d' % allDataLength) x, y, x_, y_ = dataset.get_all_processor_data() # trainLen = (int)(95*allDataLength/100) # x_train = x[0:trainLen] # y_train = y[0:trainLen] # x_val = x[trainLen:] # y_val = y[trainLen:] x_train = x y_train = y x_val = x_ y_val = y_ # endregion
# 训练集的每类的batch的量,组成的list train_batch_List = [16] * num_classes myhistory = wangyi.historyByWangyi() ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) # dataset = wangyi.DatasetExtendToSize(False ,train_size=1773,val_size= 572,classify_count=num_classes) # dataset = wangyi.DatasetExtendToSize(True ,train_size=40,val_size= 40,classify_count=num_classes) model = Model(dataset) dataset_wangyi = wangyi.DatasetByWangyi(num_classes) dataset_wangyi.set_Batch_Size(train_batch_List, val_batch_size) ''' dataset.get_train_length() : 5866 dataset.get_all_validation_data(): 1956 predict datas : 1956 y_train.sum(): [1773. 729. 891. 618. 399. 568. 394. 241. 204. 49.] y_val.sum(): [572. 247. 334. 219. 144. 185. 129. 56. 49. 21.] ''' ''' 实现自己的网络机构 ''' time_0 = clock() # 创建最终模型 Inp = Input((224, 224, 3)) # base_model = ResNet50(weights=None, input_shape=(224, 224, 3), include_top=False) base_model = DenseNet121(weights=weights_path,
class Instructor(object): """ 特点:使用flyai字典的get all data | 使用提供的next_train_batch | next_validation_batch """ def __init__(self, exec_type="train"): parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=10, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=24, type=int, help="batch size") args = parser.parse_args() self.batch_size = args.BATCH self.epochs = args.EPOCHS self.learning_rate = arguments.learning_rate self.embedding_size = arguments.embedding_size self.hidden_size = arguments.hidden_size self.tags = arguments.tags self.dropout = arguments.dropout self.tag_map = {label: i for i, label in enumerate(arguments.labels)} if exec_type == "train": self.model = Net( tag_map=self.tag_map, batch_size=self.batch_size, dropout=self.dropout, embedding_dim=self.embedding_size, hidden_dim=self.hidden_size, ) else: self.model = None self.dataset = Dataset(epochs=self.epochs, batch=self.batch_size) def train(self): self.model.to(DEVICE) # weight decay是放在正则项(regularization)前面的一个系数,正则项一般指示模型的复杂度, # 所以weight decay的作用是调节模型复杂度对损失函数的影响,若weight decay很大,则复杂的模型损失函数的值也就大。 optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=0.0005) # schedule = ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.1, patience=100, eps=1e-4, verbose=True) total_size = math.ceil(self.dataset.get_train_length() / self.batch_size) for epoch in range(self.epochs): for step in range(self.dataset.get_step() // self.epochs): self.model.train() # 与optimizer.zero_grad()作用一样 self.model.zero_grad() x_train, y_train = self.dataset.next_train_batch() x_val, y_val = self.dataset.next_validation_batch() batch = tuple( t.to(DEVICE) for t in create_batch_iter( mode='train', X=x_train, y=y_train).dataset.tensors) b_input_ids, b_input_mask, b_labels, b_out_masks = batch bert_encode = self.model(b_input_ids, b_input_mask) loss = self.model.loss_fn(bert_encode=bert_encode, tags=b_labels, output_mask=b_out_masks) loss.backward() # 梯度裁剪 # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1) optimizer.step() # schedule.step(loss) if step % 50 == 0: self.model.eval() eval_loss, eval_acc, eval_f1 = 0, 0, 0 with torch.no_grad(): batch = tuple( t.to(DEVICE) for t in create_batch_iter( mode='dev', X=x_val, y=y_val).dataset.tensors) batch = tuple(t.to(DEVICE) for t in batch) input_ids, input_mask, label_ids, output_mask = batch bert_encode = self.model(input_ids, input_mask) eval_los = self.model.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) eval_loss = eval_los + eval_loss predicts = self.model.predict(bert_encode, output_mask) label_ids = label_ids.view(1, -1) label_ids = label_ids[label_ids != -1] self.model.acc_f1(predicts, label_ids) self.model.class_report(predicts, label_ids) print('eval_loss: ', eval_loss) print("-" * 50) progress = ("█" * int(step * 25 / total_size)).ljust(25) print("step {}".format(step)) print("epoch [{}] |{}| {}/{}\n\tloss {:.2f}".format( epoch, progress, step, total_size, loss.item())) save_model(self.model, arguments.output_dir)
def main(): """ 项目的超参 """ parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=50, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=8, type=int, help="batch size") args = parser.parse_args() # ------------------判断CUDA模式---------------------- if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' device = torch.device(device) # ------------------预处理数据---------------------- dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) network = Net.from_pretrained(arguments.bert_model, num_tag=len(arguments.labels)).to(device) logger.info('\n预处理结束!!!\n') # ---------------------优化器------------------------- param_optimizer = list(network.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] t_total = int(dataset.get_train_length() / arguments.gradient_accumulation_steps / args.BATCH * args.EPOCHS) # ---------------------GPU半精度fp16----------------------------- if arguments.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") optimizer = FusedAdam(optimizer_grouped_parameters, lr=arguments.learning_rate, bias_correction=False, max_grad_norm=1.0) if arguments.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=arguments.loss_scale) # ------------------------GPU单精度fp32--------------------------- else: optimizer = BertAdam(optimizer_grouped_parameters, lr=arguments.learning_rate, warmup=arguments.warmup_proportion, t_total=t_total ) # ---------------------模型初始化---------------------- if arguments.fp16: network.half() train_losses = [] eval_losses = [] train_accuracy = [] eval_accuracy = [] best_f1 = 0 start = time.time() global_step = 0 for e in range(args.EPOCHS): network.train() for step in range(dataset.get_step() // args.EPOCHS): x_train, y_train = dataset.next_train_batch() batch = create_batch_iter(mode='train', X=x_train, y=y_train).dataset.tensors batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, output_mask = batch bert_encode = network(input_ids, segment_ids, input_mask) train_loss = network.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) if arguments.gradient_accumulation_steps > 1: train_loss = train_loss / arguments.gradient_accumulation_steps if arguments.fp16: optimizer.backward(train_loss) else: train_loss.backward() if (step + 1) % arguments.gradient_accumulation_steps == 0: def warmup_linear(x, warmup=0.002): if x < warmup: return x / warmup return 1.0 - x # modify learning rate with special warm up BERT uses lr_this_step = arguments.learning_rate * warmup_linear(global_step / t_total, arguments.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 predicts = network.predict(bert_encode, output_mask) label_ids = label_ids.view(1, -1) label_ids = label_ids[label_ids != -1] label_ids = label_ids.cpu() train_acc, f1 = network.acc_f1(predicts, label_ids) logger.info("\n train_acc: %f - train_loss: %f - f1: %f - using time: %f - step: %d \n" % (train_acc, train_loss.item(), f1, ( time.time() - start), step)) # -----------------------验证---------------------------- network.eval() count = 0 y_predicts, y_labels = [], [] eval_loss, eval_acc, eval_f1 = 0, 0, 0 with torch.no_grad(): for step in range(dataset.get_step() // args.EPOCHS): x_val, y_val = dataset.next_validation_batch() batch = create_batch_iter(mode='dev', X=x_val, y=y_val).dataset.tensors batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, output_mask = batch bert_encode = network(input_ids, segment_ids, input_mask).cpu() eval_los = network.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) eval_loss = eval_los + eval_loss count += 1 predicts = network.predict(bert_encode, output_mask) y_predicts.append(predicts) label_ids = label_ids.view(1, -1) label_ids = label_ids[label_ids != -1] y_labels.append(label_ids) eval_predicted = torch.cat(y_predicts, dim=0).cpu() eval_labeled = torch.cat(y_labels, dim=0).cpu() print('eval:') print(eval_predicted.numpy().tolist()) print(eval_labeled.numpy().tolist()) eval_acc, eval_f1 = network.acc_f1(eval_predicted, eval_labeled) network.class_report(eval_predicted, eval_labeled) logger.info( '\n\nEpoch %d - train_loss: %4f - eval_loss: %4f - train_acc:%4f - eval_acc:%4f - eval_f1:%4f\n' % (e + 1, train_loss.item(), eval_loss.item() / count, train_acc, eval_acc, eval_f1)) # 保存最好的模型 if eval_f1 > best_f1: best_f1 = eval_f1 save_model(network, arguments.output_dir) if e % 1 == 0: train_losses.append(train_loss.item()) train_accuracy.append(train_acc) eval_losses.append(eval_loss.item() / count) eval_accuracy.append(eval_acc)