# Add a training op to tune the parameters. self.loss = tf.reduce_mean(-self.log_likelihood) self.train_op = tf.train.AdamOptimizer(LEARN_RATE).minimize(self.loss) tf.summary.scalar('loss', self.loss) #训练神经网络 embedding = load_word2vec_embedding(config.vocab_size) net = NER_net(embedding) with tf.Session() as sess: merged = tf.summary.merge_all() # 将图形、训练过程等数据合并在一起 writer = tf.summary.FileWriter(LOG_PATH, sess.graph) # 将训练日志写入到logs文件夹下 sess.run(tf.global_variables_initializer()) print(dataset.get_step()) for i in range(dataset.get_step()): x_train, y_train, x_test, y_test = dataset.next_batch(args.BATCH) max_sentenc_length = max(map(len, x_train)) sequence_len = np.asarray([len(x) for x in x_train]) # padding x_train = np.asarray([list(x[:]) + (max_sentenc_length - len(x)) * [config.src_padding] for x in x_train]) y_train = np.asarray([list(y[:]) + (max_sentenc_length - len(y)) * [TAGS_NUM - 1] for y in y_train]) res,loss_,_= sess.run([merged, net.loss, net.train_op], feed_dict={net.input: x_train, net.label: y_train, net.seq_length: sequence_len}) print('steps:{}loss:{}'.format(i, loss_)) writer.add_summary(res, i) # write log into file if i % 50 == 0: modelpp.save_model(sess, MODEL_PATH, overwrite=True)
help="train epochs") parser.add_argument("-b", "--BATCH", default=32, type=int, help="batch size") args = parser.parse_args() ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) model = Model(dataset) ''' 实现自己的网络机构 ''' x = tf.placeholder(tf.float32, shape=[None], name='input_x') y_ = tf.placeholder(tf.float32, shape=[None], name='input_y') keep_prob = tf.placeholder(tf.float32, name='keep_prob') ''' dataset.get_step() 获取数据的总迭代次数 ''' best_score = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(dataset.get_step()): x_train, y_train = dataset.next_train_batch() x_val, y_val = dataset.next_validation_batch() ''' 实现自己的保存模型逻辑 ''' model.save_model(sess, MODEL_PATH, overwrite=True) print(str(step + 1) + "/" + str(dataset.get_step()))
masks) optimizer = tf.train.AdamOptimizer(lr) # 对var_list中的变量计算loss的梯度 该函数为函数minimize()的第一部分,返回一个以元组(gradient, variable)组成的列表 gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None] # 将计算出的梯度应用到变量上,是函数minimize()的第二部分,返回一个应用指定的梯度的操作Operation,对global_step做自增操作 train_op = optimizer.apply_gradients(capped_gradients) summary_op = tf.summary.merge([tf.summary.scalar("loss", cost)]) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph) for step in range(dataset.get_step()): que_train, ans_train = dataset.next_train_batch() que_val, ans_val = dataset.next_validation_batch() que_x, que_length = que_train ans_x, ans_lenth = ans_train ans_x = process_ans_batch( ans_x, ans_dict, int(sorted(list(ans_lenth), reverse=True)[0])) feed_dict = { input_data: que_x, targets: ans_x, lr: learning_rate, target_sequence_length: ans_lenth, source_sequence_length: que_length }
# region 打印模型信息 # mymodel.summary() # plot_model 需要安装pydot and graphviz # plot_model(mymodel, to_file='mymodel.png') # endregion print('load pretrain model...') densenet201.load_weights(path) print('load done !!!') max_val_acc = 0 min_loss = float('inf') iCount = 0 RATIO = 10 for i in range(dataset.get_step() // RATIO): ''' 获取 args.BATCH 数据量,准备设置为 2560,实际最大只能256, 加循环迭代10次,保证用于训练的验证集中数据和训练集中 数据训练次数大体一致 ''' for i_in in range(RATIO): x_train_big, y_train_big = dataset.next_train_batch() # 数据增强器 imageGen = ImageDataGenerator( horizontal_flip=True, zoom_range=[0.7, 1.3], rotation_range=45, ) small_step = 0
device = torch.device(device) #squeezenet1_1,squeezenet1_0,shufflenetv2_x1.0,shufflenetv2_x0.5,inception_v3_google, # resnet18,densenet121,densenet161,densenet169,densenet201 model_list = ['resnet18', 'inception_v3_google', 'densenet121'] # model_list=['squeezenet1_0','inception_v3_google','resnet18','densenet121'] # model_list=['densenet121'] net = Net(model_list, num_classes=num_classes).to(device) criterions = [nn.CrossEntropyLoss().to(device)] * len(model_list) lrs = [1e-4, 1e-4, 1e-4] optimizers = [ Adam(params=net.model_list[ii].parameters(), lr=lrs[ii], weight_decay=1e-5) for ii in range(len(model_list)) ] ''' dataset.get_step() 获取数据的总迭代次数 ''' chang_lr = [14, 18, 19] val_iter = 50 show_iter = 50 best_score = 0 steps = dataset.get_step() chang_lr_ = [math.ceil(float(s) / args.EPOCHS * steps) for s in chang_lr] chang_lr = chang_lr_ print(steps) print(chang_lr)
parser.add_argument("-b", "--BATCH", default=32, type=int, help="batch size") args = parser.parse_args() ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) model = Model(dataset) ''' 实现自己的网络机构 ''' x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name='input_x') y = tf.placeholder(tf.float32, shape=[None, 10], name='input_y') keep_prob = tf.placeholder(tf.float32, name='keep_prob') learning_rate = 0.001 ''' dataset.get_step() 获取数据的总迭代次数 ''' # 参数概要 def variable_summaries(var): with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var))
class Instructor(object): """ 特点:使用flyai自带的get next batch方法 """ def __init__(self, arguments): # 项目的超参 parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=5, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=2, type=int, help="batch size") self.args = parser.parse_args() self.arguments = arguments self.dataset = Dataset(epochs=self.args.EPOCHS, batch=self.args.BATCH, val_batch=self.args.BATCH) if 'bert' in self.arguments.model_name: self.tokenizer = Tokenizer4Bert( max_seq_len=self.arguments.max_seq_len, pretrained_bert_name=os.path.join( os.getcwd(), self.arguments.pretrained_bert_name)) bert = BertModel.from_pretrained(pretrained_model_name_or_path=self .arguments.pretrained_bert_name) self.model = self.arguments.model_class(bert, self.arguments).to( self.arguments.device) else: self.tokenizer = Util.bulid_tokenizer( fnames=[ self.arguments.dataset_file['train'], self.arguments.dataset_file['test'] ], max_seq_len=self.arguments.max_seq_len, dat_fname='{0}_tokenizer.dat'.format(self.arguments.dataset)) embedding_matrix = Util.build_embedding_matrix( word2idx=self.tokenizer.word2idx, embed_dim=self.arguments.embed_dim, dat_fname='{0}_{1}_embedding_matrix.dat'.format( str(self.arguments.embed_dim), self.arguments.dataset)) self.model = self.arguments.model_class( embedding_matrix, self.arguments).to(self.arguments.device) if self.arguments.device.type == 'cuda': logger.info('cuda memory allocated: {}'.format( torch.cuda.memory_allocated( device=self.arguments.device.index))) Util.print_args(model=self.model, logger=logger, args=self.arguments) def run(self): # loss and optimizer criterion = nn.CrossEntropyLoss() _params = filter(lambda x: x.requires_grad, self.model.parameters()) optimizer = self.arguments.optimizer(_params, lr=self.arguments.learning_rate, weight_decay=self.arguments.l2reg) Util.reset_params(model=self.model, args=self.arguments) # 训练 max_val_acc = 0 max_val_f1 = 0 global_step = 0 best_model_path = None target_set = set() for epoch in range(self.args.EPOCHS): logger.info('>' * 100) logger.info('epoch: {}'.format(epoch)) n_correct, n_total, loss_total = 0, 0, 0 self.model.train() for step in range(self.dataset.get_step() // self.args.EPOCHS): (target_train, text_train), stance_train = self.dataset.next_train_batch() for target in target_train: target_set.add(target) text_train = PreProcessing(text_train).get_file_text() trainset = ABSADataset(data_type=None, fname=(target_train, text_train, stance_train), tokenizer=self.tokenizer) trainset, _ = random_split(trainset, (len(trainset), 0)) trainset_loader = DataLoader(dataset=trainset, batch_size=self.args.BATCH, shuffle=True) for i_batch, sample_batched in enumerate(trainset_loader): global_step += 1 optimizer.zero_grad() inputs = [ sample_batched[col].to(self.arguments.device) for col in self.arguments.inputs_cols ] outputs = self.model(inputs) targets = torch.tensor(sample_batched['polarity']).to( self.arguments.device) loss = criterion(outputs, targets) loss.backward() optimizer.step() n_correct += (torch.argmax(outputs, -1) == targets).sum().item() n_total += len(outputs) loss_total += loss.item() * len(outputs) if global_step % self.arguments.log_step == 0: train_acc = n_correct / n_total train_loss = loss_total / n_total logger.info('loss: {:.4f}, acc: {:.4f}'.format( train_loss, train_acc)) (target_val, text_val), stance_train = self.dataset.next_validation_batch() for target in target_val: target_set.add(target) text_val = PreProcessing(text_val).get_file_text() valset = ABSADataset(data_type=None, fname=(target_val, text_val, stance_train), tokenizer=self.tokenizer) valset, _ = random_split(valset, (len(valset), 0)) valset_loader = DataLoader(dataset=valset, batch_size=self.args.BATCH, shuffle=True) val_acc, val_f1 = Util.evaluate_acc_f1(model=self.model, args=self.arguments, data_loader=valset_loader) logger.info('> val_acc: {:.4f}, val_f1: {:.4f}'.format( val_acc, val_f1)) if val_acc > max_val_acc: max_val_acc = val_acc best_model_path = os.path.join(os.getcwd(), self.arguments.best_model_path) Util.save_model(model=self.model, output_dir=best_model_path) logger.info('>> saved: {}'.format(best_model_path)) if val_f1 > max_val_f1: max_val_f1 = val_f1 logger.info('>>> target: {}'.format(target_set)) self.model = Util.load_model(model=self.model, output_dir=best_model_path) self.model.train() (target_val, text_val), stance_train = self.dataset.next_validation_batch() valset = ABSADataset(data_type=None, fname=(target_val, text_val, stance_train), tokenizer=self.tokenizer) valset, _ = random_split(valset, (len(valset), 0)) valset_loader = DataLoader(dataset=valset, batch_size=self.args.BATCH, shuffle=True) for i_batch, sample_batched in enumerate(valset_loader): global_step += 1 optimizer.zero_grad() inputs = [ sample_batched[col].to(self.arguments.device) for col in self.arguments.inputs_cols ] outputs = self.model(inputs) targets = torch.tensor(sample_batched['polarity']).to( self.arguments.device) loss = criterion(outputs, targets) loss.backward() optimizer.step() Util.save_model(model=self.model, output_dir=best_model_path) logger.info('> max_val_acc: {0} max_val_f1: {1}'.format( max_val_acc, max_val_f1)) logger.info('> train save model path: {}'.format(best_model_path))
total_loss += _loss * batch_len total_acc += _acc * batch_len return total_loss / data_len, total_acc / data_len # save_per_batch = 10 best_acc_val = 0 last_improved_step = 0 print_per_batch = 10 improvement_step = print_per_batch * 5 learning_rate_num = 0 flag = True with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph) print('dataset.get_step:', dataset.get_step()) for step in range(dataset.get_step()): x_train, y_train = dataset.next_train_batch() x_input_ids = x_train[0] x_input_mask = x_train[1] x_segment_ids = x_train[2] feed_dict = { model.input_ids: x_input_ids, model.input_mask: x_input_mask, model.segment_ids: x_segment_ids, model.labels: y_train, model.keep_prob: 0.5, model.learning_rate: learning_rate } if step % print_per_batch == 0: fetches = [model.loss, model.accuracy]
device = torch.device(device) #net = Net().to(device) #net = densenet201(pretrained=True)#加载已经训练好的模型 #net = EfficientNet.from_pretrained('efficientnet-b0') # net = resnet152(pretrained=True) #net = torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x16d_wsl') # print(net) # raise RuntimeError # num_ftrs = net.classifier.in_features # net.classifier = nn.Linear(num_ftrs, 4) # num_ftrs = net.fc.in_features # net.fc = nn.Linear(num_ftrs, 4) # # net = net.to(device) total_step = dataset.get_step() # optimizer = optim.Adam(net.parameters(), lr=params['lr'], weight_decay=params['weight_decay']) #optimizer = RAdam(net.parameters(), lr=params['lr'], weight_decay=params['weight_decay']) # schedule = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.333, patience=0.1*total_step, verbose=True) # criterion = nn.CrossEntropyLoss() ''' dataset.get_step() 获取数据的总迭代次数 ''' best_score = 0 min_loss = 1000 print(total_step) print('------------------start training------------------------') # train_losses = AverageMeter() # valid_losses = AverageMeter() # train_accs = AverageMeter()
Recall()]) # region 打印模型信息 # mymodel.summary() # plot_model 需要安装pydot and graphviz # plot_model(mymodel, to_file='mymodel.png') # endregion print('load pretrain model...') densenet201.load_weights(path) print('load done !!!') max_val_acc = 0 globals_f1 = 0 for i in range(dataset.get_step()): x_train, y_train = dataset.next_train_batch() x_train = preprocess_input(x_train, ) mymodel.train_on_batch(x_train, y_train) if i % 100 == 0 or i == dataset.get_step() - 1: x_val, y_val = dataset.next_validation_batch() x_val = preprocess_input(x_val, ) train_batch = x_train.shape[0] val_batch = x_val.shape[0] train_loss_and_metrics = mymodel.evaluate(x_train, y_train, batch_size=train_batch) val_loss_and_metrics = mymodel.evaluate(x_val, y_val, batch_size=val_batch)
# 输出层 sqeue.add(Dense(6, activation='softmax')) # 输出模型的整体信息 sqeue.summary() sqeue.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) ''' flyai库中的提供的数据处理方法 传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) model = Model(dataset) ''' dataset.get_step() 获取数据的总迭代次数 ''' best_score = -1 for step in range(dataset.get_step()): x_train, y_train = dataset.next_train_batch() x_val, y_val = dataset.next_validation_batch() history = sqeue.fit(x_train, y_train, batch_size=args.BATCH, verbose=1) score = sqeue.evaluate(x_val, y_val, verbose=0) if score[1] > best_score: best_score = score[1] # 保存模型 model.save_model(sqeue, MODEL_PATH, overwrite=True) print("step %d, all step %d, best accuracy %g" %
# 读取验证集进行验证,保证验证集一样 lr_val, hr_val = dataset.next_validation_batch() # 参数初始化 model = Model(dataset) if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' device = torch.device(device) net = Net().to(device) criterion = nn.L1Loss() optimize = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), args.LR) # ======================开始训练=========================== net.train() iteration = dataset.get_step() print("Batch_size=", args.BATCH_SIZE, "iteration=", iteration, "lr=", args.LR) for iter in range(iteration): # 获取 LR/HR 的图片路径 x_train, y_train = dataset.next_train_batch() # 对数据进行处理,这里为了加快速度,每次只读一张图片,然后对一张图片进行batch_size次随机裁剪,得到一个batch x_train, y_train = utility.load_image(x_train[0], y_train[0], batch_size = args.BATCH_SIZE, is_train=True) # print("---------------------------------") # print(x_train.shape, y_train.shape) # 数据导入GPU x_train = x_train.to(device) y_train = y_train.to(device)
传入整个数据训练多少轮,每批次批大小 ''' dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) model = Model(dataset) ''' 实现自己的网络结构 ''' # 判断gpu是否可用 if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' device = torch.device(device) net = Net().to(device) net = net.cuda() ''' dataset.get_step() 获取数据的总批次 ''' def optim_policy(model): # 返回第一层和全连阶层的权重 needed_optim = [] for param in model.features[0].parameters(): needed_optim.append(param) for param in model.classifier.parameters(): needed_optim.append(param) return needed_optim
class Instructor(object): """ 特点:使用flyai字典的get all data | 使用提供的next_train_batch | next_validation_batch """ def __init__(self, exec_type="train"): parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=10, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=24, type=int, help="batch size") args = parser.parse_args() self.batch_size = args.BATCH self.epochs = args.EPOCHS self.learning_rate = arguments.learning_rate self.embedding_size = arguments.embedding_size self.hidden_size = arguments.hidden_size self.tags = arguments.tags self.dropout = arguments.dropout self.tag_map = {label: i for i, label in enumerate(arguments.labels)} if exec_type == "train": self.model = Net( tag_map=self.tag_map, batch_size=self.batch_size, dropout=self.dropout, embedding_dim=self.embedding_size, hidden_dim=self.hidden_size, ) else: self.model = None self.dataset = Dataset(epochs=self.epochs, batch=self.batch_size) def train(self): self.model.to(DEVICE) # weight decay是放在正则项(regularization)前面的一个系数,正则项一般指示模型的复杂度, # 所以weight decay的作用是调节模型复杂度对损失函数的影响,若weight decay很大,则复杂的模型损失函数的值也就大。 optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=0.0005) # schedule = ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.1, patience=100, eps=1e-4, verbose=True) total_size = math.ceil(self.dataset.get_train_length() / self.batch_size) for epoch in range(self.epochs): for step in range(self.dataset.get_step() // self.epochs): self.model.train() # 与optimizer.zero_grad()作用一样 self.model.zero_grad() x_train, y_train = self.dataset.next_train_batch() x_val, y_val = self.dataset.next_validation_batch() batch = tuple( t.to(DEVICE) for t in create_batch_iter( mode='train', X=x_train, y=y_train).dataset.tensors) b_input_ids, b_input_mask, b_labels, b_out_masks = batch bert_encode = self.model(b_input_ids, b_input_mask) loss = self.model.loss_fn(bert_encode=bert_encode, tags=b_labels, output_mask=b_out_masks) loss.backward() # 梯度裁剪 # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1) optimizer.step() # schedule.step(loss) if step % 50 == 0: self.model.eval() eval_loss, eval_acc, eval_f1 = 0, 0, 0 with torch.no_grad(): batch = tuple( t.to(DEVICE) for t in create_batch_iter( mode='dev', X=x_val, y=y_val).dataset.tensors) batch = tuple(t.to(DEVICE) for t in batch) input_ids, input_mask, label_ids, output_mask = batch bert_encode = self.model(input_ids, input_mask) eval_los = self.model.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) eval_loss = eval_los + eval_loss predicts = self.model.predict(bert_encode, output_mask) label_ids = label_ids.view(1, -1) label_ids = label_ids[label_ids != -1] self.model.acc_f1(predicts, label_ids) self.model.class_report(predicts, label_ids) print('eval_loss: ', eval_loss) print("-" * 50) progress = ("█" * int(step * 25 / total_size)).ljust(25) print("step {}".format(step)) print("epoch [{}] |{}| {}/{}\n\tloss {:.2f}".format( epoch, progress, step, total_size, loss.item())) save_model(self.model, arguments.output_dir)
# endregion # saver = tf.train.Saver(var_list = tf.global_variables()) max_val_acc = 0 globals_f1 = 0 with tf.keras.backend.get_session() as sess: sess.run(tf.global_variables_initializer()) print('load pretrain model...') densenet201.load_weights(path) print('load done !!!') # 利用tensorboard查看网络结构 # writer = tf.summary.FileWriter(LOG_PATH, sess.graph) for i in range(dataset.get_step()): x_train, y_train = dataset.next_train_batch() fetches = [optimize, loss, pred_y, accuracy] _, train_loss, train_pred, train_acc = sess.run( fetches, feed_dict={ x_inputs: x_train, y_inputs: y_train, K.learning_phase(): 1 }, ) temp_train_f1 = f1_score(np.argmax(y_train, axis=-1), train_pred, average='macro')
if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' device = torch.device(device) ''' 实现自己的网络结构 ''' cnn = FCN16s(1).to(device) optimizer = SGD(cnn.parameters(), lr=0.0005, momentum=0.9, weight_decay=0.0005) criterion = nn.BCELoss() # 定义损失函数 ''' dataset.get_step() 获取数据的总迭代次数 ''' lowest_loss = 1e5 for i in range(data.get_step()): print('----------------' + str(i) + "/" + str(data.get_step()) + '-------------------') cnn.train() x_train, y_train = data.next_train_batch() x_train = torch.from_numpy(x_train) y_train = torch.from_numpy(y_train) x_train = x_train.float().to(device) y_train = y_train.float().to(device) y_train = y_train.unsqueeze(1) optimizer.zero_grad() outputs = cnn(x_train) pred = torch.sigmoid(outputs) loss = criterion(pred, y_train) loss.backward() optimizer.step()
num_warmup_steps = 1000 max_grad = 1.0 ''' 实现自己的网络机构 ''' # 判断gpu是否可用 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = Net().to(device) model = Model(dataset, net) # print(net) # optimizer = torch.optim.Adam(net.parameters(), lr=5e-6) optimizer = AdamW(net.parameters(), lr=lr, correct_bias=False) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=dataset.get_step()) criterion = nn.CrossEntropyLoss() ''' dataset.get_step() 获取数据的总迭代次数 ''' total_loss, train_acc, best_score = 0., 0., 0. train_bar = tqdm(range(dataset.get_step()), desc='Iteration') for step in train_bar: net.train() x_train, y_train = dataset.next_train_batch() x_val, y_val = dataset.next_validation_batch() x_train = [torch.from_numpy(data).to(device) for data in x_train[:-1]] y_train = torch.from_numpy(y_train).to(device) ''' 实现自己的模型保存逻辑
with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(capped_gvs) with tf.name_scope("summary"): tf.summary.scalar("loss", loss) tf.summary.scalar("acc", accuracy) merged_summary = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph) print('the total length of train dataset', dataset.get_train_length()) print('the total length of validation dataset', dataset.get_validation_length()) print('dataset.get_step:', dataset.get_step()) all_train_steps = int( dataset.get_train_length() / args.BATCH) * args.EPOCHS current_step = 0 acc_flag = 0 last_provement = 0 # 早停步骤 eraly_stop = 100 # for step in range(args.EPOCHS): # for batch_train in data_augment.get_batch_dataset(all_train_x,all_train_y,args.BATCH,current_step): for step in range(1, dataset.get_step()): x_train, y_train, x_val, y_val = dataset.next_batch( args.BATCH, dataset.get_validation_length())
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='acc') with tf.name_scope("summary"): tf.summary.scalar("loss", loss) tf.summary.scalar("accuracy", accuracy) merged_summary = tf.summary.merge_all() best_score = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph) tf.summary.FileWriter('.') train_writer.add_graph(tf.get_default_graph()) # dataset.get_step() 获取数据的总迭代次数 for step in range(dataset.get_step()): x_train, y_train = dataset.next_train_batch() x_val, y_val = dataset.next_validation_batch() fetches = [loss, accuracy, train_op] feed_dict = {input_x: x_train, input_y: y_train, keep_prob: 0.9} loss_, accuracy_, _ = sess.run(fetches, feed_dict=feed_dict) valid_acc = sess.run(accuracy, feed_dict={ input_x: x_val, input_y: y_val, keep_prob: 1.0 }) summary = sess.run(merged_summary, feed_dict=feed_dict) # train_writer.add_summary(summary, step)
def main(): """ 项目的超参 """ parser = argparse.ArgumentParser() parser.add_argument("-e", "--EPOCHS", default=50, type=int, help="train epochs") parser.add_argument("-b", "--BATCH", default=8, type=int, help="batch size") args = parser.parse_args() # ------------------判断CUDA模式---------------------- if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' device = torch.device(device) # ------------------预处理数据---------------------- dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH) network = Net.from_pretrained(arguments.bert_model, num_tag=len(arguments.labels)).to(device) logger.info('\n预处理结束!!!\n') # ---------------------优化器------------------------- param_optimizer = list(network.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] t_total = int(dataset.get_train_length() / arguments.gradient_accumulation_steps / args.BATCH * args.EPOCHS) # ---------------------GPU半精度fp16----------------------------- if arguments.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") optimizer = FusedAdam(optimizer_grouped_parameters, lr=arguments.learning_rate, bias_correction=False, max_grad_norm=1.0) if arguments.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=arguments.loss_scale) # ------------------------GPU单精度fp32--------------------------- else: optimizer = BertAdam(optimizer_grouped_parameters, lr=arguments.learning_rate, warmup=arguments.warmup_proportion, t_total=t_total ) # ---------------------模型初始化---------------------- if arguments.fp16: network.half() train_losses = [] eval_losses = [] train_accuracy = [] eval_accuracy = [] best_f1 = 0 start = time.time() global_step = 0 for e in range(args.EPOCHS): network.train() for step in range(dataset.get_step() // args.EPOCHS): x_train, y_train = dataset.next_train_batch() batch = create_batch_iter(mode='train', X=x_train, y=y_train).dataset.tensors batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, output_mask = batch bert_encode = network(input_ids, segment_ids, input_mask) train_loss = network.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) if arguments.gradient_accumulation_steps > 1: train_loss = train_loss / arguments.gradient_accumulation_steps if arguments.fp16: optimizer.backward(train_loss) else: train_loss.backward() if (step + 1) % arguments.gradient_accumulation_steps == 0: def warmup_linear(x, warmup=0.002): if x < warmup: return x / warmup return 1.0 - x # modify learning rate with special warm up BERT uses lr_this_step = arguments.learning_rate * warmup_linear(global_step / t_total, arguments.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 predicts = network.predict(bert_encode, output_mask) label_ids = label_ids.view(1, -1) label_ids = label_ids[label_ids != -1] label_ids = label_ids.cpu() train_acc, f1 = network.acc_f1(predicts, label_ids) logger.info("\n train_acc: %f - train_loss: %f - f1: %f - using time: %f - step: %d \n" % (train_acc, train_loss.item(), f1, ( time.time() - start), step)) # -----------------------验证---------------------------- network.eval() count = 0 y_predicts, y_labels = [], [] eval_loss, eval_acc, eval_f1 = 0, 0, 0 with torch.no_grad(): for step in range(dataset.get_step() // args.EPOCHS): x_val, y_val = dataset.next_validation_batch() batch = create_batch_iter(mode='dev', X=x_val, y=y_val).dataset.tensors batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, output_mask = batch bert_encode = network(input_ids, segment_ids, input_mask).cpu() eval_los = network.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask) eval_loss = eval_los + eval_loss count += 1 predicts = network.predict(bert_encode, output_mask) y_predicts.append(predicts) label_ids = label_ids.view(1, -1) label_ids = label_ids[label_ids != -1] y_labels.append(label_ids) eval_predicted = torch.cat(y_predicts, dim=0).cpu() eval_labeled = torch.cat(y_labels, dim=0).cpu() print('eval:') print(eval_predicted.numpy().tolist()) print(eval_labeled.numpy().tolist()) eval_acc, eval_f1 = network.acc_f1(eval_predicted, eval_labeled) network.class_report(eval_predicted, eval_labeled) logger.info( '\n\nEpoch %d - train_loss: %4f - eval_loss: %4f - train_acc:%4f - eval_acc:%4f - eval_f1:%4f\n' % (e + 1, train_loss.item(), eval_loss.item() / count, train_acc, eval_acc, eval_f1)) # 保存最好的模型 if eval_f1 > best_f1: best_f1 = eval_f1 save_model(network, arguments.output_dir) if e % 1 == 0: train_losses.append(train_loss.item()) train_accuracy.append(train_acc) eval_losses.append(eval_loss.item() / count) eval_accuracy.append(eval_acc)