Beispiel #1
0
class Instructor(object):
    """
    特点: 使用flyai字典的get all data | flyai提供的next batch
    """
    def __init__(self, args):
        self.args = args

        self.dataset = Dataset(epochs=self.args.EPOCHS,
                               batch=self.args.BATCH,
                               val_batch=self.args.BATCH)

    def run(self):
        best_err1 = 100.
        best_epoch = 0

        logger.info('==> creating model "{}"'.format(args.model_name))
        model = Util.getModel(**vars(args))

        model = model.to(DEVICE)
        # 大部分情况下,设置这个flag可以让内置的cuDNN的auto - tuner自动寻找最适合当前配置的高效算法,来达到优化运行效率的问题。
        cudnn.benchmark = True
        # define loss function (criterion) and pptimizer
        # criterion = nn.CrossEntropyLoss().to(DEVICE)
        # 标签平滑
        criterion = LabelSmoothingLoss(classes=self.args.num_classes,
                                       smoothing=0.1)
        # Focal Loss
        # criterion = FocalLoss(class_num=self.args.num_classes)

        # define optimizer
        optimizer = Util.getOptimizer(model=model, args=self.args)

        trainer = Trainer_1(dataset=self.dataset,
                            criterion=criterion,
                            optimizer=optimizer,
                            args=self.args,
                            logger=logger)
        logger.info('train: {} test: {}'.format(
            self.dataset.get_train_length(),
            self.dataset.get_validation_length()))
        for epoch in range(0, self.args.EPOCHS):
            # train for one epoch
            model = trainer.train(model=model, epoch=epoch)

            # evaluate on validation set
            model, val_err1 = trainer.test(model=model, epoch=epoch)

            # remember best err@1 and save checkpoint
            is_best = val_err1 < best_err1
            if is_best:
                best_err1 = val_err1
                best_epoch = epoch
                logger.info('Best var_err1 {}'.format(best_err1))
            Util.save_checkpoint(model.state_dict(), is_best,
                                 args.output_models_dir)
            if not is_best and epoch - best_epoch >= args.patience > 0:
                break

        logger.info('Best val_err1: {:.4f} at epoch {}'.format(
            best_err1, best_epoch))
'''
项目的超参
'''
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--EPOCHS", default=1, type=int, help="train epochs")
parser.add_argument("-b", "--BATCH", default=1, type=int, help="batch size")
args = parser.parse_args()
'''
flyai库中的提供的数据处理方法
传入整个数据训练多少轮,每批次批大小
'''
print('batch_size: %d, epoch_size: %d' % (args.BATCH, args.EPOCHS))
dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH, val_batch=32)
model = Model(dataset)

print("number of train examples:%d" % dataset.get_train_length())
print("number of validation examples:%d" % dataset.get_validation_length())
# region 超参数
n_classes = 45
fc1_dim = 512
# endregion

# region 定义输入变量
x_inputs = tf.placeholder(shape=(None, 224, 224, 3),
                          dtype=tf.float32,
                          name='x_inputs')
y_inputs = tf.placeholder(shape=(None, n_classes),
                          dtype=tf.float32,
                          name='y_inputs')
# lr          = tf.placeholder(dtype=tf.float32, name='lr')
inputs = preprocess_input(x_inputs, )
Beispiel #3
0
'''
parser = argparse.ArgumentParser()
parser.add_argument("-e",
                    "--EPOCHS",
                    default=200,
                    type=int,
                    help="train epochs")
parser.add_argument("-b", "--BATCH", default=64, type=int, help="batch size")
args = parser.parse_args()
'''
flyai库中的提供的数据处理方法
传入整个数据训练多少轮,每批次批大小
'''
dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)
model = flyai_model(dataset)
print('dataset.get_train_length()', dataset.get_train_length())
print('dataset.get_validation_length()', dataset.get_validation_length())
dataset_slice = wangyi.getDatasetListByClassfy(classify_count=3)
x_train_slice, y_train_slice, x_val_slice, y_val_slice = [], [], [], []
for epoch in range(3):
    x_1, y_1, x_2, y_2 = dataset_slice[epoch].get_all_processor_data()
    x_train_slice.append(x_1)
    y_train_slice.append(y_1)
    x_val_slice.append(x_2)
    y_val_slice.append(y_2)

# 超参
vocab_size = 20655  # 总词汇量
embedding_dim = 64  # 嵌入层大小
hidden_dim = 1024  # Dense层大小
max_seq_len = 34  # 最大句长
Beispiel #4
0
# 训练并评估模型
data = Dataset(epochs=args.EPOCHS, batch=args.BATCH,)
model = Model(data)

x, y, x_test, y_test = data.get_all_processor_data()
#
# validateNum = 30
# x_train = x[0:x.shape[0]-validateNum,:]
# y_train = y[0:y.shape[0]-validateNum]
# x_test = x[-validateNum:,:]
# y_test = y[-validateNum:]

x_train = x
y_train = y

print("the length of train data: %d" % data.get_train_length())
print("the length of x_train: %d" % x_train.shape[0])
print("the length of x_test: %d" % x_test.shape[0])
# the length of train data: 162
# the length of x_train: 162
# the length of x_test: 54
# the length of test datas: 54
# x_train, y_train = data.get_all_validation_data()
# print(args.BATCH)
# print(args.EPOCHS)
# read in data
dtrain = xgb.DMatrix(x_train, label=y_train)
dtest = xgb.DMatrix(x_test, label=y_test)

best_accuracy = 0
# specify parameters via map
    capped_gvs = [(tf.clip_by_value(gard, -2., 2.), var) for gard, var in gvs
                  if gard is not None]
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(capped_gvs)

with tf.name_scope("summary"):
    tf.summary.scalar("loss", loss)
    tf.summary.scalar("acc", accuracy)
    merged_summary = tf.summary.merge_all()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph)

    print('the total length of train dataset', dataset.get_train_length())
    print('the total length of validation dataset',
          dataset.get_validation_length())
    print('dataset.get_step:', dataset.get_step())

    all_train_steps = int(
        dataset.get_train_length() / args.BATCH) * args.EPOCHS

    current_step = 0
    acc_flag = 0
    last_provement = 0
    #    早停步骤
    eraly_stop = 100

    #    for step in range(args.EPOCHS):
    #        for batch_train in data_augment.get_batch_dataset(all_train_x,all_train_y,args.BATCH,current_step):
Beispiel #6
0
KERAS_MODEL_NAME = "model.h5"
# 超参
parser = argparse.ArgumentParser()
parser.add_argument("-e",
                    "--EPOCHS",
                    default=10,
                    type=int,
                    help="train epochs")
parser.add_argument("-b", "--BATCH", default=32, type=int, help="batch size")
args = parser.parse_args()
# 数据获取辅助类
dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)
# # 模型操作辅助类
modelpp = Model(dataset)

train_size = dataset.get_train_length()
val_size = dataset.get_validation_length()
print("train size:" + str(train_size))
print("test size:" + str(val_size))
steps_per_epoch = int((train_size - 1) / args.BATCH) + 1
print("steps_per_epoch:", steps_per_epoch)


def get_train_generator():
    while 1:
        yield dataset.next_train_batch()


train_generator = get_train_generator()
val_data = dataset.get_all_validation_data()
Beispiel #7
0
项目中的超参
'''
parser = argparse.ArgumentParser()
parser.add_argument("-e", "--EPOCHS", default=1, type=int, help="train epochs")
parser.add_argument("-b", "--BATCH", default=64, type=int, help="batch size")
args = parser.parse_args()
'''
flyai库中的提供的数据处理方法
传入整个数据训练多少轮,每批次批大小
'''
dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)

vocab_size = Processor().getWordsCount()

# region 准备数据
allDataLength = dataset.get_train_length()
print('length of all dev data: %d' % allDataLength)
x, y, x_, y_ = dataset.get_all_processor_data()

# trainLen = (int)(95*allDataLength/100)
# x_train = x[0:trainLen]
# y_train = y[0:trainLen]
# x_val = x[trainLen:]
# y_val = y[trainLen:]

x_train = x
y_train = y
x_val = x_
y_val = y_
# endregion
# 训练集的每类的batch的量,组成的list
train_batch_List = [16] * num_classes

myhistory = wangyi.historyByWangyi()
'''
flyai库中的提供的数据处理方法
传入整个数据训练多少轮,每批次批大小
'''

dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)
# dataset = wangyi.DatasetExtendToSize(False ,train_size=1773,val_size= 572,classify_count=num_classes)
# dataset = wangyi.DatasetExtendToSize(True ,train_size=40,val_size= 40,classify_count=num_classes)
model = Model(dataset)
dataset_wangyi = wangyi.DatasetByWangyi(num_classes)
dataset_wangyi.set_Batch_Size(train_batch_List, val_batch_size)
'''
dataset.get_train_length() : 5866
dataset.get_all_validation_data(): 1956
predict datas :  1956
y_train.sum(): [1773.  729.  891.  618.  399.  568.  394.  241.  204.   49.]
y_val.sum(): [572. 247. 334. 219. 144. 185. 129.  56.  49.  21.]
'''
'''
实现自己的网络机构
'''
time_0 = clock()
# 创建最终模型
Inp = Input((224, 224, 3))

# base_model = ResNet50(weights=None, input_shape=(224, 224, 3), include_top=False)
base_model = DenseNet121(weights=weights_path,
class Instructor(object):
    """
    特点:使用flyai字典的get all data  | 使用提供的next_train_batch | next_validation_batch
    """
    def __init__(self, exec_type="train"):
        parser = argparse.ArgumentParser()
        parser.add_argument("-e",
                            "--EPOCHS",
                            default=10,
                            type=int,
                            help="train epochs")
        parser.add_argument("-b",
                            "--BATCH",
                            default=24,
                            type=int,
                            help="batch size")
        args = parser.parse_args()

        self.batch_size = args.BATCH
        self.epochs = args.EPOCHS

        self.learning_rate = arguments.learning_rate
        self.embedding_size = arguments.embedding_size
        self.hidden_size = arguments.hidden_size
        self.tags = arguments.tags
        self.dropout = arguments.dropout
        self.tag_map = {label: i for i, label in enumerate(arguments.labels)}

        if exec_type == "train":
            self.model = Net(
                tag_map=self.tag_map,
                batch_size=self.batch_size,
                dropout=self.dropout,
                embedding_dim=self.embedding_size,
                hidden_dim=self.hidden_size,
            )
        else:
            self.model = None

        self.dataset = Dataset(epochs=self.epochs, batch=self.batch_size)

    def train(self):
        self.model.to(DEVICE)
        # weight decay是放在正则项(regularization)前面的一个系数,正则项一般指示模型的复杂度,
        # 所以weight decay的作用是调节模型复杂度对损失函数的影响,若weight decay很大,则复杂的模型损失函数的值也就大。
        optimizer = optim.Adam(self.model.parameters(),
                               lr=self.learning_rate,
                               weight_decay=0.0005)
        # schedule = ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.1, patience=100, eps=1e-4, verbose=True)
        total_size = math.ceil(self.dataset.get_train_length() /
                               self.batch_size)
        for epoch in range(self.epochs):
            for step in range(self.dataset.get_step() // self.epochs):
                self.model.train()
                # 与optimizer.zero_grad()作用一样
                self.model.zero_grad()
                x_train, y_train = self.dataset.next_train_batch()
                x_val, y_val = self.dataset.next_validation_batch()
                batch = tuple(
                    t.to(DEVICE) for t in create_batch_iter(
                        mode='train', X=x_train, y=y_train).dataset.tensors)
                b_input_ids, b_input_mask, b_labels, b_out_masks = batch
                bert_encode = self.model(b_input_ids, b_input_mask)
                loss = self.model.loss_fn(bert_encode=bert_encode,
                                          tags=b_labels,
                                          output_mask=b_out_masks)
                loss.backward()

                # 梯度裁剪
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1)
                optimizer.step()
                # schedule.step(loss)
                if step % 50 == 0:
                    self.model.eval()
                    eval_loss, eval_acc, eval_f1 = 0, 0, 0
                    with torch.no_grad():
                        batch = tuple(
                            t.to(DEVICE) for t in create_batch_iter(
                                mode='dev', X=x_val, y=y_val).dataset.tensors)
                        batch = tuple(t.to(DEVICE) for t in batch)
                        input_ids, input_mask, label_ids, output_mask = batch
                        bert_encode = self.model(input_ids, input_mask)
                        eval_los = self.model.loss_fn(bert_encode=bert_encode,
                                                      tags=label_ids,
                                                      output_mask=output_mask)
                        eval_loss = eval_los + eval_loss
                        predicts = self.model.predict(bert_encode, output_mask)

                        label_ids = label_ids.view(1, -1)
                        label_ids = label_ids[label_ids != -1]

                        self.model.acc_f1(predicts, label_ids)
                        self.model.class_report(predicts, label_ids)
                        print('eval_loss: ', eval_loss)
                    print("-" * 50)
                    progress = ("█" * int(step * 25 / total_size)).ljust(25)
                    print("step {}".format(step))
                    print("epoch [{}] |{}| {}/{}\n\tloss {:.2f}".format(
                        epoch, progress, step, total_size, loss.item()))

        save_model(self.model, arguments.output_dir)
def main():
    """
    项目的超参
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--EPOCHS", default=50, type=int, help="train epochs")
    parser.add_argument("-b", "--BATCH", default=8, type=int, help="batch size")
    args = parser.parse_args()

    # ------------------判断CUDA模式----------------------
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    device = torch.device(device)

    # ------------------预处理数据----------------------
    dataset = Dataset(epochs=args.EPOCHS, batch=args.BATCH)

    network = Net.from_pretrained(arguments.bert_model, num_tag=len(arguments.labels)).to(device)
    logger.info('\n预处理结束!!!\n')
    # ---------------------优化器-------------------------
    param_optimizer = list(network.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}]

    t_total = int(dataset.get_train_length() / arguments.gradient_accumulation_steps / args.BATCH * args.EPOCHS)

    # ---------------------GPU半精度fp16-----------------------------
    if arguments.fp16:
        try:
            from apex.optimizers import FP16_Optimizer
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=arguments.learning_rate,
                              bias_correction=False,
                              max_grad_norm=1.0)
        if arguments.loss_scale == 0:
            optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
        else:
            optimizer = FP16_Optimizer(optimizer, static_loss_scale=arguments.loss_scale)

    # ------------------------GPU单精度fp32---------------------------
    else:
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=arguments.learning_rate,
                             warmup=arguments.warmup_proportion,
                             t_total=t_total
                             )

    # ---------------------模型初始化----------------------
    if arguments.fp16:
        network.half()

    train_losses = []
    eval_losses = []
    train_accuracy = []
    eval_accuracy = []

    best_f1 = 0
    start = time.time()
    global_step = 0
    for e in range(args.EPOCHS):
        network.train()
        for step in range(dataset.get_step() // args.EPOCHS):
            x_train, y_train = dataset.next_train_batch()
            batch = create_batch_iter(mode='train', X=x_train, y=y_train).dataset.tensors
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, output_mask = batch
            bert_encode = network(input_ids, segment_ids, input_mask)
            train_loss = network.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask)

            if arguments.gradient_accumulation_steps > 1:
                train_loss = train_loss / arguments.gradient_accumulation_steps

            if arguments.fp16:
                optimizer.backward(train_loss)
            else:
                train_loss.backward()

            if (step + 1) % arguments.gradient_accumulation_steps == 0:
                def warmup_linear(x, warmup=0.002):
                    if x < warmup:
                        return x / warmup
                    return 1.0 - x

                # modify learning rate with special warm up BERT uses
                lr_this_step = arguments.learning_rate * warmup_linear(global_step / t_total,
                                                                       arguments.warmup_proportion)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_this_step
                optimizer.step()
                optimizer.zero_grad()
                global_step += 1

            predicts = network.predict(bert_encode, output_mask)
            label_ids = label_ids.view(1, -1)
            label_ids = label_ids[label_ids != -1]
            label_ids = label_ids.cpu()

            train_acc, f1 = network.acc_f1(predicts, label_ids)

        logger.info("\n train_acc: %f - train_loss: %f - f1: %f - using time: %f - step: %d \n" % (train_acc,
                                                                                                   train_loss.item(),
                                                                                                   f1,
                                                                                                   (
                                                                                                           time.time() - start),
                                                                                                   step))

        # -----------------------验证----------------------------
        network.eval()
        count = 0
        y_predicts, y_labels = [], []
        eval_loss, eval_acc, eval_f1 = 0, 0, 0
        with torch.no_grad():
            for step in range(dataset.get_step() // args.EPOCHS):
                x_val, y_val = dataset.next_validation_batch()
                batch = create_batch_iter(mode='dev', X=x_val, y=y_val).dataset.tensors
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids, output_mask = batch
                bert_encode = network(input_ids, segment_ids, input_mask).cpu()
                eval_los = network.loss_fn(bert_encode=bert_encode, tags=label_ids, output_mask=output_mask)
                eval_loss = eval_los + eval_loss
                count += 1
                predicts = network.predict(bert_encode, output_mask)
                y_predicts.append(predicts)

                label_ids = label_ids.view(1, -1)
                label_ids = label_ids[label_ids != -1]
                y_labels.append(label_ids)

            eval_predicted = torch.cat(y_predicts, dim=0).cpu()
            eval_labeled = torch.cat(y_labels, dim=0).cpu()
            print('eval:')
            print(eval_predicted.numpy().tolist())
            print(eval_labeled.numpy().tolist())

            eval_acc, eval_f1 = network.acc_f1(eval_predicted, eval_labeled)
            network.class_report(eval_predicted, eval_labeled)

            logger.info(
                '\n\nEpoch %d - train_loss: %4f - eval_loss: %4f - train_acc:%4f - eval_acc:%4f - eval_f1:%4f\n'
                % (e + 1, train_loss.item(), eval_loss.item() / count, train_acc, eval_acc, eval_f1))

            # 保存最好的模型
            if eval_f1 > best_f1:
                best_f1 = eval_f1
                save_model(network, arguments.output_dir)

            if e % 1 == 0:
                train_losses.append(train_loss.item())
                train_accuracy.append(train_acc)
                eval_losses.append(eval_loss.item() / count)
                eval_accuracy.append(eval_acc)