コード例 #1
0
def train(x_train,x_train_external,y_train):
    # model
    
    num_class=np.shape(y_train)[1]
    num_external=np.shape(x_train_external)[1]
    
    model = ECGNet(BasicBlock, [3, 4, 6, 3],num_classes= num_class,num_external=num_external)
    model = model.to(device)
    
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    criterion1 = nn.BCEWithLogitsLoss()
    
    lr = config.lr
    start_epoch = 1
    stage = 1
    best_auc = -1   
       
    # =========>开始训练<=========
    print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss,train_auc= train_epoch(model, optimizer, criterion1,x_train,x_train_external,y_train)
        print('#epoch:%02d stage:%d train_loss:%.4f train_auc:%.4f time:%s'
              % (epoch, stage, train_loss, train_auc, utils.print_time_cost(since)))
                   
        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
            

    return model
コード例 #2
0
def train(x_train, x_val, x_train_external, x_val_external, y_train, y_val,
          num_class):
    # model
    model = ECGNet(BasicBlock, [3, 4, 6, 3], num_classes=num_class)
    model = model.to(device)

    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    #   optimizer = optim. RMSProp(model.parameters(), lr=config.lr)

    wc = y_train.sum(axis=0)
    wc = 1. / (np.log(wc) + 1)

    #添加和标签权重的惩罚,如果一个标签和其他标签越接近越容易混淆,它的权重得分会越大,应该更加关注一些,此权重是已经做了归一化
    #    weight=np.array([0.9608,0.9000,0.8373,0.8373,0.8706,0.6412,0.8373,0.9118,1.0,0.9255,0.9118,
    #                      0.9892,0.9588,0.9118,0.9118,0.8137,0.9608,1.0,0.9118,0.9588,0.9588,0.9863,
    #                      0.8373,0.9892,0.9588,0.9118,0.9863])
    #   wc=weight*wc

    w = torch.tensor(wc, dtype=torch.float).to(device)
    criterion1 = utils.WeightedMultilabel(w)
    criterion2 = nn.BCEWithLogitsLoss()

    lr = config.lr
    start_epoch = 1
    stage = 1
    best_auc = -1

    # =========>开始训练<=========
    print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_auc = train_epoch(model, optimizer, criterion1,
                                            x_train, x_train_external, y_train,
                                            num_class)
        val_loss, val_auc = val_epoch(model, criterion2, x_val, x_val_external,
                                      y_val, num_class)
        print(
            '#epoch:%02d stage:%d train_loss:%.4f train_auc:%.4f  val_loss:%.4f val_auc:%.4f  time:%s'
            % (epoch, stage, train_loss, train_auc, val_loss, val_auc,
               utils.print_time_cost(since)))

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
    return model
コード例 #3
0
def train(x_train, x_val, x_train_external, x_val_external, y_train, y_val):
    # model

    num_class = np.shape(y_val)[1]
    num_external = np.shape(x_val_external)[1]

    model = ECGNet(BasicBlock, [3, 4, 6, 3],
                   num_classes=num_class,
                   num_external=num_external)
    model = model.to(device)

    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    wc = y_train.sum(axis=0)
    wc = 1. / (np.log(wc) + 1)

    w = torch.tensor(wc, dtype=torch.float).to(device)
    #   criterion1 = utils.WeightedMultilabel(w)
    criterion1 = nn.BCEWithLogitsLoss()
    criterion2 = nn.BCEWithLogitsLoss()

    lr = config.lr
    start_epoch = 1
    stage = 1
    best_auc = -1

    # =========>开始训练<=========
    print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_auc = train_epoch(model, optimizer, criterion1,
                                            x_train, x_train_external, y_train)
        val_loss, val_auc = val_epoch(model, criterion2, x_val, x_val_external,
                                      y_val)
        print(
            '#epoch:%02d stage:%d train_loss:%.4f train_auc:%.4f  val_loss:%.4f val_auc:%.4f  time:%s'
            % (epoch, stage, train_loss, train_auc, val_loss, val_auc,
               utils.print_time_cost(since)))

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
    return model
コード例 #4
0
ファイル: main.py プロジェクト: IanXiao2/DeepLearning
def train():
    model = getattr(models, config.model_name)()
    model = model.to(device)

    train_dataset = TextDataset(train=True)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=6)
    val_dataset = TextDataset(train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=4)
    print('train size {}, val size {}'.format(len(train_dataset),
                                              len(val_dataset)))

    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    scheduler_lr = optim.lr_scheduler.MultiStepLR(
        optimizer=optimizer,
        milestones=config.stage_epoch,
        gamma=config.lr_decay)
    criterion = nn.BCEWithLogitsLoss()

    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    writer = SummaryWriter(log_dir=model_save_dir, filename_suffix=".IanMac")
    start_epoch = -1
    for epoch in range(start_epoch + 1, config.max_epoch):
        since = time.time()
        train_loss, train_acc = train_epoch(model,
                                            optimizer,
                                            criterion,
                                            train_dataloader,
                                            show_interval=100)
        val_loss, val_acc = val_epoch(model, criterion, val_dataloader)
        print(
            '#epoch: %02d ---> train loss: %.3e  train f1: %.3f  val loss: %.3e val f1: %.3f time: %s\n'
            % (epoch, train_loss, train_acc, val_loss, val_acc,
               utils.print_time_cost(since)))
        writer.add_scalars("Loss", {'Train': train_loss}, epoch)
        writer.add_scalars("Loss", {'Valid': val_loss}, epoch)
        writer.add_scalars("ACC", {'Train': train_acc}, epoch)
        writer.add_scalars("ACC", {'Valid': val_acc}, epoch)

        scheduler_lr.step()
コード例 #5
0
ファイル: main.py プロジェクト: hitachinsk/kkp
def train(args):
    # model
    if config.fuse == 'False':
        model = getattr(models, config.model_name)()
    elif config.fuse == 'True':
        model = ResMlp(ResMlpParams)
    else:
        raise ValueError(
            'Not supported type of fuse item in train initialization phase!')
    if args.ckpt and not args.resume:
        state = torch.load(args.ckpt, map_location='cpu')
        model.load_state_dict(state['state_dict'])
        print('train with pretrained weight val_f1', state['f1'])
    model = model.to(device)
    # data
    train_dataset = ECGDataset(data_path=config.train_data, train=True)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=4)
    val_dataset = ECGDataset(data_path=config.train_data, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=2)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = utils.WeightedMultilabel(w)
    # 模型保存文件夹
    model_save_dir = '%s/%s_%s' % (args.output, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    utils.mkdirs(model_save_dir)
    if args.ex: model_save_dir += args.ex
    best_f1 = -1
    lr = config.lr
    start_epoch = 1
    stage = 1
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    logdir = os.path.join(args.output, 'logs',
                          current_time + '_' + config.fuse)
    writer = SummaryWriter(logdir)
    # 从上一个断点,继续训练
    if args.resume:
        if os.path.exists(args.ckpt):  # 这里是存放权重的目录
            current_w = torch.load(os.path.join(args.ckpt, config.current_w))
            best_w = torch.load(os.path.join(args.ckpt, config.best_w))
            best_f1 = best_w['loss']
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            # 如果中断点恰好为转换stage的点
            if start_epoch - 1 in config.stage_epoch:
                stage += 1
                lr /= config.lr_decay
                utils.adjust_learning_rate(optimizer, lr)
                model.load_state_dict(best_w['state_dict'])  # 一旦断点就从最好的模型开始训练
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    # =========>开始训练<=========
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_f1 = train_epoch(model,
                                           optimizer,
                                           criterion,
                                           train_dataloader,
                                           show_interval=100)
        val_loss, val_f1 = val_epoch(model, criterion, val_dataloader)
        print(
            '#epoch:%02d stage:%d train_loss:%.3e train_f1:%.3f  val_loss:%0.3e val_f1:%.3f time:%s\n'
            % (epoch, stage, train_loss, train_f1, val_loss, val_f1,
               utils.print_time_cost(since)))
        writer.add_scalar('scalar/train_loss', train_loss,
                          epoch)  # 粗略的查看,可以改造成每一个iteration的更加细致的查看
        writer.add_scalar('scalar/train_f1', train_f1, epoch)
        writer.add_scalar('scalar/val_loss', val_loss, epoch)
        writer.add_scalar('scalar/val_f1', val_f1, epoch)
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }
        save_ckpt(state, best_f1 < val_f1, model_save_dir)
        best_f1 = max(best_f1, val_f1)
        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            best_w = os.path.join(
                model_save_dir,
                config.best_w)  # 在进入到每一个阶段之前选取前一阶段表现最好的模型进行训练,贪心方法,但是这样真的对吗?
            model.load_state_dict(torch.load(best_w)['state_dict'])
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
    writer.close()
コード例 #6
0
ファイル: main.py プロジェクト: yyyu200/ecg_pytorch
def train(args):
    # model
    model = getattr(models, config.model_name)()
    if args.ckpt and not args.resume:
        state = torch.load(args.ckpt, map_location='cpu')
        model.load_state_dict(state['state_dict'])
        print('train with pretrained weight val_f1', state['f1'])
    model = model.to(device)
    # data
    train_dataset = ECGDataset(data_path=config.train_data, train=True)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=6)
    val_dataset = ECGDataset(data_path=config.train_data, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=4)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = utils.WeightedMultilabel(w)
    # 模型保存文件夹
    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    if args.ex: model_save_dir += args.ex
    best_f1 = -1
    lr = config.lr
    start_epoch = 1
    stage = 1
    # 从上一个断点,继续训练
    if args.resume:
        if os.path.exists(args.ckpt):  # 这里是存放权重的目录
            model_save_dir = args.ckpt
            current_w = torch.load(os.path.join(args.ckpt, config.current_w))
            best_w = torch.load(os.path.join(model_save_dir, config.best_w))
            best_f1 = best_w['loss']
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            # 如果中断点恰好为转换stage的点
            if start_epoch - 1 in config.stage_epoch:
                stage += 1
                lr /= config.lr_decay
                utils.adjust_learning_rate(optimizer, lr)
                model.load_state_dict(best_w['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    logger = Logger(logdir=model_save_dir, flush_secs=2)
    # =========>开始训练<=========
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_f1 = train_epoch(model,
                                           optimizer,
                                           criterion,
                                           train_dataloader,
                                           show_interval=100)
        val_loss, val_f1 = val_epoch(model, criterion, val_dataloader)
        print(
            '#epoch:%02d stage:%d train_loss:%.3e train_f1:%.3f  val_loss:%0.3e val_f1:%.3f time:%s\n'
            % (epoch, stage, train_loss, train_f1, val_loss, val_f1,
               utils.print_time_cost(since)))
        logger.log_value('train_loss', train_loss, step=epoch)
        logger.log_value('train_f1', train_f1, step=epoch)
        logger.log_value('val_loss', val_loss, step=epoch)
        logger.log_value('val_f1', val_f1, step=epoch)
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }
        save_ckpt(state, best_f1 < val_f1, model_save_dir)
        best_f1 = max(best_f1, val_f1)
        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            best_w = os.path.join(model_save_dir, config.best_w)
            model.load_state_dict(torch.load(best_w)['state_dict'])
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
コード例 #7
0
def train(mode='train', ckpt=None, resume=False):
    # model
    model = getattr(resnet, config.model_name)(input_dim=config.input_dim)
    if ckpt is not None and not resume:
        state = torch.load(ckpt, map_location='cpu')
        model.load_state_dict(state['state_dict'])
        print('train with pretrained weight val_f1', state['f1'])
    model = model.to(device)
    # data
    train_dataset = ECGDataset(data_path=config.train_data, mode=mode)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=6)
    val_dataset = ECGDataset(data_path=config.train_data, mode='val')
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=6)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    weights = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = torch.nn.BCEWithLogitsLoss(weights)
    # criterion = torch.nn.BCEWithLogitsLoss()
    # 模型保存文件夹
    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    best_f1 = -1
    lr = config.lr
    start_epoch = 1
    stage = 1
    # 从上一个断点,继续训练
    if resume:
        if os.path.exists(ckpt):  # 这里是存放权重的目录
            model_save_dir = ckpt
            current_w = torch.load(os.path.join(ckpt, config.current_w))
            best_w = torch.load(os.path.join(model_save_dir, config.best_w))
            best_f1 = best_w['loss']
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            # 如果中断点恰好为转换stage的点
            if start_epoch - 1 in config.stage_epoch:
                stage += 1
                lr /= config.lr_decay
                utils.adjust_learning_rate(optimizer, lr)
                model.load_state_dict(best_w['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    if not os.path.exists(config.ckpt):
        os.mkdir(config.ckpt)
    os.mkdir(model_save_dir)
    # =========>开始训练<=========
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_p, train_r, train_f1 = train_epoch(model,
                                                             optimizer,
                                                             criterion,
                                                             train_dataloader,
                                                             show_interval=50)
        val_loss, val_p, val_r, val_f1 = val_epoch(model, criterion,
                                                   val_dataloader)
        print('#epoch:%02d stage:%d time:%s' %
              (epoch, stage, utils.print_time_cost(since)))
        print(
            'train_loss:%.3e train_precision:%.4f train_recall:%.4f train_f1:%.4f'
            % (train_loss, train_p, train_r, train_f1))
        print(
            'val_loss:%.3e val_precision:%.4f val_recall:%.4f val_f1:%.4f \n' %
            (val_loss, val_p, val_r, val_f1))
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }
        torch.save(state, os.path.join(model_save_dir, 'e%i' % (epoch)))
        best_f1 = max(best_f1, val_f1)

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
コード例 #8
0
def transfer_train(args):
    print(args.model_name)
    config.train_data = config.train_data + 'trainsfer.pth'
    config.model_name = args.model_name
    model = getattr(models, config.model_name)()
    model = model.to(device)
    import dataset2
    train_dataset = dataset2.ECGDataset(data_path=config.train_data,
                                        train=True,
                                        transfer=True,
                                        transform=True)
    train_dataloader = DataLoader(train_dataset,
                                  collate_fn=my_collate_fn,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=6)
    val_dataset = ECGDataset(data_path=config.train_data,
                             train=False,
                             transfer=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=6)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    # optimizer = optim.RMSprop(model.parameters(), lr=config.lr)
    w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = utils.WeightedMultilabel2(w)
    #    criterion = utils.My_loss(w)
    # 模型保存文件夹
    model_save_dir = '%s/%s' % (config.ckpt, config.model_name + '_transfer')
    args.ckpt = model_save_dir
    # if args.ex: model_save_dir += args.ex
    best_f1 = -1
    lr = 3e-4
    start_epoch = 1
    stage = 1
    # 从上一个断点,继续训练
    if not os.path.exists(model_save_dir):
        os.mkdir(model_save_dir)

    if args.resume:
        if os.path.exists(args.ckpt):  # 这里是存放权重的目录
            # model_save_dir = args.ckpt
            current_w = torch.load(os.path.join(args.ckpt, config.best_w))
            best_w = torch.load(os.path.join(model_save_dir, config.best_w))
            best_f1 = best_w['best_f']
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            # 如果中断点恰好为转换stage的点
            if start_epoch - 1 in config.stage_epoch:
                stage += 1
                lr /= config.lr_decay
                utils.adjust_learning_rate(optimizer, lr)
                model.load_state_dict(best_w['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    # =========>开始训练<=========
    val_loss = 10
    val_f1 = -1
    state = {}
    for epoch in range(start_epoch, 25 + 1):
        since = time.time()
        train_loss, train_f1, best_f1 = train_epoch(
            model, optimizer, criterion, train_dataloader, epoch, lr, best_f1,
            val_dataloader, model_save_dir, state, 50)
        # if epoch % 2 == 1:
        val_loss, val_f1, _, _ = val_epoch(model, criterion, val_dataloader)
        print(
            '#epoch:%02d stage:%d train_loss:%.3e train_f1:%.3f  val_loss:%0.3e val_f1:%.3f time:%s'
            % (epoch, stage, train_loss, train_f1, val_loss, val_f1,
               utils.print_time_cost(since)))
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage,
            "best_f": val_f1
        }
        if best_f1 < val_f1:
            save_ckpt(state, best_f1 < val_f1, model_save_dir)
            print('save best')
        else:
            save_ckpt(state, False, model_save_dir)
        best_f1 = max(best_f1, val_f1)

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
コード例 #9
0
def train_cv(input_directory, output_directory):
    # model
    # 模型保存文件夹
    model_save_dir = '%s/%s_%s' % (
        config.ckpt, config.model_name + "_cv", time.strftime("%Y%m%d%H%M")
    )  #'%s/%s_%s' % (config.ckpt, args.model_name+"_cv", time.strftime("%Y%m%d%H%M"))
    for fold in range(config.kfold):
        print("***************************fold : {}***********************".
              format(fold))
        model = getattr(models, config.model_name)(fold=fold)
        # if args.ckpt and not args.resume:
        #     state = torch.load(args.ckpt, map_location='cpu')
        #     model.load_state_dict(state['state_dict'])
        #     print('train with pretrained weight val_f1', state['f1'])

        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, config.num_classes)

        #2019/11/11
        #save dense/fc weight for pretrain 55 classes
        # model = MyModel()
        # num_ftrs = model.classifier.out_features
        # model.fc = nn.Linear(55, config.num_classes)

        model = model.to(device)
        # data
        train_dataset = ECGDataset(data_path=config.train_data_cv.format(fold),
                                   data_dir=input_directory,
                                   train=True)

        train_dataloader = DataLoader(train_dataset,
                                      batch_size=config.batch_size,
                                      shuffle=True,
                                      drop_last=True,
                                      num_workers=6)

        val_dataset = ECGDataset(data_path=config.train_data_cv.format(fold),
                                 data_dir=input_directory,
                                 train=False)

        val_dataloader = DataLoader(val_dataset,
                                    batch_size=config.batch_size,
                                    drop_last=True,
                                    num_workers=4)

        print("fold_{}_train_datasize".format(fold), len(train_dataset),
              "fold_{}_val_datasize".format(fold), len(val_dataset))
        # optimizer and loss
        optimizer = radam.RAdam(
            model.parameters(),
            lr=config.lr)  #optim.Adam(model.parameters(), lr=config.lr)
        w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
        criterion = utils.WeightedMultilabel(w)  ## utils.FocalLoss() #
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         'max',
                                                         verbose=True,
                                                         factor=0.1,
                                                         patience=5,
                                                         min_lr=1e-06,
                                                         eps=1e-08)

        # if args.ex: model_save_dir += args.ex
        # best_f1 = -1
        # lr = config.lr
        # start_epoch = 1
        # stage = 1

        best_f1 = -1
        best_cm = -1
        lr = config.lr
        start_epoch = 1
        stage = 1
        # 从上一个断点,继续训练
        #         if args.resume:
        #             if os.path.exists(args.ckpt):  # 这里是存放权重的目录
        #                 model_save_dir = args.ckpt
        #                 current_w = torch.load(os.path.join(args.ckpt, config.current_w))
        #                 best_w = torch.load(os.path.join(model_save_dir, config.best_w))
        #                 best_f1 = best_w['loss']
        #                 start_epoch = current_w['epoch'] + 1
        #                 lr = current_w['lr']
        #                 stage = current_w['stage']
        #                 model.load_state_dict(current_w['state_dict'])
        #                 # 如果中断点恰好为转换stage的点
        #                 if start_epoch - 1 in config.stage_epoch:
        #                     stage += 1
        #                     lr /= config.lr_decay
        #                     utils.adjust_learning_rate(optimizer, lr)
        #                     model.load_state_dict(best_w['state_dict'])
        #                 print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
        logger = Logger(logdir=model_save_dir, flush_secs=2)
        # =========>开始训练<=========
        for epoch in range(start_epoch, config.max_epoch + 1):
            since = time.time()
            train_loss, train_acc, train_f1, train_f2, train_g2, train_cm = train_epoch(
                model,
                optimizer,
                criterion,
                train_dataloader,
                show_interval=100)
            val_loss, val_acc, val_f1, val_f2, val_g2, val_cm = val_epoch(
                model, criterion, val_dataloader)

            # train_loss, train_f1 = train_beat_epoch(model, optimizer, criterion, train_dataloader, show_interval=100)
            # val_loss, val_f1 = val_beat_epoch(model, criterion, val_dataloader)

            print('#epoch:%02d, stage:%d, train_loss:%.3e, train_acc:%.3f, train_f1:%.3f, train_f2:%.3f, train_g2:%.3f,train_cm:%.3f,\n \
                    val_loss:%0.3e, val_acc:%.3f, val_f1:%.3f, val_f2:%.3f, val_g2:%.3f, val_cm:%.3f,time:%s\n'
                  % (epoch, stage, train_loss, train_acc,train_f1,train_f2,train_g2,train_cm, \
                    val_loss, val_acc, val_f1, val_f2, val_g2, val_cm,utils.print_time_cost(since)))

            logger.log_value('fold{}_train_loss'.format(fold),
                             train_loss,
                             step=epoch)
            logger.log_value('fold{}_train_f1'.format(fold),
                             train_f1,
                             step=epoch)
            logger.log_value('fold{}_val_loss'.format(fold),
                             val_loss,
                             step=epoch)
            logger.log_value('fold{}_val_f1'.format(fold), val_f1, step=epoch)
            state = {
                "state_dict": model.state_dict(),
                "epoch": epoch,
                "loss": val_loss,
                'f1': val_f1,
                'lr': lr,
                'stage': stage
            }

            save_ckpt_cv(state, best_cm < val_cm, model_save_dir, fold,
                         output_directory)
            best_cm = max(best_cm, val_cm)

            scheduler.step(val_cm)
            # scheduler.step()

            if val_cm < best_cm:
                epoch_cum += 1
            else:
                epoch_cum = 0

            # save_ckpt_cv(state, best_f1 < val_f1, model_save_dir,fold)
            # best_f1 = max(best_f1, val_f1)

            # if val_f1 < best_f1:
            #     epoch_cum += 1
            # else:
            #     epoch_cum = 0

            # if epoch in config.stage_epoch:
            # if epoch_cum == 5:
            #     stage += 1
            #     lr /= config.lr_decay
            #     if lr < 1e-6:
            #         lr = 1e-6
            #         print("*" * 20, "step into stage%02d lr %.3ef" % (stage, lr))
            #     best_w = os.path.join(model_save_dir, config.best_w_cv.format(fold))
            #     model.load_state_dict(torch.load(best_w)['state_dict'])
            #     print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            #     utils.adjust_learning_rate(optimizer, lr)

            # elif epoch_cum >= 12:
            #     print("*" * 20, "step into stage%02d lr %.3ef" % (stage, lr))
            #     break

            if epoch_cum >= 12:
                print("*" * 20, "step into stage%02d lr %.3ef" % (stage, lr))
                break
コード例 #10
0
def train(input_directory, output_directory):
    # model
    model = getattr(models, config.model_name)()

    # if args.ckpt and not args.resume:
    #     state = torch.load(args.ckpt, map_location='cpu')
    #     model.load_state_dict(state['state_dict'])
    #     print('train with pretrained weight val_f1', state['f1'])

    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, config.num_classes)

    model = model.to(device)
    # data
    train_dataset = ECGDataset(data_path=config.train_data,
                               data_dir=input_directory,
                               train=True)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=6)
    val_dataset = ECGDataset(data_path=config.train_data,
                             data_dir=input_directory,
                             train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=4)

    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    # optimizer and loss
    #optimizer = optim.Adam(model.parameters(), lr=config.lr)
    optimizer = radam.RAdam(model.parameters(),
                            lr=config.lr,
                            weight_decay=1e-4)  #config.lr
    #optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, dampening=0, weight_decay=1e-4, nesterov=False)
    w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = utils.WeightedMultilabel(w)  ##   # utils.FocalLoss() #

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        'max',
        verbose=True,
        factor=0.1,
        patience=5,
        min_lr=1e-06,
        eps=1e-08)  #CosineAnnealingLR  CosineAnnealingWithRestartsLR
    #scheduler = pytorchtools.CosineAnnealingWithRestartsLR(optimizer,T_max=30, T_mult = 1.2, eta_min=1e-6)

    # optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, nesterov=True)
    # scheduler = pytorchtools.CosineAnnealingLR_with_Restart(optimizer, T_max=12, T_mult=1, model=model, out_dir='./snapshot',take_snapshot=True, eta_min=1e-9)

    # 模型保存文件夹
    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))

    # if args.ex: model_save_dir += args.ex

    best_f1 = -1
    best_cm = -1
    lr = config.lr
    start_epoch = 1
    stage = 1

    # 从上一个断点,继续训练
    # if args.resume:
    #     if os.path.exists(args.ckpt):  # 这里是存放权重的目录
    #         model_save_dir = args.ckpt
    #         current_w = torch.load(os.path.join(args.ckpt, config.current_w))
    #         best_w = torch.load(os.path.join(model_save_dir, config.best_w))
    #         best_f1 = best_w['loss']
    #         start_epoch = current_w['epoch'] + 1
    #         lr = current_w['lr']
    #         stage = current_w['stage']
    #         model.load_state_dict(current_w['state_dict'])
    #         # 如果中断点恰好为转换stage的点
    #         if start_epoch - 1 in config.stage_epoch:
    #             stage += 1
    #             lr /= config.lr_decay
    #             utils.adjust_learning_rate(optimizer, lr)
    #             model.load_state_dict(best_w['state_dict'])
    #         print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))

    logger = Logger(logdir=model_save_dir, flush_secs=2)
    # =========>开始训练<=========
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_acc, train_f1, train_f2, train_g2, train_cm = train_epoch(
            model, optimizer, criterion, train_dataloader, show_interval=100)
        val_loss, val_acc, val_f1, val_f2, val_g2, val_cm = val_epoch(
            model, criterion, val_dataloader)

        # train_loss, train_f1 = train_beat_epoch(model, optimizer, criterion, train_dataloader, show_interval=100)
        # val_loss, val_f1 = val_beat_epoch(model, criterion, val_dataloader)

        print('#epoch:%02d, stage:%d, train_loss:%.3e, train_acc:%.3f, train_f1:%.3f, train_f2:%.3f, train_g2:%.3f,train_cm:%.3f,\n \
                val_loss:%0.3e, val_acc:%.3f, val_f1:%.3f, val_f2:%.3f, val_g2:%.3f, val_cm:%.3f,time:%s\n'
              % (epoch, stage, train_loss, train_acc,train_f1,train_f2,train_g2,train_cm, \
                val_loss, val_acc, val_f1, val_f2, val_g2, val_cm,utils.print_time_cost(since)))

        logger.log_value('train_loss', train_loss, step=epoch)
        logger.log_value('train_f1', train_f1, step=epoch)
        logger.log_value('val_loss', val_loss, step=epoch)
        logger.log_value('val_f1', val_f1, step=epoch)
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }

        save_ckpt(state, best_cm < val_cm, model_save_dir, output_directory)
        best_cm = max(best_cm, val_cm)

        scheduler.step(val_cm)
        # scheduler.step()

        if val_cm < best_cm:
            epoch_cum += 1
        else:
            epoch_cum = 0


#         # if epoch in config.stage_epoch:
#         if epoch_cum == 5:
#             stage += 1
#             lr /= config.lr_decay
#             if lr < 1e-6:
#                 lr = 1e-6
#                 print("*" * 20, "step into stage%02d lr %.3ef" % (stage, lr))
#             best_w = os.path.join(model_save_dir, config.best_w)
#             model.load_state_dict(torch.load(best_w)['state_dict'])
#             print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
#             utils.adjust_learning_rate(optimizer, lr)

#         elif epoch_cum >= 12:
#             print("*" * 20, "step into stage%02d lr %.3ef" % (stage, lr))
#             break

        if epoch_cum >= 12:
            print("*" * 20, "step into stage%02d lr %.3ef" % (stage, lr))
            break
コード例 #11
0
def train(mode='train', ckpt=None, resume=False):
    # model
    model = getattr(resnet, config.model_name)(num_classes=config.num_classes,
                                               input_dim=config.input_dim)
    if ckpt is not None and not resume:
        state = torch.load(ckpt, map_location='cpu')
        model.load_state_dict(state['state_dict'])
        print('train with pretrained weight val_f1', state['f1'])
    model = model.to(device)
    # data
    train_dataset = ECGDataset(data_path=config.train_data, mode=mode)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=6)
    val_dataset = ECGDataset(data_path=config.train_data, mode='val')
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=6)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    groups = config.groups
    count = train_dataset.count
    criterion = utils.WeightedMultilabel(groups, count, device)
    # 模型保存文件夹
    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    if not os.path.exists(config.ckpt):
        os.mkdir(config.ckpt)
    os.mkdir(model_save_dir)
    best_f1 = -1
    lr = config.lr
    start_epoch = 1
    stage = 1
    # 从上一个断点,继续训练
    if resume:
        if os.path.exists(ckpt):  # 这里是存放权重的目录
            current_w = torch.load(os.path.join(ckpt))
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    # logger = Logger(logdir=model_save_dir, flush_secs=2)
    # =========>开始训练<=========
    val_loss, val_p, val_r, val_f1 = val_epoch(model, criterion,
                                               val_dataloader)
    print('start training')
    print('val_loss:%.3e val_precision:%.4f val_recall:%.4f val_f1:%.4f \n' %
          (val_loss, val_p, val_r, val_f1))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_p, train_r, train_f1 = train_epoch(
            model,
            optimizer,
            criterion,
            train_dataloader,
            show_interval=config.show_interval)
        val_loss, val_p, val_r, val_f1, pr_df = val_epoch(model,
                                                          criterion,
                                                          val_dataloader,
                                                          simple_mode=False)
        pr_df['arry'] = pr_df['arry'].map(val_dataset.idx2name)
        print('#epoch:%02d stage:%d time:%s' %
              (epoch, stage, utils.print_time_cost(since)))
        print(
            'train_loss:%.3e train_precision:%.4f train_recall:%.4f train_f1:%.4f'
            % (train_loss, train_p, train_r, train_f1))
        print(
            'val_loss:%.3e val_precision:%.4f val_recall:%.4f val_f1:%.4f \n' %
            (val_loss, val_p, val_r, val_f1))
        display.display(pr_df)
        # logger.log_value('train_loss', train_loss, step=epoch)
        # logger.log_value('train_f1', train_f1, step=epoch)
        # logger.log_value('val_loss', val_loss, step=epoch)
        # logger.log_value('val_f1', val_f1, step=epoch)
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }
        torch.save(state, os.path.join(model_save_dir, 'e%i' % (epoch)))
        best_f1 = max(best_f1, val_f1)

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
コード例 #12
0
ファイル: main.py プロジェクト: ycd2016/HFECG
def train(args):
    model = models.myecgnet()
    if args.ckpt and not args.resume:
        state = torch.load(args.ckpt, map_location='cpu')
        model.load_state_dict(state['state_dict'])
        print('train with pretrained weight val_f1', state['f1'])
    model = model.to(device)
    train_dataset = ECGDataset(data_path=config.train_data, train=True)
    train_dataloader = DataLoader(train_dataset,
                                  collate_fn=my_collate_fn,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=8)
    val_dataset = ECGDataset(data_path=config.train_data, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=8)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    optimizer = AdamW(model.parameters(), lr=config.lr)
    w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = utils.WeightedMultilabel(w)
    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    os.mkdir(model_save_dir)
    if args.ex: model_save_dir += args.ex
    best_f1 = -1
    lr = config.lr
    start_epoch = 1
    stage = 1
    if args.resume:
        if os.path.exists(args.ckpt):
            model_save_dir = args.ckpt
            current_w = torch.load(os.path.join(args.ckpt, config.current_w))
            best_w = torch.load(os.path.join(model_save_dir, config.best_w))
            best_f1 = best_w['loss']
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            if start_epoch - 1 in config.stage_epoch:
                stage += 1
                lr /= config.lr_decay
                utils.adjust_learning_rate(optimizer, lr)
                model.load_state_dict(best_w['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_f1 = train_epoch(model,
                                           optimizer,
                                           criterion,
                                           train_dataloader,
                                           show_interval=10)
        val_loss, val_f1 = val_epoch(model, criterion, val_dataloader)
        print(
            '#epoch:%03d\tstage:%d\ttrain_loss:%.4f\ttrain_f1:%.3f\tval_loss:%0.4f\tval_f1:%.3f\ttime:%s\n'
            % (epoch, stage, train_loss, train_f1, val_loss, val_f1,
               utils.print_time_cost(since)))
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }
        save_ckpt(state, best_f1 < val_f1, model_save_dir)
        best_f1 = max(best_f1, val_f1)
        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            best_w = os.path.join(model_save_dir, config.best_w)
            model.load_state_dict(torch.load(best_w)['state_dict'])
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)