def train(self):
        '''
        训练模型,必须实现此方法
        :return:
        '''
        # pass
        df = pd.read_csv(os.path.join(DATA_PATH, DataID, 'train.csv'))

        kf = KFold(n_splits=5, shuffle=False, random_state=42)
        for fold, (train_idx, val_idx) in enumerate(kf.split(df)):
            # # abandon cross validation
            # if fold > 0:
            #     break
            self.__init__(self.model_name)
            print(
                f'fold:{fold+1}...', 'train_size: %d, val_size: %d' %
                (len(train_idx), len(val_idx)))

            # generate dataloder
            train_data = ImageData(df, train_idx, mode='train')
            val_data = ImageData(df, val_idx, mode='valid')
            train_loader = DataLoader(
                train_data,
                batch_size=args.BATCH,
                shuffle=True,
                # drop_last=True
            )
            val_loader = DataLoader(val_data,
                                    batch_size=args.BATCH,
                                    shuffle=False,
                                    drop_last=True)

            max_correct = 0
            for epoch in range(args.EPOCHS):
                self.scheduler.step(epoch)
                train_loss, train_acc, val_loss, val_acc = self.train_one_epoch(
                    train_loader, val_loader)
                start = time.strftime("%H:%M:%S")
                print(f'fold:{fold + 1}',
                      f"epoch:{epoch + 1}/{args.EPOCHS} | ⏰: {start}   ",
                      f"Training Loss: {train_loss:.6f}.. ",
                      f"Training Acc:  {train_acc:.6f}.. ",
                      f"validation Acc: {val_acc:.6f}.. ")

                train_log(train_loss=train_loss,
                          train_acc=train_acc,
                          val_loss=val_loss,
                          val_acc=val_acc)

                if val_acc > max_correct:
                    max_correct = val_acc
                    torch.save(
                        self.model, MODEL_PATH + '/' +
                        f"{self.model_name}_best_fold{fold+1}.pth")
                    # torch.save(self.model, MODEL_PATH + '/' + "best.pth")
                    print('find optimal model')
Exemple #2
0
 def train_epochs(self):
     while self.epoch < self.max_epoch:
         self.epoch += 1
         s_time = time.time()
         acc, loss = self.train()
         val_acc, val_loss = self.validate()
         if self.scheduler != None:
             self.scheduler.step(val_acc)
         s_time = time.time() - s_time
         print("这次花了这么长时间:%f" % s_time)
         train_log(train_loss=loss,
                   train_acc=acc,
                   val_acc=val_acc,
                   val_loss=val_loss)
Exemple #3
0
        实现自己的模型保存逻辑
        '''
        # initialise gradients
        optimizer.zero_grad()
        # generate predictions
        outputs = net(x_train)
        # calculate loss
        loss = criterion(outputs, y_train)
        # compute loss
        loss.backward()
        nn.utils.clip_grad_norm_(net.parameters(), max_grad)
        # update parameters using gradients
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        train_acc_ts = (outputs.argmax(1) == y_train)
        train_acc += (train_acc_ts).sum().item()
        train_bar.set_description('{}/{} loss: {:.4f}'.format(
            step + 1, dataset.get_step(), loss.item()))
        if step % 20 == 0:
            data_num = 20 * args.BATCH
            val_loss, val_acc = model.evaluate()
            train_log(total_loss / data_num, train_acc / data_num, val_loss,
                      val_acc)
            if val_acc > best_score:
                model.save_model(net, MODEL_PATH, overwrite=True)
                best_score = val_acc
                print("Model saved!")
            total_loss, train_acc = 0., 0.
    x_test = torch.from_numpy(x_test)
    y_test = torch.from_numpy(y_test)
    x_test = x_test.float().to(device)
    y_test = y_test.long().to(device)

    outputs = cnn(x_train)
    _, prediction = torch.max(outputs.data, 1)

    optimizer.zero_grad()
    # print(x_train.shape,outputs.shape,y_train.shape)
    loss = loss_fn(outputs, y_train)
    loss.backward()
    optimizer.step()  #优化器针对loss进行参数更新
    scheduler.step(loss.item())  #scheduler为针对学习率的调整策略
    print(loss.detach())

    # 若测试准确率高于当前最高准确率,则保存模型
    val_acc, val_loss = eval(model, x_test, y_test)
    if val_loss >= 1:
        val_loss = 0.8
    train_log(train_loss=loss.item(), val_loss=val_loss, val_acc=val_acc)
    if val_acc >= best_accuracy:
        best_accuracy = val_acc
        model.save_model(cnn, MODEL_PATH, overwrite=True)
        print("step %d, best accuracy %g" % (i, best_accuracy))

    print(str(i) + "/" + str(args.EPOCHS))

print(best_accuracy)
Exemple #5
0
                    net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
            return net


g = tf.Graph()
prediction = tf.add(vgg_19(x_image), 0, name='y_conv')
loss = slim.losses.softmax_cross_entropy(prediction, y)  # 读入标签
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
train_op = slim.learning.create_train_op(loss, optimizer)  # 训练以及优化

# 求准确率:
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

merged = tf.summary.merge_all()
saver = tf.train.Saver()
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    train_writer = tf.summary.FileWriter(LOG_PATH, sess.graph)
    x_train, y_train, x_test, y_test = dataset.next_batch(args.BATCH)
    for i in range(args.EPOCHS):
        train_dict = {x: x_train, y: y_train, keep_prob: 0.7}
        sess.run(train_op, feed_dict=train_dict)
        losses, acc_ = sess.run([loss, accuracy], feed_dict=train_dict)
	train_log(train_loss=losses, train_acc=acc_)
        y_convs = sess.run(prediction, feed_dict=train_dict)
        print("step:{}, loss:{}, acc:{}".format(i + 1, losses, acc_))
        model.save_model(sess, MODEL_PATH, overwrite=True)
Exemple #6
0
        _, tra_loss, logits, train_acc = sess.run(fetches, feed_dict=feed_dict)

        val_que_x, val_que_len = que_val
        val_ans_x, val_ans_len = ans_val
        val_ans_x = process_ans_batch(
            val_ans_x, ans_dict,
            int(sorted(list(val_ans_len), reverse=True)[0]))
        feed_dict = {
            input_data: val_que_x,
            targets: val_ans_x,
            lr: learning_rate,
            target_sequence_length: val_ans_len,
            source_sequence_length: val_que_len
        }

        val_loss, val_acc = sess.run([cost, ans_accuracy], feed_dict=feed_dict)

        summary = sess.run(summary_op, feed_dict=feed_dict)
        train_writer.add_summary(summary, step)

        # 调用系统打印日志函数,这样在线上可看到训练和校验准确率和损失的实时变化曲线
        train_log(train_loss=tra_loss,
                  train_acc=train_acc,
                  val_loss=val_loss,
                  val_acc=val_acc)

        # 实现自己的保存模型逻辑
        if step % 200 == 0:
            model.save_model(sess, MODEL_PATH, overwrite=True)
    model.save_model(sess, MODEL_PATH, overwrite=True)
        }
        valLoss, valAcc_, y_pre, wrong_ = sess.run(
            [loss, accuracy, pred, not_correct_pred], feed_dict=feed_dict_val)
        val_f1score = metrics.f1_score(y_val[:, 0], y_pre, average='weighted')
        #if step%100 == 0:
        #print(x_text_val[wrong_],y_val[wrong_])
        # -------save and print
        summary = sess.run(merged_summary, feed_dict=feed_dict)
        train_writer.add_summary(summary, step)
        print(' ')
        # cur_step = str(step + 1) + "/" + str(all_train_steps)
        print('steps: {0}'.format(
            str(current_step) + '/' + str(all_train_steps)))
        f1_mean = (val_f1score + train_f1) / 2
        train_log(train_loss=loss_,
                  train_acc=acc_,
                  val_loss=valLoss,
                  val_acc=f1_mean)
        print("val_f1:{}".format(f1_mean))
        current_step += 1
        #if current_step % 100 == 0:
        #modelpp.save_model(sess, MODEL_PATH, overwrite=True)

        #if current_step % 10 == 0:
        # 每 5 step验证一次
        if acc_flag < f1_mean:
            acc_flag = f1_mean
            modelpp.save_model(sess, MODEL_PATH, overwrite=True)
            last_provement = current_step
            print('the save model steps is : {0}'.format(
                str(current_step) + '/' + str(all_train_steps)))
            print('the model f1score is {0}'.format(f1_mean))
Exemple #8
0
cnn = FCN16s(1).to(device)
optimizer = SGD(cnn.parameters(), lr=0.0005, momentum=0.9, weight_decay=0.0005)
criterion = nn.BCELoss()  # 定义损失函数
'''
dataset.get_step() 获取数据的总迭代次数
'''
lowest_loss = 1e5
for i in range(data.get_step()):
    print('----------------' + str(i) + "/" + str(data.get_step()) +
          '-------------------')
    cnn.train()
    x_train, y_train = data.next_train_batch()
    x_train = torch.from_numpy(x_train)
    y_train = torch.from_numpy(y_train)
    x_train = x_train.float().to(device)
    y_train = y_train.float().to(device)
    y_train = y_train.unsqueeze(1)
    optimizer.zero_grad()
    outputs = cnn(x_train)
    pred = torch.sigmoid(outputs)
    loss = criterion(pred, y_train)
    loss.backward()
    optimizer.step()
    print("now loss is : %f, lowest loss %f" % (loss.data, lowest_loss))
    # 线上实时打印log
    train_log(train_loss=loss.data.cpu().numpy())
    # 若测试准确率高于当前最高准确率,则保存模型
    if loss.data < lowest_loss:
        lowest_loss = loss.data
        model.save_model(cnn, MODEL_PATH, overwrite=True)
        print("saved model!!!")
                                 overwrite=True)
                best_score_by_acc = history_train.history['val_accuracy'][0]
                best_score_by_loss = history_train.history['val_loss'][0]
                best_epoch = epoch
                print('【保存了best:acc相同,loss降低】')
    # if history_train.history['val_acc'][0] > 0.80 and \
    #         round(best_score_by_loss/save_boundary, 2) >= round(history_train.history['val_loss'][0] /save_boundary, 2):

    if best_score_by_acc == 0:
        print('未能满足best_score的条件')
    else:
        print('当前【best】:acc:%.2f, loss:%.2f, epoch:%d' %
              (best_score_by_acc, best_score_by_loss, best_epoch + 1))
    # 调用系统打印日志函数,这样在线上可看到训练和校验准确率和损失的实时变化曲线
    train_log(train_loss=history_train.history['loss'][0],
              train_acc=history_train.history['accuracy'][0],
              val_loss=history_train.history['val_loss'][0],
              val_acc=history_train.history['val_accuracy'][0])
    '''
    4/ 调整学习率和优化模型
    '''
    tmp_opt = wangyiOpt.reduce_lr_by_loss_and_epoch(
        history_train.history['loss'][0], epoch)

    # 应用新的学习率
    if tmp_opt is not None:
        model_cnn.model_cnn.compile(loss='categorical_crossentropy',
                                    optimizer=tmp_opt,
                                    metrics=['accuracy'])

    # TODO 新的学习率,还没完成
    # if optimzer_custom.compareHistoryList( history_train_all['loss'] ,pationce= 5 ,min_delta=0.001) :
def train_epoch(cfg,
                model,
                loader,
                optimizer,
                optimizer_center,
                center_criterion,
                loss_fun,
                epoch,
                n_epochs,
                grid,
                writer,
                logger,
                print_freq=50):
    batch_time = AverageMeter()
    losses = AverageMeter()
    losses_id = AverageMeter()
    losses_center = AverageMeter()
    error = AverageMeter()

    # Model on train mode
    model.train()
    grid.set_prob(epoch, cfg.SOLVER.MAX_EPOCHS)
    end = time.time()
    writer.add_scalar('data/lr', optimizer.param_groups[0]['lr'], epoch)
    for batch_idx, (input, target) in enumerate(loader):
        # Create vaiables
        optimizer.zero_grad()
        optimizer_center.zero_grad()
        if torch.cuda.is_available():
            input = input.cuda()
            target = target.cuda()
        if cfg.INPUT.GRID_PRO > 0:
            input = grid(input)
        # compute output
        if not cfg.INPUT.MIXUP:
            output, feat = model(input, target)
            all_loss, id_loss, cen_loss = loss_fun(output, feat, target)
        else:
            input, targets_a, targets_b, lam = mixup_data(input,
                                                          target,
                                                          0.5,
                                                          use_cuda=True)
            input, targets_a, targets_b = map(Variable,
                                              (input, targets_a, targets_b))
            output, feat = model(input, target)
            all_loss, id_loss, cen_loss = mixup_criterion(
                loss_fun, output, feat, targets_a, targets_b, lam)

        # measure accuracy and record loss
        batch_size = target.size(0)
        _, pred = output.data.cpu().topk(1, dim=1)
        error.update(
            torch.ne(pred.squeeze(), target.cpu()).float().sum().item() /
            batch_size, batch_size)
        losses.update(all_loss.item(), batch_size)
        losses_id.update(id_loss.item(), batch_size)
        if isinstance(cen_loss, int) or isinstance(cen_loss, float):
            losses_center.update(0, batch_size)
        else:
            losses_center.update(cen_loss.item(), batch_size)
        writer.add_scalar('data/loss', losses.avg,
                          (epoch) * len(loader) + batch_idx)
        writer.add_scalar('data/loss_id', losses_id.avg,
                          (epoch) * len(loader) + batch_idx)
        writer.add_scalar('data/loss_center', losses_center.avg,
                          (epoch) * len(loader) + batch_idx)
        writer.add_scalar('data/train_error', error.avg,
                          (epoch) * len(loader) + batch_idx)
        # compute gradient and do SGD step
        all_loss.backward()
        optimizer.step()
        if 'center' in cfg.MODEL.METRIC_LOSS_TYPE:
            for param in center_criterion.parameters():
                param.grad.data *= (1. / cfg.SOLVER.CENTER_LOSS_WEIGHT)
            optimizer_center.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print stats
        if batch_idx % print_freq == 0:
            res = '\t'.join([
                'Epoch: [%d/%d]' % (epoch + 1, n_epochs),
                'Iter: [%d/%d]' % (batch_idx + 1, len(loader)),
                'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg),
                'Loss %.4f (%.4f)' % (losses.val, losses.avg),
                'Error %.4f (%.4f)' % (error.val, error.avg),
            ])
            train_log(train_loss=losses.val, train_acc=error.val)
            logger.info(res)
    # Return summary statistics
    return batch_time.avg, losses.avg, error.avg
            right_num = 0
            n_smp = 0
            for val_step in val_step_iterator:
                x_val, y_val = next(val_gen)
                n_smp += y_val.size()[0]
                TI_net.eval()
                outputs = TI_net(x_val, labels=y_val)
                val_loss += outputs[0].item()
                logits = outputs[1]
                pred = logits.argmax(dim=-1)
                right_num += torch.eq(pred, y_val).sum().item()
                '''
                实现自己的模型保存逻辑
                '''
            val_loss = val_loss / (val_step + 1)
            val_acc = right_num / n_smp
            print("step " + str(step + 1) + "/" + str(steps_per_epoch) + ", " +
                  "epoch " + str(epoch + 1) + "/" + str(args.EPOCHS) + ", " +
                  "val loss is " + str(val_loss) + ", val acc is " +
                  str(val_acc))
            # 调用系统打印日志函数,这样在线上可看到训练和校验准确率和损失的实时变化曲线
            train_log(train_loss=train_loss,
                      train_acc=0.5,
                      val_loss=val_loss,
                      val_acc=val_acc)
            if max_acc < val_acc:
                print("acc improved from {0} to {1}, model saved.".format(
                    max_acc, val_acc))
                max_acc = val_acc
                mymodel.save_model()
Exemple #12
0
    def train(self):

        # pass
        df = pd.read_csv(os.path.join(DATA_PATH, DataID, 'train.csv'))
        image_path_list = df['image_path'].values
        label_list = df['label'].values

        # 划分训练集和校验集
        all_size = len(image_path_list)
        train_size = int(all_size * 0.9)
        train_image_path_list = image_path_list[:train_size]
        train_label_list = label_list[:train_size]
        val_image_path_list = image_path_list[train_size:]
        val_label_list = label_list[train_size:]
        print(
            'train_size: %d, val_size: %d' % (len(train_image_path_list),
                                              len(val_image_path_list)))
        train_transform, val_trainsform = self.deal_with_data()
        train_data = ImageData(train_image_path_list, train_label_list,
                               train_transform)
        val_data = ImageData(val_image_path_list, val_label_list,
                             val_trainsform)
        train_loader = DataLoader(train_data, batch_size=args.BATCH,
                                  num_workers=0, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=args.BATCH,
                                num_workers=0, shuffle=False)
        model = EfficientNet.from_pretrained('efficientnet-b1')
        
        model.fc = nn.Linear(1280, 2)
        if use_gpu:
            model.to(DEVICE)
        criteration = nn.CrossEntropyLoss()
        criteration.cuda()
        optimizer = torch.optim.SGD(model.parameters(), lr=args.LR,
                                    momentum=0.9, weight_decay=5e-4)

        if args.SCHE == "cos":
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                             T_max=5,
                                                             eta_min=4e-08)
        elif args.SCHE == "red":
             scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode="min", factor=0.1,
               patience=3, verbose=False, threshold=0.0001
            )
        else:
            sys.exit(-1)
        max_correct = 0
        
        #scheduler_steplr = StepLR(optimizer, step_size=10, gamma=0.1)
        scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=1, total_epoch=5, after_scheduler=scheduler)
        
        for epoch in range(args.EPOCHS):
           
            #scheduler_warmup.step(epoch)
            model.train()
            correct = 0
            # Train losses
            train_losses = []
            for img, label in train_loader:
                img, label = img.to(DEVICE), label.to(DEVICE)
                optimizer.zero_grad()
                output = model(img)
                #loss = criteration(output, label)
                loss = self.label_smoothing(output, label,epsilon=0.1)
                loss.backward()
                optimizer.step()
                # Train Metric
                train_pred = output.detach().cpu().max(1, keepdim=True)[1]
                correct += train_pred.eq(label.detach().cpu().
                                         view_as(train_pred)).sum().item()
                train_losses.append(loss.item())
                del train_pred
                # print("Epoch {}, Loss {:.4f}".format(epoch, loss.item()))
            del img, label

            #  Train loss curve
            train_avg_loss = np.mean(train_losses)
            
            acc = 100 * correct / len(train_image_path_list)
            
            scheduler_warmup.step_ReduceLROnPlateau(train_avg_loss)


            if epoch % 1 == 0 or epoch == args.EPOCHS - 1:
                correct = 0
                with torch.no_grad():
                    model.eval()
                    # Val losses
                    val_losses = []
                    for val_img, val_label in val_loader:
                        val_img = val_img.to(DEVICE),
                        val_label = val_label.to(DEVICE)
                        val_output = model(val_img[0])
                        loss = criteration(val_output, val_label)
                        val_pred = val_output.detach().cpu().\
                            max(1, keepdim=True)[1]
                        correct += val_pred.eq(val_label.detach().cpu().
                                               view_as(val_pred)).\
                            sum().item()
                        val_losses.append(loss.item())
                        del val_img, val_label, val_output, val_pred

                #  Val loss curve
                val_avg_loss = np.mean(val_losses)
                
                val_acc = 100 * correct / len(val_image_path_list)
                

                if (correct > max_correct):
                    max_correct = correct
                    torch.save(model, MODEL_PATH + '/' + "best.pth")
                print("Epoch {},  Accuracy {:.0f}%".format(
                    epoch, 100 * correct / len(val_image_path_list)))
                
                
               # LR curve
                
            train_log(train_loss=train_avg_loss, train_acc=acc, val_loss=val_avg_loss,val_acc=val_acc)