Example #1
0
    def __init__(self, env_cls, args, manager, cfg, process_num, pre=False, pre_irl=False, infer=False, realtrain=False):
        """
        :param env_cls: env class or function, not instance, as we need to create several instance in class.
        :param args:
        :param manager:
        :param cfg:
        :param process_num: process number
        :param pre: set to pretrain mode
        :param infer: set to test mode
        """
        self.train_direc = args.train_direc
        self.realtrain = realtrain
        self.process_num = process_num
        self.human_reward = human_reward(args, cfg)
        # initialize envs for each process
        self.env_list = []
        for _ in range(process_num):
            self.env_list.append(env_cls())

        # construct policy and value network
        self.feature_layer = nn.Sequential(
                                 nn.Linear(cfg.s_dim, cfg.h_dim),
                                 nn.ReLU()
        )
        self.policy = MultiDiscretePolicy(cfg, self.feature_layer).to(device=DEVICE)
        self.value = Value(cfg).to(device=DEVICE)
        self.multi_entropy_loss = nn.MultiLabelSoftMarginLoss()

        
        if pre:
            self.print_per_batch = args.print_per_batch
            from dbquery import DBQuery
            db = DBQuery(args.data_dir)
            self.data_train = manager.create_dataset_rl('train', args.batchsz, cfg, db)
            self.data_valid = manager.create_dataset_rl('valid', args.batchsz, cfg, db)
            self.data_test = manager.create_dataset_rl('test', args.batchsz, cfg, db)
            # self.multi_entropy_loss = nn.MultiLabelSoftMarginLoss()
        else:
            self.rewarder = RewardEstimator(args, manager, cfg, pretrain=pre_irl, inference=infer, feature_extractor=self.policy)
            # self.rewarder = DiscEstimator(args, manager, cfg, pretrain=pre_irl, inference=infer)
            self.evaluator = Evaluator(args.data_dir, cfg)

        self.save_dir = args.save_dir
        self.save_per_epoch = args.save_per_epoch
        self.optim_batchsz = args.batchsz
        self.update_round = args.update_round
        self.policy.eval()
        self.value.eval()
        
        self.gamma = args.gamma
        self.epsilon = args.epsilon
        self.tau = args.tau
        self.policy_optim = optim.RMSprop(self.policy.parameters(), lr=args.lr_rl)
        self.value_optim = optim.Adam(self.value.parameters(), lr=args.lr_rl)
        self.mt_factor = args.mt_factor
Example #2
0
def evaluate_single(gt,pred,num_of_class):
    evaluator = Evaluator(num_of_class)
    evaluator.reset()
    evaluator.add_batch(gt,pred)

    Acc = evaluator.Pixel_Accuracy()
    Acc_class = evaluator.Pixel_Accuracy_Class()
    mIoU = evaluator.Mean_Intersection_over_Union()
    FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union()

    return format(Acc, '.4f'), format(Acc_class, '.4f'), format(mIoU, '.4f'), format(FWIoU, '.4f')
Example #3
0
def evaluate_batch(gt_list,pred_list,num_of_class):
    evaluator = Evaluator(num_of_class)
    evaluator.reset()
    for i in range(len(gt_list)):
        evaluator.add_batch(gt_list[i],pred_list[i])
    Acc = evaluator.Pixel_Accuracy()
    Acc_class = evaluator.Pixel_Accuracy_Class()
    mIoU = evaluator.Mean_Intersection_over_Union()
    FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union()

    return format(Acc, '.4f'), format(Acc_class, '.4f'), format(mIoU, '.4f'), format(FWIoU, '.4f')
Example #4
0
    def __init__(self, args):
        self.epochs = args.epochs
        self.save_interval = args.save_interval
        self.train_data = AerialDataset(args, mode='train')
        self.train_loader = DataLoader(self.train_data,
                                       batch_size=args.train_batch_size,
                                       shuffle=True,
                                       num_workers=1)
        self.model = models.deeplabv3_resnet50(num_classes=args.num_of_class)
        #self.model = models.fcn_resnet50(num_classes=args.num_of_class)

        self.loss = args.loss
        if self.loss == 'CE':
            self.criterion = nn.CrossEntropyLoss()
        else:  #self.loss == 'LS'
            self.criterion = LovaszSoftmax()

        self.optimizer = torch.optim.AdamW(self.model.parameters())

        self.eval_interval = args.eval_interval
        self.eval_data = AerialDataset(args, mode='eval')
        self.eval_loader = DataLoader(self.eval_data,
                                      batch_size=args.eval_batch_size,
                                      shuffle=False,
                                      num_workers=1)
        self.evaluator = Evaluator(args.num_of_class)

        self.cuda = args.cuda
        if self.cuda is True:
            self.model = self.model.cuda()

        self.resume = args.resume
        if self.resume != None:
            if self.cuda:
                checkpoint = torch.load(args.resume)
            else:
                checkpoint = torch.load(args.resume, map_location='cpu')
            self.model.load_state_dict(checkpoint['model'])
            self.optimizer.load_state_dict(checkpoint['opt'])
            self.start_epoch = checkpoint['epoch'] + 1
            #start from next epoch
        else:
            self.start_epoch = 1
Example #5
0
    def __init__(self,
                 anchors,
                 size: Tuple[int, int],
                 metric_names: list,
                 detect_thresh: float = 0.3,
                 nms_thresh: float = 0.3,
                 images_per_batch: int = -1):
        self.ap = 'AP'
        self.anchors = anchors
        self.size = size
        self.detect_thresh = detect_thresh
        self.nms_thresh = nms_thresh

        self.images_per_batch = images_per_batch
        self.metric_names_original = metric_names
        self.metric_names = ["{}-{}".format(self.ap, i) for i in metric_names]

        self.evaluator = Evaluator()
        self.boundingBoxes = BoundingBoxes()
def train(args):
    save_weights_only = args["save_weights_only"]
    loader = Loader(args["label"], args["img_dir"], load_all=args["load_all"])
    net = LPRNet(loader.get_num_chars() + 1)

    if args["pretrained"]:
        net.load_weights(args["pretrained"])
        print("Pretrained model loaded")

    model = net.model
    train_dataset = tf.data.Dataset.from_generator(
        loader,
        output_types=(tf.float32,
                      tf.int32, tf.int32)).batch(args["batch_size"]).shuffle(
                          len(loader)).prefetch(tf.data.experimental.AUTOTUNE)
    print("Training data loaded")

    if args["valid_label"] and args["valid_img_dir"]:
        evaluator = Evaluator(net, args["valid_label"], args["valid_img_dir"],
                              args["valid_batch_size"])
        print("Validation data loaded")
    else:
        evaluator = None

    learning_rate = keras.optimizers.schedules.ExponentialDecay(
        args["learning_rate"],
        decay_steps=args["decay_steps"],
        decay_rate=args["decay_rate"],
        staircase=args["staircase"])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    best_val_loss = float("inf")
    for step, (imgs, labels, label_lengths) in enumerate(train_dataset):
        if step == args["num_steps"]:
            break
        with tf.GradientTape() as tape:
            logits = model(imgs, training=True)
            batch_size, times = logits.shape[:2]
            logits_lengths = tf.expand_dims(tf.tile(
                tf.constant([times], tf.int32),
                tf.constant([batch_size], tf.int32)),
                                            axis=1)
            loss_value = ctc_loss(labels, logits, logits_lengths,
                                  label_lengths)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        loss_value = float(loss_value)
        print("[batch {} - Seen: {} samples] "
              "Training loss: {}, "
              "learning_rate: {} ".format(
                  step + 1, (step + 1) * args["batch_size"], float(loss_value),
                  optimizer._decayed_lr(tf.float32).numpy()))

        # Log every 10 batches.
        if step % args["valid_interval"] == 0 and step > 0:
            if evaluator is not None:
                val_loss, _, _ = evaluator.evaluate()
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    if save_weights_only:
                        net.save_weights(
                            os.path.join(args["saved_dir"], "weights_best.pb"))
                    else:
                        net.save(
                            os.path.join(args["saved_dir"], "model_best.pb"))
                    print("save best at batch: {}, loss: {}".format(
                        step + 1, val_loss))

    if save_weights_only:
        net.save_weights(os.path.join(args["saved_dir"], "weights_last.pb"))
    else:
        net.save(os.path.join(args["saved_dir"], "model_last.pb"))
Example #7
0
        'params': model.get_10x_lr_params(),
        'lr': args.lr * 10
    }]

    # Define Optimizer
    optimizer = optim.SGD(train_params,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay,
                          nesterov=args.nesterov)
    scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer,
                                          step_size=1,
                                          gamma=args.gamma)

    loss_func = nn.CrossEntropyLoss()

    evaluator = Evaluator(num_classes)

    if args.premodel is not None:
        model.load_state_dict(torch.load(args.premodel))
    if args.cuda:
        model = model.cuda()
    model.train()
    Max_MIOU = 0
    Max_Acc = 0
    PM = 0
    for epoch in range(args.epoch):
        train_loss = 0
        train_lossvf = 0
        train_losshf = 0

        # tbarvf = tqdm(train_loadervf)
def main():
    # 配置设备
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # 数据路径
    train_dir = './data_road/training'
    valid_dir = './data_road/validing'
    # 超参数
    num_epochs = 20
    learning_rate = 0.001
    img_size = (640, 192)
    num_class = 2
    SAVE_INTERVAL = 5
    evaluator = Evaluator(num_class=num_class)

    # 构建Dataset实例
    train_data = LaneDataset(data_dir=train_dir, img_size=img_size)
    train_loader = DataLoader(train_data, batch_size=2, shuffle=True)
    valid_data = LaneDataset(data_dir=valid_dir, img_size=img_size)
    valid_loader = DataLoader(valid_data, batch_size=2, shuffle=False)
    # 构建Model实例
    # model = FCN8s(n_class=2)
    # vgg16_path = "models/vgg16_from_caffe.pth"
    # vgg16 = load_vgg16(model_file=vgg16_path)
    # model.copy_params_from_vgg16(vgg16)

    # resnet101-fcn
    # resnet101_path = './resnet101.pth'
    # resnet101 = torchvision.models.resnet101(pretrained=False)
    # state_dict = torch.load(resnet101_path)
    # resnet101.load_state_dict(state_dict)
    # model = ResnetFCN(resnet101, num_classes=2, expansion=4)

    # resnet50-fcn
    # resnet50_path = './resnet50.pth'
    # resnet50 = torchvision.models.resnet50(pretrained=False)
    # state_dict = torch.load(resnet50_path)
    # resnet50.load_state_dict(state_dict)
    # model = ResnetFCN(resnet50, num_classes=2, expansion=4)

    # resnet34-fcn
    resnet34_path = './resnet34.pth'
    resnet34 = torchvision.models.resnet34(pretrained=False)
    state_dict = torch.load(resnet34_path)
    resnet34.load_state_dict(state_dict)
    model = ResnetFCN(resnet34, num_classes=2, expansion=1)

    model = model.to(device)
    print("模型加载成功!!!")

    # 定义损失函数和优化器
    # criterion = nn.BCELoss()
    criterion = Focal_Loss()
    #criterion = nn.BCELoss(weight=torch.tensor([0.3, 0.7])).to(device)
    #criterion = nn.CrossEntropyLoss(weight=torch.tensor([0.25, 0.75])).to(device)
    #criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                momentum=0.9,
                                weight_decay=5e-4)

    train_Acc_list = []
    train_Acc_class_list = []
    train_mIoU_list = []
    train_FWIoU_list = []
    train_loss_list = []
    valid_Acc_list = []
    valid_Acc_class_list = []
    valid_mIoU_list = []
    valid_FWIoU_list = []
    valid_loss_list = []

    # 训练
    for epoch in range(num_epochs):
        train_loss = 0.0
        evaluator.reset()
        for i, (image, label) in enumerate(train_loader):
            image = image.to(device)
            label = label.to(device)
            #print(label[:, 1, :, :])
            optimizer.zero_grad()
            # 前向传播
            output = model(image)

            loss = criterion(output, label)
            #output = torch.sigmoid(output)
            output = torch.softmax(output, dim=1)
            #loss = criterion(output.transpose(1,3), label.transpose(1,3))
            output = torch.argmax(output, dim=1).cpu().numpy()
            label = torch.argmax(label, dim=1).cpu().numpy()
            #print(output)

            # 后向传播及优化
            loss.backward()
            train_loss += loss.item()
            optimizer.step()

            evaluator.add_batch(label, output)  # 添加output和label,用于后续评估

        # 计算像素准确率、平均像素准确率、平均交并比、频权交并比
        Acc = evaluator.Pixel_Accuracy()
        Acc_class = evaluator.Pixel_Accuracy_Class()
        mIoU = evaluator.Mean_Intersection_over_Union()
        FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union()

        train_Acc_list.append(Acc)
        train_Acc_class_list.append(Acc_class)
        train_mIoU_list.append(mIoU)
        train_FWIoU_list.append(FWIoU)
        train_loss_list.append(train_loss / len(train_loader))

        evaluator.reset()

        # 保存模型
        if (epoch + 1) == num_epochs:  #epoch % SAVE_INTERVAL == 0 or
            torch.save(model.state_dict(), './models/fcn8s_BFL.pth')
        print(
            "Epoch_{}: train_loss: {:.6f} Acc: {:.4f} Acc_class: {:.4f} mIoU: {:.4f} FWIoU: {:.4f}"
            .format(epoch + 1, train_loss / len(train_loader), Acc, Acc_class,
                    mIoU, FWIoU))

        # 验证阶段
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for i, (image, label) in enumerate(valid_loader):
                image = image.to(device)
                label = label.to(device)
                output = model(image)
                loss = criterion(output, label)
                #output = torch.sigmoid(output)
                output = torch.softmax(output, dim=1)
                # loss = criterion(output.transpose(1, 3), label.transpose(1, 3))
                output = torch.argmax(output, dim=1).cpu().numpy()
                label = torch.argmax(label, dim=1).cpu().numpy()
                #print(output)
                #print(label)

                valid_loss += loss.item()
                evaluator.add_batch(label, output)  # 添加output和label,用于后续评估

        # 计算像素准确率、平均像素准确率、平均交并比、频权交并比
        Acc = evaluator.Pixel_Accuracy()
        Acc_class = evaluator.Pixel_Accuracy_Class()
        mIoU = evaluator.Mean_Intersection_over_Union()
        FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union()

        print(
            "Epoch_{}: valid_loss: {:.6f} Acc: {:.4f} Acc_class: {:.4f} mIoU: {:.4f} FWIoU: {:.4f}"
            .format(epoch + 1, valid_loss / len(valid_loader), Acc, Acc_class,
                    mIoU, FWIoU))

        valid_Acc_list.append(Acc)
        valid_Acc_class_list.append(Acc_class)
        valid_mIoU_list.append(mIoU)
        valid_FWIoU_list.append(FWIoU)
        valid_loss_list.append(valid_loss / len(valid_loader))

    # 绘制曲线
    draw_figures(train_loss_list,
                 valid_loss_list,
                 title='loss',
                 y_label='loss')
    draw_figures(train_Acc_list, valid_Acc_list, title='Acc', y_label='Acc')
    draw_figures(train_Acc_class_list,
                 valid_Acc_class_list,
                 title='Acc_class',
                 y_label='Acc_class')
    draw_figures(train_mIoU_list,
                 valid_mIoU_list,
                 title='mIoU',
                 y_label='mIoU')
    draw_figures(train_FWIoU_list,
                 valid_FWIoU_list,
                 title='FWIoU',
                 y_label='FWIoU')
    print("完成曲线绘制!!!")
Example #9
0
def train(data_set_type, num_classes, batch_size, epochs, use_gpu,
          learning_rate, w_decay):
    model = get_fcn_model(num_classes, use_gpu)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=model.parameters(),
                           lr=learning_rate,
                           weight_decay=w_decay)
    scheduler = lr_scheduler.StepLR(
        optimizer, step_size=step_size,
        gamma=gamma)  # decay LR by a factor of 0.5 every 5 epochs

    data_set, data_loader = get_dataset_dataloader(data_set_type, batch_size)

    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    epoch_loss = np.zeros((2, epochs))
    epoch_acc = np.zeros((2, epochs))
    epoch_iou = np.zeros((2, epochs, num_classes))
    epoch_mean_iou = np.zeros((2, epochs))
    evaluator = Evaluator(num_classes)

    for epoch in range(epochs):
        logger.info('Epoch {}/{}'.format(epoch + 1, epochs))
        logger.info('-' * 28)

        for phase_ind, phase in enumerate(['train', 'val']):
            if phase == 'train':
                model.train()
                logger.info(phase)
            else:
                model.eval()
                logger.info(phase)

            evaluator.reset()
            running_loss = 0.0
            running_acc = 0.0
            num_of_batches = math.ceil(len(data_set[phase]) / batch_size)
            running_iou = np.zeros((num_of_batches, num_classes))

            for batch_ind, batch in enumerate(data_loader[phase]):
                imgs, targets = batch
                imgs = Variable(imgs).float()
                imgs = imgs.to(device)
                targets = Variable(targets).type(torch.LongTensor)
                targets = targets.to(device)

                # zero the learnable parameters gradients
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(imgs)
                    loss = criterion(outputs, targets)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # computes loss and acc for current iteration
                preds = torch.argmax(outputs, dim=1)
                ious = iou(preds, targets, num_classes)

                running_loss += loss * imgs.size(0)
                running_acc += pixelwise_acc(preds, targets) * imgs.size(0)
                running_iou[batch_ind, :] = ious
                logger.debug('Batch {} running loss: {}'.format(
                    batch_ind, running_loss))

                # test the iou and pixelwise accuracy using evaluator
                preds = preds.cpu().numpy()
                targets = targets.cpu().numpy()
                evaluator.add_batch(targets, preds)

            epoch_loss[phase_ind, epoch] = running_loss / len(data_set[phase])
            epoch_acc[phase_ind, epoch] = running_acc / len(data_set[phase])
            epoch_iou[phase_ind, epoch] = np.nanmean(running_iou, axis=0)
            epoch_mean_iou[phase_ind, epoch] = np.nanmean(epoch_iou[phase_ind,
                                                                    epoch])

            logger.info('{} loss: {:.4f}, acc: {:.4f}, mean iou: {:.6f}'.format(phase,\
                epoch_loss[phase_ind, epoch], epoch_acc[phase_ind, epoch],\
                epoch_mean_iou[phase_ind, epoch]))

            eva_pixel_acc = evaluator.Pixel_Accuracy()
            eva_pixel_acc_class = evaluator.Pixel_Accuracy_Class()
            eva_mIOU = evaluator.Mean_Intersection_over_Union()
            logger.info('{} - Evaluator - acc: {:.4f}, acc class: {:.4f}, mean iou: {:.6f}'.format(phase,\
                eva_pixel_acc, eva_pixel_acc_class, eva_mIOU))

            if phase == 'val' and epoch_acc[phase_ind, epoch] > best_acc:
                best_acc = epoch_acc[phase_ind, epoch]
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    logger.info('Training completed in {}m {}s'.format(int(time_elapsed / 60),\
        int(time_elapsed) % 60))

    # load best model weights
    model.load_state_dict(best_model_wts)

    # save numpy results
    np.save(os.path.join(score_dir, 'epoch_accuracy'), epoch_acc)
    np.save(os.path.join(score_dir, 'epoch_mean_iou'), epoch_mean_iou)
    np.save(os.path.join(score_dir, 'epoch_iou'), epoch_iou)

    return model
Example #10
0
def model_fn(features, labels, mode, params, config):
    """
    This is a function for creating a computational tensorflow graph.
    The function is in format required by tf.estimator.
    """

    # choose a backbone network
    if params['backbone'] == 'resnet':
        feature_extractor = resnet
    elif params['backbone'] == 'mobilenet':
        feature_extractor = lambda x: mobilenet(x, params['depth_multiplier'])
    elif params['backbone'] == 'shufflenet':
        feature_extractor = lambda x: shufflenet(
            x, str(params['depth_multiplier']))

    # build the main graph
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    detector = Detector(features['images'], feature_extractor, is_training,
                        params)

    # add NMS to the graph
    if not is_training:
        predictions = detector.get_predictions(
            score_threshold=params['score_threshold'],
            iou_threshold=params['iou_threshold'],
            max_boxes_per_class=params['max_boxes_per_class'])

    if mode == tf.estimator.ModeKeys.PREDICT:

        w, h = tf.unstack(tf.to_float(
            features['images_size']))  # original image size
        s = tf.to_float(tf.shape(features['images']))  # size after resizing
        scaler = tf.stack([h / s[1], w / s[2], h / s[1], w / s[2]])
        predictions['boxes'] = scaler * predictions['boxes']

        export_outputs = tf.estimator.export.PredictOutput({
            name: tf.identity(tensor, name)
            for name, tensor in predictions.items()
        })
        return tf.estimator.EstimatorSpec(
            mode,
            predictions=predictions,
            export_outputs={'outputs': export_outputs})

    # add L2 regularization
    with tf.name_scope('weight_decay'):
        add_weight_decay(params['weight_decay'])
        regularization_loss = tf.losses.get_regularization_loss()

    # create localization and classification losses
    losses = detector.get_losses(labels, params)
    tf.losses.add_loss(params['alpha'] * losses['rpn_localization_loss'])
    tf.losses.add_loss(params['beta'] * losses['rpn_classification_loss'])
    tf.losses.add_loss(params['gamma'] * losses['roi_localization_loss'])
    tf.losses.add_loss(params['theta'] * losses['roi_classification_loss'])
    total_loss = tf.losses.get_total_loss(add_regularization_losses=True)

    tf.summary.scalar('regularization_loss', regularization_loss)
    tf.summary.scalar('rpn_localization_loss', losses['rpn_localization_loss'])
    tf.summary.scalar('rpn_classification_loss',
                      losses['rpn_classification_loss'])
    tf.summary.scalar('roi_localization_loss', losses['roi_localization_loss'])
    tf.summary.scalar('roi_classification_loss',
                      losses['roi_classification_loss'])

    if mode == tf.estimator.ModeKeys.EVAL:

        with tf.name_scope('evaluator'):
            evaluator = Evaluator(num_classes=params['num_classes'])
            eval_metric_ops = evaluator.get_metric_ops(labels, predictions)

        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=eval_metric_ops)

    assert mode == tf.estimator.ModeKeys.TRAIN
    with tf.variable_scope('learning_rate'):
        global_step = tf.train.get_global_step()
        learning_rate = tf.train.cosine_decay(params['initial_learning_rate'],
                                              global_step,
                                              decay_steps=params['num_steps'])
        tf.summary.scalar('learning_rate', learning_rate)

    with tf.variable_scope('optimizer'):
        optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

        if params['backbone'] == 'shufflenet':
            var_list = [
                v for v in tf.trainable_variables()
                if 'Conv1' not in v.name and 'Stage2' not in v.name
            ]
        elif params['backbone'] == 'mobilenet':
            var_list = [
                v for v in tf.trainable_variables()
                if all('Conv2d_%d_' % i not in v.name
                       for i in range(6)) and 'Conv2d_0' not in v.name
            ]
        elif params['backbone'] == 'resnet':
            var_list = [
                v for v in tf.trainable_variables()
                if 'resnet_v1_50/block1/' not in v.name
                and 'resnet_v1_50/conv1/' not in v.name
            ]

        grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
        grads_and_vars = [(3.0 * g, v) if 'thin_feature_maps' in v.name else
                          (g, v) for g, v in grads_and_vars]
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)

    for g, v in grads_and_vars:
        tf.summary.histogram(v.name[:-2] + '_hist', v)
        tf.summary.histogram(v.name[:-2] + '_grad_hist', g)

    with tf.control_dependencies([train_op]), tf.name_scope('ema'):
        ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                num_updates=global_step)
        train_op = ema.apply(tf.trainable_variables())

    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)