criteria = CrossEntropyLoss2d(
                weight, ignore=data_config["ignore_idx"])  # weight

        if num_gpu > 0:
            weight = weight.cuda()
            criteria = criteria.cuda()

        print("init_lr: " + str(test_config["learning_rate"]) +
              "   batch_size : " + str(data_config["batch_size"]) +
              args.lrsch + " sch use weight and class " +
              str(test_config["num_classes"]))
        print("logs saved in " + logdir + "\tlr sch: " + args.lrsch +
              "\toptim method: " + args.optim + "\ttf style : " +
              str(args.wd_tfmode) + "\tbn-weight : " + str(others))

        print('Flops:  {}'.format(flops_to_string(N_flop)))
        print('Params: ' + get_model_parameters_number(model))
        print('Output shape: {}'.format(list(out.shape)))
        print(total_paramters)

        ################################ start Enc train ##########################################
        if args.visualize:
            lossVal, ElossVal, mIOU_val, save_input, save_est, save_gt = \
                val_edge(num_gpu, valLoader, model, criteria, Lovasz, args.visualize)
            if test_config["loss"] == "Lovasz":
                grid_outputs = torchvision.utils.make_grid(color_transform(
                    (save_est[0] > 0).cpu().data),
                                                           nrow=6)
            else:
                grid_outputs = torchvision.utils.make_grid(color_transform(
                    save_est[0].unsqueeze(0).cpu().max(1)[1].data),

if __name__ == '__main__':
    import time
    import torch
    from etc.flops_counter import add_flops_counting_methods, flops_to_string, get_model_parameters_number
    from etc.flops_compute import compute_flops
    # model = C3_ESPNet_Encoder(classes=2, p=1, q=5)
    model = ExtremeC3Net_small(classes=1, p=1, q=5, stage2=True)
    batch = torch.FloatTensor(1, 3, 224, 224)

    model_eval = add_flops_counting_methods(model)
    model_eval.eval().start_flops_count()
    out = model_eval(batch)  # ,only_encode=True)

    print('Flops:  {}'.format(flops_to_string(model.compute_average_flops_cost())))
    print('Params: ' + get_model_parameters_number(model))
    print('Output shape: {}'.format(list(out.shape)))
    total_paramters = sum(p.numel() for p in model.parameters())
    print(total_paramters)

    # flops = compute_flops(model, input=torch.Tensor(1, 3, 224, 224))
    # print('non use deconv Flops: ' + str(flops))
    #
    #
    # model.eval()
    #
    # images = torch.randn(1,3,224,224)
    # batch_size=1
    # # if torch.cuda.is_available():
    # #     model = model.cuda()  # .half()	#HALF seems to be doing slower for some reason
    config = [[[3, 1], [5, 1]], [[3, 1], [3, 1]], [[3, 1], [5, 1]],
              [[3, 1], [3, 1]], [[5, 1], [3, 2]], [[5, 2], [3, 4]],
              [[3, 1], [3, 1]], [[5, 1], [5, 1]], [[3, 2], [3, 4]],
              [[3, 1], [5, 2]]]

    model = SINet(classes=2, p=2, q=8, config=config, chnn=1)

    # batch = torch.FloatTensor(1, 3, 480, 320)
    batch = torch.FloatTensor(1, 3, 224, 224)

    model_eval = add_flops_counting_methods(model)
    model_eval.eval().start_flops_count()
    out = model_eval(batch)  # ,only_encode=True)

    print('Flops:  {}'.format(
        flops_to_string(model.compute_average_flops_cost())))
    print('Params: ' + get_model_parameters_number(model))
    print('Output shape: {}'.format(list(out.shape)))
    total_paramters = sum(p.numel() for p in model.parameters())
    print(total_paramters)

    import time

    use_gpu = False

    if use_gpu:
        model = model.cuda(
        )  # .half()	#HALF seems to be doing slower for some reason
        batch = batch.cuda()  # .half()

    time_train = []