Beispiel #1
0
def perform_val(model, HEAD1, HEAD_test1, cfg, feature_dim, pair_a, pair_b):

    test_lb2idxs, test_idx2lb = read_meta(cfg.test_data['label_path'])
    test_inst_num = len(test_idx2lb)

    model.eval()
    HEAD1.eval()
    HEAD_test1.eval()

    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.test_data, k, v)
    dataset = build_dataset(cfg.model['type'], cfg.test_data)

    features = torch.FloatTensor(dataset.features)
    adj = sparse_mx_to_torch_sparse_tensor(dataset.adj)
    labels = torch.LongTensor(dataset.gt_labels)

    if cfg.cuda:
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        HEAD_test1 = HEAD_test1.cuda()

    test_data = [features, adj, labels]

    HEAD_test1.load_state_dict(HEAD1.state_dict(), False)

    with torch.no_grad():
        output_feature = model(test_data)
        sum_acc = 0
        patch_num = 10
        patch_size = int(test_inst_num / patch_num)
        for i in range(patch_num):
            score = HEAD_test1(output_feature[pair_a[i * patch_size:(i + 1) * patch_size]],
                               output_feature[pair_b[i * patch_size:(i + 1) * patch_size]], no_list=True)
            #print(score)
            pre_labels = (score > 0.5).long()
            #print(pre_labels)
            gt_labels = (labels[pair_a[i * patch_size:(i + 1) * patch_size]] == labels[pair_b[i * patch_size:(i + 1) * patch_size]]).long()

            acc = (pre_labels == gt_labels).long().sum()
            sum_acc += acc
        avg_acc = float(sum_acc) / test_inst_num
        return avg_acc
Beispiel #2
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    drop_last = True
    dataset = build_dataset(cfg.DATASET.DATASET_NAME,
        file_list=cfg.DATASET.TRAIN_FILE_LIST,
        mode=ModelPhase.TRAIN,
        shuffle=True,
        data_dir=cfg.DATASET.DATA_DIR,
        base_size= cfg.DATAAUG.BASE_SIZE, crop_size= cfg.DATAAUG.CROP_SIZE, rand_scale=True)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.TRAIN_BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#device count: {}".format(dev_count))
    cfg.TRAIN_BATCH_SIZE = dev_count * int(cfg.TRAIN_BATCH_SIZE_PER_GPU)
    print_info("#train_batch_size: {}".format(cfg.TRAIN_BATCH_SIZE))
    print_info("#batch_size_per_dev: {}".format(cfg.TRAIN_BATCH_SIZE_PER_GPU))

    py_reader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    py_reader.decorate_sample_generator(
        data_generator, batch_size=cfg.TRAIN_BATCH_SIZE_PER_GPU, drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
        load_vars = []
        load_fail_vars = []

        def var_shape_matched(var, shape):
            """
            Check whehter persitable variable shape is match with current network
            """
            var_exist = os.path.exists(
                os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
            if var_exist:
                var_shape = parse_shape_from_file(
                    os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                return var_shape == shape
            return False

        for x in train_prog.list_vars():
            if isinstance(x, fluid.framework.Parameter):
                shape = tuple(fluid.global_scope().find_var(
                    x.name).get_tensor().shape())
                if var_shape_matched(x, shape):
                    load_vars.append(x)
                else:
                    load_fail_vars.append(x)

        fluid.io.load_vars(
            exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
        for var in load_vars:
            print_info("Parameter[{}] loaded sucessfully!".format(var.name))
        for var in load_fail_vars:
            print_info(
                "Parameter[{}] don't exist or shape does not match current network, skip"
                " to load it.".format(var.name))
        print_info("{}/{} pretrained parameters loaded successfully!".format(
            len(load_vars),
            len(load_vars) + len(load_fail_vars)))
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(
            precision=4, suppress=True, linewidth=160, floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_vdl:
        if not args.vdl_log_dir:
            print_info("Please specify the log directory by --vdl_log_dir.")
            exit(1)

        from visualdl import LogWriter
        log_writer = LogWriter(args.vdl_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.TRAIN_BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.TRAIN_BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError(
            ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        py_reader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={}/{} step={}/{} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, cfg.SOLVER.NUM_EPOCHS, step, all_step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_vdl:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  step)
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  step)
                            log_writer.add_scalar('Train/step/sec', speed,
                                                  step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={}/{} step={}/{} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, cfg.SOLVER.NUM_EPOCHS, global_step, all_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        if args.use_vdl:
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  step)
                            log_writer.add_scalar('Train/speed', speed,
                                                  step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                py_reader.reset()
                break
            except Exception as e:
                print(e)

        if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, train_prog, epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio)
                if args.use_vdl:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
                                          step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
                                          step)

            # Use VisualDL to visualize results
            if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(
                    cfg=cfg,
                    use_gpu=args.use_gpu,
                    vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                    vis_dir="visual",
                    ckpt_dir=ckpt_dir,
                    log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(exe, train_prog, 'final')

    if args.use_vdl:
        log_writer.close()
def evaluate(cfg,
             ckpt_dir=None,
             use_gpu=False,
             use_mpio=False,
             multi_scales=False,
             flip=False,
             **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    num_classes = cfg.DATASET.NUM_CLASSES
    base_size = cfg.TEST.BASE_SIZE
    crop_size = cfg.TEST.CROP_SIZE
    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = build_dataset(cfg.DATASET.DATASET_NAME,
                            file_list=cfg.DATASET.VAL_FILE_LIST,
                            mode=ModelPhase.EVAL,
                            data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b[0], b[1], b[2]

    py_reader, avg_loss, out, grts, masks = build_model(test_prog,
                                                        startup_prog,
                                                        phase=ModelPhase.EVAL)

    py_reader.decorate_sample_generator(data_generator,
                                        drop_last=False,
                                        batch_size=cfg.EVAL_BATCH_SIZE,
                                        places=fluid.cuda_places())

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        filename = '{}_{}_{}_epoch_{}.pdparams'.format(
            str(cfg.MODEL.MODEL_NAME), str(cfg.MODEL.BACKBONE),
            str(cfg.DATASET.DATASET_NAME), cfg.SOLVER.NUM_EPOCHS)
        print("loading testing model file: {}/{}".format(ckpt_dir, filename))
        fluid.io.load_params(exe,
                             ckpt_dir,
                             main_program=test_prog,
                             filename=filename)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(precision=4,
                        suppress=True,
                        linewidth=160,
                        floatmode="fixed")
    conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    #fetch_list: return of the model
    fetch_list = [avg_loss.name, out.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.VAL_TOTAL_IMAGES // cfg.EVAL_BATCH_SIZE
    timer = Timer()
    timer.start()
    for data in py_reader():
        mask = np.array(data[0]['mask'])
        label = np.array(data[0]['label'])
        image_org = np.array(data[0]['image'])
        image = np.transpose(image_org, (0, 2, 3, 1))  # BCHW->BHWC
        image = np.squeeze(image)

        if cfg.TEST.SLIDE_WINDOW:
            if not multi_scales:
                scales = [1.0]
            else:
                scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25
                          ] if cfg.DATASET.DATASET_NAME == 'cityscapes' else [
                              0.5, 0.75, 1.0, 1.25, 1.5, 1.75
                          ]
                #scales = [0.75, 1.0, 1.25] # fast multi-scale testing

            #strides
            stride = int(crop_size * 1.0 /
                         3)  # 1/3 > 2/3 > 1/2 for input_size: 769 x 769
            h, w = image.shape[0:2]
            scores = np.zeros(shape=[num_classes, h, w], dtype='float32')

            for scale in scales:
                long_size = int(math.ceil(base_size * scale))
                if h > w:
                    height = long_size
                    width = int(1.0 * w * long_size / h + 0.5)
                    short_size = width
                else:
                    width = long_size
                    height = int(1.0 * h * long_size / w + 0.5)
                    short_size = height
                # print('org_img_size: {}x{}, rescale_img_size: {}x{}'.format(h, w, height, width))
                cur_img = image_resize(image, height, width)
                # pading
                if long_size <= crop_size:
                    pad_img = pad_single_image(cur_img, crop_size)
                    label_feed, mask_feed = get_feed(pad_img)
                    pad_img = mapper_image(pad_img)
                    loss, pred1 = exe.run(test_prog,
                                          feed={
                                              'image': pad_img,
                                              'label': label_feed,
                                              'mask': mask_feed
                                          },
                                          fetch_list=fetch_list,
                                          return_numpy=True)
                    pred1 = np.array(pred1)
                    outputs = pred1[:, :, :height, :width]
                    if flip:
                        pad_img_flip = flip_left_right_image(cur_img)
                        pad_img_flip = pad_single_image(
                            pad_img_flip, crop_size)
                        label_feed, mask_feed = get_feed(pad_img_flip)

                        pad_img_flip = mapper_image(pad_img_flip)
                        loss, pred1 = exe.run(test_prog,
                                              feed={
                                                  'image': pad_img_flip,
                                                  'label': label_feed,
                                                  'mask': mask_feed
                                              },
                                              fetch_list=fetch_list,
                                              return_numpy=True)
                        pred1 = np.flip(pred1, 3)
                        outputs += pred1[:, :, :height, :width]
                else:
                    if short_size < crop_size:
                        pad_img = pad_single_image(cur_img, crop_size)
                    else:
                        pad_img = cur_img
                    ph, pw = pad_img.shape[0:2]

                    #slid window
                    h_grids = int(math.ceil(1.0 *
                                            (ph - crop_size) / stride)) + 1
                    w_grids = int(math.ceil(1.0 *
                                            (pw - crop_size) / stride)) + 1
                    outputs = np.zeros(shape=[1, num_classes, ph, pw],
                                       dtype='float32')
                    count_norm = np.zeros(shape=[1, 1, ph, pw], dtype='int32')
                    for idh in range(h_grids):
                        for idw in range(w_grids):
                            h0 = idh * stride
                            w0 = idw * stride
                            h1 = min(h0 + crop_size, ph)
                            w1 = min(w0 + crop_size, pw)
                            #print('(h0,w0,h1,w1):({},{},{},{})'.format(h0, w0, h1, w1))
                            crop_img = crop_image(pad_img, h0, w0, h1, w1)
                            pad_crop_img = pad_single_image(
                                crop_img, crop_size)
                            label_feed, mask_feed = get_feed(pad_crop_img)
                            pad_crop_img = mapper_image(pad_crop_img)
                            loss, pred1 = exe.run(test_prog,
                                                  feed={
                                                      'image': pad_crop_img,
                                                      'label': label_feed,
                                                      'mask': mask_feed
                                                  },
                                                  fetch_list=fetch_list,
                                                  return_numpy=True)
                            pred1 = np.array(pred1)
                            outputs[:, :, h0:h1,
                                    w0:w1] += pred1[:, :, 0:h1 - h0, 0:w1 - w0]
                            count_norm[:, :, h0:h1, w0:w1] += 1
                            if flip:
                                pad_img_flip = flip_left_right_image(crop_img)
                                pad_img_flip = pad_single_image(
                                    pad_img_flip, crop_size)
                                label_feed, mask_feed = get_feed(pad_img_flip)
                                pad_img_flip = mapper_image(pad_img_flip)
                                loss, pred1 = exe.run(test_prog,
                                                      feed={
                                                          'image':
                                                          pad_img_flip,
                                                          'label': label_feed,
                                                          'mask': mask_feed
                                                      },
                                                      fetch_list=fetch_list,
                                                      return_numpy=True)
                                pred1 = np.flip(pred1, 3)
                                outputs[:, :, h0:h1,
                                        w0:w1] += pred1[:, :, 0:h1 - h0,
                                                        0:w1 - w0]
                                count_norm[:, :, h0:h1, w0:w1] += 1

                    outputs = 1.0 * outputs / count_norm
                    outputs = outputs[:, :, :height, :width]
                with fluid.dygraph.guard():
                    outputs = fluid.dygraph.to_variable(outputs)
                    outputs = fluid.layers.resize_bilinear(outputs,
                                                           out_shape=[h, w])
                    score = outputs.numpy()[0]
                    scores += score
        else:
            # taking the original image as the model input
            loss, pred = exe.run(test_prog,
                                 feed={
                                     'image': image_org,
                                     'label': label,
                                     'mask': mask
                                 },
                                 fetch_list=fetch_list,
                                 return_numpy=True)
            scores = pred[0]
        # computing IoU with all scale result
        pred = np.argmax(scores, axis=0).astype('int64')
        pred = pred[np.newaxis, :, :, np.newaxis]
        step += 1
        num_images += pred.shape[0]
        conf_mat.calculate(pred, label, mask)
        _, iou = conf_mat.mean_iou()
        _, acc = conf_mat.accuracy()

        print("[EVAL] step={}/{} acc={:.4f} IoU={:.4f}".format(
            step, all_step, acc, iou))

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    print("[EVAL] #image={} acc={:.4f} IoU={:.4f}".format(
        num_images, avg_acc, avg_iou))
    print("[EVAL] Category IoU:", category_iou)
    print("[EVAL] Category Acc:", category_acc)
    print("[EVAL] Kappa:{:.4f}".format(conf_mat.kappa()))
    print("flip = ", flip)
    print("scales = ", scales)

    return category_iou, avg_iou, category_acc, avg_acc
Beispiel #4
0
            print("inst_num:", inst_num)

            feature_path = os.path.join(feature_path, target)

            # print(**cfg.model['kwargs'])
            model = build_model('gcn', **cfg.model['kwargs'])
            model.load_state_dict(
                torch.load(os.path.join(model_path, backbone_name)))
            HEAD_test1 = HEAD_test(nhid=512)
            HEAD_test1.load_state_dict(
                torch.load(os.path.join(model_path, HEAD_name)), False)

            with Timer('build dataset'):
                for k, v in cfg.model['kwargs'].items():
                    setattr(cfg.test_data, k, v)
                dataset = build_dataset(cfg.model['type'], cfg.test_data)

            features = torch.FloatTensor(dataset.features)
            adj = sparse_mx_to_torch_sparse_tensor(dataset.adj)
            if not dataset.ignore_label:
                labels = torch.FloatTensor(dataset.gt_labels)

            pair_a = []
            pair_b = []
            pair_a_new = []
            pair_b_new = []
            for i in range(inst_num):
                pair_a.extend([int(i)] * 80)
                pair_b.extend([int(j) for j in nbrs[i]])
            for i in range(len(pair_a)):
                if pair_a[i] != pair_b[i]:
Beispiel #5
0
def train_gcn(model, cfg, logger):
    # prepare dataset
    for k, v in cfg.model['kwargs'].items():
        setattr(cfg.train_data, k, v)
    dataset = build_dataset(cfg.model['type'], cfg.train_data)
    pre_features = torch.FloatTensor(dataset.features)
    print('Have loaded the training data.')

    inst_num = dataset.inst_num
    feature_dim = dataset.feature_dim
    lb2idxs = dataset.lb2idxs
    center_fea = dataset.center_fea.astype('float32')
    cls_num, dim = center_fea.shape

    labels = torch.LongTensor(dataset.gt_labels)
    HEAD1 = HEAD(nhid=512)
    HEAD_test1 = HEAD_test(nhid=512)

    #load parameters from the pretrained model
    #model.load_state_dict(torch.load('./'))
    #HEAD1.load_state_dict(torch.load('./'), False)

    OPTIMIZER = optim.SGD([{'params': model.parameters(),'weight_decay':1e-5},
                           {'params': HEAD1.parameters(),'weight_decay':1e-5}], lr=0.01, momentum=0.9)
    print('the learning rate is 0.01')

    #model.load_state_dict(torch.load(''))
    #HEAD1.load_state_dict(torch.load(''))
    print("have load the pretrained model.")
    cfg.cuda = True
    model = model.cuda()
    HEAD1 = HEAD1.cuda()

    MODEL_ROOT = './src/train_model'
    print('the model save path is', MODEL_ROOT)

    #prepare the test data
    target = "part1_test"
    knn_path = "./data/knns/" + target + "/faiss_k_80.npz"
    knns = np.load(knn_path, allow_pickle=True)['data']
    inst_num = knns.shape[0]
    k_num = knns.shape[2]
    nbrs = knns[:, 0, :]
    pair_a = []
    pair_b = []
    for i in range(inst_num):
        pair_a.extend([i] * k_num)
        pair_b.extend(nbrs[i])


    for epoch in range(cfg.total_epochs):
        if epoch == cfg.STAGES[0]:  # adjust LR for each training stage after warm up, you can also choose to adjust LR manually (with slight modification) once plaueau observed
            schedule_lr(OPTIMIZER)
        if epoch == cfg.STAGES[1]:
            schedule_lr(OPTIMIZER)
        if epoch == cfg.STAGES[2]:
            schedule_lr(OPTIMIZER)

        model.train()
        HEAD1.train()

        index = faiss.IndexFlatIP(dim)
        index.add(center_fea)
        sims, cluster_id = index.search(center_fea, k=(cfg.cluster_num+200))  # search for the k-10 neighbor
        #sims, cluster_id = index.search(center_fea, k=cfg.cluster_num)  # search for the k-10 neighbor
        print('Have selected the cluster ids.')

        for batch in range(cls_num):
        #for batch in range(20):
            #0.select ids
            sample_cluster_id = random.sample(list(cluster_id[batch]), cfg.cluster_num)
            #sample_cluster_id = list(cluster_id[batch])
            sample_id = []#the idx of the samples in this batch
            for i in range(len(sample_cluster_id)):
                sample_id.extend(random.sample(lb2idxs[sample_cluster_id[i]],int(len(lb2idxs[sample_cluster_id[i]])*0.9)))
                #sample_id.extend(lb2idxs[sample_cluster_id[i]])
            #sample_id.sort()
            sample_num =len(sample_id)
            #id = list(np.arange(0,sample_num,1))
            #sample2sort = dict(zip(sample_id, id))
            if (sample_num>100000)|(sample_num<100):
                print('[too much samples] continue.')
                continue

            #1.create selected labels and images
            batch_labels = labels[sample_id]
            feature = pre_features[sample_id]
            print(sample_num)

            #2.create knn for this batch
            with Timer('build knn:'):
                knn_prefix = os.path.join("./data/rebuild_knn")
                if not os.path.exists(knn_prefix):
                    os.makedirs(knn_prefix)
                if os.path.exists(os.path.join(knn_prefix, 'faiss_k_80.npz')):
                    os.remove(os.path.join(knn_prefix, 'faiss_k_80.npz'))
                if os.path.exists(os.path.join(knn_prefix, 'faiss_k_80.index')):
                    os.remove(os.path.join(knn_prefix, 'faiss_k_80.index'))

                knns = build_knns(knn_prefix,
                                  #l2norm(feature.clone().detach().cpu().numpy()),
                                  l2norm(feature.numpy()),
                                  "faiss",
                                  80,
                                  is_rebuild=True)
                batch_adj = fast_knns2spmat(knns, 80, 0, use_sim=True)
                batch_adj = build_symmetric_adj(batch_adj, self_loop=True)
                batch_adj = row_normalize(batch_adj)
                batch_adj = sparse_mx_to_torch_sparse_tensor(batch_adj, return_idx=False)

            #3.put selected feature and labels to cuda
            batch_labels = batch_labels.cuda()
            feature = feature.cuda()
            batch_adj = batch_adj.cuda()
            train_data = [feature, batch_adj, batch_labels]
            #x = model(train_data)

            #4.train the model
            #add
            train_id_inst = batch_adj._indices().size()[1]
            #print('train_id_inst:', train_id_inst)
            #print('sample_num:', sample_num)
            #train_id_inst = sample_num
            rad_id = random.sample(range(0, train_id_inst), train_id_inst)+random.sample(range(0, train_id_inst), train_id_inst)
            patch_num = 40
            for i in range(patch_num*2):
                id = rad_id[i * int(train_id_inst / patch_num):(i + 1) * int(train_id_inst / patch_num)]
                x = model(train_data)
                loss = HEAD1(x, train_data, id)

                OPTIMIZER.zero_grad()
                loss.backward()
                OPTIMIZER.step()

                print(datetime.datetime.now())
                print('epoch:{}/{}, batch:{}/{}, batch2:{}/{},loss:{}'.format(epoch, cfg.total_epochs, batch, cls_num, i, patch_num*2, loss))

            if (batch+1)%100==0:
                if not os.path.exists(MODEL_ROOT):
                    os.makedirs(MODEL_ROOT)
                print('save model in epoch:{} batch:{} to {}'.format(epoch, batch, MODEL_ROOT))
                torch.save(model.state_dict(), os.path.join(MODEL_ROOT, "Backbone_Epoch_{}_batch_{}.pth".format(epoch + 1, batch)))
                torch.save(HEAD1.state_dict(), os.path.join(MODEL_ROOT, "Head_Epoch_{}_batch_{}.pth".format(epoch + 1, batch)))
            
            if (batch + 1) % 300 == 0:
                avg_acc = perform_val(model, HEAD1, HEAD_test1, cfg, feature_dim, pair_a, pair_b)
                print('the avg testing acc in epoch:{} batch:{} is :'.format(epoch,batch), avg_acc)
                model.train()
                HEAD1.train()


        #5.test
        avg_acc = perform_val(model, HEAD1, HEAD_test1, cfg, feature_dim, pair_a, pair_b)
        print('the avg testing acc in epoch:{} batch:{} is :'.format(epoch,batch), avg_acc)


        # 6.save model
        if not os.path.exists(MODEL_ROOT):
            os.makedirs(MODEL_ROOT)
        print('save model in epoch:{} batch:{} to {}'.format(epoch, batch, MODEL_ROOT))
        torch.save(model.state_dict(), os.path.join(MODEL_ROOT, "Backbone_Epoch_{}_batch_{}.pth".format(epoch + 1, batch)))
        torch.save(HEAD1.state_dict(), os.path.join(MODEL_ROOT, "Head_Epoch_{}_batch_{}.pth".format(epoch + 1, batch)))