Exemple #1
0
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch, batch_size):
    # get the whole indexes of coco
    image_files_list = read_img_list(project_id=project_id, iteration=100)
    whole_train_size = len(image_files_list)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1)
    iter_num = get_iter(project_id=project_id) - 1
    n_batches = int(np.ceil(((whole_train_size - seed_batch) * 1 / batch_size))) + 1

    for n in range(n_batches):
        if n != iter_num:
            continue
        else:
            "" "init seg_model  """
            selected_image_files = read_img_list(project_id=project_id, iteration=iter_num)
            train_size_this_iter = len(selected_image_files)

            ins_seg_model = CoCoSegModel(
                args=args,
                project_id=project_id,
                coco_data=coco_data,
                train_size=train_size_this_iter,
                resume_or_load=resume_or_load
            )
            register_coco_instances_from_selected_image_files(
                name='coco_from_selected_image',
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'],
                selected_image_files=selected_image_files
            )
            data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
                'coco_from_selected_image')
            ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=iter_num)

            losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'],
                                                image_root=coco_data[0]['image_root'])
            whole_image_id_list = read_img_list(project_id=project_id, iteration=100)
            """ init sampler """

            sampler = LossSampler(sampler_name='increase_loss')
            n_sample = min(batch_size, whole_train_size - len(selected_image_files))
            start_time = int(time.time())
            new_batch = sampler.select_batch(n_sample, already_selected=selected_image_files, losses=losses,
                                             loss_decrease=False)
            end_time = int(time.time())
            print("select batch using " + str(end_time - start_time) + "s")

            selected_image_files.extend(new_batch)

            save_img_list(project_id=project_id, iteration=n + 1, img_id_list=selected_image_files)
            print("save {} images id list for iter {}".format(len(selected_image_files), n + 1))
            print('in {} iter'.format(n))
Exemple #2
0
def reset_seg_model(seg_model, coco_data):
    args = seg_model.args
    project_id = seg_model.project_id
    resume_or_load = seg_model.resume_or_load
    del seg_model
    new_seg_model = CoCoSegModel(args, project_id, coco_data, resume_or_load)
    return new_seg_model
Exemple #3
0
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch):
    """
    check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id  dir
    if not save the origin (100)image_id list
    the file 100 is whole data set image id list
    the file 0 is this iter we randomly select image id list
    """
    dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id
    if not os.path.exists(dir):
        os.makedirs(dir)
    file = dir + '/' + str(100)

    if not os.path.exists(file):
        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            resume_or_load=resume_or_load,
        )
        data_loader = ins_seg_model.trainer.data_loader
        image_files_list = []
        index_list = data_loader.dataset._dataset._lst
        for item in index_list:
            image_files_list.append(item['image_id'])
        save_img_list(project_id=project_id,
                      iteration=100,
                      img_id_list=image_files_list)
        print("run the function train_seed again")

    else:
        image_files_list = read_img_list(project_id=project_id, iteration=100)
        whole_train_size = len(image_files_list)
        if seed_batch < 1:
            seed_batch = int(seed_batch * whole_train_size)

        selected_image_files = random.sample(image_files_list, seed_batch)
        print("selected {} images from the {} images ".format(
            seed_batch, whole_train_size))
        save_img_list(project_id=project_id,
                      iteration=0,
                      img_id_list=selected_image_files)
        print("save the image ids randomly selected this iter 0")

        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            train_size=len(selected_image_files),
            resume_or_load=resume_or_load,
        )
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_files)
        data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')

        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files,
                                    iter_num=0)
Exemple #4
0
        assert len(new_batch) == n_sample

        # reset model if
        ins_seg_model.reset_model()


if __name__ == '__main__':
    """
        train some base model use separately 20% data 30% data ......100% data

    """
    coco_data = debug_data

    args = default_argument_parser().parse_args()
    seg_model = CoCoSegModel(args,
                             project_id='Base',
                             coco_data=coco_data,
                             resume_or_load=True)
    data_loader = seg_model.trainer.data_loader
    whole_image_id = [
        item['image_id'] for item in data_loader.dataset._dataset._lst
    ]

    generate_base_model(whole_image_id=whole_image_id,
                        coco_data=coco_data,
                        ins_seg_model=seg_model,
                        seed_batch=0.2,
                        batch_size=0.1)
    """
        load the trained base models, and use base models to fit_on_single_data get score_list
        the score_list will be saved as OUTPUT_DIR/file/score_list
    """
Exemple #5
0
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_id)
        data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')

        # reset model
        print("--reset model")
        ins_seg_model.reset_model()


if __name__ == "__main__":

    args = default_argument_parser().parse_args()
    project_id = "random"
    seg_model = CoCoSegModel(args,
                             project_id=project_id,
                             coco_data=debug_data,
                             resume_or_load=True)
    data_loader = seg_model.trainer.data_loader
    whole_image_id = []
    index_list = data_loader.dataset._dataset._lst
    for item in index_list:
        whole_image_id.append(item['image_id'])

    randomsampler = CoCoRandomSampler("random_sampler",
                                      whole_image_id=whole_image_id)
    generate_one_curve(
        coco_data=copy.deepcopy(debug_data),
        whole_image_id=copy.deepcopy(whole_image_id),
        sampler=randomsampler,
        ins_seg_model=seg_model,
        batch_size=100,
Exemple #6
0
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch, batch_size):
    """
    check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id  dir
    if not save the origin (100)image_id list
    the file 100 is whole data set image id list
    the file 0 is this iter we randomly select image id list
    """
    dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id
    if not os.path.exists(dir):
        os.makedirs(dir)
    file = dir + '/' + str(100)

    if not os.path.exists(file):
        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            resume_or_load=resume_or_load,
        )
        data_loader = ins_seg_model.trainer.data_loader
        image_files_list = []
        index_list = data_loader.dataset._dataset._lst
        for item in index_list:
            image_files_list.append(item['image_id'])
        save_img_list(project_id=project_id, iteration=100, img_id_list=image_files_list)
        print("run the function train_seed again")

    else:
        image_files_list = read_img_list(project_id=project_id, iteration=100)
        whole_train_size = len(image_files_list)
        if seed_batch < 1:
            seed_batch = int(seed_batch * whole_train_size)
        if batch_size < 1:
            batch_size = int(batch_size * whole_train_size)

        selected_image_files = random.sample(image_files_list, seed_batch)
        print("selected {} images from the {} images ".format(seed_batch, whole_train_size))
        save_img_list(project_id=project_id, iteration=0, img_id_list=selected_image_files)
        print("save the image ids randomly selected this iter 0")

        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            train_size=len(selected_image_files),
            resume_or_load=resume_or_load,
        )
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_files
        )
        data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')

        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=0)

        """ use the trained model to get losses  
        """
        losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'])

        whole_image_id_list = read_img_list(project_id=project_id, iteration=100)
        """ init sampler """

        sampler = LossSampler(sampler_name='increase_loss')
        n_sample = min(batch_size, whole_train_size - len(selected_image_files))
        start_time = int(time.time())
        new_batch = sampler.select_batch(n_sample,already_selected=selected_image_files,losses=losses,loss_decrease=False)
        end_time = int(time.time())
        print("select batch using " + str(end_time - start_time) + "s")

        selected_image_files.extend(new_batch)
        save_img_list(project_id=project_id, iteration=1, img_id_list=selected_image_files)
        print("save {} images id list for iter 1".format(len(selected_image_files)))
Exemple #7
0
        assert len(new_batch) == n_sample

        # reset model if
        ins_seg_model.reset_model()

    results['mious'] = mious
    results['data_sizes'] = data_sizes
    print(results)


if __name__ == "__main__":
    coco_data = debug_data

    args = default_argument_parser().parse_args()
    seg_model = CoCoSegModel(args,
                             project_id='self_paced_with_diversity',
                             coco_data=coco_data,
                             resume_or_load=True)
    data_loader = seg_model.trainer.data_loader
    whole_image_id = [
        item['image_id'] for item in data_loader.dataset._dataset._lst
    ]

    # waiting for VAE feature to be generated
    while True:
        if not os.path.exists(VAE_feature_path):
            print('waiting for  VAE feature')
            time.sleep(15)
        else:
            break
    print('the VAE feature has been generated')
        assert len(new_batch_1) == n_sample
        assert len(new_batch_2) == n_sample

        # reset model if necessary
        ins_seg_model_1.reset_model()
        ins_seg_model_2.reset_model()


if __name__ == "__main__":

    data = debug_data
    args = default_argument_parser().parse_args()
    seg_model_1 = CoCoSegModel(args,
                               project_id='co_teaching_model_1',
                               coco_data=data,
                               model_config='Mask_RCNN2',
                               resume_or_load=True)

    seg_model_2 = CoCoSegModel(args,
                               project_id='co_teaching_model_2',
                               coco_data=data,
                               model_config='Mask_RCNN',
                               resume_or_load=True)

    data_loader_1 = seg_model_1.trainer.data_loader

    data_loader_2 = seg_model_2.trainer.data_loader

    whole_image_id_1 = []
    index_list = data_loader_1.dataset._dataset._lst
Exemple #9
0
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False


if __name__ == '__main__':
    seed_torch()
    # 用于检查所有图像能否可以被正确读取
    # 结果:COCO_train2014_000000167126.jpg load error!
    # check_file("/home/muyun99/Downloads/Tsne/adsampler/imageid/all")

    # initialize seg_model and get the whole_image_id
    args = default_argument_parser().parse_args()
    seg_model = CoCoSegModel(args, project_id='adversely', coco_data=coco_data, resume_or_load=True)

    data_loader = seg_model.trainer.data_loader
    whole_image_id = [item['image_id'] for item in data_loader.dataset._dataset._lst]

    # 错误图像的id放在error_imgid
    error_imgid = [167126]
    trainer = Adversary_sampler_trainer(whole_image_id=whole_image_id)

    # 加载预训练模型
    trainer.load_weight(
        vae_weight=os.path.join(WEIGHT_path, "vae_model_14912_2500.pth"),
        dis_weight=os.path.join(WEIGHT_path, "dis_model_14912_2500.pth")
    )

    # 构造训练用的dataloader
Exemple #10
0
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch,
                   batch_size):
    # get the whole indexes of coco
    image_files_list = read_img_list(project_id=project_id, iteration=100)
    whole_train_size = len(image_files_list)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1)_
    iter_num = get_iter(project_id=project_id)
    n_batches = int(np.ceil(
        ((whole_train_size - seed_batch) * 1 / batch_size))) + 1

    for n in range(n_batches):
        if n != iter_num:
            continue
        else:
            "" "init seg_model  " ""
            selected_image_files = read_img_list(project_id=project_id,
                                                 iteration=iter_num - 1)
            train_size_this_iter = seed_batch + min(
                (whole_train_size - len(selected_image_files)), n * batch_size)
            ins_seg_model = CoCoSegModel(args=args,
                                         project_id=project_id,
                                         coco_data=coco_data,
                                         train_size=train_size_this_iter,
                                         resume_or_load=resume_or_load)
            data_loader = ins_seg_model.trainer.data_loader
            mask_feature = ins_seg_model.save_mask_features(
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'])
            """ init sampler"""
            # sampler = CoCoRandomSampler('random_sampler', data_loader)
            sampler = CoreSetSampler('coreset_sampler', mask_feature)

            n_sample = min(batch_size,
                           whole_train_size - len(selected_image_files))
            start_time = int(time.time())
            new_batch = sampler.select_batch(
                n_sample, already_selected=selected_image_files)
            end_time = int(time.time())
            print("select batch using " + str(end_time - start_time) + "s")
            print("selected {} new images in {} iter,{} images used to train".
                  format(n_sample, n, train_size_this_iter))

            selected_image_files.extend(new_batch)
            save_img_list(project_id=project_id,
                          iteration=n,
                          img_id_list=selected_image_files)
            print("save {} images id list ".format(len(selected_image_files)))

            register_coco_instances_from_selected_image_files(
                name='coco_from_selected_image',
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'],
                selected_image_files=selected_image_files)
            data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
                'coco_from_selected_image')

            assert train_size_this_iter == len(selected_image_files)
            ins_seg_model.fit_on_subset(data_loader_from_selected_image_files,
                                        iter_num=iter_num)
            print('in {} iter'.format(n))