Esempio n. 1
0
def generate_base_model(whole_image_id, coco_data, ins_seg_model, seed_batch,
                        batch_size):
    """
    generate base models, separately use 20% data 30% data 40% data 40% data 50% data ~~~~100% data
    the data is randomly selected
    and the eavl results save as baseline
    """
    # initialize quantity relationship
    whole_train_size = len(whole_image_id)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # initialize random sampler
    sampler = CoCoRandomSampler(sampler_name='random',
                                whole_image_id=whole_image_id)

    # initally, seed_batch pieces of image were selected randomly
    selected_image_id = random.sample(whole_image_id, seed_batch)
    # register data set and build data loader
    register_coco_instances_from_selected_image_files(
        name='coco_from_selected_image',
        json_file=coco_data[0]['json_file'],
        image_root=coco_data[0]['image_root'],
        selected_image_files=selected_image_id)
    data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
        'coco_from_selected_image')

    n_batches = int(np.ceil(
        ((whole_train_size - seed_batch) * 1 / batch_size))) + 1
    for n in range(n_batches):
        # check the size in this iter
        n_train_size = seed_batch + min(
            (whole_train_size - seed_batch), n * batch_size)
        print('{} data ponints for training in iter{}'.format(n_train_size, n))
        assert n_train_size == len(selected_image_id)

        ins_seg_model.save_selected_image_id(selected_image_id)
        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files)

        n_sample = min(batch_size, whole_train_size - len(selected_image_id))
        new_batch = sampler.select_batch(n_sample,
                                         already_selected=selected_image_id)

        selected_image_id.extend(new_batch)
        print('Requested: %d, Selected: %d' % (n_sample, len(new_batch)))

        # register dataset and build data loader
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_id)
        data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')
        assert len(new_batch) == n_sample

        # reset model if
        ins_seg_model.reset_model()
Esempio n. 2
0
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch):
    """
    check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id  dir
    if not save the origin (100)image_id list
    the file 100 is whole data set image id list
    the file 0 is this iter we randomly select image id list
    """
    dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id
    if not os.path.exists(dir):
        os.makedirs(dir)
    file = dir + '/' + str(100)

    if not os.path.exists(file):
        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            resume_or_load=resume_or_load,
        )
        data_loader = ins_seg_model.trainer.data_loader
        image_files_list = []
        index_list = data_loader.dataset._dataset._lst
        for item in index_list:
            image_files_list.append(item['image_id'])
        save_img_list(project_id=project_id,
                      iteration=100,
                      img_id_list=image_files_list)
        print("run the function train_seed again")

    else:
        image_files_list = read_img_list(project_id=project_id, iteration=100)
        whole_train_size = len(image_files_list)
        if seed_batch < 1:
            seed_batch = int(seed_batch * whole_train_size)

        selected_image_files = random.sample(image_files_list, seed_batch)
        print("selected {} images from the {} images ".format(
            seed_batch, whole_train_size))
        save_img_list(project_id=project_id,
                      iteration=0,
                      img_id_list=selected_image_files)
        print("save the image ids randomly selected this iter 0")

        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            train_size=len(selected_image_files),
            resume_or_load=resume_or_load,
        )
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_files)
        data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')

        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files,
                                    iter_num=0)
Esempio n. 3
0
def generate_one_curve(whole_image_id, coco_data, sampler, ins_seg_model,
                       seed_batch, batch_size):
    # initialize the quantity relationship
    whole_train_size = len(whole_image_id)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # initally, seed_batch pieces of image were selected randomly
    selected_image_id = random.sample(whole_image_id, seed_batch)
    # register data set and build data loader
    register_coco_instances_from_selected_image_files(
        name='coco_from_selected_image',
        json_file=coco_data[0]['json_file'],
        image_root=coco_data[0]['image_root'],
        selected_image_files=selected_image_id)
    data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
        'coco_from_selected_image')

    n_batches = int(np.ceil(
        ((whole_train_size - seed_batch) * 1 / batch_size))) + 1
    for n in range(n_batches):
        # check the size in this iter
        n_train_size = seed_batch + min(
            (whole_train_size - seed_batch), n * batch_size)
        print('{} data ponints for training in iter{}'.format(n_train_size, n))
        assert n_train_size == len(selected_image_id)

        ins_seg_model.save_selected_image_id(selected_image_id)

        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files)

        # get the losses for loss_sampler
        losses = ins_seg_model.compute_loss(
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
        )

        n_sample = min(batch_size, whole_train_size - len(selected_image_id))
        new_batch = sampler.select_batch(n_sample,
                                         already_selected=selected_image_id,
                                         losses=losses,
                                         loss_decrease=False)
        selected_image_id.extend(new_batch)
        print('Requested: %d, Selected: %d' % (n_sample, len(new_batch)))

        # register dataset and build data loader
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_id)
        data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')
        assert len(new_batch) == n_sample

        # reset model if
        ins_seg_model.reset_model()
Esempio n. 4
0
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch, batch_size):
    # get the whole indexes of coco
    image_files_list = read_img_list(project_id=project_id, iteration=100)
    whole_train_size = len(image_files_list)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1)
    iter_num = get_iter(project_id=project_id) - 1
    n_batches = int(np.ceil(((whole_train_size - seed_batch) * 1 / batch_size))) + 1

    for n in range(n_batches):
        if n != iter_num:
            continue
        else:
            "" "init seg_model  """
            selected_image_files = read_img_list(project_id=project_id, iteration=iter_num)
            train_size_this_iter = len(selected_image_files)

            ins_seg_model = CoCoSegModel(
                args=args,
                project_id=project_id,
                coco_data=coco_data,
                train_size=train_size_this_iter,
                resume_or_load=resume_or_load
            )
            register_coco_instances_from_selected_image_files(
                name='coco_from_selected_image',
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'],
                selected_image_files=selected_image_files
            )
            data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
                'coco_from_selected_image')
            ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=iter_num)

            losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'],
                                                image_root=coco_data[0]['image_root'])
            whole_image_id_list = read_img_list(project_id=project_id, iteration=100)
            """ init sampler """

            sampler = LossSampler(sampler_name='increase_loss')
            n_sample = min(batch_size, whole_train_size - len(selected_image_files))
            start_time = int(time.time())
            new_batch = sampler.select_batch(n_sample, already_selected=selected_image_files, losses=losses,
                                             loss_decrease=False)
            end_time = int(time.time())
            print("select batch using " + str(end_time - start_time) + "s")

            selected_image_files.extend(new_batch)

            save_img_list(project_id=project_id, iteration=n + 1, img_id_list=selected_image_files)
            print("save {} images id list for iter {}".format(len(selected_image_files), n + 1))
            print('in {} iter'.format(n))
Esempio n. 5
0
    def fit_on_single_data(self, image_id_list):
        """
            for each image in image_id_list build a data_loader iteratively
            return a list of dict,dict {'image_id':int, 'score':float}
            use every data in image_id_list to fine tuning the base model
            and compute the promotion as the image's score
        """
        score_list = []

        base_model = copy.deepcopy(self.model)
        result = self.test()
        base_score = result['segm']['AP']

        for image_id in image_id_list:
            dic = {'image_id': image_id}
            image_id = [image_id]
            register_coco_instances_from_selected_image_files(
                name='coco_from_selected_image',
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'],
                selected_image_files=image_id)

            data_loader, l = self.trainer.re_build_train_loader(
                'coco_from_selected_image', images_per_batch=1)

            self.trainer.data_loader = data_loader
            self.trainer._data_loader_iter = iter(data_loader)
            self.trainer.max_iter = 20
            result = self.trainer.train()
            dic['score'] = result['segm']['AP'] - base_score
            score_list.append(dic)

            # back_to_base model
            self.back_to_base_model(base_model=base_model)

        # save score_list
        self.save_score_list(score_list)
        return score_list
Esempio n. 6
0
def train_seed(args, project_id, coco_data, resume_or_load, seed_batch, batch_size):
    """
    check if there is origin (100)image_id list in the OUTPUT_DIR/selected_image_list/project_id  dir
    if not save the origin (100)image_id list
    the file 100 is whole data set image id list
    the file 0 is this iter we randomly select image id list
    """
    dir = OUTPUT_DIR + '/' + 'selected_img_list' + '/' + project_id
    if not os.path.exists(dir):
        os.makedirs(dir)
    file = dir + '/' + str(100)

    if not os.path.exists(file):
        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            resume_or_load=resume_or_load,
        )
        data_loader = ins_seg_model.trainer.data_loader
        image_files_list = []
        index_list = data_loader.dataset._dataset._lst
        for item in index_list:
            image_files_list.append(item['image_id'])
        save_img_list(project_id=project_id, iteration=100, img_id_list=image_files_list)
        print("run the function train_seed again")

    else:
        image_files_list = read_img_list(project_id=project_id, iteration=100)
        whole_train_size = len(image_files_list)
        if seed_batch < 1:
            seed_batch = int(seed_batch * whole_train_size)
        if batch_size < 1:
            batch_size = int(batch_size * whole_train_size)

        selected_image_files = random.sample(image_files_list, seed_batch)
        print("selected {} images from the {} images ".format(seed_batch, whole_train_size))
        save_img_list(project_id=project_id, iteration=0, img_id_list=selected_image_files)
        print("save the image ids randomly selected this iter 0")

        ins_seg_model = CoCoSegModel(
            args=args,
            project_id=project_id,
            coco_data=coco_data,
            train_size=len(selected_image_files),
            resume_or_load=resume_or_load,
        )
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_files
        )
        data_loader_from_selected_image_files, _ = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')

        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, iter_num=0)

        """ use the trained model to get losses  
        """
        losses = ins_seg_model.compute_loss(json_file=coco_data[0]['json_file'], image_root=coco_data[0]['image_root'])

        whole_image_id_list = read_img_list(project_id=project_id, iteration=100)
        """ init sampler """

        sampler = LossSampler(sampler_name='increase_loss')
        n_sample = min(batch_size, whole_train_size - len(selected_image_files))
        start_time = int(time.time())
        new_batch = sampler.select_batch(n_sample,already_selected=selected_image_files,losses=losses,loss_decrease=False)
        end_time = int(time.time())
        print("select batch using " + str(end_time - start_time) + "s")

        selected_image_files.extend(new_batch)
        save_img_list(project_id=project_id, iteration=1, img_id_list=selected_image_files)
        print("save {} images id list for iter 1".format(len(selected_image_files)))
Esempio n. 7
0
def generate_one_curve(
    whole_image_id,
    coco_data,
    sampler,
    ins_seg_model,
    seed_batch,
    batch_size,
    image2class,
):
    """
    :return:
    """
    # initialize the quantity relationship
    whole_train_size = len(whole_image_id)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # initialize the container
    results = {}
    data_sizes = []
    mious = []

    # initally, seed_batch pieces of image were selected randomly
    selected_image_id = random.sample(whole_image_id, seed_batch)
    # register data set and build data loader
    register_coco_instances_from_selected_image_files(
        name='coco_from_selected_image',
        json_file=coco_data[0]['json_file'],
        image_root=coco_data[0]['image_root'],
        selected_image_files=selected_image_id)
    data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
        'coco_from_selected_image')

    n_batches = int(np.ceil(
        ((whole_train_size - seed_batch) * 1 / batch_size))) + 1
    for n in range(n_batches):
        # check the size in this iter
        n_train_size = seed_batch + min(
            (whole_train_size - seed_batch), n * batch_size)
        print('{} data ponints for training in iter{}'.format(n_train_size, n))
        assert n_train_size == len(selected_image_id)
        data_sizes.append(n_train_size)

        ins_seg_model.save_selected_image_id(selected_image_id)

        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files)
        miou = ins_seg_model.test()
        mious.append(miou)
        print('miou:{} in {} iter'.format(miou['miou'], n))
        """ get the mask feature use the trained model 
            and use the mask feature to cluster: KNN
        """

        # get the losses for loss_sampler
        losses = ins_seg_model.compute_loss(
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
        )

        n_sample = min(batch_size, whole_train_size - len(selected_image_id))

        new_batch = sampler.slect_batch_from_groups(
            n_sample=n_sample,
            already_selected=selected_image_id,
            losses=losses,
            loss_decrease=False,
            image2class=image2class,
        )

        selected_image_id.extend(new_batch)
        print('Requested: %d, Selected: %d' % (n_sample, len(new_batch)))

        # register dataset and build data loader
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_id)
        data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')
        assert len(new_batch) == n_sample

        # reset model if
        ins_seg_model.reset_model()

    results['mious'] = mious
    results['data_sizes'] = data_sizes
    print(results)
Esempio n. 8
0
def generate_one_curve(coco_data, data_loader, sampler, ins_seg_model,
                       seed_batch, batch_size):
    """

    :param data_loader:      the data_loader contains all training data , we use the sampler select data(image) from it.
    :param sampler:          active learning sampler
    :param ins_seg_model:    model used to score the samplers.  Expects fit and test methods to be implemented.
    :param seed_batch: float(from 0 to 1)   float indicates percentage of train data to use for initial model
    :param batch_size: float (from 0 to 1)   float indicates batch size as a percent of training data ,
    we use sampler select batch_size peaces of data (image)
    :return:
    """
    # def select_batch(sampler, n_sample, already_selcted, **kwargs):
    #     """
    #
    #     :param sampler:         active learning sampler
    #     :param n_sample:        we select n_sample pieces of data(image)
    #     :param already_selcted: Data (image)that has been selected before
    #     :param kwargs:
    #     :return:
    #     """
    #     kwargs['n_sample'] = n_sample
    #     kwargs['already_selected'] = already_selcted
    #     batch = sampler.select_batch(**kwargs)
    #     return batch

    # get all the image files from the data_loader
    image_files_list = []
    list = data_loader.dataset._dataset._lst
    for item in list:
        image_files_list.append(item['image_id'])

    # The size of the entire training set
    train_size = len(image_files_list)
    # transform seed_batch and batch_size from float which indicate percentage of entire training set to int
    seed_batch = int(seed_batch * train_size)
    batch_size = int(batch_size * train_size)

    # We recorded the results of the model training and testing after each data sampling
    results = {}
    data_sizes = []
    mious = []

    # initally, seed_batch pieces of image were selected randomly
    selected_image_files = random.sample(image_files_list, seed_batch)

    register_coco_instances_from_selected_image_files(
        name='coco_from_selected_image',
        json_file=coco_data[0]['json_file'],
        image_root=coco_data[0]['image_root'],
        selected_image_files=selected_image_files)
    data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
        'coco_from_selected_image')
    # data_loader_iter = iter(data_loader_from_selected_image_files)
    # data = next(data_loader_iter)
    # n_batches cycles were used to sample all the data of the training set
    n_batches = int(np.ceil(((train_size - seed_batch) * 1 / batch_size))) + 1
    for n in range(n_batches):
        n_train = seed_batch + min((train_size - seed_batch), n * batch_size)
        print('{} data ponints for training in iter{}'.format(n_train, n))
        assert n_train == len(selected_image_files)
        data_sizes.append(n_train)
        ins_seg_model.fit_on_subset(data_loader_from_selected_image_files, n)
        miou = ins_seg_model.test()
        mious.append(miou)
        print('miou:{} in {} iter'.format(miou['miou'], n))

        # get the losses for loss_sampler
        losses = ins_seg_model.compute_loss(
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
        )

        n_sample = min(batch_size, train_size - len(selected_image_files))
        new_batch = sampler.select_batch(n_sample,
                                         already_selected=selected_image_files,
                                         losses=losses)
        selected_image_files.extend(new_batch)
        print('Requested: %d, Selected: %d' % (n_sample, len(new_batch)))
        register_coco_instances_from_selected_image_files(
            name='coco_from_selected_image',
            json_file=coco_data[0]['json_file'],
            image_root=coco_data[0]['image_root'],
            selected_image_files=selected_image_files)
        data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
            'coco_from_selected_image')
        assert len(new_batch) == n_sample

    results['mious'] = mious
    results['data_sizes'] = data_sizes
    results['sampler'] = sampler.sample_name
    print(results)
Esempio n. 9
0
def train_on_batch(args, project_id, coco_data, resume_or_load, seed_batch,
                   batch_size):
    # get the whole indexes of coco
    image_files_list = read_img_list(project_id=project_id, iteration=100)
    whole_train_size = len(image_files_list)
    if seed_batch < 1:
        seed_batch = int(seed_batch * whole_train_size)
    if batch_size < 1:
        batch_size = int(batch_size * whole_train_size)

    # get the iter_num now by accessing saved indexes eg(if file 0 exist then iter_num now is 1)_
    iter_num = get_iter(project_id=project_id)
    n_batches = int(np.ceil(
        ((whole_train_size - seed_batch) * 1 / batch_size))) + 1

    for n in range(n_batches):
        if n != iter_num:
            continue
        else:
            "" "init seg_model  " ""
            selected_image_files = read_img_list(project_id=project_id,
                                                 iteration=iter_num - 1)
            train_size_this_iter = seed_batch + min(
                (whole_train_size - len(selected_image_files)), n * batch_size)
            ins_seg_model = CoCoSegModel(args=args,
                                         project_id=project_id,
                                         coco_data=coco_data,
                                         train_size=train_size_this_iter,
                                         resume_or_load=resume_or_load)
            data_loader = ins_seg_model.trainer.data_loader
            mask_feature = ins_seg_model.save_mask_features(
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'])
            """ init sampler"""
            # sampler = CoCoRandomSampler('random_sampler', data_loader)
            sampler = CoreSetSampler('coreset_sampler', mask_feature)

            n_sample = min(batch_size,
                           whole_train_size - len(selected_image_files))
            start_time = int(time.time())
            new_batch = sampler.select_batch(
                n_sample, already_selected=selected_image_files)
            end_time = int(time.time())
            print("select batch using " + str(end_time - start_time) + "s")
            print("selected {} new images in {} iter,{} images used to train".
                  format(n_sample, n, train_size_this_iter))

            selected_image_files.extend(new_batch)
            save_img_list(project_id=project_id,
                          iteration=n,
                          img_id_list=selected_image_files)
            print("save {} images id list ".format(len(selected_image_files)))

            register_coco_instances_from_selected_image_files(
                name='coco_from_selected_image',
                json_file=coco_data[0]['json_file'],
                image_root=coco_data[0]['image_root'],
                selected_image_files=selected_image_files)
            data_loader_from_selected_image_files, l = ins_seg_model.trainer.re_build_train_loader(
                'coco_from_selected_image')

            assert train_size_this_iter == len(selected_image_files)
            ins_seg_model.fit_on_subset(data_loader_from_selected_image_files,
                                        iter_num=iter_num)
            print('in {} iter'.format(n))