Esempio n. 1
0
    def open(filename):
        with configuration.ConfigurationContext("NetworkResistantImage"):
            cumulative_wait_max = configuration.get("cumulative_wait_max",
                                                    2.0 * 60.0 * 60.0)
            wait_interval_initial = configuration.get("wait_interval_initial",
                                                      0.5)

        # Set initial state
        wait_interval = wait_interval_initial
        cumulative_wait = 0.0
        last_exception = None

        while cumulative_wait <= cumulative_wait_max:
            try:
                image = Image.open(filename)
                return image

            except Exception as ex:
                print(
                    f"Cannot open {filename}: {ex}. Waiting {wait_interval} seconds."
                )
                last_exception = ex
                time.sleep(wait_interval)
                cumulative_wait += wait_interval
                wait_interval *= 2.0

        raise last_exception
Esempio n. 2
0
    def __init__(self, cls):
        KerasIncrementalModel.__init__(self, cls)

        with configuration.ConfigurationContext("DFNKerasIncrementalModel"):
            self.exposure_coef = configuration.get("exposure_coef", 1.0)
            self.always_rehearse = configuration.get("always_rehearse", False)
            self.lambda_ = configuration.get("lambda", 0.5)
            self.fixed_inner_steps = configuration.get("fixed_inner_steps",
                                                       None)

        self.rehearsal_pool = []
Esempio n. 3
0
    def __init__(self, kb):
        EmbeddingBasedKerasHC.__init__(self, kb)

        # Configuration
        with configuration.ConfigurationContext("IDKEmbeddingBasedKerasHC"):
            self._l2_regularization_coefficient = configuration.get("l2", 5e-5)
            self._mlnp = configuration.get("mlnp", True)
            self._normalize_scores = configuration.get("normalize_scores", True)

        self.fc_layer = None
        self.uid_to_dimension = {}
        self.graph = None
        self.observed_uids = None
        self.topo_sorted_uids = None
        self.update_embedding()
Esempio n. 4
0
 def __init__(self, model, kb):
     super().__init__(model, kb)
     with configuration.ConfigurationContext(
             "SemanticOneVsTwoActiveLearningMethod"):
         self.semantic_measure_name = configuration.get(
             "semantic_measure", "Rada1989")
         self.semantic_measure = semanticmeasures.method(
             self.semantic_measure_name, kb)
Esempio n. 5
0
    def __init__(self):
        with configuration.ConfigurationContext("LNDWDataset"):
            self.base_path = configuration.get_system("LNDWDataset.base_path")
            self.side_length = configuration.get("side_length", 224)
            self._viability_threshold = configuration.get(
                "viability_threshold", 3.0)

        self.all_classes_even_unviable = []
        with open(os.path.join(self.base_path, "classes.csv")) as classes_file:
            reader = csv.reader(classes_file, delimiter=";")
            header = next(reader)
            fields = {
                "folder": "ID",
                "class_name": "Class Name",
                "individual_id": "No.",
                "grade": "Grade",
                "wordnet": "WordNet",
            }
            fields = {k: header.index(v) for k, v in fields.items()}
            for line in reader:
                self.all_classes_even_unviable += [{
                    k: line[v]
                    for k, v in fields.items()
                }]

            self.viable_classes = [
                class_ for class_ in self.all_classes_even_unviable
                if self._viable(class_)
            ]

        self.wordnet_mapping = []
        for class_ in self.viable_classes:
            self.wordnet_mapping += [(
                f"{_namespace_uid}::{class_['class_name']}" +
                f"{int(class_['individual_id']):02d}",
                f"WordNet3.0::{class_['wordnet']}",
            )]
            self.wordnet_mapping += [(
                f"{_namespace_uid}::{class_['class_name']}",
                f"WordNet3.0::{class_['wordnet']}",
            )]

        # Attributes are set in setup()
        self.individuals = None
        self.setup()
Esempio n. 6
0
    def __init__(self, cls):
        KerasIncrementalModel.__init__(self, cls)

        with configuration.ConfigurationContext(
                "FastSingleShotKerasIncrementalModel"):
            self._inner_steps = configuration.get("inner_steps",
                                                  no_default=True)

        self._already_observed = False
    def __init__(self):
        with configuration.ConfigurationContext(self.__class__.__name__):
            self.base_path = configuration.get_system(
                "iNaturalist2018Dataset.base_path")
            self.side_length = configuration.get("side_length", 224)

        self._id_to_class = {}
        with open(os.path.join(self.base_path,
                               "categories.json")) as json_file:
            json_data = json.load(json_file)
            for json_datum in json_data:
                self._id_to_class[json_datum["id"]] = json_datum["name"]
Esempio n. 8
0
    def __init__(self, kb):
        super().__init__(kb)
        with configuration.ConfigurationContext(self.__class__.__name__):
            self.noise_model = configuration.get("noise_model",
                                                 no_default=True)

            if self.noise_model == "Deng2014":
                self.relabel_fraction: float = configuration.get(
                    "relabel_fraction", no_default=True)
            elif self.noise_model == "Poisson":
                self.lambda_: float = configuration.get("lambda",
                                                        no_default=True)
            elif self.noise_model == "Geometric":
                self.q: float = configuration.get("q", no_default=True)
            else:
                raise ValueError(f"Unknown noise model: {self.noise_model}")

            self.filter_imprecise = configuration.get("filter_imprecise",
                                                      False)
            self.project_to_random_leaf = configuration.get(
                "project_to_random_leaf", False)

        self.last_concept_stamp = -1
        self.graph: Optional[nx.DiGraph] = None
        self.root = None
Esempio n. 9
0
    def __init__(self, prefix=""):
        instrumentation.InstrumentationObserver.__init__(self, prefix)

        # Configuration
        with configuration.ConfigurationContext(self.__class__.__name__):
            self._mongo_observer = sacred.observers.MongoObserver.create(
                url=configuration.get(
                    "mongo_url",
                    next(
                        open(os.path.expanduser("~/work/experiments/sacred/mongourl"))
                    ),
                ),
                db_name=configuration.get("mongo_db_name", "sacred"),
            )
        self.sacred_experiment = None
        self.sacred_run = None

        self.done = None
        self.run_object_available = None
        self.sacred_thread = None
        self.stored_result = None
        self.stored_exception = None
Esempio n. 10
0
    def __init__(self, kb):
        EmbeddingBasedKerasHC.__init__(self, kb)

        # Configuration
        with configuration.ConfigurationContext(self.__class__.__name__):
            self._l2_regularization_coefficient = configuration.get("l2", 5e-5)

        self.last_observed_concept_count = len(self.kb.get_observed_concepts())

        self.fc_layer = None
        self.uid_to_dimension = {}
        self.dimension_to_uid = []

        self.update_embedding()
Esempio n. 11
0
    def __init__(self):
        with configuration.ConfigurationContext(self.__class__.__name__):
            self.do_random_flip_horizontal = configuration.get(
                "do_random_flip_horizontal", True)
            self.do_random_flip_vertical = configuration.get(
                "do_random_flip_vertical", True)

            self.do_random_rotate = configuration.get("do_random_rotate", True)

            self.do_random_crop = configuration.get("do_random_crop", True)
            self.random_crop_factor = configuration.get(
                "random_crop_factor", 0.2)

            self.do_random_brightness_and_contrast = configuration.get(
                "do_random_brightness_and_contrast", True)
            self.random_brightness_factor = configuration.get(
                "random_brightness_factor", 0.05)
            self.random_contrast_factors = configuration.get(
                "random_contrast_factors", (0.7, 1.3))

            self.do_random_hue_and_saturation = configuration.get(
                "do_random_hue_and_saturation", True)
            self.random_hue_factor = configuration.get("random_hue_factor",
                                                       0.08)
            self.random_saturation_factors = configuration.get(
                "random_saturation_factors", (0.6, 1.6))

            self.do_random_scale = configuration.get("do_random_scale", True)
            self.random_scale_factors = configuration.get(
                "random_scale_factors", (0.5, 2.0))
    def __init__(
            self,
            cls: keras_hierarchicalclassification.KerasHierarchicalClassifier):
        self.cls = cls

        with configuration.ConfigurationContext("KerasIncrementalModel"):
            # Preprocessing
            self.random_crop_to_size = configuration.get(
                "random_crop_to_size", None)
            _channel_mean = configuration.get("channel_mean",
                                              [127.5, 127.5, 127.5])
            self.channel_mean_normalized = np.array(_channel_mean) / 255.0
            _channel_stddev = configuration.get("channel_stddev",
                                                [127.5, 127.5, 127.5])
            self.channel_stddev_normalized = np.array(_channel_stddev) / 255.0

            # Batch size
            self.batchsize_max = configuration.get("batchsize_max", 256)
            self.batchsize_min = configuration.get("batchsize_min", 1)
            self.sequential_training_batches = configuration.get(
                "sequential_training_batches", 1)
            self.autobs_vram = configuration.get(
                "autobs_vram", configuration.get_system("gpu0_vram"))

            # Fine-tuning options
            self.do_train_feature_extractor = configuration.get(
                "train_feature_extractor", False)
            self.use_pretrained_weights = configuration.get(
                "use_pretrained_weights", "ILSVRC2012")

            # Architecture
            self.architecture = configuration.get("architecture",
                                                  "keras::ResNet50V2")

            # Optimization and regularization
            self.l2_regularization = configuration.get("l2_regularization",
                                                       5e-5)
            self.optimizer_name = configuration.get("optimizer", "adam")
            if self.optimizer_name == "sgd":
                self.sgd_momentum = configuration.get("sgd_momentum", 0.9)
            self.lr_schedule_cfg = configuration.get("lr_schedule", {
                "name": "constant",
                "config": {
                    "initial_lr": 0.003
                }
            })
            self.lr_schedule = keras_learningrateschedule.get(
                self.lr_schedule_cfg)

        if self.architecture == "keras::ResNet50V2":
            self.feature_extractor = resnet_v2.ResNet50V2(
                include_top=False,
                input_tensor=None,
                input_shape=None,
                pooling="avg",
                weights="imagenet"
                if self.use_pretrained_weights == "ILSVRC2012" else None,
            )
            self.pixels_per_gb = 1100000

            self._add_regularizers()

        elif self.architecture == "keras::InceptionResNetV2":
            self.feature_extractor = inception_resnet_v2.InceptionResNetV2(
                include_top=False,
                input_tensor=None,
                input_shape=None,
                pooling="avg",
                weights="imagenet"
                if self.use_pretrained_weights == "ILSVRC2012" else None,
            )
            self.pixels_per_gb = 700000

            self._add_regularizers()

        elif self.architecture == "keras::MobileNetV2":
            with configuration.ConfigurationContext("KerasIncrementalModel"):
                self.side_length = configuration.get("side_length",
                                                     no_default=True)
            self.feature_extractor = mobilenet_v2.MobileNetV2(
                include_top=False,
                input_tensor=None,
                input_shape=(self.side_length, self.side_length, 3),
                pooling="avg",
                weights="imagenet"
                if self.use_pretrained_weights == "ILSVRC2012" else None,
            )
            self.pixels_per_gb = 2000000

            self._add_regularizers()

        elif self.architecture == "keras::NASNetMobile":
            with configuration.ConfigurationContext("KerasIncrementalModel"):
                self.side_length = configuration.get("side_length",
                                                     no_default=True)
            self.feature_extractor = nasnet.NASNetMobile(
                include_top=False,
                input_tensor=None,
                input_shape=(self.side_length, self.side_length, 3),
                pooling="avg",
                weights="imagenet"
                if self.use_pretrained_weights == "ILSVRC2012" else None,
            )
            self.pixels_per_gb = 1350000

            self._add_regularizers()

        elif self.architecture == "keras::CIFAR-ResNet56":
            assert (self.do_train_feature_extractor
                    ), "There are no pretrained weights for this architecture!"
            assert (self.use_pretrained_weights is None
                    ), "There are no pretrained weights for this architecture!"

            from chia.methods.common import keras_cifar_resnet

            self.feature_extractor = keras_cifar_resnet.feature_extractor(
                version=2, n=6, l2_norm=self.l2_regularization)
            self.pixels_per_gb = 200000

        else:
            raise ValueError(f'Unknown architecture "{self.architecture}"')

        if self.optimizer_name == "adam":
            self.optimizer = tf.keras.optimizers.Adam(self.lr_schedule(0))
        else:
            self.optimizer = tf.keras.optimizers.SGD(
                learning_rate=self.lr_schedule(0), momentum=self.sgd_momentum)
        self.augmentation = keras_dataaugmentation.KerasDataAugmentation()

        if (self.use_pretrained_weights is not None
                and self.use_pretrained_weights != "ILSVRC2012"):
            print(
                f"Loading alternative pretrained weights {self.use_pretrained_weights}"
            )
            self.feature_extractor.load_weights(self.use_pretrained_weights)

        if not self.do_train_feature_extractor:
            for layer in self.feature_extractor.layers:
                layer.trainable = False

        self.reported_auto_bs = False

        # State here
        self.current_step = 0
Esempio n. 13
0
def main():
    # Some constants
    label_gt_resource_id = "label_ground_truth"
    label_ann_resource_id = "label_annotated"
    label_pred_resource_id = "label_predicted"

    # General config
    ilm_method = configuration.get("ilm_method", no_default=True)
    cls_method = configuration.get("cls_method", no_default=True)
    interaction_method = configuration.get("interaction_method",
                                           no_default=True)
    experiment_scale = configuration.get("experiment_scale", no_default=True)
    dataset_name = configuration.get("dataset", no_default=True)
    experiment_name = configuration.get("experiment_name", no_default=True)
    report_interval = configuration.get("report_interval", no_default=True)
    report_initially = configuration.get("report_initially", no_default=True)
    validation_scale = configuration.get("validation_scale", no_default=True)
    skip_reclassification = configuration.get("skip_reclassification",
                                              no_default=True)
    evaluators = configuration.get("evaluators", no_default=True)

    # KB specific stuff
    observe_gt_concepts = configuration.get("observe_gt_concepts",
                                            no_default=True)

    # Save and restore
    restore_path = configuration.get("restore_path", no_default=True)
    save_path = configuration.get("save_path", no_default=True)
    save_path_append_run_number = configuration.get(
        "save_path_append_run_number", no_default=True)

    # Eval config
    use_sacred_observer = configuration.get("use_sacred_observer",
                                            no_default=True)

    # Dataset specific setup stuff
    if dataset_name == "CORe50":
        core50_scenario = configuration.get("core50_scenario", no_default=True)

    # Instantiate dataset
    dataset = datasets.dataset(dataset_name)

    # Get instrumentation going
    instrumentation_observers = [
        instrumentation.PrintObserver(experiment_name),
        instrumentation.JSONResultObserver(experiment_name),
    ]
    if use_sacred_observer:
        from chia.framework.instrumentation import sacred_instrumentation

        instrumentation_observers += [
            sacred_instrumentation.SacredObserver(experiment_name)
        ]

    with instrumentation.InstrumentationContext(
            "run", observers=instrumentation_observers):
        # Determine run count
        if dataset_name == "CORe50":
            run_count = dataset.get_run_count(core50_scenario)
        else:
            run_count = configuration.get("run_count", no_default=True)

        run_count = max(1, int(math.ceil(run_count * experiment_scale)))
        instrumentation.report("run_count", run_count)

        results_across_runs = []

        # Runs...
        for run_id in range(run_count):
            instrumentation.update_local_step(run_id)

            # Dataset specific run init
            if dataset_name == "CORe50":
                dataset.setup(scenario=core50_scenario, run=run_id)

            # Test datasets
            test_pools = []
            with instrumentation.InstrumentationContext("test_pools"):
                for i in range(dataset.test_pool_count()):
                    instrumentation.update_local_step(i)
                    test_pool = dataset.test_pool(i, label_gt_resource_id)
                    if validation_scale < 1.0:
                        test_pool = test_pool[:min(
                            max(
                                1,
                                int(
                                    math.
                                    ceil(len(test_pool) * validation_scale))),
                            len(test_pool),
                        )]
                    instrumentation.report("size", len(test_pool))
                    test_pools += [test_pool]

            # Build methods
            kb = knowledge.KnowledgeBase()

            if restore_path is not None:
                kb.restore(restore_path)
            else:
                # Add hierarchy
                wna = wordnet.WordNetAccess()
                kb.add_relation(
                    "hypernymy",
                    is_symmetric=False,
                    is_transitive=True,
                    is_reflexive=False,
                    explore_left=False,
                    explore_right=True,
                    sources=[dataset.relation("hypernymy"), wna],
                )

            im = interaction.method(interaction_method, kb)
            cls = hierarchicalclassification.method(cls_method, kb)
            ilm = incrementallearning.method(ilm_method, cls)

            # Restore
            if restore_path is not None:
                ilm.restore(restore_path)

            # Evaluator
            evaluator = evaluation.method(evaluators, kb)

            train_pool_count = dataset.train_pool_count()

            # Collect training data
            train_pool = []
            for train_pool_id in range(train_pool_count):
                train_pool += pool.FixedPool(
                    dataset.train_pool(train_pool_id, label_gt_resource_id))

            train_pool_size = len(train_pool)
            instrumentation.report("train_pool_size", train_pool_size)

            if observe_gt_concepts:
                kb.observe_concepts([
                    sample.get_resource(label_gt_resource_id)
                    for sample in train_pool
                ])

            # Run "interaction"
            labeled_pool = im.query_annotations_for(train_pool,
                                                    label_gt_resource_id,
                                                    label_ann_resource_id)

            labeled_pool_size = len(labeled_pool)
            instrumentation.report("labeled_pool_size", labeled_pool_size)

            if report_initially:
                next_progress = 0.0
            else:
                next_progress = report_interval

            def evaluate(progress=None):
                nonlocal next_progress
                if progress is not None:
                    if progress < next_progress:
                        return
                    else:
                        next_progress += report_interval

                # Quick reclass accuracy
                if not skip_reclassification:
                    with instrumentation.InstrumentationContext(
                            "reclassification", take_time=True):
                        instrumentation.update_local_step(0)
                        if validation_scale < 1.0:
                            reclass_pool = labeled_pool[:min(
                                max(
                                    1,
                                    int(
                                        math.ceil(
                                            len(labeled_pool) *
                                            validation_scale)),
                                ),
                                len(labeled_pool),
                            )]
                        else:
                            reclass_pool = labeled_pool
                        evaluator.update(
                            ilm.predict(reclass_pool, label_pred_resource_id),
                            label_ann_resource_id,
                            label_pred_resource_id,
                        )
                        instrumentation.report_dict(evaluator.result())
                        evaluator.reset()

                # Validation
                with instrumentation.InstrumentationContext("validation",
                                                            take_time=True):
                    results_across_test_pools = []
                    for test_pool_id in range(len(test_pools)):
                        instrumentation.update_local_step(test_pool_id)
                        evaluator.update(
                            ilm.predict(test_pools[test_pool_id],
                                        label_pred_resource_id),
                            label_gt_resource_id,
                            label_pred_resource_id,
                        )
                        instrumentation.report_dict(evaluator.result())
                        results_across_test_pools += [evaluator.result()]
                        evaluator.reset()

                return results_across_test_pools

            with instrumentation.InstrumentationContext("training",
                                                        take_time=True):

                # Learn the thing
                if not observe_gt_concepts:
                    kb.observe_concepts([
                        sample.get_resource(label_ann_resource_id)
                        for sample in labeled_pool
                    ])
                ilm.observe(
                    labeled_pool,
                    label_ann_resource_id,
                    progress_callback=evaluate
                    if report_interval > 0 else None,
                )

            results_across_runs += [evaluate()]

            if save_path is not None and save_path_append_run_number:
                kb.save(f"{save_path}-{run_id}")
                ilm.save(f"{save_path}-{run_id}")

        instrumentation.store_result(results_across_runs)

        # Save last model
        if save_path is not None and not save_path_append_run_number:
            kb.save(save_path)
            ilm.save(save_path)
Esempio n. 14
0
    def __init__(self):
        with configuration.ConfigurationContext(self.__class__.__name__):
            self.base_path = configuration.get_system(
                "NABirdsDataset.base_path")
            self.side_length = configuration.get("side_length", 224)
            self.use_lazy_mode = configuration.get("use_lazy_mode", True)

        with open(os.path.join(self.base_path, "classes.txt")) as cls:
            lines = [x.strip() for x in cls]
            tuples = [x.split(sep=" ", maxsplit=1) for x in lines]
            tuples = [(int(k), str(v)) for (k, v) in tuples]

            self.uid_for_label_id = {
                k: f"{_namespace_uid}::{int(k):03d}{v}"
                for (k, v) in tuples
            }

            self.nabirds_ids = {k for (k, v) in tuples}

            if len([k for (k, v) in tuples]) != len({k for (k, v) in tuples}):
                print("Non-unique IDs found!")
                quit(-1)

        with open(os.path.join(self.base_path,
                               "image_class_labels.txt")) as lab:
            with open(os.path.join(self.base_path,
                                   "train_test_split.txt")) as tts:
                with open(os.path.join(self.base_path, "images.txt")) as iid:
                    lablines = [x.strip() for x in lab]
                    labtuples = [
                        x.split(sep=" ", maxsplit=1) for x in lablines
                    ]
                    labtuples = [(str(some_primary_key), int(label))
                                 for (some_primary_key, label) in labtuples]

                    ttslines = [x.strip() for x in tts]
                    ttstuples = [
                        x.split(sep=" ", maxsplit=1) for x in ttslines
                    ]
                    ttstuples = [(str(some_primary_key), int(is_train_or_test))
                                 for (some_primary_key,
                                      is_train_or_test) in ttstuples]

                    iidlines = [x.strip() for x in iid]
                    iidtuples = [
                        x.split(sep=" ", maxsplit=1) for x in iidlines
                    ]
                    iidtuples = [(str(some_primary_key), str(image_path))
                                 for (some_primary_key,
                                      image_path) in iidtuples]
                    self.image_location_for_image_id = {
                        k: v
                        for (k, v) in iidtuples
                    }

                    combinedtuples = [
                        a + b for (a, b) in zip(labtuples, ttstuples)
                    ]
                    mismatches = [a != c for (a, b, c, d) in combinedtuples]
                    if any(mismatches):
                        print("Mismatch between tts and label files!")
                        quit(-1)

                    combinedtuples = [(a, b, d)
                                      for (a, b, c, d) in combinedtuples]

                    self._nabirds_training_tuples = [
                        (img, id) for (img, id, tt) in combinedtuples
                        if tt == 1
                    ]
                    self._nabirds_validation_tuples = [
                        (img, id) for (img, id, tt) in combinedtuples
                        if tt == 0
                    ]

        with open(os.path.join(self.base_path, "hierarchy.txt")) as hie:
            lines = [x.strip() for x in hie]
            tuples = [x.split(sep=" ", maxsplit=1) for x in lines]
            self.tuples = [(self.uid_for_label_id[int(k)],
                            self.uid_for_label_id[int(v)])
                           for (k, v) in tuples]
Esempio n. 15
0
def main():
    # Some constants
    label_gt_resource_id = "label_ground_truth"
    label_ann_resource_id = "label_annotated"
    label_pred_resource_id = "label_predicted"
    al_score_resource_id = "al_score"

    # General config
    ilm_method = configuration.get("ilm_method", no_default=True)
    cls_method = configuration.get("cls_method", no_default=True)
    interaction_method = configuration.get("interaction_method", no_default=True)
    al_method = configuration.get("al_method", no_default=True)
    experiment_scale = configuration.get("experiment_scale", no_default=True)
    label_budget = configuration.get("label_budget", no_default=True)
    dataset_name = configuration.get("dataset", no_default=True)
    experiment_name = configuration.get("experiment_name", no_default=True)
    validation_scale = configuration.get("validation_scale", no_default=True)
    evaluators = configuration.get("evaluators", no_default=True)
    al_score_fraction = configuration.get("al_score_fraction", no_default=True)

    ll_cycle_mode = configuration.get("ll_cycle_mode", no_default=True)
    if ll_cycle_mode:
        ll_cycle_length = configuration.get("ll_cycle_length", no_default=True)
    else:
        ll_cycle_length = 0

    # Eval config
    use_sacred_observer = configuration.get("use_sacred_observer", no_default=True)

    # Dataset specific setup stuff
    if dataset_name == "CORe50":
        core50_scenario = configuration.get("core50_scenario", no_default=True)

    # Instantiate dataset
    dataset = datasets.dataset(dataset_name)

    # Get instrumentation going
    instrumentation_observers = [
        instrumentation.PrintObserver(experiment_name),
        instrumentation.JSONResultObserver(experiment_name),
    ]
    if use_sacred_observer:
        from chia.framework.instrumentation import sacred_instrumentation

        instrumentation_observers += [
            sacred_instrumentation.SacredObserver(experiment_name)
        ]

    with instrumentation.InstrumentationContext(
        "run", observers=instrumentation_observers
    ):
        # Determine run count
        if dataset_name == "CORe50":
            run_count = dataset.get_run_count(core50_scenario)
        else:
            run_count = configuration.get("run_count", no_default=True)

        run_count = max(1, int(math.ceil(run_count * experiment_scale)))
        instrumentation.report("run_count", run_count)

        results_across_runs = []

        # Runs...
        for run_id in range(run_count):
            instrumentation.update_local_step(run_id)
            results_across_train_pools = []

            # Dataset specific run init
            if dataset_name == "CORe50":
                dataset.setup(scenario=core50_scenario, run=run_id)

            # Test datasets
            test_pools = []
            with instrumentation.InstrumentationContext("test_pools"):
                for i in range(dataset.test_pool_count()):
                    instrumentation.update_local_step(i)
                    test_pool = dataset.test_pool(i, label_gt_resource_id)
                    if validation_scale < 1.0:
                        test_pool = test_pool[
                            : min(
                                max(
                                    1, int(math.ceil(len(test_pool) * validation_scale))
                                ),
                                len(test_pool),
                            )
                        ]
                    instrumentation.report("size", len(test_pool))
                    test_pools += [test_pool]

            # Build methods
            kb = knowledge.KnowledgeBase()

            # Add hierarchy
            wna = wordnet.WordNetAccess()
            kb.add_relation(
                "hypernymy",
                is_symmetric=False,
                is_transitive=True,
                is_reflexive=False,
                explore_left=False,
                explore_right=True,
                sources=[dataset.relation("hypernymy"), wna],
            )

            im = interaction.method(interaction_method, kb)
            cls = hierarchicalclassification.method(cls_method, kb)
            ilm = incrementallearning.method(ilm_method, cls)
            alm = activelearning.method(al_method, ilm, kb)

            # Evaluator
            evaluator = evaluation.method(evaluators, kb)

            # Go over batches...
            with instrumentation.InstrumentationContext("train_pool"):
                train_pool_count = dataset.train_pool_count()
                instrumentation.report("train_pool_count", train_pool_count)

                for train_pool_id in range(train_pool_count):
                    instrumentation.update_local_step(train_pool_id)

                    with instrumentation.InstrumentationContext(
                        "train_pool", take_time=True
                    ):
                        # Collect training data
                        train_pool = pool.FixedPool(
                            dataset.train_pool(train_pool_id, label_gt_resource_id)
                        )

                        train_pool_size = len(train_pool)
                        instrumentation.report("train_pool_size", train_pool_size)
                        train_pool_label_budget = max(
                            1, math.ceil(label_budget * train_pool_size)
                        )
                        instrumentation.report(
                            "train_pool_budget", train_pool_label_budget
                        )

                        # Start the cycle through the training data
                        with instrumentation.InstrumentationContext("llcycle"):
                            results_across_cycles = []
                            current_cycle = 0
                            while train_pool_label_budget > 0:
                                instrumentation.update_local_step(current_cycle)
                                assert train_pool_label_budget <= len(train_pool)

                                if ll_cycle_mode:
                                    current_cycle_budget = min(
                                        train_pool_label_budget, ll_cycle_length
                                    )
                                else:
                                    current_cycle_budget = train_pool_label_budget

                                instrumentation.report(
                                    "current_cycle_budget", current_cycle_budget
                                )
                                instrumentation.report(
                                    "train_pool_size", len(train_pool)
                                )
                                train_pool_label_budget -= current_cycle_budget

                                # Only do active learning if there is even a choice
                                if (len(train_pool) - current_cycle_budget) > 0:
                                    # Run active learning method
                                    if current_cycle > 0:
                                        if al_score_fraction < 1.0:
                                            # Don't look at whole training pool
                                            fraction_train_pool = np.random.choice(
                                                train_pool,
                                                min(
                                                    len(train_pool),
                                                    max(
                                                        1,
                                                        math.ceil(
                                                            len(train_pool)
                                                            * al_score_fraction
                                                        ),
                                                    ),
                                                ),
                                                replace=False,
                                            )
                                            scored_train_pool = alm.score(
                                                fraction_train_pool,
                                                al_score_resource_id,
                                            )
                                        else:
                                            scored_train_pool = alm.score(
                                                train_pool, al_score_resource_id
                                            )
                                    else:
                                        scored_train_pool = [
                                            sample.add_resource(
                                                "llcycle",
                                                al_score_resource_id,
                                                random.uniform(0.0, 1.0),
                                            )
                                            for sample in train_pool
                                        ]

                                    sorted_scored_train_pool = list(
                                        sorted(
                                            scored_train_pool,
                                            key=lambda sample: sample.get_resource(
                                                al_score_resource_id
                                            ),
                                            reverse=True,
                                        )
                                    )

                                    pool_to_be_labeled = sorted_scored_train_pool[
                                        :current_cycle_budget
                                    ]

                                    instrumentation.report(
                                        "min_al_score",
                                        sorted_scored_train_pool[-1].get_resource(
                                            al_score_resource_id
                                        ),
                                    )
                                    instrumentation.report(
                                        "cutoff_al_score",
                                        pool_to_be_labeled[-1].get_resource(
                                            al_score_resource_id
                                        ),
                                    )
                                    instrumentation.report(
                                        "max_al_score",
                                        pool_to_be_labeled[0].get_resource(
                                            al_score_resource_id
                                        ),
                                    )
                                else:
                                    pool_to_be_labeled = train_pool

                                # Run "interaction"
                                labeled_pool = im.query_annotations_for(
                                    pool_to_be_labeled,
                                    label_gt_resource_id,
                                    label_ann_resource_id,
                                )

                                # Learn the thing
                                kb.observe_concepts(
                                    [
                                        sample.get_resource(label_ann_resource_id)
                                        for sample in labeled_pool
                                    ]
                                )
                                ilm.observe(labeled_pool, label_ann_resource_id)

                                # Remove the labeled samples from the pool
                                samples_before_removal = len(train_pool)
                                train_pool = train_pool.remove_multiple(labeled_pool)
                                assert (
                                    len(train_pool)
                                    == samples_before_removal - current_cycle_budget
                                )
                                current_cycle += 1

                                # Quick reclass accuracy
                                with instrumentation.InstrumentationContext(
                                    "reclassification"
                                ):
                                    evaluator.update(
                                        ilm.predict(
                                            labeled_pool, label_pred_resource_id
                                        ),
                                        label_ann_resource_id,
                                        label_pred_resource_id,
                                    )
                                    instrumentation.report_dict(evaluator.result())
                                    evaluator.reset()

                                # Validation
                                with instrumentation.InstrumentationContext(
                                    "validation", take_time=True
                                ):
                                    results_across_test_pools = []
                                    for test_pool_id in range(len(test_pools)):
                                        instrumentation.update_local_step(test_pool_id)
                                        evaluator.update(
                                            ilm.predict(
                                                test_pools[test_pool_id],
                                                label_pred_resource_id,
                                            ),
                                            label_gt_resource_id,
                                            label_pred_resource_id,
                                        )
                                        instrumentation.report_dict(evaluator.result())
                                        results_across_test_pools += [
                                            evaluator.result()
                                        ]
                                        evaluator.reset()
                                    results_across_cycles += [results_across_test_pools]

                    results_across_train_pools += [results_across_cycles]

            results_across_runs += [results_across_train_pools]

        instrumentation.store_result(results_across_runs)
Esempio n. 16
0
    def __init__(self, kb):
        with configuration.ConfigurationContext(self.__class__.__name__):
            self.kmax = configuration.get("kmax", 5)

        self.kb = kb
        self.reset()