def test_agent(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" agent_0 = ActiveLearningAgent(self.api_workflow_client) agent_1 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz") agent_2 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz", preselected_tag_name="preselected_tag_name_xyz") agent_3 = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz") for method in [SamplingMethod.CORAL, SamplingMethod.CORESET, SamplingMethod.RANDOM]: for agent in [agent_0, agent_1, agent_2, agent_3]: for batch_size in [2, 6]: n_old_labeled = len(agent.labeled_set) n_old_unlabeled = len(agent.unlabeled_set) n_samples = len(agent.labeled_set) + batch_size if method == SamplingMethod.CORAL and len(agent.labeled_set) == 0: sampler_config = SamplerConfig(n_samples=n_samples, method=SamplingMethod.CORESET) else: sampler_config = SamplerConfig(n_samples=n_samples, method=method) if sampler_config.method == SamplingMethod.CORAL: predictions = np.random.rand(len(agent.unlabeled_set), 10).astype(np.float32) predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis] al_scorer = ScorerClassification(predictions_normalized) labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer) else: sampler_config = SamplerConfig(n_samples=n_samples) labeled_set, added_set = agent.query(sampler_config=sampler_config) self.assertEqual(n_old_labeled + len(added_set), len(labeled_set)) assert set(added_set).issubset(labeled_set) self.assertEqual(len(list(set(agent.labeled_set) & set(agent.unlabeled_set))), 0) self.assertEqual(n_old_unlabeled - len(added_set), len(agent.unlabeled_set))
def test_agent_query_too_few(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" agent = ActiveLearningAgent( self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz", ) # sample 0 samples sampler_config = SamplerConfig(n_samples=0, method=SamplingMethod.RANDOM) agent.query(sampler_config)
def test_agent_only_upload_scores(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" agent = ActiveLearningAgent( self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz", ) n_predictions = len(agent.query_set) predictions = np.random.rand(n_predictions, 10).astype(np.float32) predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis] al_scorer = ScorerClassification(predictions_normalized) agent.upload_scores(al_scorer)
def test_agent_wrong_scores(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" agent = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz") method = SamplingMethod.CORAL n_samples = len(agent.labeled_set) + 2 n_predictions = len(agent.unlabeled_set) - 3 # the -3 should cause en error predictions = np.random.rand(n_predictions, 10).astype(np.float32) predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis] al_scorer = ScorerClassification(predictions_normalized) sampler_config = SamplerConfig(n_samples=n_samples, method=method) with self.assertRaises(ValueError): labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
def test_agent_without_embedding_id(self): agent = ActiveLearningAgent( self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz") method = SamplingMethod.CORAL n_samples = len(agent.labeled_set) + 2 n_predictions = len(agent.query_set) predictions = np.random.rand(n_predictions, 10).astype(np.float32) predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis] al_scorer = ScorerClassification(predictions_normalized) sampler_config = SamplerConfig(n_samples=n_samples, method=method) agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
def test_agent(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" agent_0 = ActiveLearningAgent(self.api_workflow_client) agent_1 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz") agent_2 = ActiveLearningAgent( self.api_workflow_client, query_tag_name="query_tag_name_xyz", preselected_tag_name="preselected_tag_name_xyz") agent_3 = ActiveLearningAgent( self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz") for method in [ SamplingMethod.CORAL, SamplingMethod.CORESET, SamplingMethod.RANDOM ]: for agent in [agent_0, agent_1, agent_2, agent_3]: for batch_size in [2, 6]: n_samples = len(agent.labeled_set) + batch_size if method == SamplingMethod.CORAL and len( agent.labeled_set) > 0: sampler_config = SamplerConfig( n_samples=n_samples, method=SamplingMethod.CORESET) else: sampler_config = SamplerConfig(n_samples=n_samples, method=method) if sampler_config.method == SamplingMethod.CORESET: predictions = np.random.rand(len(agent.unlabeled_set), 10) predictions_normalized = predictions / np.sum( predictions, axis=1)[:, np.newaxis] al_scorer = ScorerClassification( predictions_normalized) chosen_filenames = agent.query( sampler_config=sampler_config, al_scorer=al_scorer) else: sampler_config = SamplerConfig(n_samples=n_samples) chosen_filenames = agent.query( sampler_config=sampler_config)
def test_agent_added_set_before_query(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" agent = ActiveLearningAgent( self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz") agent.query_set agent.labeled_set agent.unlabeled_set with self.assertRaises(RuntimeError): agent.added_set
def test_agent_with_generator(self): self.api_workflow_client.embedding_id = "embedding_id_xyz" width = 32 height = 32 no_classes = 13 agent = ActiveLearningAgent( self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz") method = SamplingMethod.CORAL n_samples = len(agent.labeled_set) + 2 n_predictions = len(agent.query_set) predictions = np.random.rand(n_predictions, no_classes, width, height).astype(np.float32) predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis] predictions_generator = (predictions_normalized[i] for i in range(n_predictions)) al_scorer = ScorerSemanticSegmentation(predictions_generator) sampler_config = SamplerConfig(n_samples=n_samples, method=method) agent.query(sampler_config=sampler_config, al_scorer=al_scorer) # make sure we throw an error if generator is already consumed with self.assertRaises(ValueError): agent.upload_scores(al_scorer)
def t_est_active_learning(api_workflow_client: ApiWorkflowClient, method: SamplingMethod = SamplingMethod.CORAL, query_tag_name: str = 'initial-tag', preselected_tag_name: str = None, n_samples_additional: List[int] = [2, 5]): # create the tags with 100 respectively 10 samples if not yet existant if query_tag_name is not None: sampler_config = SamplerConfig(method=SamplingMethod.RANDOM, n_samples=100, name=query_tag_name) try: api_workflow_client.sampling(sampler_config=sampler_config) except RuntimeError: pass if preselected_tag_name is not None: sampler_config = SamplerConfig(method=SamplingMethod.RANDOM, n_samples=10, name=preselected_tag_name) try: api_workflow_client.sampling(sampler_config=sampler_config) except RuntimeError: pass # define the active learning agent agent = ActiveLearningAgent(api_workflow_client, query_tag_name=query_tag_name, preselected_tag_name=preselected_tag_name) total_no_samples = len(agent.unlabeled_set) + len(agent.labeled_set) al_scorer = None for iteration, n_samples_additional in enumerate(n_samples_additional): n_samples = len(agent.labeled_set) + n_samples_additional print( f"Beginning with iteration {iteration} to have {n_samples} labeled samples." ) # Perform a sampling method_here = SamplingMethod.CORESET if iteration == 0 and method == SamplingMethod.CORAL else method sampler_config = SamplerConfig(method=method_here, n_samples=n_samples) if al_scorer is None: agent.query(sampler_config=sampler_config) else: agent.query(sampler_config=sampler_config, al_scorer=al_scorer) assert len(agent.labeled_set) == n_samples assert len(agent.unlabeled_set) == total_no_samples - n_samples # Update the scorer n_samples = len(agent.query_set) n_classes = 10 predictions = np.random.rand(n_samples, n_classes) predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis] model_output = predictions_normalized al_scorer = ScorerClassification(model_output=predictions) print("Success!")
# %% # First we read the variables we set before as environment variables via the console token = os.getenv("LIGHTLY_TOKEN", default="YOUR_TOKEN") path_to_embeddings_csv = os.getenv("LIGHTLY_EMBEDDINGS_CSV", default="path_to_your_embeddings_csv") # We define the client to the Lightly Platform API api_workflow_client = ApiWorkflowClient(token=token) api_workflow_client.create_dataset( dataset_name="active_learning_clothing_dataset") # %% # We define the dataset, the classifier and the active learning agent dataset = CSVEmbeddingDataset(path_to_embeddings_csv=path_to_embeddings_csv) classifier = LogisticRegression(max_iter=1000) agent = ActiveLearningAgent(api_workflow_client=api_workflow_client) # %% # 1. Choose an initial subset of your dataset. # We want to start with 200 samples and use the CORESET sampler for sampling them. print("Starting the initial sampling") sampler_config = SamplerConfig(n_samples=200, method=SamplingMethod.CORESET, name='initial-selection') agent.query(sampler_config=sampler_config) print(f"There are {len(agent.labeled_set)} samples in the labeled set.") # %% # 2. Train a classifier on the labeled set. labeled_set_features = dataset.get_features(agent.labeled_set) labeled_set_labels = dataset.get_labels(agent.labeled_set)