def test_pipeline(): curriculum = [ LearningExample( perception=PerceptualRepresentation( [BagOfFeaturesPerceptualRepresentationFrame( ("red", "truck"))]), linguistic_description=TokenSequenceLinguisticDescription( ("red", "truck")), ) ] learner: MemorizingLanguageLearner[ BagOfFeaturesPerceptualRepresentationFrame, TokenSequenceLinguisticDescription] = MemorizingLanguageLearner() for example in curriculum: learner.observe(example) # shouldn't be able to describe "red" or "truck" alone assert not learner.describe( PerceptualRepresentation( [BagOfFeaturesPerceptualRepresentationFrame(("red", ))])) assert not learner.describe( PerceptualRepresentation( [BagOfFeaturesPerceptualRepresentationFrame(("truck", ))])) # but should be able to describe "red truck" red_truck_descriptions = learner.describe( PerceptualRepresentation( [BagOfFeaturesPerceptualRepresentationFrame(("red", "truck"))])) assert len(red_truck_descriptions) == 1 red_truck_description = only(red_truck_descriptions) assert red_truck_description.as_token_sequence() == ("red", "truck")
def test_object_recognition_with_drink_perception(): """ Regression test to confirm we can perform object recognition on a pickled and unpickled "drink" perception. If we do this using the normal pickling interface we get an error. This test checks that we don't run into such an error when we instead pickle and unpickle the perception using the AdamPickler and AdamUnpickler. See https://github.com/isi-vista/adam/issues/958. """ language_mode = LanguageMode.ENGLISH template = drink_test_template() curriculum = phase1_instances( "train", sampled( template, max_to_sample=3, ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), block_multiple_of_the_same_type=True, ), language_generator=phase1_language_generator(language_mode), ) object_recognizer = LANGUAGE_MODE_TO_TEMPLATE_LEARNER_OBJECT_RECOGNIZER[ language_mode] learner = IntegratedTemplateLearner(object_learner=object_recognizer) for (_, linguistic_description, perceptual_representation) in curriculum.instances(): new_perceptual_representation = _pickle_and_unpickle_object( perceptual_representation) learner.observe( LearningExample(new_perceptual_representation, linguistic_description))
def test_pursuit_preposition_on_learner(language_mode): rng = random.Random() rng.seed(0) learner = PrepositionPursuitLearner( learning_factor=0.5, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.001, ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=LANGUAGE_MODE_TO_OBJECT_RECOGNIZER[language_mode], language_mode=language_mode, ) # type: ignore ball = standard_object("ball", BALL) table = standard_object("table", TABLE) language_generator = phase1_language_generator(language_mode) on_train_curriculum = phase1_instances( "Preposition Unit Train", situations=sampled( _on_template(ball, table, immutableset(), is_training=True), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=10, ), language_generator=language_generator, ) on_test_curriculum = phase1_instances( "Preposition Unit Test", situations=sampled( _on_template(ball, table, immutableset(), is_training=False), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, ), language_generator=language_generator, ) for ( _, linguistic_description, perceptual_representation, ) in on_train_curriculum.instances(): # Get the object matches first - preposition learner can't learn without already recognized objects learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in on_test_curriculum.instances(): descriptions_from_learner = learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert [ desc.as_token_sequence() for desc in descriptions_from_learner ][0] == gold
def test_pursuit_preposition_has_learner(language_mode, learner): person = standard_object("person", PERSON, banned_properties=[IS_SPEAKER, IS_ADDRESSEE]) inanimate_object = standard_object("inanimate-object", INANIMATE_OBJECT, required_properties=[PERSON_CAN_HAVE]) ball = standard_object("ball", BALL) language_generator = phase1_language_generator(language_mode) has_train_curriculum = phase1_instances( "Has Unit Train", situations=sampled( _x_has_y_template(person, inanimate_object), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=2, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) has_test_curriculum = phase1_instances( "Has Unit Test", situations=sampled( _x_has_y_template(person, ball), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) processing_learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in has_train_curriculum.instances(): processing_learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in has_test_curriculum.instances(): descriptions_from_learner = processing_learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def test_pursuit_preposition_over_learner(language_mode, learner): ball = standard_object("ball", BALL) table = standard_object("table", TABLE) language_generator = phase1_language_generator(language_mode) over_train_curriculum = phase1_instances( "Preposition Over Unit Train", situations=sampled( _over_template(ball, table, immutableset(), is_training=True, is_distal=True), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=10, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) over_test_curriculum = phase1_instances( "Preposition Over Unit Test", situations=sampled( _over_template(ball, table, immutableset(), is_training=False, is_distal=True), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) processing_learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in over_train_curriculum.instances(): processing_learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_linguistic_description, test_perceptual_representation, ) in over_test_curriculum.instances(): descriptions_from_learner = processing_learner.describe( test_perceptual_representation) gold = test_linguistic_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def test_pursuit_preposition_in_learner(language_mode): rng = random.Random() rng.seed(0) learner = PrepositionPursuitLearner( learning_factor=0.5, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.001, ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=LANGUAGE_MODE_TO_OBJECT_RECOGNIZER[language_mode], language_mode=language_mode, ) # type: ignore water = object_variable("water", WATER) cup = standard_object("cup", CUP) language_generator = phase1_language_generator(language_mode) in_train_curriculum = phase1_instances( "Preposition In Unit Train", situations=sampled( _in_template(water, cup, immutableset(), is_training=True), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=10, ), language_generator=language_generator, ) in_test_curriculum = phase1_instances( "Preposition In Unit Test", situations=sampled( _in_template(water, cup, immutableset(), is_training=False), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, ), language_generator=language_generator, ) for ( _, linguistic_description, perceptual_representation, ) in in_train_curriculum.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_linguistic_description, test_perceptual_representation, ) in in_test_curriculum.instances(): descriptions_from_learner = learner.describe( test_perceptual_representation) gold = test_linguistic_description.as_token_sequence() assert descriptions_from_learner assert [ desc.as_token_sequence() for desc in descriptions_from_learner ][0] == gold
def test_throw_animacy(language_mode, learner): # shuffle both together for the train curriculum train_curriculum = phase1_instances( "train", chain(*[ sampled( situation_template=situation_template, max_to_sample=10, ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), block_multiple_of_the_same_type=True, ) for situation_template in make_throw_animacy_templates(None) ]), language_generator=phase1_language_generator(language_mode), ) # shuffle both together for test curriculum test_curriculum = phase1_instances( "test", chain(*[ sampled( situation_template=situation_template, max_to_sample=1, ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), block_multiple_of_the_same_type=True, ) for situation_template in make_throw_animacy_templates(None) ]), language_generator=phase1_language_generator(language_mode), ) # instantiate and test the learner learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in train_curriculum.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in test_curriculum.instances(): descriptions_from_learner = learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def test_pursuit_preposition_in_learner(language_mode, learner): water = object_variable("water", WATER) cup = standard_object("cup", CUP) language_generator = phase1_language_generator(language_mode) in_train_curriculum = phase1_instances( "Preposition In Unit Train", situations=sampled( _in_template(water, cup, immutableset(), is_training=True), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=10, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) in_test_curriculum = phase1_instances( "Preposition In Unit Test", situations=sampled( _in_template(water, cup, immutableset(), is_training=False), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) processing_learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in in_train_curriculum.instances(): processing_learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_linguistic_description, test_perceptual_representation, ) in in_test_curriculum.instances(): descriptions_from_learner = processing_learner.describe( test_perceptual_representation) gold = test_linguistic_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def run_imprecise_test(learner, situation_template, language_generator): train_curriculum = phase1_instances( "train", chain(*[ sampled( situation_template, max_to_sample=10, ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), # this is a hack since our current object recognizer will throw a runtime error if there are percieved objects not in the description block_multiple_of_the_same_type=False, ) ]), language_generator=language_generator, ) test_curriculum = phase1_instances( "test", chain(*[ sampled( situation_template, max_to_sample=1, ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_TEST_CHOOSER_FACTORY(), block_multiple_of_the_same_type=False, ) ]), language_generator=language_generator, ) for ( _, linguistic_description, perceptual_representation, ) in train_curriculum.instances(): # Get the object matches first - preposition learner can't learn without already recognized objects learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in test_curriculum.instances(): descriptions_from_learner = learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def run_preposition_test(learner, situation_template, language_generator): train_curriculum = phase1_instances( "Preposition Unit Train", situations=sampled( situation_template, chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=2, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) test_curriculum = phase1_instances( "Preposition Unit Test", situations=sampled( situation_template, chooser=PHASE1_TEST_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) for ( _, linguistic_description, perceptual_representation, ) in train_curriculum.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in test_curriculum.instances(): descriptions_from_learner = learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def test_functional_learner(language_mode: LanguageMode): # TODO: currently the _make_sit_curriculum defaults to bed instead of chair so chair isn't predicted in the testing sit_train = _make_sit_on_chair_curriculum( 5, 0, phase2_language_generator(language_mode) ) sit_test = _make_sit_on_chair_curriculum( 1, 0, phase2_language_generator(language_mode) ) learner = integrated_learner_factory(language_mode) for (_, linguistic_description, perceptual_representation) in sit_train.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description) ) for (_, linguistic_description, perceptual_representation) in sit_test.instances(): descriptions_from_learner = learner.describe(perceptual_representation) gold = linguistic_description.as_token_sequence() assert descriptions_from_learner assert gold in [desc.as_token_sequence() for desc in descriptions_from_learner]
def test_pursuit_preposition_has_learner(language_mode): person = standard_object("person", PERSON) inanimate_object = standard_object("inanimate-object", INANIMATE_OBJECT, required_properties=[PERSON_CAN_HAVE]) ball = standard_object("ball", BALL) language_generator = phase1_language_generator(language_mode) has_train_curriculum = phase1_instances( "Has Unit Train", situations=sampled( _x_has_y_template(person, inanimate_object), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=2, ), language_generator=language_generator, ) has_test_curriculum = phase1_instances( "Has Unit Test", situations=sampled( _x_has_y_template(person, ball), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, ), language_generator=language_generator, ) rng = random.Random() rng.seed(0) learner = PrepositionPursuitLearner( learning_factor=0.5, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.001, ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=LANGUAGE_MODE_TO_OBJECT_RECOGNIZER[language_mode], language_mode=language_mode, ) # type: ignore for ( _, linguistic_description, perceptual_representation, ) in has_train_curriculum.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in has_test_curriculum.instances(): descriptions_from_learner = learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert [ desc.as_token_sequence() for desc in descriptions_from_learner ][0] == gold
def test_pursuit_color_attribute(color_node, object_0_node, object_1_node, language_mode, learner): color = property_variable(f"{color_node.handle}", color_node) object_0 = standard_object(f"{object_0_node.handle}", object_0_node, added_properties=[color]) object_1 = standard_object(f"{object_1_node.handle}", object_1_node, added_properties=[color]) color_object_template = _object_with_color_template(object_0, None) templates_with_n_samples = [ (color_object_template, 2), (_object_with_color_template(object_1, None), 4), ] language_generator = phase1_language_generator(language_mode) color_train_curriculum = phase1_instances( f"{color.handle} Color Train", language_generator=language_generator, situations=chain(*[ flatten([ sampled( template, chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=n_samples, block_multiple_of_the_same_type=True, ) for template, n_samples in templates_with_n_samples ]) ]), ) color_test_curriculum = phase1_instances( f"{color.handle} Color Test", situations=sampled( color_object_template, chooser=PHASE1_TEST_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) processing_learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in color_train_curriculum.instances(): processing_learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in color_test_curriculum.instances(): descriptions_from_learner = processing_learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def execute_experiment( experiment: Experiment[SituationT, LinguisticDescriptionT, PerceptionT], *, log_path: Optional[Path] = None, log_hypotheses_every_n_examples: int = 250, learner_logging_path: Optional[Path] = None, log_learner_state: bool = True, load_learner_state: Optional[Path] = None, resume_from_latest_logged_state: bool = False, debug_learner_pickling: bool = False, starting_point: int = 0, point_to_log: int = 0, ) -> None: """ Runs an `Experiment`. """ # the starting point must be greater than or equal to 0 if starting_point < 0: logging.warning( f"Starting point {starting_point} is invalid, setting to 0") starting_point = 0 if resume_from_latest_logged_state and load_learner_state is not None: raise RuntimeError( "Cannot both resume from latest logged state and load a specified state file." ) if resume_from_latest_logged_state and learner_logging_path is None: raise RuntimeError( "Need state logging path to be able to resume from latest learner state." ) if resume_from_latest_logged_state and starting_point > 0: raise RuntimeError( f"Starting point should not be specified " f"when learner is configured to resume from latest state." f"(specified starting point {starting_point}).") # make the directories in which to log the learner if log_learner_state and learner_logging_path: learner_path = learner_logging_path / "learner_state" # if the directory in which we wish to log the learner doesn't exist, we must create it if not os.path.exists(learner_path): try: os.mkdir(learner_path) # if we don't have a directory where we can log our learner state, we simply don't log it except OSError: logging.warning("Cannot log learner state to %s", str(learner_path)) log_learner_state = False logging.warning( "Proceeding without logging learner state. The experiment results from the observers won't be valid if the experiment is resumed without a log of the learner state." ) observer_path = learner_logging_path / "observer_state" if not observer_path.exists(): try: observer_path.mkdir() # if we don't have a directory where we can log our observer state, we simply don't log it except OSError: logging.warning("Cannot log observer state to %s", str(observer_path)) log_learner_state = False logging.warning( "Proceeding without logging learner. The experiment results from the observers won't be valid if the experiment is resumed without a log of the observer state." ) logging.info("Beginning experiment %s", experiment.name) # if there is an existing learner to load, try to load it if load_learner_state: if starting_point == 0: logging.warning( "Using existing learner, expected starting point > 0") logging.info("Loading existing learner from %s", str(load_learner_state)) try: learner = pickle.load(open(load_learner_state, "rb")) # if the learner can't be loaded, just instantiate the default learner and notify the user except OSError: learner = experiment.learner_factory() logging.warning( "Unable to load learner at %s, using factory instead", load_learner_state) # If we should resume from the latest logged state, try to load each numbered state file, # starting with the file for the learner that got the farthest in the curriculum elif resume_from_latest_logged_state: logging.info("Attempting to resume from latest learner state...") learner = None # This should already be set, but just in case it wasn't, for example if # log_learner_state = False, set it. learner_path = cast(Path, learner_logging_path) / "learner_state" # Note that this is safe if the learner path doesn't exist -- this loop will simply never # execute. for iteration_number, learner_state_path in _learner_states_by_most_recent( learner_path): try: with learner_state_path.open("rb") as f: learner = pickle.load(f) starting_point = iteration_number except OSError: logging.warning( "Unable to open learner state at %s; skipping.", str(learner_state_path), ) except pickle.UnpicklingError: logging.warning( "Couldn't unpickle learner state at %s; skipping.", str(learner_state_path), ) # Fall back on default learner if learner is None: logging.warning( "Could not load a saved learner; using factory instead.") learner = experiment.learner_factory() # If we did load a learner we should restore the state of our observers reports to match the learner else: # WARNING: This DOES NOT verify that the observed instance number of the observers match # That of the learner we loaded. This issue is tracked here: https://github.com/isi-vista/adam/issues/981 logging.info("Restoring State of Observers Text Reports") for observer_iter in chain( experiment.pre_example_training_observers, experiment.post_example_training_observers, ): if isinstance(observer_iter, HTMLLoggerPreObserver) or isinstance( observer_iter, HTMLLoggerPostObserver): restore_html_state( Path(observer_iter.html_logger.output_file_str), starting_point) if (observer_iter.candidate_accuracy_observer and observer_iter.candidate_accuracy_observer. accuracy_to_txt): restore_report_state( Path(observer_iter.candidate_accuracy_observer. txt_path), starting_point, ) if (observer_iter.precision_recall_observer and observer_iter.precision_recall_observer.make_report ): restore_report_state( Path(observer_iter.precision_recall_observer. txt_path), starting_point, ) # if there's no existing learner, instantiate the default else: learner = experiment.learner_factory() logging.info("Instantiated learner %s", learner) num_observations = 0 for training_stage in experiment.training_stages: if num_observations > starting_point: logging.info("Beginning training stage %s", training_stage.name()) for ( situation, linguistic_description, perceptual_representation, ) in training_stage.instances(): num_observations += 1 # don't learn from anything until we've reached the starting of the the learning # This is <= because we actually want to restart observations from one *past* the starting point # as we've already observed that instanced. if num_observations <= starting_point: continue # log the start of the learning if num_observations == starting_point: logging.info("Beginning training stage %s", training_stage.name()) # if we've reached the user-given point where we want to log the learner, log it here if (point_to_log > 0 # we log after the nth input is given to the learner and num_observations - 1 == point_to_log and log_learner_state): logging.info( f"Reached {point_to_log} instances, logging learner") # dump the learner to a pickle file pickle.dump( learner, open( learner_path / f"learner_state_at_{str(point_to_log)}.pkl", "wb"), pickle.HIGHEST_PROTOCOL, ) # Dump the observers to a pickle file observers_holder = ObserversHolder( pre_observers=experiment.pre_example_training_observers, post_observers=experiment.post_example_training_observers, test_observers=experiment.test_observers, ) pickle.dump( observers_holder, open( observer_path / f"observers_state_at_{str(point_to_log)}.pkl", "wb", ), pickle.HIGHEST_PROTOCOL, ) # if we've reached the next num_observations where we should log hypotheses, log the hypotheses if log_path and num_observations % log_hypotheses_every_n_examples == 0: learner.log_hypotheses(log_path / str(num_observations)) # if we are logging the learner state, we do it here if log_learner_state: # dump the learner to a pickle file pickle.dump( learner, open( learner_path / f"learner_state_at_{str(num_observations)}.pkl", "wb", ), pickle.HIGHEST_PROTOCOL, ) if debug_learner_pickling: logging.info("Pickling and unpickling learner...") learner = pickle.loads( pickle.dumps(learner, protocol=pickle.HIGHEST_PROTOCOL)) logging.info("Pickled and unpickled.") # Dump the observers to a pickle file observers_holder = ObserversHolder( pre_observers=experiment.pre_example_training_observers, post_observers=experiment.post_example_training_observers, test_observers=experiment.test_observers, ) pickle.dump( observers_holder, open( observer_path / f"observers_state_at_{str(num_observations)}.pkl", "wb", ), pickle.HIGHEST_PROTOCOL, ) if experiment.pre_example_training_observers: learner_descriptions_before_seeing_example = learner.describe( perceptual_representation) if situation: for pre_example_observer in experiment.pre_example_training_observers: pre_example_observer.observe( situation, linguistic_description, perceptual_representation, learner_descriptions_before_seeing_example, ) pre_example_observer.report() else: raise ValueError( "Observed training instances cannot lack a situation") learner.observe( LearningExample(perceptual_representation, linguistic_description), offset=starting_point, ) if experiment.post_example_training_observers: learner_descriptions_after_seeing_example = learner.describe( perceptual_representation) for post_example_observer in experiment.post_example_training_observers: post_example_observer.observe( situation, linguistic_description, perceptual_representation, learner_descriptions_after_seeing_example, ) post_example_observer.report() logging.info("Training complete") for training_observer in chain( experiment.pre_example_training_observers, experiment.post_example_training_observers, ): training_observer.report() if log_path: learner.log_hypotheses(log_path / "final") # log the final learner if the user wishes for it to be logged if log_learner_state: pickle.dump( learner, open(learner_path / f"final_learner_state.pkl", "wb"), pickle.HIGHEST_PROTOCOL, ) # Dump the observers to a pickle file observers_holder = ObserversHolder( pre_observers=experiment.pre_example_training_observers, post_observers=experiment.post_example_training_observers, test_observers=experiment.test_observers, ) pickle.dump( observers_holder, open(observer_path / f"final_observers_state.pkl", "wb"), pickle.HIGHEST_PROTOCOL, ) logging.info("Warming up for tests") for warm_up_instance_group in experiment.warm_up_test_instance_groups: for ( situation, warm_up_test_instance_language, warm_up_test_instance_perception, ) in warm_up_instance_group.instances(): learner.observe( LearningExample(warm_up_test_instance_perception, warm_up_test_instance_language)) logging.info("Performing tests") num_test_observations = 0 for test_instance_group in experiment.test_instance_groups: for ( situation, test_instance_language, test_instance_perception, ) in test_instance_group.instances(): logging.info(f"Test Description: {num_test_observations}") num_test_observations += 1 descriptions_from_learner = learner.describe( test_instance_perception) for test_observer in experiment.test_observers: test_observer.observe( situation, test_instance_language, test_instance_perception, descriptions_from_learner, ) test_observer.report() if log_path and num_test_observations % log_hypotheses_every_n_examples == 0: observation_number = num_observations + num_test_observations # if we are logging the learner state, we do it here if log_learner_state: # dump the learner to a pickle file # While yes the learner here shouldn't be different as the test cases # Don't affect the internal state we need to the number of instances # Between the learner and the observers to match for experiment restoration pickle.dump( learner, open( learner_path / f"learner_state_at_{str(observation_number)}.pkl", "wb", ), pickle.HIGHEST_PROTOCOL, ) # Dump the observers to a pickle file observers_holder = ObserversHolder( pre_observers=experiment.pre_example_training_observers, post_observers=experiment.post_example_training_observers, test_observers=experiment.test_observers, ) pickle.dump( observers_holder, open( observer_path / f"observers_state_at_{str(observation_number)}.pkl", "wb", ), pickle.HIGHEST_PROTOCOL, ) for test_observer in experiment.test_observers: test_observer.report() logging.info("Experiment %s complete", experiment.name)
def run_experiment(learner, curricula, experiment_id): english_color_dictionary = { "watermelon": "green", "cookie": "light brown", "paper": "white", } # Teach pretraining curriculum for curriculum in curricula: print("\nTeaching", curriculum.name()) for ( _, linguistic_description, perceptual_representation, ) in curriculum.instances(): # Get the object matches first - preposition learner can't learn without already recognized objects print("Observation: ", " ".join(linguistic_description.as_token_sequence())) learner.observe( LearningExample(perceptual_representation, linguistic_description) ) # Evaluate assocations before generics print("\nColor assocations - Before Generics") for word, _ in english_color_dictionary.items(): word_concept = get_concept_node_from_graph(word, learner.semantics_graph) if not word_concept: continue results = [ ( color_concept.debug_string, learner.semantics_graph[word_concept][color_concept]["weight"], ) for color_concept in learner.semantics_graph.neighbors(word_concept) if isinstance(color_concept, AttributeConcept) ] results.sort(key=lambda x: x[1], reverse=True) print(f"\nObject:", word) print( f"Associated Colors:", [(r[0].replace("_slot1", ""), r[1]) for r in results] ) # for r in results: # print(f'{word}, {color}, {r[0].replace("_slot1","")}, {r[1]}') # Teach generics color_predicates = _make_colour_predicates_curriculum(None, None, language_generator) print("\nTeaching color predicates") for ( _, linguistic_description, perceptual_representation, ) in color_predicates.instances(): # Get the object matches first - preposition learner can't learn without already recognized objects learner.observe( LearningExample(perceptual_representation, linguistic_description) ) print("Observation:", " ".join(linguistic_description.as_token_sequence())) # Evaluate assocations after generics print("\nColor assocations - After Generics") for word, _ in english_color_dictionary.items(): word_concept = get_concept_node_from_graph(word, learner.semantics_graph) if not word_concept: continue results = [ ( color_concept.debug_string, learner.semantics_graph[word_concept][color_concept]["weight"], ) for color_concept in learner.semantics_graph.neighbors(word_concept) if isinstance(color_concept, AttributeConcept) ] results.sort(key=lambda x: x[1], reverse=True) print(f"\nObject:", word) print( f"Associated Colors:", [(r[0].replace("_slot1", ""), r[1]) for r in results] ) learner.log_hypotheses(Path(f"./renders/{experiment_id}")) learner.render_semantics_to_file( graph=learner.semantics_graph, graph_name="semantics", output_file=Path(f"./renders/{experiment_id}/semantics.png"), )
def run_experiment(learner, curricula, experiment_id): # Teach each pretraining curriculum for curriculum in curricula: print("Teaching", curriculum.name(), "curriculum") for ( _, linguistic_description, perceptual_representation, ) in curriculum.instances(): # Get the object matches first - prepositison learner can't learn without already recognized objects # print('Observation: ',' '.join(linguistic_description.as_token_sequence())) learner.observe( LearningExample(perceptual_representation, linguistic_description)) # Teach each kind member empty_situation = HighLevelSemanticsSituation( ontology=GAILA_PHASE_2_ONTOLOGY, salient_objects=immutableset([ SituationObject.instantiate_ontology_node( ontology_node=GROUND, debug_handle=GROUND.handle, ontology=GAILA_PHASE_1_ONTOLOGY, ) ]), ) empty_perception = GAILA_PHASE_2_PERCEPTION_GENERATOR.generate_perception( empty_situation, PHASE1_CHOOSER_FACTORY()) pseudoword_to_kind = {"wug": "animal", "vonk": "food", "snarp": "people"} print("Teaching new objects in known categories") for word, kind in pseudoword_to_kind.items(): print("Observation: ", word, "s", "are", kind, "s") learner.observe( LearningExample( empty_perception, TokenSequenceLinguisticDescription(tokens=(word, "s", "are", kind, "s")) if kind != "people" else TokenSequenceLinguisticDescription( tokens=(word, "s", "are", kind, "s")), )) semantics_manager: SemanticsManager = SemanticsManager( semantics_graph=learner.semantics_graph) complete_results = [] print("\nResults for ", experiment_id) for word, _ in pseudoword_to_kind.items(): results = [(kind, semantics_manager.evaluate_kind_membership(word, kind)) for kind in pseudoword_to_kind.values()] complete_results.append(results) results_df = pd.DataFrame( [[np.asscalar(i[1]) for i in l] for l in complete_results], columns=["Animal", "Food", "People"], ) results_df.insert(0, "Words", pseudoword_to_kind.keys()) # print(results_df.to_csv(index=False)) print(tabulate(results_df, headers="keys", tablefmt="psql")) learner.log_hypotheses(Path(f"./renders/{experiment_id}")) learner.render_semantics_to_file( graph=learner.semantics_graph, graph_name="semantics", output_file=Path(f"./renders/{experiment_id}/semantics.png"), )
def run_subset_learner_for_object( nodes: Iterable[OntologyNode], *, learner, language_generator: LanguageGenerator[HighLevelSemanticsSituation, LinearizedDependencyTree]): colored_obj_objects = [ object_variable("obj-with-color", node, added_properties=[color_variable("color")]) for node in nodes ] obj_templates = [ Phase1SituationTemplate( "colored-obj-object", salient_object_variables=[colored_obj_object], syntax_hints=[IGNORE_COLORS], ) for colored_obj_object in colored_obj_objects ] obj_curriculum = phase1_instances( "all obj situations", flatten([ all_possible( obj_template, chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, ) for obj_template in obj_templates ]), language_generator=language_generator, ) test_obj_curriculum = phase1_instances( "obj test", situations=sampled( obj_templates[0], chooser=PHASE1_TEST_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, ), language_generator=language_generator, ) for training_stage in [obj_curriculum]: for ( _, linguistic_description, perceptual_representation, ) in training_stage.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description)) for test_instance_group in [test_obj_curriculum]: for ( _, test_instance_language, test_instance_perception, ) in test_instance_group.instances(): descriptions_from_learner = learner.describe( test_instance_perception) gold = test_instance_language.as_token_sequence() assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def test_pursuit_object_learner_with_gaze(language_mode): target_objects = [ BALL, # PERSON, # CHAIR, # TABLE, DOG, # BIRD, BOX, ] language_generator = phase1_language_generator(language_mode) target_test_templates = [] for obj in target_objects: # Create train and test templates for the target objects test_obj_object = object_variable("obj-with-color", obj) test_template = Phase1SituationTemplate( "colored-obj-object", salient_object_variables=[test_obj_object], syntax_hints=[IGNORE_COLORS], gazed_objects=[test_obj_object], ) target_test_templates.extend( all_possible( test_template, chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, )) rng = random.Random() rng.seed(0) # We can use this to generate the actual pursuit curriculum train_curriculum = make_simple_pursuit_curriculum( target_objects=target_objects, num_instances=30, num_objects_in_instance=3, num_noise_instances=0, language_generator=language_generator, add_gaze=True, ) test_obj_curriculum = phase1_instances( "obj test", situations=target_test_templates, language_generator=language_generator, ) # All parameters should be in the range 0-1. # Learning factor works better when kept < 0.5 # Graph matching threshold doesn't seem to matter that much, as often seems to be either a # complete or a very small match. # The lexicon threshold works better between 0.07-0.3, but we need to play around with it because we end up not # lexicalize items sufficiently because of diminishing lexicon probability through training rng = random.Random() rng.seed(0) learner = IntegratedTemplateLearner(object_learner=PursuitObjectLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, rank_gaze_higher=True, )) for training_stage in [train_curriculum]: for ( _, linguistic_description, perceptual_representation, ) in training_stage.instances(): learner.observe( LearningExample(perceptual_representation, linguistic_description)) for test_instance_group in [test_obj_curriculum]: for ( _, test_instance_language, test_instance_perception, ) in test_instance_group.instances(): logging.info("lang: %s", test_instance_language) descriptions_from_learner = learner.describe( test_instance_perception) gold = test_instance_language.as_token_sequence() assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def test_subset_preposition_has(language_mode, learner): person = standard_object("person", PERSON) cup = standard_object("cup", CUP) book = standard_object("book", BOOK) ball = standard_object("ball", BALL) language_generator = phase1_language_generator(language_mode) has_train_curriculum = [] has_train_curriculum.extend( phase1_instances( "Has Unit Train", language_generator=language_generator, situations=sampled( _x_has_y_template(person, cup), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), ).instances()) has_train_curriculum.extend( phase1_instances( "Has Unit Train", language_generator=language_generator, situations=sampled( _x_has_y_template(person, book), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), ).instances()) has_test_curriculum = phase1_instances( "Has Unit Test", situations=sampled( _x_has_y_template(person, ball), chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) process_learner = learner(language_mode) for (_, linguistic_description, perceptual_representation) in has_train_curriculum: process_learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in has_test_curriculum.instances(): descriptions_from_learner = process_learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def run_generics_test(learner, language_mode): def build_object_multiples_situations( ontology: Ontology, *, samples_per_object: int = 3, chooser: RandomChooser) -> Iterable[HighLevelSemanticsSituation]: for object_type in PHASE_1_CURRICULUM_OBJECTS: # Exclude slow objects for now if object_type.handle in ["bird", "dog", "truck"]: continue is_liquid = ontology.has_all_properties(object_type, [LIQUID]) # don't want multiples of named people if not is_recognized_particular(ontology, object_type) and not is_liquid: for _ in range(samples_per_object): num_objects = chooser.choice(range(2, 4)) yield HighLevelSemanticsSituation( ontology=GAILA_PHASE_1_ONTOLOGY, salient_objects=[ SituationObject.instantiate_ontology_node( ontology_node=object_type, debug_handle=object_type.handle + f"_{idx}", ontology=GAILA_PHASE_1_ONTOLOGY, ) for idx in range(num_objects) ], axis_info=AxesInfo(), ) language_generator = phase2_language_generator(language_mode) # Teach plurals plurals = phase1_instances( "plurals pretraining", build_object_multiples_situations(ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY()), language_generator=language_generator, ) curricula = [ # Actions - verbs in generics _make_eat_curriculum(10, 0, language_generator), # Plurals plurals, # Color attributes _make_objects_with_colors_curriculum(None, None, language_generator), # Predicates _make_colour_predicates_curriculum(None, None, language_generator), _make_kind_predicates_curriculum(None, None, language_generator), # Generics _make_generic_statements_curriculum( num_samples=3, noise_objects=0, language_generator=language_generator), ] for curriculum in curricula: for ( _, linguistic_description, perceptual_representation, ) in curriculum.instances(): # Get the object matches first - preposition learner can't learn without already recognized objects learner.observe( LearningExample(perceptual_representation, linguistic_description))
def execute_experiment( experiment: Experiment[SituationT, LinguisticDescriptionT, PerceptionT], *, log_path: Optional[Path] = None, log_hypotheses_every_n_examples: int = 250, learner_logging_path: Optional[Path] = None, log_learner_state: bool = True, load_learner_state: Optional[Path] = None, starting_point: int = 0, point_to_log: int = 0, ) -> None: """ Runs an `Experiment`. """ # make the directories in which to log the learner if log_learner_state and learner_logging_path: learner_path = learner_logging_path / "learner_state" # if the directory in which we wish to log the learner doesn't exist, we must create it if not os.path.exists(learner_path): try: os.mkdir(learner_path) # if we don't have a directory where we can log our learner state, we simply don't log it except OSError: logging.warning("Cannot log learner state to %s", str(learner_path)) log_learner_state = False logging.warning("Proceeding without logging learner state") logging.info("Beginning experiment %s", experiment.name) # if there is an existing learner to load, try to load it if load_learner_state: if starting_point == 0: logging.warning( "Using existing learner, expected starting point > 0") logging.info("Loading existing learner from %s", str(load_learner_state)) try: learner = pickle.load(open(load_learner_state, "rb")) # if the learner can't be loaded, just instantiate the default learner and notify the user except OSError: learner = experiment.learner_factory() logging.warning( "Unable to load learner at %s, using factory instead", load_learner_state) # if there's no existing learner, instantiate the default else: learner = experiment.learner_factory() logging.info("Instantiated learner %s", learner) num_observations = 0 for training_stage in experiment.training_stages: if num_observations > starting_point: logging.info("Beginning training stage %s", training_stage.name()) for ( situation, linguistic_description, perceptual_representation, ) in training_stage.instances(): num_observations += 1 # don't learn from anything until we've reached the starting of the the learning if num_observations < starting_point: continue # log the start of the learning if num_observations == starting_point: logging.info("Beginning training stage %s", training_stage.name()) # if we've reached the user-given point where we want to log the learner, log it here if (point_to_log > 0 # we log after the nth input is given to the learner and num_observations - 1 == point_to_log and log_learner_state): logging.info( f"Reached {point_to_log} instances, logging learner") # dump the learner to a pickle file pickle.dump( learner, open( learner_path / f"learner_state_at_{str(point_to_log)}.pkl", "wb"), pickle.HIGHEST_PROTOCOL, ) # if we've reached the next num_observations where we should log hypotheses, log the hypotheses if log_path and num_observations % log_hypotheses_every_n_examples == 0: learner.log_hypotheses(log_path / str(num_observations)) # if we are logging the learner state, we do it here if log_learner_state: # dump the learner to a pickle file pickle.dump( learner, open( learner_path / f"learner_state_at_{str(num_observations)}.pkl", "wb", ), pickle.HIGHEST_PROTOCOL, ) if experiment.pre_example_training_observers: learner_descriptions_before_seeing_example = learner.describe( perceptual_representation) if situation: for pre_example_observer in experiment.pre_example_training_observers: pre_example_observer.observe( situation, linguistic_description, perceptual_representation, learner_descriptions_before_seeing_example, ) pre_example_observer.report() else: raise ValueError( "Observed training instances cannot lack a situation") learner.observe( LearningExample(perceptual_representation, linguistic_description), observation_num=num_observations, ) if experiment.post_example_training_observers: learner_descriptions_after_seeing_example = learner.describe( perceptual_representation) for post_example_observer in experiment.post_example_training_observers: post_example_observer.observe( situation, linguistic_description, perceptual_representation, learner_descriptions_after_seeing_example, offset=starting_point, ) logging.info("Training complete") for training_observer in chain( experiment.pre_example_training_observers, experiment.post_example_training_observers, ): training_observer.report() if log_path: learner.log_hypotheses(log_path / "final") # log the final learner if the user wishes for it to be logged if log_learner_state: pickle.dump( learner, open(learner_path / f"final_learner_state.pkl", "wb"), pickle.HIGHEST_PROTOCOL, ) logging.info("Warming up for tests") for warm_up_instance_group in experiment.warm_up_test_instance_groups: for ( situation, warm_up_test_instance_language, warm_up_test_instance_perception, ) in warm_up_instance_group.instances(): learner.observe( LearningExample(warm_up_test_instance_perception, warm_up_test_instance_language)) logging.info("Performing tests") num_test_observations = 0 for test_instance_group in experiment.test_instance_groups: for ( situation, test_instance_language, test_instance_perception, ) in test_instance_group.instances(): logging.info(f"Test Description: {num_test_observations}") num_test_observations += 1 descriptions_from_learner = learner.describe( test_instance_perception) for test_observer in experiment.test_observers: test_observer.observe( situation, test_instance_language, test_instance_perception, descriptions_from_learner, ) for test_observer in experiment.test_observers: test_observer.report() logging.info("Experiment %s complete", experiment.name)
def test_your_attribute_learner(language_mode, learner): person_0 = standard_object( "speaker", PERSON, banned_properties=[IS_SPEAKER, IS_ADDRESSEE], added_properties=[IS_SPEAKER], ) person_1 = standard_object( "addressee", PERSON, banned_properties=[IS_SPEAKER, IS_ADDRESSEE], added_properties=[IS_ADDRESSEE], ) inanimate_object = standard_object( "object", INANIMATE_OBJECT, required_properties=[PERSON_CAN_HAVE] ) language_generator = phase1_language_generator(language_mode) your_train_curriculum = phase1_instances( "your-train", situations=sampled( _x_has_y_template( person_1, inanimate_object, background=[person_0], syntax_hints=[IGNORE_HAS_AS_VERB], ), ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), max_to_sample=5, ), language_generator=language_generator, ) your_test_curriculum = phase1_instances( "your-test", situations=sampled( _x_has_y_template( person_1, inanimate_object, background=[person_0], syntax_hints=[IGNORE_HAS_AS_VERB], ), ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_TEST_CHOOSER_FACTORY(), max_to_sample=1, ), language_generator=language_generator, ) process_learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in your_train_curriculum.instances(): process_learner.observe( LearningExample(perceptual_representation, linguistic_description) ) for ( _, test_lingustics_description, test_perceptual_representation, ) in your_test_curriculum.instances(): descriptions_from_learner = process_learner.describe( test_perceptual_representation ) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [desc.as_token_sequence() for desc in descriptions_from_learner]