Esempio n. 1
0
def _explicit_split(source: KeyValueSource[str, bytes], params: Parameters):
    explicit_split_namespace = params.namespace(_EXPLICIT_SPLIT_PARAM)

    # We track these so we can ensure the split is a complete partition of the input,
    # if the user so desires.
    keys_copied = []

    for split_namespace in explicit_split_namespace.sub_namespaces():
        keys_for_split = file_lines_to_set(
            split_namespace.existing_file("keys_file"))
        with KeyValueSink.zip_bytes_sink(
                split_namespace.creatable_file("output_file")) as split_sink:
            for key in keys_for_split:
                source_value = source.get(key)
                if source_value is not None:
                    split_sink.put(key, source_value)
                    keys_copied.append(key)
                else:
                    error_message = (
                        f"For split specified in {split_namespace.namespace_prefix}, "
                        f"requested key value {key} not found in {source}.")
                    available_keys = source.keys()
                    if available_keys is not None:
                        error_message = (
                            f"{error_message} Here are a few"  # type: ignore
                            f"available keys: {str_list_limited(source.keys(), 10)}"
                        )
                    raise RuntimeError(error_message)

    if params.boolean("must_be_exhaustive", default=True):
        keys_not_copied = immutableset(source.keys()) - set(keys_copied)
        if keys_not_copied:
            raise RuntimeError(
                f"Expected the split to be a partition, but "
                f"{len(keys_not_copied)} were not included in any output split, "
                f"including {str_list_limited(keys_not_copied, 10)}.  "
                f"If you did not intend the split to be exhaustive, "
                f"please specify set parameter must_be_exhaustive to False")
Esempio n. 2
0
def gaze_ablation_runner_entry_point(params: Parameters) -> None:
    """This function creates all possible gaze ablation param files within a given range"""
    initialize_vista_pegasus_wrapper(params)

    # Get the baseline experiment parameters for gaze ablation -- these are things common to all of
    # the experiments, like:
    #
    #     include_image_links: true
    #     sort_learner_descriptions_by_length: True
    #     num_pretty_descriptions: 5
    baseline_parameters = params.namespace("gaze_ablation")

    # get the minimum and maximum number of objects in a scene
    min_num_objects = params.integer("min_num_objects", default=1)
    max_num_objects = params.integer("max_num_objects", default=7)

    # this gets the number of different accuracies to try; default = increment by 0.1
    num_accuracy_increments = params.integer("num_increments", default=11)
    values_for_accuracy = np.linspace(0, 1, num_accuracy_increments)

    # the number of noise instances to be included
    min_num_noise_instances = params.integer("min_num_noise", default=0)
    max_num_noise_instances = params.integer("max_num_noise", default=0)

    # get the number of instances in the entire curriculum
    min_num_instances_in_curriculum = params.integer("min_instances",
                                                     default=10)
    max_num_instances_in_curriculum = params.integer("max_instances",
                                                     default=20)

    # all possible numbers of noise instances
    for num_noise_instances in range(min_num_noise_instances,
                                     max_num_noise_instances + 1):
        # all possible numbers of instances in the curriculum
        for num_instances in range(min_num_instances_in_curriculum,
                                   max_num_instances_in_curriculum + 1):
            # all possible numbers of instances
            for num_objects_in_instance in range(min_num_objects,
                                                 max_num_objects + 1):
                # all possible accuracies
                for prob_given in values_for_accuracy:
                    for prob_not_given in values_for_accuracy:
                        # both ignoring and perceiving gaze
                        for add_gaze in [True, False]:
                            # Define the experiment name, which is used both as a job name and to
                            # choose a directory in which to store the experiment results.
                            experiment_name_string = EXPERIMENT_NAME_FORMAT.format(
                                num_instances=num_instances,
                                num_noise_instances=num_noise_instances,
                                num_objects_in_instance=num_objects_in_instance,
                                prob_given=prob_given,
                                prob_not_given=prob_not_given,
                                add_gaze=add_gaze,
                            )
                            experiment_name = Locator(
                                experiment_name_string.split("-"))

                            # Note that the input parameters should include the root params and
                            # anything else we want.
                            experiment_params = baseline_parameters.unify(
                                FIXED_PARAMETERS).unify({
                                    "experiment":
                                    experiment_name_string,
                                    "experiment_group_dir":
                                    directory_for(experiment_name),
                                    "hypothesis_log_dir":
                                    directory_for(experiment_name) /
                                    "hypotheses",
                                    "learner_logging_path":
                                    directory_for(experiment_name),
                                    "log_learner_state":
                                    True,
                                    "resume_from_latest_logged_state":
                                    True,
                                    "pursuit-curriculum-params": {
                                        "num_instances": num_instances,
                                        "num_noise_instances":
                                        num_noise_instances,
                                        "num_objects_in_instance":
                                        num_objects_in_instance,
                                        "add_gaze": add_gaze,
                                        "prob_given": float(prob_given),
                                        "prob_not_given":
                                        float(prob_not_given),
                                    },
                                })

                            run_python_on_parameters(
                                experiment_name,
                                log_experiment_script,
                                experiment_params,
                                depends_on=[],
                            )

    write_workflow_description()
def sample_main(params: Parameters):
    assert params.string("only_original") == "foo"
    assert params.string("only_cli") == "bar"
    assert params.string("overridden") == "hello"
    assert params.namespace("nested").string(
        "overridden") == "I've been overridden"
Esempio n. 4
0
def curriculum_from_params(params: Parameters,
                           language_mode: LanguageMode = LanguageMode.ENGLISH):
    str_to_train_test_curriculum: Mapping[str, Tuple[
        CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = {
            "m6-deniz": (make_m6_curriculum, None),
            "each-object-by-itself": (
                build_each_object_by_itself_curriculum_train,
                build_each_object_by_itself_curriculum_test,
            ),
            "pursuit": (
                build_pursuit_curriculum,
                build_each_object_by_itself_curriculum_test,
            ),
            "m6-preposition": (build_m6_prepositions_curriculum, None),
            "m9-objects": (build_gaila_phase1_object_curriculum, None),
            "m9-attributes": (build_gaila_phase1_attribute_curriculum, None),
            "chinese-classifiers": (build_classifier_curriculum, None),
            "m9-relations": (build_gaila_phase1_relation_curriculum, None),
            "m9-events": (build_gaila_phase1_verb_curriculum, None),
            "m9-debug":
            (build_debug_curriculum_train, build_debug_curriculum_test),
            "m9-complete": (build_gaila_phase_1_curriculum, None),
            "m13-imprecise-size": (make_imprecise_size_curriculum, None),
            "m13-imprecise-temporal":
            (make_imprecise_temporal_descriptions, None),
            "m13-subtle-verb-distinction":
            (make_subtle_verb_distinctions_curriculum, None),
            "m13-object-restrictions":
            (build_functionally_defined_objects_curriculum, None),
            "m13-functionally-defined-objects": (
                build_functionally_defined_objects_train_curriculum,
                build_functionally_defined_objects_curriculum,
            ),
            "m13-generics": (build_generics_curriculum, None),
            "m13-complete": (build_gaila_m13_curriculum, None),
            "m13-verbs-with-dynamic-prepositions": (
                make_verb_with_dynamic_prepositions_curriculum,
                None,
            ),
            "m13-shuffled": (build_m13_shuffled_curriculum,
                             build_gaila_m13_curriculum),
            "m13-relations": (make_prepositions_curriculum, None),
            "actions-and-generics-curriculum":
            (build_actions_and_generics_curriculum, None),
            "m15-object-noise-experiments": (
                build_object_learner_experiment_curriculum_train,
                build_each_object_by_itself_curriculum_test,
            ),
            "m18-integrated-learners-experiment": (
                integrated_pursuit_learner_experiment_curriculum,
                integrated_pursuit_learner_experiment_test,
            ),
        }

    curriculum_name = params.string("curriculum",
                                    str_to_train_test_curriculum.keys())
    language_generator = (
        integrated_experiment_language_generator(language_mode)
        if curriculum_name == "m18-integrated-learners-experiment" else
        phase2_language_generator(language_mode))

    if params.has_namespace("pursuit-curriculum-params"):
        pursuit_curriculum_params = params.namespace(
            "pursuit-curriculum-params")
    else:
        pursuit_curriculum_params = Parameters.empty()
    use_path_instead_of_goal = params.boolean("use-path-instead-of-goal",
                                              default=False)

    (training_instance_groups,
     test_instance_groups) = str_to_train_test_curriculum[curriculum_name]

    num_samples = params.optional_positive_integer("num_samples")
    # We need to be able to accept 0 as the number of noise objects but optional_integer doesn't currently
    # support specifying a range of acceptable values: https://github.com/isi-vista/vistautils/issues/142
    num_noise_objects = params.optional_integer("num_noise_objects")

    if curriculum_name == "pursuit":
        return (
            training_instance_groups(
                num_samples,
                num_noise_objects,
                language_generator,
                pursuit_curriculum_params=pursuit_curriculum_params,
            ),
            test_instance_groups(num_samples, num_noise_objects,
                                 language_generator)
            if test_instance_groups else [],
        )

    # optional argument to use path instead of goal
    elif use_path_instead_of_goal and curriculum_name in [
            "m13-complete",
            "m13-shuffled",
            "m13-verbs-with-dynamic-prepositions",
    ]:
        return (
            training_instance_groups(
                num_samples,
                num_noise_objects,
                language_generator,
                use_path_instead_of_goal,
            ),
            test_instance_groups(num_samples, num_noise_objects,
                                 language_generator)
            if test_instance_groups else [],
        )
    elif curriculum_name in (
            "m15-object-noise-experiments",
            "m18-integrated-learners-experiment",
    ):
        return (
            training_instance_groups(
                num_samples,
                num_noise_objects,
                language_generator,
                params=params.namespace_or_empty("train_curriculum"),
            ),
            test_instance_groups(
                5,
                0,
                language_generator,
                params=params.namespace_or_empty("test_curriculum"),
            ) if test_instance_groups else [],
        )
    return (
        training_instance_groups(num_samples, num_noise_objects,
                                 language_generator),
        test_instance_groups(num_samples, num_noise_objects,
                             language_generator)
        if test_instance_groups else [],
    )
Esempio n. 5
0
def learner_factory_from_params(
    params: Parameters,
    graph_logger: Optional[HypothesisLogger],
    language_mode: LanguageMode = LanguageMode.ENGLISH,
) -> Callable[[], TopLevelLanguageLearner]:  # type: ignore
    learner_type = params.string(
        "learner",
        [
            "pursuit",
            "object-subset",
            "preposition-subset",
            "attribute-subset",
            "verb-subset",
            "integrated-learner",
            "integrated-learner-recognizer-without-generics",
            "integrated-learner-recognizer",
            "pursuit-gaze",
            "integrated-object-only",
            "integrated-learner-params",
            "integrated-pursuit-attribute-only",
        ],
    )

    beam_size = params.positive_integer("beam_size", default=10)
    rng = random.Random()
    rng.seed(0)
    perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR

    objects = [YOU_HACK, ME_HACK]
    objects.extend(PHASE_1_CURRICULUM_OBJECTS)

    # Eval hack! This is specific to the Phase 1 ontology
    object_recognizer = ObjectRecognizer.for_ontology_types(
        objects,
        determiners=ENGLISH_DETERMINERS,
        ontology=GAILA_PHASE_1_ONTOLOGY,
        language_mode=language_mode,
        perception_generator=perception_generator,
    )

    if learner_type == "pursuit":
        return lambda: ObjectPursuitLearner.from_parameters(
            params.namespace("pursuit"), graph_logger=graph_logger)
    elif learner_type == "pursuit-gaze":
        return lambda: IntegratedTemplateLearner(
            object_learner=PursuitObjectLearnerNew(
                learning_factor=0.05,
                graph_match_confirmation_threshold=0.7,
                lexicon_entry_threshold=0.7,
                rng=rng,
                smoothing_parameter=0.002,
                ontology=GAILA_PHASE_2_ONTOLOGY,
                language_mode=language_mode,
                rank_gaze_higher=True,
            ),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
        )
    elif learner_type == "object-subset":
        return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY,
                                           language_mode=LanguageMode.ENGLISH)
    elif learner_type == "attribute-subset":
        return lambda: SubsetAttributeLearner(
            ontology=GAILA_PHASE_1_ONTOLOGY,
            object_recognizer=object_recognizer,
            language_mode=LanguageMode.ENGLISH,
        )
    elif learner_type == "preposition-subset":
        return lambda: SubsetPrepositionLearner(
            # graph_logger=graph_logger,
            object_recognizer=object_recognizer,
            ontology=GAILA_PHASE_1_ONTOLOGY,
            language_mode=LanguageMode.ENGLISH,
        )
    elif learner_type == "verb-subset":
        return lambda: SubsetVerbLearner(
            ontology=GAILA_PHASE_1_ONTOLOGY,
            object_recognizer=object_recognizer,
            language_mode=LanguageMode.ENGLISH,
        )
    elif learner_type == "integrated-learner":
        return lambda: IntegratedTemplateLearner(
            object_learner=SubsetObjectLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            functional_learner=FunctionalLearner(language_mode=language_mode),
        )
    elif learner_type == "integrated-learner-recognizer":
        return lambda: IntegratedTemplateLearner(
            object_learner=ObjectRecognizerAsTemplateLearner(
                object_recognizer=object_recognizer,
                language_mode=language_mode),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            functional_learner=FunctionalLearner(language_mode=language_mode),
            generics_learner=SimpleGenericsLearner(),
        )
    elif learner_type == "ic":
        return lambda: IntegratedTemplateLearner(
            object_learner=ObjectRecognizerAsTemplateLearner(
                object_recognizer=object_recognizer,
                language_mode=language_mode),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            functional_learner=FunctionalLearner(language_mode=language_mode),
        )
    elif learner_type == "integrated-object-only":
        object_learner_type = params.string(
            "object_learner_type",
            valid_options=["subset", "pbv", "pursuit"],
            default="subset",
        )

        if params.has_namespace("learner_params"):
            learner_params = params.namespace("learner_params")
        else:
            learner_params = params.empty(namespace_prefix="learner_params")

        object_learner_factory: Callable[[], TemplateLearner]
        if object_learner_type == "subset":

            def subset_factory() -> SubsetObjectLearnerNew:
                return SubsetObjectLearnerNew(  # type: ignore
                    ontology=GAILA_PHASE_2_ONTOLOGY,
                    beam_size=beam_size,
                    language_mode=language_mode,
                )

            object_learner_factory = subset_factory

        elif object_learner_type == "pbv":

            def pbv_factory() -> ProposeButVerifyObjectLearner:
                return ProposeButVerifyObjectLearner.from_params(  # type: ignore
                    learner_params)

            object_learner_factory = pbv_factory
        elif object_learner_type == "pursuit":

            def pursuit_factory() -> PursuitObjectLearnerNew:
                return PursuitObjectLearnerNew(  # type: ignore
                    learning_factor=learner_params.floating_point(
                        "learning_factor"),
                    graph_match_confirmation_threshold=learner_params.
                    floating_point("graph_match_confirmation_threshold"),
                    lexicon_entry_threshold=learner_params.floating_point(
                        "lexicon_entry_threshold"),
                    rng=rng,
                    smoothing_parameter=learner_params.floating_point(
                        "smoothing_parameter"),
                    ontology=GAILA_PHASE_2_ONTOLOGY,
                    language_mode=language_mode,
                )

            object_learner_factory = pursuit_factory
        else:
            raise RuntimeError(
                f"Invalid Object Learner Type Selected: {learner_type}")
        return lambda: IntegratedTemplateLearner(object_learner=
                                                 object_learner_factory())
    elif learner_type == "integrated-learner-params":
        object_learner = build_object_learner_factory(  # type:ignore
            params.namespace_or_empty("object_learner"), beam_size,
            language_mode)
        attribute_learner = build_attribute_learner_factory(  # type:ignore
            params.namespace_or_empty("attribute_learner"), beam_size,
            language_mode)
        relation_learner = build_relation_learner_factory(  # type:ignore
            params.namespace_or_empty("relation_learner"), beam_size,
            language_mode)
        action_learner = build_action_learner_factory(  # type:ignore
            params.namespace_or_empty("action_learner"), beam_size,
            language_mode)
        plural_learner = build_plural_learner_factory(  # type:ignore
            params.namespace_or_empty("plural_learner"), beam_size,
            language_mode)
        return lambda: IntegratedTemplateLearner(
            object_learner=object_learner,
            attribute_learner=attribute_learner,
            relation_learner=relation_learner,
            action_learner=action_learner,
            functional_learner=FunctionalLearner(language_mode=language_mode)
            if params.boolean("include_functional_learner", default=True) else
            None,
            generics_learner=SimpleGenericsLearner() if params.boolean(
                "include_generics_learner", default=True) else None,
            plural_learner=plural_learner,
            suppress_error=params.boolean("suppress_error", default=True),
        )
    elif learner_type == "integrated-pursuit-attribute-only":
        return lambda: IntegratedTemplateLearner(
            object_learner=ObjectRecognizerAsTemplateLearner(
                object_recognizer=object_recognizer,
                language_mode=language_mode),
            attribute_learner=PursuitAttributeLearnerNew(
                learning_factor=0.05,
                graph_match_confirmation_threshold=0.7,
                lexicon_entry_threshold=0.7,
                rng=rng,
                smoothing_parameter=0.002,
                rank_gaze_higher=False,
                ontology=GAILA_PHASE_1_ONTOLOGY,
                language_mode=language_mode,
            ),
        )
    else:
        raise RuntimeError("can't happen")
def integrated_experiment_entry_point(params: Parameters) -> None:
    initialize_vista_pegasus_wrapper(params)

    baseline_parameters = params.namespace("integrated_learners_experiment")
    pursuit_resource_request_params = params.namespace(
        "pursuit_resource_request")

    # This code is commented out but may be used in the near future to add language ablation
    # Capabilities to this curriculum.

    # get the minimum and maximum accuracy of the language with the situation
    # min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1)
    # max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5)
    # num_language_accuracy_increment = params.integer(
    #    "num_language_accuracy_increment", default=5
    # )
    # values_for_accuracy = np.linspace(
    #    min_language_accuracy, max_language_accuracy, num_language_accuracy_increment
    # )

    # Get if attributes or relations should be included
    include_attributes = params.boolean("include_attributes", default=True)
    include_relations = params.boolean("include_relations", default=True)

    limit_jobs_for_category(
        "pursuit_job_limit",
        params.integer("num_pursuit_learners_active", default=8))

    curriculum_repository_path = params.creatable_directory(
        "curriculum_repository_path")

    # Job to build desired curriculum(s) which our learners use

    curriculum_dependencies = immutableset((
        CURRICULUM_NAME_FORMAT.format(
            noise=add_noise,
            shuffled=shuffle,
            relations=include_relations,
            attributes=include_attributes,
        ),
        run_python_on_parameters(
            Locator(
                CURRICULUM_NAME_FORMAT.format(
                    noise=add_noise,
                    shuffled=shuffle,
                    relations=include_relations,
                    attributes=include_attributes,
                ).split("-")),
            generate_curriculum_script,
            baseline_parameters.unify({
                "train_curriculum":
                Parameters.from_mapping(CURRICULUM_PARAMS).unify(
                    {
                        "add_noise": add_noise,
                        "shuffled": shuffle,
                        "include_attributes": include_attributes,
                        "include_relations": include_relations,
                    }).as_mapping()
            }).unify(FIXED_PARAMETERS).unify(
                {"curriculum_repository_path": curriculum_repository_path}),
            depends_on=[],
        ),
        Parameters.from_mapping(CURRICULUM_PARAMS).unify(
            {
                "add_noise": add_noise,
                "shuffled": shuffle,
                "include_attributes": include_attributes,
                "include_relations": include_relations,
            }),
    ) for add_noise in (True, False) for shuffle in (True, False))

    # jobs to build experiment
    for (curriculum_str, curriculum_dep,
         curr_params) in curriculum_dependencies:
        object_learner_type = params.string(
            "object_learner.learner_type",
            valid_options=["pursuit", "subset", "pbv"],
            default="pursuit",
        )
        attribute_learner_type = params.string(
            "attribute_learner.learner__type",
            valid_options=["none", "pursuit", "subset"],
            default="pursuit",
        )
        relation_learner_type = params.string(
            "relation_learner.learner_type",
            valid_options=["none", "pursuit", "subset"],
            default="pursuit",
        )
        experiment_name_string = EXPERIMENT_NAME_FORMAT.format(
            curriculum_name=curriculum_str.replace("-", "+"),
            object_learner=object_learner_type,
            attribute_learner=attribute_learner_type,
            relation_learner=relation_learner_type,
        )
        experiment_name = Locator(experiment_name_string.split("-"))

        # Note that the input parameters should include the root params and
        # anything else we want.
        experiment_params = baseline_parameters.unify(FIXED_PARAMETERS).unify({
            "experiment":
            experiment_name_string,
            "experiment_group_dir":
            directory_for(experiment_name),
            "hypothesis_log_dir":
            directory_for(experiment_name) / "hypotheses",
            "learner_logging_path":
            directory_for(experiment_name),
            "log_learner_state":
            True,
            "resume_from_latest_logged_state":
            True,
            "load_from_curriculum_repository":
            curriculum_repository_path,
            "train_curriculum":
            curr_params,
        })

        run_python_on_parameters(
            experiment_name,
            log_experiment_script,
            experiment_params,
            depends_on=[curriculum_dep],
            resource_request=SlurmResourceRequest.from_parameters(
                pursuit_resource_request_params) if "pursuit" in [
                    object_learner_type, attribute_learner_type,
                    relation_learner_type
                ] else None,
            category="pursuit" if "pursuit" in [
                object_learner_type, attribute_learner_type,
                relation_learner_type
            ] else "subset",
            use_pypy=True,
        )

    write_workflow_description()
def object_language_ablation_runner_entry_point(params: Parameters) -> None:
    """This function creates all possible object language ablation param files within a given range"""
    initialize_vista_pegasus_wrapper(params)

    baseline_parameters = params.namespace("object_language_ablation")
    pursuit_resource_request_params = params.namespace(
        "pursuit_resource_request")

    # get the minimum and maximum number of objects in a scene
    min_num_objects = params.integer("min_num_objects", default=1)
    max_num_objects = params.integer("max_num_objects", default=7)

    # get the minimum and maximum accuracy of the language with the situation
    min_language_accuracy = params.floating_point("min_language_accuracy",
                                                  default=0.1)
    max_language_accuracy = params.floating_point("max_language_accuracy",
                                                  default=0.5)
    num_language_accuracy_increment = params.integer(
        "num_language_accuracy_increment", default=5)
    values_for_accuracy = np.linspace(min_language_accuracy,
                                      max_language_accuracy,
                                      num_language_accuracy_increment)

    limit_jobs_for_category(
        "pursuit", params.integer("num_pursuit_learners_active", default=8))

    for num_objects in range(min_num_objects, max_num_objects + 1):
        for language_accuracy in values_for_accuracy:
            for learner_type in LEARNER_VALUES_TO_PARAMS:
                for params_str, learner_params in LEARNER_VALUES_TO_PARAMS[
                        learner_type]:
                    experiment_name_string = EXPERIMENT_NAME_FORMAT.format(
                        num_objects=num_objects,
                        language_accuracy=language_accuracy,
                        learner_type=learner_type,
                        learner_params=params_str,
                    )
                    experiment_name = Locator(
                        experiment_name_string.split("-"))

                    # Note that the input parameters should include the root params and
                    # anything else we want.
                    experiment_params = baseline_parameters.unify(
                        FIXED_PARAMETERS
                    ).unify({
                        "experiment":
                        experiment_name_string,
                        "experiment_group_dir":
                        directory_for(experiment_name),
                        "hypothesis_log_dir":
                        directory_for(experiment_name) / "hypotheses",
                        "learner_logging_path":
                        directory_for(experiment_name),
                        "log_learner_state":
                        True,
                        "resume_from_latest_logged_state":
                        True,
                        "train_curriculum": {
                            "accurate_language_percentage":
                            float(language_accuracy)
                        },
                        "object_learner_type":
                        learner_type,
                        "object_learner":
                        learner_params,
                        # We subtract one because the target object is a given
                        "num_noise_objects":
                        num_objects - 1,
                    })

                    run_python_on_parameters(
                        experiment_name,
                        log_experiment_script,
                        experiment_params,
                        depends_on=[],
                        resource_request=SlurmResourceRequest.from_parameters(
                            pursuit_resource_request_params)
                        if learner_type == "pursuit" else None,
                        category=learner_type,
                    )

    write_workflow_description()
Esempio n. 8
0
def curriculum_from_params(params: Parameters,
                           language_mode: LanguageMode = LanguageMode.ENGLISH):
    str_to_train_test_curriculum: Mapping[str, Tuple[
        CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = {
            "m6-deniz": (make_m6_curriculum, None),
            "each-object-by-itself": (
                build_each_object_by_itself_curriculum_train,
                build_each_object_by_itself_curriculum_test,
            ),
            "pursuit": (
                build_pursuit_curriculum,
                build_each_object_by_itself_curriculum_test,
            ),
            "m6-preposition": (build_m6_prepositions_curriculum, None),
            "m9-objects": (build_gaila_phase1_object_curriculum, None),
            "m9-attributes": (build_gaila_phase1_attribute_curriculum, None),
            "m9-relations": (build_gaila_phase1_relation_curriculum, None),
            "m9-events": (build_gaila_phase1_verb_curriculum, None),
            "m9-debug":
            (build_debug_curriculum_train, build_debug_curriculum_test),
            "m9-complete": (build_gaila_phase_1_curriculum, None),
            "m13-imprecise-size": (make_imprecise_size_curriculum, None),
            "m13-imprecise-temporal":
            (make_imprecise_temporal_descriptions, None),
            "m13-subtle-verb-distinction":
            (make_subtle_verb_distinctions_curriculum, None),
            "m13-object-restrictions":
            (build_functionally_defined_objects_curriculum, None),
            "m13-functionally-defined-objects": (
                build_functionally_defined_objects_train_curriculum,
                build_functionally_defined_objects_curriculum,
            ),
            "m13-generics": (build_generics_curriculum, None),
            "m13-complete": (build_gaila_m13_curriculum, None),
            "m13-verbs-with-dynamic-prepositions": (
                make_verb_with_dynamic_prepositions_curriculum,
                None,
            ),
            "m13-shuffled": (build_m13_shuffled_curriculum,
                             build_gaila_m13_curriculum),
            "m13-relations": (make_prepositions_curriculum, None),
        }

    curriculum_name = params.string("curriculum",
                                    str_to_train_test_curriculum.keys())
    language_generator = phase2_language_generator(language_mode)

    if params.has_namespace("pursuit-curriculum-params"):
        pursuit_curriculum_params = params.namespace(
            "pursuit-curriculum-params")
    else:
        pursuit_curriculum_params = Parameters.empty()

    (training_instance_groups,
     test_instance_groups) = str_to_train_test_curriculum[curriculum_name]

    num_samples = params.optional_positive_integer("num_samples")
    num_noise_objects = params.optional_positive_integer("num_noise_objects")

    return (
        training_instance_groups(num_samples, num_noise_objects,
                                 language_generator)
        if curriculum_name != "pursuit" else training_instance_groups(
            num_samples,
            num_noise_objects,
            language_generator,
            pursuit_curriculum_params=pursuit_curriculum_params,
        ),
        test_instance_groups(num_samples, num_noise_objects,
                             language_generator)
        if test_instance_groups else [],
    )
Esempio n. 9
0
def learner_factory_from_params(
    params: Parameters,
    graph_logger: Optional[HypothesisLogger],
    language_mode: LanguageMode = LanguageMode.ENGLISH,
) -> Callable[[], TopLevelLanguageLearner]:  # type: ignore
    learner_type = params.string(
        "learner",
        [
            "pursuit",
            "object-subset",
            "preposition-subset",
            "attribute-subset",
            "verb-subset",
            "integrated-learner",
            "integrated-learner-recognizer",
            "pursuit-gaze",
        ],
    )

    beam_size = params.positive_integer("beam_size", default=10)

    if language_mode == LanguageMode.CHINESE and learner_type not in [
            "integrated-learner",
            "integrated-learner-recognizer",
    ]:
        raise RuntimeError(
            "Only able to test Chinese with integrated learner.")

    rng = random.Random()
    rng.seed(0)
    perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR

    objects = [YOU_HACK, ME_HACK]
    objects.extend(PHASE_1_CURRICULUM_OBJECTS)

    # Eval hack! This is specific to the Phase 1 ontology
    object_recognizer = ObjectRecognizer.for_ontology_types(
        objects,
        determiners=ENGLISH_DETERMINERS,
        ontology=GAILA_PHASE_1_ONTOLOGY,
        language_mode=language_mode,
        perception_generator=perception_generator,
    )

    if learner_type == "pursuit":
        return lambda: ObjectPursuitLearner.from_parameters(
            params.namespace("pursuit"), graph_logger=graph_logger)
    elif learner_type == "pursuit-gaze":
        return lambda: IntegratedTemplateLearner(
            object_learner=PursuitObjectLearnerNew(
                learning_factor=0.05,
                graph_match_confirmation_threshold=0.7,
                lexicon_entry_threshold=0.7,
                rng=rng,
                smoothing_parameter=0.002,
                ontology=GAILA_PHASE_2_ONTOLOGY,
                language_mode=language_mode,
                rank_gaze_higher=True,
            ),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
        )
    elif learner_type == "object-subset":
        return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY,
                                           language_mode=LanguageMode.ENGLISH)
    elif learner_type == "attribute-subset":
        return lambda: SubsetAttributeLearner(
            ontology=GAILA_PHASE_1_ONTOLOGY,
            object_recognizer=object_recognizer,
            language_mode=LanguageMode.ENGLISH,
        )
    elif learner_type == "preposition-subset":
        return lambda: SubsetPrepositionLearner(
            # graph_logger=graph_logger,
            object_recognizer=object_recognizer,
            ontology=GAILA_PHASE_1_ONTOLOGY,
            language_mode=LanguageMode.ENGLISH,
        )
    elif learner_type == "verb-subset":
        return lambda: SubsetVerbLearner(
            ontology=GAILA_PHASE_1_ONTOLOGY,
            object_recognizer=object_recognizer,
            language_mode=LanguageMode.ENGLISH,
        )
    elif learner_type == "integrated-learner":
        return lambda: IntegratedTemplateLearner(
            object_learner=SubsetObjectLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            functional_learner=FunctionalLearner(language_mode=language_mode),
        )
    elif learner_type == "integrated-learner-recognizer":
        return lambda: IntegratedTemplateLearner(
            object_learner=ObjectRecognizerAsTemplateLearner(
                object_recognizer=object_recognizer,
                language_mode=language_mode),
            attribute_learner=SubsetAttributeLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            relation_learner=SubsetRelationLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            action_learner=SubsetVerbLearnerNew(
                ontology=GAILA_PHASE_2_ONTOLOGY,
                beam_size=beam_size,
                language_mode=language_mode,
            ),
            functional_learner=FunctionalLearner(language_mode=language_mode),
        )
    else:
        raise RuntimeError("can't happen")