Example #1
0
def get_dataset_reader_from_argument(argument: str) -> DatasetReader:
    if argument in Registrable._registry[DatasetReader]:
        type_, _ = Registrable._registry[DatasetReader][argument]
        return type_()
    else:
        params = Params(json.loads(argument))
        return DatasetReader.from_params(params)
Example #2
0
def create_kwargs(constructor: Callable[..., T], cls: Type[T], params: Params,
                  **extras) -> Dict[str, Any]:
    """
    Given some class, a `Params` object, and potentially other keyword arguments,
    create a dict of keyword args suitable for passing to the class's constructor.
    The function does this by finding the class's constructor, matching the constructor
    arguments to entries in the `params` object, and instantiating values for the parameters
    using the type annotation and possibly a from_params method.
    Any values that are provided in the `extras` will just be used as is.
    For instance, you might provide an existing `Vocabulary` this way.
    """
    # Get the signature of the constructor.

    kwargs: Dict[str, Any] = {}

    parameters = infer_params(cls, constructor)

    # Iterate over all the constructor parameters and their annotations.
    for param_name, param in parameters.items():
        # Skip "self". You're not *required* to call the first parameter "self",
        # so in theory this logic is fragile, but if you don't call the self parameter
        # "self" you kind of deserve what happens.
        if param_name == "self":
            continue
        # Also skip **kwargs parameters; we handled them above.
        if param.kind == param.VAR_KEYWORD:
            continue

        # If the annotation is a compound type like typing.Dict[str, int],
        # it will have an __origin__ field indicating `typing.Dict`
        # and an __args__ field indicating `(str, int)`. We capture both.
        annotation = remove_optional(param.annotation)

        constructed_arg = pop_and_construct_arg(cls.__name__, param_name,
                                                annotation, param.default,
                                                params, **extras)

        # If we just ended up constructing the default value for the parameter, we can just omit it.
        # Leaving it in can cause issues with **kwargs in some corner cases, where you might end up
        # with multiple values for a single parameter (e.g., the default value gives you lazy=False
        # for a dataset reader inside **kwargs, but a particular dataset reader actually hard-codes
        # lazy=True - the superclass sees both lazy=True and lazy=False in its constructor).
        if constructed_arg is not param.default:
            kwargs[param_name] = constructed_arg

    params.assert_empty(cls.__name__)
    return kwargs
Example #3
0
    def test_from_params(self):
        params = Params({'type': 'subclass1', 'a': 4, 'b': 'abc'})
        obj = _Base1.from_params(params)
        target = _Subclass1(4, 'abc')

        assert isinstance(obj, _Subclass1)
        assert obj.a == target.a
        assert obj.b == target.b
Example #4
0
def pop_and_construct_arg(class_name: str, argument_name: str,
                          annotation: Type, default: Any, params: Params,
                          **extras) -> Any:
    """
    Does the work of actually constructing an individual argument for
    [`create_kwargs`](./#create_kwargs).
    Here we're in the inner loop of iterating over the parameters to a particular constructor,
    trying to construct just one of them.  The information we get for that parameter is its name,
    its type annotation, and its default value; we also get the full set of `Params` for
    constructing the object (which we may mutate), and any `extras` that the constructor might
    need.
    We take the type annotation and default value here separately, instead of using an
    `inspect.Parameter` object directly, so that we can handle `Union` types using recursion on
    this method, trying the different annotation types in the union in turn.
    """
    # We used `argument_name` as the method argument to avoid conflicts with 'name' being a key in
    # `extras`, which isn't _that_ unlikely.  Now that we are inside the method, we can switch back
    # to using `name`.
    name = argument_name

    # Some constructors expect extra non-parameter items, e.g. vocab: Vocabulary.
    # We check the provided `extras` for these and just use them if they exist.
    if name in extras:
        if name not in params:
            return extras[name]
        else:
            logger.warning(
                f"Parameter {name} for class {class_name} was found in both "
                "**extras and in params. Using the specification found in params, "
                "but you probably put a key in a config file that you didn't need, "
                "and if it is different from what we get from **extras, you might "
                "get unexpected behavior.")

    popped_params = params.pop(
        name, default) if default != _NO_DEFAULT else params.pop(name)
    if popped_params is None:
        return None

    return construct_arg(class_name, name, popped_params, annotation, default,
                         **extras)
Example #5
0
    def run(self, args):
        params = Params.from_file(args.config, args.overrides)
        dataset_reader = DatasetReader.from_params(
            params.pop('dataset_reader'))
        metrics = _load_metrics(params)

        instances = dataset_reader.read()
        metrics_dicts = score_instances(instances, metrics)

        # Save the results to the output file
        with JsonlWriter(args.output_jsonl) as out:
            for instance_id in sorted(metrics_dicts.keys()):
                for summarizer_id in sorted(metrics_dicts[instance_id].keys()):
                    out.write(metrics_dicts[instance_id][summarizer_id])
Example #6
0
    def test_from_params_complex_types(self):
        params = Params({
            'type':
            'subclass2',
            'first': [
                {
                    'type': 'subclass1',
                    'a': 4,
                    'b': 'abc'
                },
                {
                    'type': 'subclass1',
                    'a': 8,
                    'b': 'def'
                },
            ],
            'second': {
                'A': {
                    'type': 'subclass1',
                    'a': 7,
                    'b': 'ghi'
                },
                'B': {
                    'type': 'subclass1',
                    'a': 3,
                    'b': 'jkl'
                },
            }
        })
        obj = _Base1.from_params(params)
        target = _Subclass2(
            [_Subclass1(4, 'abc'), _Subclass1(8, 'def')], {
                'A': _Subclass1(7, 'ghi'),
                'B': _Subclass1(3, 'jkl')
            })

        assert isinstance(obj, _Subclass2)
        assert len(obj.first) == 2
        assert obj.first[0].a == 4
        assert obj.first[0].b == 'abc'
        assert obj.first[1].a == 8
        assert obj.first[1].b == 'def'

        assert len(obj.second) == 2
        assert obj.second['A'].a == 7
        assert obj.second['A'].b == 'ghi'
        assert obj.second['B'].a == 3
        assert obj.second['B'].b == 'jkl'
    def run(self, args):
        params = Params.from_file(args.config, args.overrides)
        dataset_reader = DatasetReader.from_params(
            params.pop('dataset_reader'))
        metrics = load_metrics(params)

        instances = dataset_reader.read()
        summaries = [instance.summary for instance in instances]

        macro = MetricsDict()
        micro_list = get_initial_micro_list(instances)

        for metric in metrics:
            # Prepare the extra input arguments
            eval_args = []
            for field in metric.required_fields:
                eval_args.append(
                    [instance.fields[field] for instance in instances])

            # Score all the summaries
            this_macro, this_micro_list = metric.evaluate(
                summaries, *eval_args)

            # Update the global metrics dictionaries
            macro.update(this_macro)
            for micro, this_micro in zip(micro_list, this_micro_list):
                micro.metrics.update(this_micro)

        dirname = os.path.dirname(args.macro_output_json)
        if dirname:
            os.makedirs(dirname, exist_ok=True)

        serialized_macro = jsons.dumps({'metrics': macro},
                                       jdkwargs={'indent': 2})
        with open(args.macro_output_json, 'w') as out:
            out.write(serialized_macro)
        if not args.silent:
            print(serialized_macro)

        with JsonlWriter(args.micro_output_jsonl) as out:
            for metrics_dict in micro_list:
                out.write(metrics_dict)
Example #8
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        import_module_and_submodules('sacrerouge')
        include_packages = args.include_packages or []
        for package in include_packages:
            import_module_and_submodules(package)

        params = Params.from_file(args.config, args.overrides)
        dataset_reader = DatasetReader.from_params(params.pop('dataset_reader'))
        metrics = _load_metrics(params)

        input_files = params.pop('input_files')
        if isinstance(input_files, str):
            input_files = [input_files]

        instances = dataset_reader.read(*input_files)
        metrics_dicts = score_instances(instances, metrics, args.disable_peer_jackknifing)

        save_score_results(metrics_dicts, args.output_jsonl, args.silent)
Example #9
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        import_module_and_submodules('sacrerouge')
        include_packages = args.include_packages or []
        for package in include_packages:
            import_module_and_submodules(package)

        params = Params.from_file(args.config, args.overrides)
        dataset_reader = DatasetReader.from_params(
            params.pop('dataset_reader'))
        metrics = load_metrics(params)

        input_files = params.pop('input_files')
        if isinstance(input_files, str):
            input_files = [input_files]

        instances = dataset_reader.read(*input_files)
        macro, micro_list = evaluate_instances(instances, metrics)

        save_evaluation_results(macro, micro_list, args.macro_output_json,
                                args.micro_output_jsonl, args.silent)
Example #10
0
def _load_metrics(params: Params) -> List[Metric]:
    metrics = []
    for metric_params in params.pop('metrics'):
        metric = Metric.from_params(metric_params)
        metrics.append(metric)
    return metrics
Example #11
0
    def from_params(
        cls: Type[T],
        params: Params,
        constructor_to_call: Callable[..., T] = None,
        constructor_to_inspect: Callable[..., T] = None,
        **extras,
    ) -> T:
        """
        This is the automatic implementation of `from_params`. Any class that subclasses
        `FromParams` (or `Registrable`, which itself subclasses `FromParams`) gets this
        implementation for free.  If you want your class to be instantiated from params in the
        "obvious" way -- pop off parameters and hand them to your constructor with the same names --
        this provides that functionality.
        If you need more complex logic in your from `from_params` method, you'll have to implement
        your own method that overrides this one.
        The `constructor_to_call` and `constructor_to_inspect` arguments deal with a bit of
        redirection that we do.  We allow you to register particular `@classmethods` on a class as
        the constructor to use for a registered name.  This lets you, e.g., have a single
        `Vocabulary` class that can be constructed in two different ways, with different names
        registered to each constructor.  In order to handle this, we need to know not just the class
        we're trying to construct (`cls`), but also what method we should inspect to find its
        arguments (`constructor_to_inspect`), and what method to call when we're done constructing
        arguments (`constructor_to_call`).  These two methods are the same when you've used a
        `@classmethod` as your constructor, but they are `different` when you use the default
        constructor (because you inspect `__init__`, but call `cls()`).
        """

        from sacrerouge.common.registrable import Registrable  # import here to avoid circular imports

        logger.debug(
            f"instantiating class {cls} from params {getattr(params, 'params', params)} "
            f"and extras {set(extras.keys())}")

        if params is None:
            return None

        if isinstance(params, str):
            params = Params({"type": params})

        if not isinstance(params, Params):
            raise ConfigurationError(
                "from_params was passed a `params` object that was not a `Params`. This probably "
                "indicates malformed parameters in a configuration file, where something that "
                "should have been a dictionary was actually a list, or something else. "
                f"This happened when constructing an object of type {cls}.")

        registered_subclasses = Registrable._registry.get(cls)

        if is_base_registrable(cls) and registered_subclasses is None:
            # NOTE(mattg): There are some potential corner cases in this logic if you have nested
            # Registrable types.  We don't currently have any of those, but if we ever get them,
            # adding some logic to check `constructor_to_call` should solve the issue.  Not
            # bothering to add that unnecessary complexity for now.
            raise ConfigurationError(
                "Tried to construct an abstract Registrable base class that has no registered "
                "concrete types. This might mean that you need to use --include-package to get "
                "your concrete classes actually registered.")

        if registered_subclasses is not None and not constructor_to_call:
            # We know `cls` inherits from Registrable, so we'll use a cast to make mypy happy.

            as_registrable = cast(Type[Registrable], cls)
            default_to_first_choice = as_registrable.default_implementation is not None
            choice = params.pop_choice(
                "type",
                choices=as_registrable.list_available(),
                default_to_first_choice=default_to_first_choice,
            )
            subclass, constructor_name = as_registrable.resolve_class_name(
                choice)
            # See the docstring for an explanation of what's going on here.
            if not constructor_name:
                constructor_to_inspect = subclass.__init__
                constructor_to_call = subclass  # type: ignore
            else:
                constructor_to_inspect = getattr(subclass, constructor_name)
                constructor_to_call = constructor_to_inspect

            if hasattr(subclass, "from_params"):
                # We want to call subclass.from_params.
                extras = create_extras(subclass, extras)
                # mypy can't follow the typing redirection that we do, so we explicitly cast here.
                retyped_subclass = cast(Type[T], subclass)
                return retyped_subclass.from_params(
                    params=params,
                    constructor_to_call=constructor_to_call,
                    constructor_to_inspect=constructor_to_inspect,
                    **extras,
                )
            else:
                # In some rare cases, we get a registered subclass that does _not_ have a
                # from_params method (this happens with Activations, for instance, where we
                # register pytorch modules directly).  This is a bit of a hack to make those work,
                # instead of adding a `from_params` method for them somehow.  We just trust that
                # you've done the right thing in passing your parameters, and nothing else needs to
                # be recursively constructed.
                extras = create_extras(subclass, extras)
                constructor_args = {**params, **extras}
                return subclass(**constructor_args)  # type: ignore
        else:
            # This is not a base class, so convert our params and extras into a dict of kwargs.

            # See the docstring for an explanation of what's going on here.
            if not constructor_to_inspect:
                constructor_to_inspect = cls.__init__
            if not constructor_to_call:
                constructor_to_call = cls

            if constructor_to_inspect == object.__init__:
                # This class does not have an explicit constructor, so don't give it any kwargs.
                # Without this logic, create_kwargs will look at object.__init__ and see that
                # it takes *args and **kwargs and look for those.
                kwargs: Dict[str, Any] = {}
                params.assert_empty(cls.__name__)
            else:
                # This class has a constructor, so create kwargs for it.
                kwargs = create_kwargs(constructor_to_inspect, cls, params,
                                       **extras)

            return constructor_to_call(**kwargs)  # type: ignore
Example #12
0
def construct_arg(
    class_name: str,
    argument_name: str,
    popped_params: Params,
    annotation: Type,
    default: Any,
    **extras,
) -> Any:
    """
    The first two parameters here are only used for logging if we encounter an error.
    """
    origin = getattr(annotation, "__origin__", None)
    args = getattr(annotation, "__args__", [])

    # The parameter is optional if its default value is not the "no default" sentinel.
    optional = default != _NO_DEFAULT

    if hasattr(annotation, "from_params"):
        if popped_params is default:
            return default
        elif popped_params is not None:
            # Our params have an entry for this, so we use that.

            subextras = create_extras(annotation, extras)

            # In some cases we allow a string instead of a param dict, so
            # we need to handle that case separately.
            if isinstance(popped_params, str):
                popped_params = Params({"type": popped_params})
            elif isinstance(popped_params, dict):
                popped_params = Params(popped_params)
            return annotation.from_params(params=popped_params, **subextras)
        elif not optional:
            # Not optional and not supplied, that's an error!
            raise ConfigurationError(
                f"expected key {argument_name} for {class_name}")
        else:
            return default

    # If the parameter type is a Python primitive, just pop it off
    # using the correct casting pop_xyz operation.
    elif annotation in {int, bool}:
        if type(popped_params) in {int, bool}:
            return annotation(popped_params)
        else:
            raise TypeError(
                f"Expected {argument_name} to be a {annotation.__name__}.")
    elif annotation == str:
        # Strings are special because we allow casting from Path to str.
        if type(popped_params) == str or isinstance(popped_params, Path):
            return str(popped_params)  # type: ignore
        else:
            raise TypeError(f"Expected {argument_name} to be a string.")
    elif annotation == float:
        # Floats are special because in Python, you can put an int wherever you can put a float.
        # https://mypy.readthedocs.io/en/stable/duck_type_compatibility.html
        if type(popped_params) in {int, float}:
            return popped_params
        else:
            raise TypeError(f"Expected {argument_name} to be numeric.")

    # This is special logic for handling types like Dict[str, TokenIndexer],
    # List[TokenIndexer], Tuple[TokenIndexer, Tokenizer], and Set[TokenIndexer],
    # which it creates by instantiating each value from_params and returning the resulting structure.
    elif (origin in {collections.abc.Mapping, Mapping, Dict, dict}
          and len(args) == 2 and can_construct_from_params(args[-1])):
        value_cls = annotation.__args__[-1]

        value_dict = {}

        for key, value_params in popped_params.items():
            value_dict[key] = construct_arg(
                str(value_cls),
                argument_name + "." + key,
                value_params,
                value_cls,
                _NO_DEFAULT,
                **extras,
            )

        return value_dict

    elif origin in (Tuple, tuple) and all(
            can_construct_from_params(arg) for arg in args):
        value_list = []

        for i, (value_cls, value_params) in enumerate(
                zip(annotation.__args__, popped_params)):
            value = construct_arg(
                str(value_cls),
                argument_name + f".{i}",
                value_params,
                value_cls,
                _NO_DEFAULT,
                **extras,
            )
            value_list.append(value)

        return tuple(value_list)

    elif origin in (Set, set) and len(args) == 1 and can_construct_from_params(
            args[0]):
        value_cls = annotation.__args__[0]

        value_set = set()

        for i, value_params in enumerate(popped_params):
            value = construct_arg(
                str(value_cls),
                argument_name + f".{i}",
                value_params,
                value_cls,
                _NO_DEFAULT,
                **extras,
            )
            value_set.add(value)

        return value_set

    elif origin == Union:
        # Storing this so we can recover it later if we need to.
        backup_params = deepcopy(popped_params)

        # We'll try each of the given types in the union sequentially, returning the first one that
        # succeeds.
        for arg_annotation in args:
            try:
                return construct_arg(
                    str(arg_annotation),
                    argument_name,
                    popped_params,
                    arg_annotation,
                    default,
                    **extras,
                )
            except (ValueError, TypeError, ConfigurationError, AttributeError):
                # Our attempt to construct the argument may have modified popped_params, so we
                # restore it here.
                popped_params = deepcopy(backup_params)

        # If none of them succeeded, we crash.
        raise ConfigurationError(
            f"Failed to construct argument {argument_name} with type {annotation}"
        )

    # For any other kind of iterable, we will just assume that a list is good enough, and treat
    # it the same as List. This condition needs to be at the end, so we don't catch other kinds
    # of Iterables with this branch.
    elif (origin in {collections.abc.Iterable, Iterable, List, list}
          and len(args) == 1 and can_construct_from_params(args[0])):
        value_cls = annotation.__args__[0]

        value_list = []

        for i, value_params in enumerate(popped_params):
            value = construct_arg(
                str(value_cls),
                argument_name + f".{i}",
                value_params,
                value_cls,
                _NO_DEFAULT,
                **extras,
            )
            value_list.append(value)

        return value_list

    else:
        # Pass it on as is and hope for the best.   ¯\_(ツ)_/¯
        if isinstance(popped_params, Params):
            return popped_params.as_dict(quiet=True)
        return popped_params
Example #13
0
 def from_params(cls: Type, params: Params) -> 'FromParams':
     type_ = params.pop('type')
     cls_ = cls._registry[type_]
     return cls_(**params)