def make_vocab_from_params(params: Params, serialization_dir: str): prepare_environment(params) vocab_params = params.pop("vocabulary", {}) os.makedirs(serialization_dir, exist_ok=True) vocab_dir = os.path.join(serialization_dir, "vocabulary") if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None: raise ConfigurationError("The 'vocabulary' directory in the provided " "serialization directory is non-empty") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) instances = [ instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation ] vocab = Vocabulary.from_params(vocab_params, instances) logger.info(f"writing the vocabulary to {vocab_dir}.") vocab.save_to_files(vocab_dir) logger.info("done creating vocab")
def from_params(cls, params: Params) -> 'B': params.add_file_to_archive("filename") filename = params.pop("filename") c_params = params.pop("c") c = C.from_params(c_params) return cls(filename, c)
def dry_run_from_params(params: Params, serialization_dir: str) -> None: prepare_environment(params) vocab_params = params.pop("vocabulary", {}) os.makedirs(serialization_dir, exist_ok=True) vocab_dir = os.path.join(serialization_dir, "vocabulary") if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None: raise ConfigurationError("The 'vocabulary' directory in the provided " "serialization directory is non-empty") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) instances = [instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation] vocab = Vocabulary.from_params(vocab_params, instances) dataset = Batch(instances) dataset.index_instances(vocab) dataset.print_statistics() vocab.print_statistics() logger.info(f"writing the vocabulary to {vocab_dir}.") vocab.save_to_files(vocab_dir) model = Model.from_params(vocab=vocab, params=params.pop('model')) trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name)
def test_from_params(self): params = Params({ "regularizers": [("conv", "l1"), ("linear", { "type": "l2", "alpha": 10 })] }) regularizer_applicator = RegularizerApplicator.from_params( params.pop("regularizers")) regularizers = regularizer_applicator._regularizers # pylint: disable=protected-access conv = linear = None for regex, regularizer in regularizers: if regex == "conv": conv = regularizer elif regex == "linear": linear = regularizer assert isinstance(conv, L1Regularizer) assert isinstance(linear, L2Regularizer) assert linear.alpha == 10
def from_params(cls, params: Params) -> 'A': b_params = params.pop("b") return cls(B.from_params(b_params))
def from_params(cls, params: Params) -> 'C': params.add_file_to_archive("c_file") c_file = params.pop("c_file") return cls(c_file)
def test_add_file_to_archive(self): # Creates actual files since add_file_to_archive will throw an exception # if the file does not exist. tempdir = tempfile.mkdtemp() my_file = os.path.join(tempdir, "my_file.txt") my_other_file = os.path.join(tempdir, "my_other_file.txt") open(my_file, 'w').close() open(my_other_file, 'w').close() # Some nested classes just to exercise the ``from_params`` # and ``add_file_to_archive`` methods. class C: def __init__(self, c_file: str) -> None: self.c_file = c_file @classmethod def from_params(cls, params: Params) -> 'C': params.add_file_to_archive("c_file") c_file = params.pop("c_file") return cls(c_file) class B: def __init__(self, filename: str, c) -> None: self.filename = filename self.c_dict = {"here": c} @classmethod def from_params(cls, params: Params) -> 'B': params.add_file_to_archive("filename") filename = params.pop("filename") c_params = params.pop("c") c = C.from_params(c_params) return cls(filename, c) class A: def __init__(self, b) -> None: self.b = b @classmethod def from_params(cls, params: Params) -> 'A': b_params = params.pop("b") return cls(B.from_params(b_params)) params = Params({ "a": { "b": { "filename": my_file, "c": { "c_file": my_other_file } } } }) # Construct ``A`` from params but then just throw it away. A.from_params(params.pop("a")) assert params.files_to_archive == { "a.b.filename": my_file, "a.b.c.c_file": my_other_file }
def create_kwargs(cls: Type[T], params: Params, **extras) -> Dict[str, Any]: """ Given some class, a `Params` object, and potentially other keyword arguments, create a dict of keyword args suitable for passing to the class's constructor. The function does this by finding the class's constructor, matching the constructor arguments to entries in the `params` object, and instantiating values for the parameters using the type annotation and possibly a from_params method. Any values that are provided in the `extras` will just be used as is. For instance, you might provide an existing `Vocabulary` this way. """ # Get the signature of the constructor. signature = inspect.signature(cls.__init__) kwargs: Dict[str, Any] = {} # Iterate over all the constructor parameters and their annotations. for name, param in signature.parameters.items(): # Skip "self". You're not *required* to call the first parameter "self", # so in theory this logic is fragile, but if you don't call the self parameter # "self" you kind of deserve what happens. if name == "self": continue # If the annotation is a compound type like typing.Dict[str, int], # it will have an __origin__ field indicating `typing.Dict` # and an __args__ field indicating `(str, int)`. We capture both. annotation = remove_optional(param.annotation) origin = getattr(annotation, '__origin__', None) args = getattr(annotation, '__args__', []) # The parameter is optional if its default value is not the "no default" sentinel. default = param.default optional = default != _NO_DEFAULT # Some constructors expect extra non-parameter items, e.g. vocab: Vocabulary. # We check the provided `extras` for these and just use them if they exist. if name in extras: kwargs[name] = extras[name] # The next case is when the parameter type is itself constructible from_params. elif hasattr(annotation, 'from_params'): if name in params: # Our params have an entry for this, so we use that. subparams = params.pop(name) if takes_arg(annotation.from_params, 'extras'): # If annotation.params accepts **extras, we need to pass them all along. # For example, `BasicTextFieldEmbedder.from_params` requires a Vocabulary # object, but `TextFieldEmbedder.from_params` does not. subextras = extras else: # Otherwise, only supply the ones that are actual args; any additional ones # will cause a TypeError. subextras = { k: v for k, v in extras.items() if takes_arg(annotation.from_params, k) } # In some cases we allow a string instead of a param dict, so # we need to handle that case separately. if isinstance(subparams, str): kwargs[name] = annotation.by_name(subparams)() else: #print(annotation) kwargs[name] = annotation.from_params(params=subparams, **subextras) elif not optional: # Not optional and not supplied, that's an error! raise ConfigurationError( f"expected key {name} for {cls.__name__}") else: kwargs[name] = default # If the parameter type is a Python primitive, just pop it off # using the correct casting pop_xyz operation. elif annotation == str: kwargs[name] = (params.pop(name, default) if optional else params.pop(name)) elif annotation == int: kwargs[name] = (params.pop_int(name, default) if optional else params.pop_int(name)) elif annotation == bool: kwargs[name] = (params.pop_bool(name, default) if optional else params.pop_bool(name)) elif annotation == float: kwargs[name] = (params.pop_float(name, default) if optional else params.pop_float(name)) # This is special logic for handling types like Dict[str, TokenIndexer], which it creates by # instantiating each value from_params and returning the resulting dict. elif origin in (Dict, dict) and len(args) == 2 and hasattr( args[-1], 'from_params'): value_cls = annotation.__args__[-1] value_dict = {} for key, value_params in params.pop(name, Params({})).items(): value_dict[key] = value_cls.from_params(params=value_params, **extras) kwargs[name] = value_dict else: # Pass it on as is and hope for the best. ¯\_(ツ)_/¯ if optional: kwargs[name] = params.pop(name, default) else: kwargs[name] = params.pop(name) params.assert_empty(cls.__name__) return kwargs