def _bless_graph_executors(self) -> None: """Pre-compute the tensors referenced by the graph executors. Due to the lazy nature of the computational graph related components, nothing is actually added to the graph until it is "blessed" ( referenced, and therefore, executed). "Blessing" is usually implemented in the form of a log or a debug call with the blessed tensor as parameter. Referencing a `Tensor` causes the whole computational graph that is needed to evaluate the tensor to be built. This function "blesses" all tensors that could be potentially used using the `fetches` property of the provided runner objects. If the experiment runs in the training mode, this function also blesses the tensors fetched by the trainer(s). """ log("Building TF Graph") if hasattr(self.model, "trainer"): if isinstance(self.model.trainer, List): trainers = self.model.trainer else: trainers = [self.model.trainer] for trainer in trainers: debug("Trainer fetches: {}".format(trainer.fetches), "bless") for runner in self.model.runners: debug("Runner fetches: {}".format(runner.fetches), "bless") log("TF Graph built")
def check_dataset_and_coders(dataset, runners): #pylint: disable=protected-access data_list = [] for runner in runners: for c in runner.all_coders: if hasattr(c, "data_id"): data_list.append((c.data_id, c)) elif hasattr(c, "data_ids"): data_list.extend([(d, c) for d in c.data_ids]) else: log(("Warning: Coder: {} does not have" "a data attribute").format(c)) debug("Found series: {}".format(str(data_list)), "checking") missing = [] for (serie, coder) in data_list: if not dataset.has_series(serie): log("dataset {} does not have serie {}".format( dataset.name, serie)) missing.append((coder, serie)) if len(missing) > 0: formated = [ "{} ({}, {}.{})".format(name, cod.name, cod.__class__.__module__, cod.__class__.__name__) for name, cod in missing ] raise CheckingException("Dataset '{}' is mising series {}:".format( dataset.name, ", ".join(formated)))
def print_tensor(x: np.ndarray) -> tf.Tensor: if message is not None: debug( "{}, shape: {}:\n{}".format(message, x.shape, x), debug_label) else: debug("Shape: {}\n{}".format(x.shape, x), debug_label) return x
def main() -> None: try: _main() except KeyboardInterrupt: log("Training interrupted by user.") debug(traceback.format_exc()) exit(1)
def dataset_from_files(**kwargs): """ Creates a dataset from the provided arguments. Paths to the data are provided in a form of dictionary. Args: kwargs: Arguments are treated as a dictionary. Paths to the data series are specified here. Series identifiers should not contain underscores. You can specify a language for the serie by adding a preprocess method you want to apply on the textual data by naming the function as <identifier>_preprocess=function OR the preprocessor can be specified globally """ random_seed = kwargs.get("random_seed", None) preprocess = kwargs.get("preprocessor", lambda x: x) name = kwargs.get("name", "dataset") series = None series_paths = _get_series_paths(kwargs) debug("Series paths: {}".format(series_paths), "datasetBuild") if len(series_paths) > 0: log("Initializing dataset with: {}".format(", ".join(series_paths))) series = {s: Dataset.create_series(series_paths[s], preprocess) for s in series_paths} name = kwargs.get('name', _get_name_from_paths(series_paths)) series_outputs = {SERIES_OUTPUT.match(key).group(1): value for key, value in kwargs.items() if SERIES_OUTPUT.match(key)} dataset = Dataset(name, series, series_outputs, random_seed) log("Dataset length: {}".format(len(dataset))) return dataset
def build_object(value: str, all_dicts: Dict[str, Any], existing_objects: Dict[str, Any], depth: int) -> Any: """Build an object from config dictionary of its arguments. Works recursively. Arguments: value: Value that should be resolved (either a literal value or a config section name) all_dicts: Configuration dictionaries used to find configuration of unconstructed objects. existing_objects: Dictionary of already constructed objects. ignore_names: Set of names that shoud be ignored. depth: The current depth of recursion. Used to prevent an infinite recursion. """ # TODO detect infinite recursion by other means than depth argument # TODO as soon as config is run from an entrypoint, remove the # ignore_names feature if depth > 20: raise AssertionError("Config recursion should not be deeper that 20.") debug("Building value on depth {}: {}".format(depth, value), "configBuild") # if isinstance(value, str) and value in ignore_names: # TODO zapisovani do argumentu # existing_objects[value] = None if isinstance(value, tuple): return tuple( build_object(val, all_dicts, existing_objects, depth + 1) for val in value) elif (isinstance(value, collections.Iterable) and not isinstance(value, str)): return [ build_object(val, all_dicts, existing_objects, depth + 1) for val in value ] if value in existing_objects: debug("Skipping already initialized value: {}".format(value), "configBuild") return existing_objects[value] if isinstance(value, str): # either a string or a reference to an object if not value.startswith("object:"): return value obj = instantiate_class(value[7:], all_dicts, existing_objects, depth) existing_objects[value] = obj return obj if isinstance(value, ClassSymbol): return value.create() return value
def get_initializer(self, var_name: str, default: Callable = None) -> Optional[Callable]: """Return the initializer associated with the given variable name. Calling the method marks the given initializer as used. """ initializer = self._initializers.get(var_name, default) if initializer is not default: debug("Using {} for variable {}".format(initializer, var_name)) self._initialized_variables.add(var_name) return initializer
def build_object(value: str, all_dicts: Dict[str, Any], existing_objects: Dict[str, Any], depth: int) -> Any: """Build an object from config dictionary of its arguments. Works recursively. Arguments: value: Value that should be resolved (either a literal value or a config section name) all_dicts: Configuration dictionaries used to find configuration of unconstructed objects. existing_objects: Dictionary of already constructed objects. depth: The current depth of recursion. Used to prevent an infinite recursion. """ # TODO detect infinite recursion by other means than depth argument # TODO as soon as config is run from an entrypoint, remove the # ignore_names feature if depth > 20: raise AssertionError("Config recursion should not be deeper that 20.") debug("Building value on depth {}: {}".format(depth, value), "configBuild") # if isinstance(value, str) and value in ignore_names: # TODO zapisovani do argumentu # existing_objects[value] = None if isinstance(value, tuple): return tuple(build_object(val, all_dicts, existing_objects, depth + 1) for val in value) if (isinstance(value, collections.Iterable) and not isinstance(value, str)): return [build_object(val, all_dicts, existing_objects, depth + 1) for val in value] if isinstance(value, ObjectRef): if value.name in existing_objects: debug("Skipping already initialized object: {}".format(value.name), "configBuild") else: existing_objects[value.name] = instantiate_class( value.name, all_dicts, existing_objects, depth) value.bind(existing_objects[value.name]) return value.target if isinstance(value, ClassSymbol): return value.create() return value
def _get_series_paths_and_readers( series_config: SeriesConfig) -> Dict[str, Tuple[List[str], Reader]]: """Get paths to files that contain data from the dataset kwargs. Input file for a serie named 'xxx' is specified by parameter 's_xxx'. The dataset series is defined by a string with a path / list of strings with paths, or a tuple whose first member is a path or a list of paths and the second memeber is a reader function. The paths can contain wildcards, which will be expanded using :py:func:`glob.glob` in sorted order. Arguments: series_config: A dictionary containing the dataset keyword argument specs. Returns: A dictionary which maps serie names to the paths of their input files and readers.. """ keys = [k for k in list(series_config.keys()) if SERIES_SOURCE.match(k)] names = [get_first_match(SERIES_SOURCE, k) for k in keys] series_sources = {} for name, key in zip(names, keys): value = cast(ReaderDef, series_config[key]) if isinstance(value, tuple): patterns, reader = value # type: ignore else: patterns = value reader = UtfPlainTextReader if isinstance(patterns, str): patterns = [patterns] paths = [] for pattern in patterns: matched_files = sorted(glob.glob(pattern)) if not matched_files: raise FileNotFoundError( "Pattern did not match any files. Series: {}, Pattern: {}" .format(name, pattern)) paths.extend(matched_files) debug("Series '{}' has the following files: {}".format(name, paths)) series_sources[name] = (paths, reader) return series_sources
def build_object(value, all_dicts, existing_objects, depth): """Builds an object from config dictionary of its arguments. It works recursively. Arguments: value: Value that should be resolved (either a literal value or a config section name) all_dicts: Configuration dictionaries used to find configuration of unconstructed objects. existing_objects: Dictionary of already constructed objects. ignore_names: Set of names that shoud be ignored. depth: The current depth of recursion. Used to prevent an infinite recursion. """ ### TODO detect infinite recursion by other means than depth argument ### TODO as soon as config is run from an entrypoint, remove the ### ignore_names feature if depth > 20: raise AssertionError("Config recursion should not be deeper that 20.") debug("Building value on depth {}: {}".format(depth, value), "configBuild") #if isinstance(value, str) and value in ignore_names: # TODO zapisovani do argumentu # existing_objects[value] = None if isinstance(value, collections.Iterable) and not isinstance(value, str): return [build_object(val, all_dicts, existing_objects, depth + 1) for val in value] if value in existing_objects: debug("Skipping already initialized value: {}".format(value), "configBuild") return existing_objects[value] if isinstance(value, str): # either a string or a reference to an object if not value.startswith("object:"): return value obj = instantiate_class(value[7:], all_dicts, existing_objects, depth) existing_objects[value] = obj return obj return value
def initial_loop_state(self) -> AttentionLoopState: # Similarly to the feed_forward attention, we need to build the encoder # projections and masks before the while loop is entered so they are # not created as a part of the loop # pylint: disable=not-an-iterable for val in self.encoder_projections_for_logits: debug(val, "bless") debug(self.masks_concat, "bless") length = sum(tf.shape(s)[1] for s in self._encoders_tensors) # pylint: enable=not-an-iterable if self._use_sentinels: length += 1 return empty_attention_loop_state(self.batch_size, length, self.context_vector_size)
def check_dataset_and_coders(dataset: Dataset, runners: Iterable[BaseRunner]) -> None: # pylint: disable=protected-access data_list = [] for runner in runners: for c in runner.feedables: if hasattr(c, "data_id"): data_list.append((getattr(c, "data_id"), c)) elif hasattr(c, "data_ids"): data_list.extend([(d, c) for d in getattr(c, "data_ids")]) elif hasattr(c, "input_sequence"): inpseq = getattr(c, "input_sequence") if hasattr(inpseq, "data_id"): data_list.append((getattr(inpseq, "data_id"), c)) elif hasattr(inpseq, "data_ids"): data_list.extend([(d, c) for d in getattr(inpseq, "data_ids")]) else: log("Input sequence: {} does not have a data attribute". format(str(inpseq))) else: log(("Coder: {} has neither an input sequence attribute nor a " "a data attribute.").format(c)) debug("Found series: {}".format(str(data_list)), "checking") missing = [] for (serie, coder) in data_list: if not dataset.has_series(serie): log("dataset {} does not have serie {}".format( dataset.name, serie)) missing.append((coder, serie)) if missing: formated = [ "{} ({}, {}.{})".format(serie, str(cod), cod.__class__.__module__, cod.__class__.__name__) for cod, serie in missing ] raise CheckingException("Dataset '{}' is mising series {}:".format( dataset.name, ", ".join(formated)))
def _get_series_paths_and_readers( series_config: SeriesConfig) -> Dict[str, Tuple[List[str], Reader]]: """Get paths to files that contain data from the dataset kwargs. Input file for a serie named 'xxx' is specified by parameter 's_xxx'. The dataset series is defined by a string with a path / list of strings with paths, or a tuple whose first member is a path or a list of paths and the second memeber is a reader function. The paths can contain wildcards, which will be expanded using :py:func:`glob.glob` in sorted order. Arguments: series_config: A dictionary containing the dataset keyword argument specs. Returns: A dictionary which maps serie names to the paths of their input files and readers.. """ keys = [k for k in list(series_config.keys()) if SERIES_SOURCE.match(k)] names = [get_first_match(SERIES_SOURCE, k) for k in keys] series_sources = {} for name, key in zip(names, keys): value = cast(ReaderDef, series_config[key]) if isinstance(value, tuple): patterns, reader = value # type: ignore else: patterns = value reader = UtfPlainTextReader if isinstance(patterns, str): patterns = [patterns] paths = _expand_patterns_flat(patterns) debug("Series '{}' has the following files: {}".format(name, paths)) series_sources[name] = (paths, reader) return series_sources
def instantiate_class(name: str, all_dicts: Dict[str, Any], existing_objects: Dict[str, Any], depth: int) -> Any: """Instantiate a class from the configuration. Arguments: see help(build_object) """ if name not in all_dicts: debug(str(all_dicts), "configBuild") raise ConfigInvalidValueException(name, "Undefined object") this_dict = all_dicts[name] if "class" not in this_dict: raise ConfigInvalidValueException(name, "Undefined object type") clazz = this_dict["class"].create() if not isclass(clazz) and not isfunction(clazz): raise ConfigInvalidValueException( name, "Cannot instantiate object with '{}'".format(clazz)) # prepare the arguments for the constructor arguments = dict() for key, value in this_dict.items(): if key == "class": continue arguments[key] = build_object(value, all_dicts, existing_objects, depth + 1) # get a signature of the constructing function construct_sig = signature(clazz) try: # try to bound the arguments to the signature bounded_params = construct_sig.bind(**arguments) except TypeError as exc: raise ConfigBuildException(clazz, exc) debug("Instantiating class {} with arguments {}".format(clazz, arguments), "configBuild") # call the function with the arguments # NOTE: any exception thrown from the body of the constructor is # not worth catching here obj = clazz(*bounded_params.args, **bounded_params.kwargs) debug("Class {} initialized into object {}".format(clazz, obj), "configBuild") return obj
def instantiate_class(name, all_dicts, existing_objects, depth): """ Instantiate a class from the configuration Arguments: see help(build_object) """ if name not in all_dicts: debug(all_dicts, "configBuild") raise ConfigInvalidValueException(name, "Undefined object") this_dict = all_dicts[name] if 'class' not in this_dict: raise ConfigInvalidValueException(name, "Undefined object type") clazz = this_dict['class'] if not isclass(clazz) and not isfunction(clazz): raise ConfigInvalidValueException( name, "Cannot instantiate object with '{}'".format(clazz)) ## prepare the arguments for the constructor arguments = dict() for key, value in this_dict.items(): if key == 'class': continue arguments[key] = build_object(value, all_dicts, existing_objects, depth + 1) ## get a signature of the constructing function construct_sig = signature(clazz) try: ## try to bound the arguments to the signature bounded_params = construct_sig.bind(**arguments) except TypeError as exc: raise ConfigBuildException(clazz, exc) debug("Instatiating class {} with arguments {}".format(clazz, arguments), "configBuild") ## call the function with the arguments ## NOTE: any exception thrown from the body of the constructor is ## not worth catching here obj = clazz(*bounded_params.args, **bounded_params.kwargs) debug("Class {} initialized into object {}".format(clazz, obj), "configBuild") return obj
def initial_loop_state(self) -> AttentionLoopState: # Here we need to make sure that the hidden_features and attention_mask # are pre-computed. If this is used in combination with a decoder which # has train and runtime while loops, these tensors need to be created # outside of any of those loops in order to be available to both. # Note that we are not breaking lazy loading here because this method # is called from a lazy tensor. debug("Pre-computing attention tensors", "bless") debug("Hidden features: {}".format(self.hidden_features), "bless") debug("Hidden mask: {}".format(self.attention_mask), "bless") return empty_attention_loop_state( self.batch_size, tf.shape(self.attention_states)[1], self.context_vector_size)
def instantiate_class(name: str, all_dicts: Dict[str, Any], existing_objects: Dict[str, Any], depth: int) -> Any: """Instantiate a class from the configuration. Arguments: see help(build_object) """ if name not in all_dicts: debug(str(all_dicts), "configBuild") raise ConfigInvalidValueException(name, "Undefined object") this_dict = all_dicts[name] if "class" not in this_dict: raise ConfigInvalidValueException(name, "Undefined object type") clazz = this_dict["class"].create() if not isclass(clazz) and not isfunction(clazz): raise ConfigInvalidValueException( name, "Cannot instantiate object with '{}'".format(clazz)) # prepare the arguments for the constructor arguments = dict() for key, value in this_dict.items(): if key == "class": continue arguments[key] = build_object(value, all_dicts, existing_objects, depth + 1) # get a signature of the constructing function construct_sig = signature(clazz) # if a signature contains a "name" attribute which is not in arguments, # replace it with the name of the section if "name" in construct_sig.parameters and "name" not in arguments: annotation = construct_sig.parameters["name"].annotation if annotation == Parameter.empty: debug( "No type annotation for the 'name' parameter in " "class/function {}. Default value will not be used.".format( this_dict["class"].clazz), "configBuild") elif annotation != str: debug( "Type annotation for the 'name' parameter in class/function " "{} is not 'str'. Default value will not be used.".format( this_dict["class"].clazz), "configBuild") debug("Annotation is {}".format(str(annotation))) else: debug( "Using default 'name' for object {}".format( this_dict["class"].clazz), "configBuild") arguments["name"] = name try: # try to bound the arguments to the signature bounded_params = construct_sig.bind(**arguments) except TypeError as exc: raise ConfigBuildException(clazz, exc) debug("Instantiating class {} with arguments {}".format(clazz, arguments), "configBuild") # call the function with the arguments # NOTE: any exception thrown from the body of the constructor is # not worth catching here obj = clazz(*bounded_params.args, **bounded_params.kwargs) debug("Class {} initialized into object {}".format(clazz, obj), "configBuild") return obj
def instantiate_class(name: str, all_dicts: Dict[str, Any], existing_objects: Dict[str, Any], depth: int) -> Any: """Instantiate a class from the configuration. Arguments: see help(build_object) """ if name not in all_dicts: debug(str(all_dicts), "configBuild") raise ConfigInvalidValueException(name, "Undefined object") this_dict = all_dicts[name] if "class" not in this_dict: raise ConfigInvalidValueException(name, "Undefined object type") clazz = this_dict["class"].create() if not isclass(clazz) and not isfunction(clazz): raise ConfigInvalidValueException( name, "Cannot instantiate object with '{}'".format(clazz)) # prepare the arguments for the constructor arguments = dict() for key, value in this_dict.items(): if key == "class": continue arguments[key] = build_object(value, all_dicts, existing_objects, depth + 1) # get a signature of the constructing function construct_sig = signature(clazz) # if a signature contains a "name" attribute which is not in arguments, # replace it with the name of the section if "name" in construct_sig.parameters and "name" not in arguments: annotation = construct_sig.parameters["name"].annotation if annotation == Parameter.empty: debug("No type annotation for the 'name' parameter in " "class/function {}. Default value will not be used." .format(this_dict["class"].clazz), "configBuild") elif annotation != str: debug("Type annotation for the 'name' parameter in class/function " "{} is not 'str'. Default value will not be used." .format(this_dict["class"].clazz), "configBuild") debug("Annotation is {}".format(str(annotation))) else: debug("Using default 'name' for object {}" .format(this_dict["class"].clazz), "configBuild") arguments["name"] = name try: # try to bound the arguments to the signature bounded_params = construct_sig.bind(**arguments) except TypeError as exc: raise ConfigBuildException(clazz, exc) debug("Instantiating class {} with arguments {}".format(clazz, arguments), "configBuild") # call the function with the arguments # NOTE: any exception thrown from the body of the constructor is # not worth catching here obj = clazz(*bounded_params.args, **bounded_params.kwargs) debug("Class {} initialized into object {}".format(clazz, obj), "configBuild") return obj