Exemple #1
    def _bless_graph_executors(self) -> None:
        """Pre-compute the tensors referenced by the graph executors.

        Due to the lazy nature of the computational graph related components,
        nothing is actually added to the graph until it is "blessed" (
        referenced, and therefore, executed).

        "Blessing" is usually implemented in the form of a log or a debug call
        with the blessed tensor as parameter. Referencing a `Tensor` causes the
        whole computational graph that is needed to evaluate the tensor to be

        This function "blesses" all tensors that could be potentially used
        using the `fetches` property of the provided runner objects.

        If the experiment runs in the training mode, this function also
        blesses the tensors fetched by the trainer(s).
        log("Building TF Graph")
        if hasattr(self.model, "trainer"):
            if isinstance(self.model.trainer, List):
                trainers = self.model.trainer
                trainers = [self.model.trainer]

            for trainer in trainers:
                debug("Trainer fetches: {}".format(trainer.fetches), "bless")

        for runner in self.model.runners:
            debug("Runner fetches: {}".format(runner.fetches), "bless")
        log("TF Graph built")
Exemple #2
def check_dataset_and_coders(dataset, runners):
    #pylint: disable=protected-access

    data_list = []

    for runner in runners:
        for c in runner.all_coders:
            if hasattr(c, "data_id"):
                data_list.append((c.data_id, c))
            elif hasattr(c, "data_ids"):
                data_list.extend([(d, c) for d in c.data_ids])
                log(("Warning: Coder: {} does not have"
                     "a data attribute").format(c))

    debug("Found series: {}".format(str(data_list)), "checking")
    missing = []

    for (serie, coder) in data_list:
        if not dataset.has_series(serie):
            log("dataset {} does not have serie {}".format(
                dataset.name, serie))
            missing.append((coder, serie))

    if len(missing) > 0:
        formated = [
            "{} ({}, {}.{})".format(name, cod.name, cod.__class__.__module__,
            for name, cod in missing

        raise CheckingException("Dataset '{}' is mising series {}:".format(
            dataset.name, ", ".join(formated)))
Exemple #3
 def print_tensor(x: np.ndarray) -> tf.Tensor:
     if message is not None:
             "{}, shape: {}:\n{}".format(message, x.shape, x), debug_label)
         debug("Shape: {}\n{}".format(x.shape, x), debug_label)
     return x
Exemple #4
Exemple #5
def main() -> None:
    except KeyboardInterrupt:
        log("Training interrupted by user.")
Exemple #6
Exemple #7
Exemple #8
def dataset_from_files(**kwargs):
    Creates a dataset from the provided arguments. Paths to the data are
    provided in a form of dictionary.


        kwargs: Arguments are treated as a dictionary. Paths to the data
            series are specified here. Series identifiers should not contain
            underscores. You can specify a language for the serie by adding
            a preprocess method you want to apply on the textual data by
            naming the function as <identifier>_preprocess=function
            OR the preprocessor can be specified globally
    random_seed = kwargs.get("random_seed", None)
    preprocess = kwargs.get("preprocessor", lambda x: x)
    name = kwargs.get("name", "dataset")
    series = None
    series_paths = _get_series_paths(kwargs)

    debug("Series paths: {}".format(series_paths), "datasetBuild")

    if len(series_paths) > 0:
        log("Initializing dataset with: {}".format(", ".join(series_paths)))
        series = {s: Dataset.create_series(series_paths[s], preprocess)
                  for s in series_paths}
        name = kwargs.get('name', _get_name_from_paths(series_paths))

    series_outputs = {SERIES_OUTPUT.match(key).group(1): value
                      for key, value in kwargs.items()
                      if SERIES_OUTPUT.match(key)}

    dataset = Dataset(name, series, series_outputs, random_seed)
    log("Dataset length: {}".format(len(dataset)))
    return dataset
Exemple #9
def build_object(value: str, all_dicts: Dict[str, Any],
                 existing_objects: Dict[str, Any], depth: int) -> Any:
    """Build an object from config dictionary of its arguments.

    Works recursively.

        value: Value that should be resolved (either a literal value or
               a config section name)
        all_dicts: Configuration dictionaries used to find configuration
                   of unconstructed objects.
        existing_objects: Dictionary of already constructed objects.
        ignore_names: Set of names that shoud be ignored.
        depth: The current depth of recursion. Used to prevent an infinite
    # TODO detect infinite recursion by other means than depth argument
    # TODO as soon as config is run from an entrypoint, remove the
    # ignore_names feature
    if depth > 20:
        raise AssertionError("Config recursion should not be deeper that 20.")

    debug("Building value on depth {}: {}".format(depth, value), "configBuild")

    # if isinstance(value, str) and value in ignore_names:
    # TODO zapisovani do argumentu
    #   existing_objects[value] = None

    if isinstance(value, tuple):
        return tuple(
            build_object(val, all_dicts, existing_objects, depth + 1)
            for val in value)
    elif (isinstance(value, collections.Iterable)
          and not isinstance(value, str)):
        return [
            build_object(val, all_dicts, existing_objects, depth + 1)
            for val in value

    if value in existing_objects:
        debug("Skipping already initialized value: {}".format(value),

        return existing_objects[value]

    if isinstance(value, str):
        # either a string or a reference to an object
        if not value.startswith("object:"):
            return value

        obj = instantiate_class(value[7:], all_dicts, existing_objects, depth)
        existing_objects[value] = obj
        return obj

    if isinstance(value, ClassSymbol):
        return value.create()

    return value
Exemple #11
    def get_initializer(self, var_name: str,
                        default: Callable = None) -> Optional[Callable]:
        """Return the initializer associated with the given variable name.

        Calling the method marks the given initializer as used.
        initializer = self._initializers.get(var_name, default)
        if initializer is not default:
            debug("Using {} for variable {}".format(initializer, var_name))
        return initializer
Exemple #12
Exemple #13
def _get_series_paths_and_readers(
        series_config: SeriesConfig) -> Dict[str, Tuple[List[str], Reader]]:
    """Get paths to files that contain data from the dataset kwargs.

    Input file for a serie named 'xxx' is specified by parameter 's_xxx'. The
    dataset series is defined by a string with a path / list of strings with
    paths, or a tuple whose first member is a path or a list of paths and the
    second memeber is a reader function.

    The paths can contain wildcards, which will be expanded using
    :py:func:`glob.glob` in sorted order.

        series_config: A dictionary containing the dataset keyword argument

        A dictionary which maps serie names to the paths of their input files
        and readers..
    keys = [k for k in list(series_config.keys()) if SERIES_SOURCE.match(k)]
    names = [get_first_match(SERIES_SOURCE, k) for k in keys]

    series_sources = {}
    for name, key in zip(names, keys):
        value = cast(ReaderDef, series_config[key])

        if isinstance(value, tuple):
            patterns, reader = value  # type: ignore
            patterns = value
            reader = UtfPlainTextReader

        if isinstance(patterns, str):
            patterns = [patterns]

        paths = []
        for pattern in patterns:
            matched_files = sorted(glob.glob(pattern))
            if not matched_files:
                raise FileNotFoundError(
                    "Pattern did not match any files. Series: {}, Pattern: {}"
                    .format(name, pattern))

        debug("Series '{}' has the following files: {}".format(name, paths))

        series_sources[name] = (paths, reader)

    return series_sources
def build_object(value, all_dicts, existing_objects, depth):
    """Builds an object from config dictionary of its arguments.
    It works recursively.

        value: Value that should be resolved (either a literal value or
               a config section name)
        all_dicts: Configuration dictionaries used to find configuration
                   of unconstructed objects.
        existing_objects: Dictionary of already constructed objects.
        ignore_names: Set of names that shoud be ignored.
        depth: The current depth of recursion. Used to prevent an infinite
    ### TODO detect infinite recursion by other means than depth argument
    ### TODO as soon as config is run from an entrypoint, remove the
    ###      ignore_names feature
    if depth > 20:
        raise AssertionError("Config recursion should not be deeper that 20.")

    debug("Building value on depth {}: {}".format(depth, value), "configBuild")

    #if isinstance(value, str) and value in ignore_names:
        # TODO zapisovani do argumentu
     #   existing_objects[value] = None

    if isinstance(value, collections.Iterable) and not isinstance(value, str):
        return [build_object(val, all_dicts, existing_objects, depth + 1)
                for val in value]

    if value in existing_objects:
        debug("Skipping already initialized value: {}".format(value),

        return existing_objects[value]

    if isinstance(value, str):
        # either a string or a reference to an object
        if not value.startswith("object:"):
            return value

        obj = instantiate_class(value[7:], all_dicts, existing_objects, depth)
        existing_objects[value] = obj
        return obj

    return value
Exemple #15
def check_dataset_and_coders(dataset: Dataset,
                             runners: Iterable[BaseRunner]) -> None:
    # pylint: disable=protected-access

    data_list = []
    for runner in runners:
        for c in runner.feedables:
            if hasattr(c, "data_id"):
                data_list.append((getattr(c, "data_id"), c))
            elif hasattr(c, "data_ids"):
                data_list.extend([(d, c) for d in getattr(c, "data_ids")])
            elif hasattr(c, "input_sequence"):
                inpseq = getattr(c, "input_sequence")
                if hasattr(inpseq, "data_id"):
                    data_list.append((getattr(inpseq, "data_id"), c))
                elif hasattr(inpseq, "data_ids"):
                    data_list.extend([(d, c)
                                      for d in getattr(inpseq, "data_ids")])
                    log("Input sequence: {} does not have a data attribute".
                log(("Coder: {} has neither an input sequence attribute nor a "
                     "a data attribute.").format(c))

    debug("Found series: {}".format(str(data_list)), "checking")
    missing = []

    for (serie, coder) in data_list:
        if not dataset.has_series(serie):
            log("dataset {} does not have serie {}".format(
                dataset.name, serie))
            missing.append((coder, serie))

    if missing:
        formated = [
            "{} ({}, {}.{})".format(serie, str(cod), cod.__class__.__module__,
            for cod, serie in missing

        raise CheckingException("Dataset '{}' is mising series {}:".format(
            dataset.name, ", ".join(formated)))
Exemple #17
Exemple #18
Exemple #19
def instantiate_class(name: str,
                      all_dicts: Dict[str, Any],
                      existing_objects: Dict[str, Any],
                      depth: int) -> Any:
    """Instantiate a class from the configuration.

    Arguments: see help(build_object)
    if name not in all_dicts:
        debug(str(all_dicts), "configBuild")
        raise ConfigInvalidValueException(name, "Undefined object")
    this_dict = all_dicts[name]

    if "class" not in this_dict:
        raise ConfigInvalidValueException(name, "Undefined object type")
    clazz = this_dict["class"].create()

    if not isclass(clazz) and not isfunction(clazz):
        raise ConfigInvalidValueException(
            name, "Cannot instantiate object with '{}'".format(clazz))

    # prepare the arguments for the constructor
    arguments = dict()

    for key, value in this_dict.items():
        if key == "class":

        arguments[key] = build_object(value, all_dicts, existing_objects,
                                      depth + 1)

    # get a signature of the constructing function
    construct_sig = signature(clazz)

        # try to bound the arguments to the signature
        bounded_params = construct_sig.bind(**arguments)
    except TypeError as exc:
        raise ConfigBuildException(clazz, exc)

    debug("Instantiating class {} with arguments {}".format(clazz, arguments),

    # call the function with the arguments
    # NOTE: any exception thrown from the body of the constructor is
    # not worth catching here
    obj = clazz(*bounded_params.args, **bounded_params.kwargs)

    debug("Class {} initialized into object {}".format(clazz, obj),

    return obj
Exemple #21
    def initial_loop_state(self) -> AttentionLoopState:

        # Here we need to make sure that the hidden_features and attention_mask
        # are pre-computed. If this is used in combination with a decoder which
        # has train and runtime while loops, these tensors need to be created
        # outside of any of those loops in order to be available to both.

        # Note that we are not breaking lazy loading here because this method
        # is called from a lazy tensor.

        debug("Pre-computing attention tensors", "bless")
        debug("Hidden features: {}".format(self.hidden_features), "bless")
        debug("Hidden mask: {}".format(self.attention_mask), "bless")

        return empty_attention_loop_state(
Exemple #22
Exemple #23
def instantiate_class(name: str,
                      all_dicts: Dict[str, Any],
                      existing_objects: Dict[str, Any],
                      depth: int) -> Any:
    """Instantiate a class from the configuration.

    Arguments: see help(build_object)
    if name not in all_dicts:
        debug(str(all_dicts), "configBuild")
        raise ConfigInvalidValueException(name, "Undefined object")
    this_dict = all_dicts[name]

    if "class" not in this_dict:
        raise ConfigInvalidValueException(name, "Undefined object type")
    clazz = this_dict["class"].create()

    if not isclass(clazz) and not isfunction(clazz):
        raise ConfigInvalidValueException(
            name, "Cannot instantiate object with '{}'".format(clazz))

    # prepare the arguments for the constructor
    arguments = dict()

    for key, value in this_dict.items():
        if key == "class":

        arguments[key] = build_object(value, all_dicts, existing_objects,
                                      depth + 1)

    # get a signature of the constructing function
    construct_sig = signature(clazz)

    # if a signature contains a "name" attribute which is not in arguments,
    # replace it with the name of the section
    if "name" in construct_sig.parameters and "name" not in arguments:
        annotation = construct_sig.parameters["name"].annotation

        if annotation == Parameter.empty:
            debug("No type annotation for the 'name' parameter in "
                  "class/function {}. Default value will not be used."
                  .format(this_dict["class"].clazz), "configBuild")
        elif annotation != str:
            debug("Type annotation for the 'name' parameter in class/function "
                  "{} is not 'str'. Default value will not be used."
                  .format(this_dict["class"].clazz), "configBuild")
            debug("Annotation is {}".format(str(annotation)))
            debug("Using default 'name' for object {}"
                  .format(this_dict["class"].clazz), "configBuild")
            arguments["name"] = name

        # try to bound the arguments to the signature
        bounded_params = construct_sig.bind(**arguments)
    except TypeError as exc:
        raise ConfigBuildException(clazz, exc)

    debug("Instantiating class {} with arguments {}".format(clazz, arguments),

    # call the function with the arguments
    # NOTE: any exception thrown from the body of the constructor is
    # not worth catching here
    obj = clazz(*bounded_params.args, **bounded_params.kwargs)

    debug("Class {} initialized into object {}".format(clazz, obj),

    return obj