Example #1
0
    def _bless_graph_executors(self) -> None:
        """Pre-compute the tensors referenced by the graph executors.

        Due to the lazy nature of the computational graph related components,
        nothing is actually added to the graph until it is "blessed" (
        referenced, and therefore, executed).

        "Blessing" is usually implemented in the form of a log or a debug call
        with the blessed tensor as parameter. Referencing a `Tensor` causes the
        whole computational graph that is needed to evaluate the tensor to be
        built.

        This function "blesses" all tensors that could be potentially used
        using the `fetches` property of the provided runner objects.

        If the experiment runs in the training mode, this function also
        blesses the tensors fetched by the trainer(s).
        """
        log("Building TF Graph")
        if hasattr(self.model, "trainer"):
            if isinstance(self.model.trainer, List):
                trainers = self.model.trainer
            else:
                trainers = [self.model.trainer]

            for trainer in trainers:
                debug("Trainer fetches: {}".format(trainer.fetches), "bless")

        for runner in self.model.runners:
            debug("Runner fetches: {}".format(runner.fetches), "bless")
        log("TF Graph built")
Example #2
0
def check_dataset_and_coders(dataset, runners):
    #pylint: disable=protected-access

    data_list = []

    for runner in runners:
        for c in runner.all_coders:
            if hasattr(c, "data_id"):
                data_list.append((c.data_id, c))
            elif hasattr(c, "data_ids"):
                data_list.extend([(d, c) for d in c.data_ids])
            else:
                log(("Warning: Coder: {} does not have"
                     "a data attribute").format(c))

    debug("Found series: {}".format(str(data_list)), "checking")
    missing = []

    for (serie, coder) in data_list:
        if not dataset.has_series(serie):
            log("dataset {} does not have serie {}".format(
                dataset.name, serie))
            missing.append((coder, serie))

    if len(missing) > 0:
        formated = [
            "{} ({}, {}.{})".format(name, cod.name, cod.__class__.__module__,
                                    cod.__class__.__name__)
            for name, cod in missing
        ]

        raise CheckingException("Dataset '{}' is mising series {}:".format(
            dataset.name, ", ".join(formated)))
Example #3
0
 def print_tensor(x: np.ndarray) -> tf.Tensor:
     if message is not None:
         debug(
             "{}, shape: {}:\n{}".format(message, x.shape, x), debug_label)
     else:
         debug("Shape: {}\n{}".format(x.shape, x), debug_label)
     return x
Example #4
0
    def _bless_graph_executors(self) -> None:
        """Pre-compute the tensors referenced by the graph executors.

        Due to the lazy nature of the computational graph related components,
        nothing is actually added to the graph until it is "blessed" (
        referenced, and therefore, executed).

        "Blessing" is usually implemented in the form of a log or a debug call
        with the blessed tensor as parameter. Referencing a `Tensor` causes the
        whole computational graph that is needed to evaluate the tensor to be
        built.

        This function "blesses" all tensors that could be potentially used
        using the `fetches` property of the provided runner objects.

        If the experiment runs in the training mode, this function also
        blesses the tensors fetched by the trainer(s).
        """
        log("Building TF Graph")
        if hasattr(self.model, "trainer"):
            if isinstance(self.model.trainer, List):
                trainers = self.model.trainer
            else:
                trainers = [self.model.trainer]

            for trainer in trainers:
                debug("Trainer fetches: {}".format(trainer.fetches), "bless")

        for runner in self.model.runners:
            debug("Runner fetches: {}".format(runner.fetches), "bless")
        log("TF Graph built")
Example #5
0
def main() -> None:
    try:
        _main()
    except KeyboardInterrupt:
        log("Training interrupted by user.")
        debug(traceback.format_exc())
        exit(1)
Example #6
0
def main() -> None:
    try:
        _main()
    except KeyboardInterrupt:
        log("Training interrupted by user.")
        debug(traceback.format_exc())
        exit(1)
Example #7
0
 def print_tensor(x: np.ndarray) -> tf.Tensor:
     if message is not None:
         debug(
             "{}, shape: {}:\n{}".format(message, x.shape, x), debug_label)
     else:
         debug("Shape: {}\n{}".format(x.shape, x), debug_label)
     return x
Example #8
0
def dataset_from_files(**kwargs):
    """
    Creates a dataset from the provided arguments. Paths to the data are
    provided in a form of dictionary.

    Args:

        kwargs: Arguments are treated as a dictionary. Paths to the data
            series are specified here. Series identifiers should not contain
            underscores. You can specify a language for the serie by adding
            a preprocess method you want to apply on the textual data by
            naming the function as <identifier>_preprocess=function
            OR the preprocessor can be specified globally
    """
    random_seed = kwargs.get("random_seed", None)
    preprocess = kwargs.get("preprocessor", lambda x: x)
    name = kwargs.get("name", "dataset")
    series = None
    series_paths = _get_series_paths(kwargs)

    debug("Series paths: {}".format(series_paths), "datasetBuild")

    if len(series_paths) > 0:
        log("Initializing dataset with: {}".format(", ".join(series_paths)))
        series = {s: Dataset.create_series(series_paths[s], preprocess)
                  for s in series_paths}
        name = kwargs.get('name', _get_name_from_paths(series_paths))

    series_outputs = {SERIES_OUTPUT.match(key).group(1): value
                      for key, value in kwargs.items()
                      if SERIES_OUTPUT.match(key)}

    dataset = Dataset(name, series, series_outputs, random_seed)
    log("Dataset length: {}".format(len(dataset)))
    return dataset
Example #9
0
def build_object(value: str, all_dicts: Dict[str, Any],
                 existing_objects: Dict[str, Any], depth: int) -> Any:
    """Build an object from config dictionary of its arguments.

    Works recursively.

    Arguments:
        value: Value that should be resolved (either a literal value or
               a config section name)
        all_dicts: Configuration dictionaries used to find configuration
                   of unconstructed objects.
        existing_objects: Dictionary of already constructed objects.
        ignore_names: Set of names that shoud be ignored.
        depth: The current depth of recursion. Used to prevent an infinite
        recursion.
    """
    # TODO detect infinite recursion by other means than depth argument
    # TODO as soon as config is run from an entrypoint, remove the
    # ignore_names feature
    if depth > 20:
        raise AssertionError("Config recursion should not be deeper that 20.")

    debug("Building value on depth {}: {}".format(depth, value), "configBuild")

    # if isinstance(value, str) and value in ignore_names:
    # TODO zapisovani do argumentu
    #   existing_objects[value] = None

    if isinstance(value, tuple):
        return tuple(
            build_object(val, all_dicts, existing_objects, depth + 1)
            for val in value)
    elif (isinstance(value, collections.Iterable)
          and not isinstance(value, str)):
        return [
            build_object(val, all_dicts, existing_objects, depth + 1)
            for val in value
        ]

    if value in existing_objects:
        debug("Skipping already initialized value: {}".format(value),
              "configBuild")

        return existing_objects[value]

    if isinstance(value, str):
        # either a string or a reference to an object
        if not value.startswith("object:"):
            return value

        obj = instantiate_class(value[7:], all_dicts, existing_objects, depth)
        existing_objects[value] = obj
        return obj

    if isinstance(value, ClassSymbol):
        return value.create()

    return value
    def get_initializer(self, var_name: str,
                        default: Callable = None) -> Optional[Callable]:
        """Return the initializer associated with the given variable name.

        Calling the method marks the given initializer as used.
        """
        initializer = self._initializers.get(var_name, default)
        if initializer is not default:
            debug("Using {} for variable {}".format(initializer, var_name))
        self._initialized_variables.add(var_name)
        return initializer
Example #11
0
    def get_initializer(self, var_name: str,
                        default: Callable = None) -> Optional[Callable]:
        """Return the initializer associated with the given variable name.

        Calling the method marks the given initializer as used.
        """
        initializer = self._initializers.get(var_name, default)
        if initializer is not default:
            debug("Using {} for variable {}".format(initializer, var_name))
        self._initialized_variables.add(var_name)
        return initializer
Example #12
0
def build_object(value: str,
                 all_dicts: Dict[str, Any],
                 existing_objects: Dict[str, Any],
                 depth: int) -> Any:
    """Build an object from config dictionary of its arguments.

    Works recursively.

    Arguments:
        value: Value that should be resolved (either a literal value or
               a config section name)
        all_dicts: Configuration dictionaries used to find configuration
                   of unconstructed objects.
        existing_objects: Dictionary of already constructed objects.
        depth: The current depth of recursion. Used to prevent an infinite
        recursion.
    """
    # TODO detect infinite recursion by other means than depth argument
    # TODO as soon as config is run from an entrypoint, remove the
    # ignore_names feature
    if depth > 20:
        raise AssertionError("Config recursion should not be deeper that 20.")

    debug("Building value on depth {}: {}".format(depth, value), "configBuild")

    # if isinstance(value, str) and value in ignore_names:
    # TODO zapisovani do argumentu
    #   existing_objects[value] = None

    if isinstance(value, tuple):
        return tuple(build_object(val, all_dicts, existing_objects, depth + 1)
                     for val in value)
    if (isinstance(value, collections.Iterable)
            and not isinstance(value, str)):
        return [build_object(val, all_dicts, existing_objects, depth + 1)
                for val in value]

    if isinstance(value, ObjectRef):
        if value.name in existing_objects:
            debug("Skipping already initialized object: {}".format(value.name),
                  "configBuild")
        else:
            existing_objects[value.name] = instantiate_class(
                value.name, all_dicts, existing_objects, depth)
        value.bind(existing_objects[value.name])
        return value.target

    if isinstance(value, ClassSymbol):
        return value.create()

    return value
Example #13
0
def _get_series_paths_and_readers(
        series_config: SeriesConfig) -> Dict[str, Tuple[List[str], Reader]]:
    """Get paths to files that contain data from the dataset kwargs.

    Input file for a serie named 'xxx' is specified by parameter 's_xxx'. The
    dataset series is defined by a string with a path / list of strings with
    paths, or a tuple whose first member is a path or a list of paths and the
    second memeber is a reader function.

    The paths can contain wildcards, which will be expanded using
    :py:func:`glob.glob` in sorted order.

    Arguments:
        series_config: A dictionary containing the dataset keyword argument
            specs.

    Returns:
        A dictionary which maps serie names to the paths of their input files
        and readers..
    """
    keys = [k for k in list(series_config.keys()) if SERIES_SOURCE.match(k)]
    names = [get_first_match(SERIES_SOURCE, k) for k in keys]

    series_sources = {}
    for name, key in zip(names, keys):
        value = cast(ReaderDef, series_config[key])

        if isinstance(value, tuple):
            patterns, reader = value  # type: ignore
        else:
            patterns = value
            reader = UtfPlainTextReader

        if isinstance(patterns, str):
            patterns = [patterns]

        paths = []
        for pattern in patterns:
            matched_files = sorted(glob.glob(pattern))
            if not matched_files:
                raise FileNotFoundError(
                    "Pattern did not match any files. Series: {}, Pattern: {}"
                    .format(name, pattern))
            paths.extend(matched_files)

        debug("Series '{}' has the following files: {}".format(name, paths))

        series_sources[name] = (paths, reader)

    return series_sources
def build_object(value, all_dicts, existing_objects, depth):
    """Builds an object from config dictionary of its arguments.
    It works recursively.

    Arguments:
        value: Value that should be resolved (either a literal value or
               a config section name)
        all_dicts: Configuration dictionaries used to find configuration
                   of unconstructed objects.
        existing_objects: Dictionary of already constructed objects.
        ignore_names: Set of names that shoud be ignored.
        depth: The current depth of recursion. Used to prevent an infinite
        recursion.
    """
    ### TODO detect infinite recursion by other means than depth argument
    ### TODO as soon as config is run from an entrypoint, remove the
    ###      ignore_names feature
    if depth > 20:
        raise AssertionError("Config recursion should not be deeper that 20.")

    debug("Building value on depth {}: {}".format(depth, value), "configBuild")

    #if isinstance(value, str) and value in ignore_names:
        # TODO zapisovani do argumentu
     #   existing_objects[value] = None

    if isinstance(value, collections.Iterable) and not isinstance(value, str):
        return [build_object(val, all_dicts, existing_objects, depth + 1)
                for val in value]

    if value in existing_objects:
        debug("Skipping already initialized value: {}".format(value),
              "configBuild")

        return existing_objects[value]

    if isinstance(value, str):
        # either a string or a reference to an object
        if not value.startswith("object:"):
            return value

        obj = instantiate_class(value[7:], all_dicts, existing_objects, depth)
        existing_objects[value] = obj
        return obj

    return value
Example #15
0
    def initial_loop_state(self) -> AttentionLoopState:

        # Similarly to the feed_forward attention, we need to build the encoder
        # projections and masks before the while loop is entered so they are
        # not created as a part of the loop

        # pylint: disable=not-an-iterable
        for val in self.encoder_projections_for_logits:
            debug(val, "bless")
        debug(self.masks_concat, "bless")

        length = sum(tf.shape(s)[1] for s in self._encoders_tensors)
        # pylint: enable=not-an-iterable

        if self._use_sentinels:
            length += 1

        return empty_attention_loop_state(self.batch_size, length,
                                          self.context_vector_size)
Example #16
0
def check_dataset_and_coders(dataset: Dataset,
                             runners: Iterable[BaseRunner]) -> None:
    # pylint: disable=protected-access

    data_list = []
    for runner in runners:
        for c in runner.feedables:
            if hasattr(c, "data_id"):
                data_list.append((getattr(c, "data_id"), c))
            elif hasattr(c, "data_ids"):
                data_list.extend([(d, c) for d in getattr(c, "data_ids")])
            elif hasattr(c, "input_sequence"):
                inpseq = getattr(c, "input_sequence")
                if hasattr(inpseq, "data_id"):
                    data_list.append((getattr(inpseq, "data_id"), c))
                elif hasattr(inpseq, "data_ids"):
                    data_list.extend([(d, c)
                                      for d in getattr(inpseq, "data_ids")])
                else:
                    log("Input sequence: {} does not have a data attribute".
                        format(str(inpseq)))
            else:
                log(("Coder: {} has neither an input sequence attribute nor a "
                     "a data attribute.").format(c))

    debug("Found series: {}".format(str(data_list)), "checking")
    missing = []

    for (serie, coder) in data_list:
        if not dataset.has_series(serie):
            log("dataset {} does not have serie {}".format(
                dataset.name, serie))
            missing.append((coder, serie))

    if missing:
        formated = [
            "{} ({}, {}.{})".format(serie, str(cod), cod.__class__.__module__,
                                    cod.__class__.__name__)
            for cod, serie in missing
        ]

        raise CheckingException("Dataset '{}' is mising series {}:".format(
            dataset.name, ", ".join(formated)))
Example #17
0
    def initial_loop_state(self) -> AttentionLoopState:

        # Similarly to the feed_forward attention, we need to build the encoder
        # projections and masks before the while loop is entered so they are
        # not created as a part of the loop

        # pylint: disable=not-an-iterable
        for val in self.encoder_projections_for_logits:
            debug(val, "bless")
        debug(self.masks_concat, "bless")

        length = sum(tf.shape(s)[1] for s in self._encoders_tensors)
        # pylint: enable=not-an-iterable

        if self._use_sentinels:
            length += 1

        return empty_attention_loop_state(self.batch_size, length,
                                          self.context_vector_size)
Example #18
0
def _get_series_paths_and_readers(
        series_config: SeriesConfig) -> Dict[str, Tuple[List[str], Reader]]:
    """Get paths to files that contain data from the dataset kwargs.

    Input file for a serie named 'xxx' is specified by parameter 's_xxx'. The
    dataset series is defined by a string with a path / list of strings with
    paths, or a tuple whose first member is a path or a list of paths and the
    second memeber is a reader function.

    The paths can contain wildcards, which will be expanded using
    :py:func:`glob.glob` in sorted order.

    Arguments:
        series_config: A dictionary containing the dataset keyword argument
            specs.

    Returns:
        A dictionary which maps serie names to the paths of their input files
        and readers..
    """
    keys = [k for k in list(series_config.keys()) if SERIES_SOURCE.match(k)]
    names = [get_first_match(SERIES_SOURCE, k) for k in keys]

    series_sources = {}
    for name, key in zip(names, keys):
        value = cast(ReaderDef, series_config[key])

        if isinstance(value, tuple):
            patterns, reader = value  # type: ignore
        else:
            patterns = value
            reader = UtfPlainTextReader

        if isinstance(patterns, str):
            patterns = [patterns]

        paths = _expand_patterns_flat(patterns)
        debug("Series '{}' has the following files: {}".format(name, paths))

        series_sources[name] = (paths, reader)

    return series_sources
Example #19
0
def instantiate_class(name: str,
                      all_dicts: Dict[str, Any],
                      existing_objects: Dict[str, Any],
                      depth: int) -> Any:
    """Instantiate a class from the configuration.

    Arguments: see help(build_object)
    """
    if name not in all_dicts:
        debug(str(all_dicts), "configBuild")
        raise ConfigInvalidValueException(name, "Undefined object")
    this_dict = all_dicts[name]

    if "class" not in this_dict:
        raise ConfigInvalidValueException(name, "Undefined object type")
    clazz = this_dict["class"].create()

    if not isclass(clazz) and not isfunction(clazz):
        raise ConfigInvalidValueException(
            name, "Cannot instantiate object with '{}'".format(clazz))

    # prepare the arguments for the constructor
    arguments = dict()

    for key, value in this_dict.items():
        if key == "class":
            continue

        arguments[key] = build_object(value, all_dicts, existing_objects,
                                      depth + 1)

    # get a signature of the constructing function
    construct_sig = signature(clazz)

    try:
        # try to bound the arguments to the signature
        bounded_params = construct_sig.bind(**arguments)
    except TypeError as exc:
        raise ConfigBuildException(clazz, exc)

    debug("Instantiating class {} with arguments {}".format(clazz, arguments),
          "configBuild")

    # call the function with the arguments
    # NOTE: any exception thrown from the body of the constructor is
    # not worth catching here
    obj = clazz(*bounded_params.args, **bounded_params.kwargs)

    debug("Class {} initialized into object {}".format(clazz, obj),
          "configBuild")

    return obj
def instantiate_class(name, all_dicts, existing_objects, depth):
    """ Instantiate a class from the configuration

    Arguments: see help(build_object)
    """
    if name not in all_dicts:
        debug(all_dicts, "configBuild")
        raise ConfigInvalidValueException(name, "Undefined object")
    this_dict = all_dicts[name]

    if 'class' not in this_dict:
        raise ConfigInvalidValueException(name, "Undefined object type")
    clazz = this_dict['class']

    if not isclass(clazz) and not isfunction(clazz):
        raise ConfigInvalidValueException(
            name, "Cannot instantiate object with '{}'".format(clazz))

    ## prepare the arguments for the constructor
    arguments = dict()

    for key, value in this_dict.items():
        if key == 'class':
            continue

        arguments[key] = build_object(value, all_dicts, existing_objects,
                                      depth + 1)

    ## get a signature of the constructing function
    construct_sig = signature(clazz)

    try:
        ## try to bound the arguments to the signature
        bounded_params = construct_sig.bind(**arguments)
    except TypeError as exc:
        raise ConfigBuildException(clazz, exc)

    debug("Instatiating class {} with arguments {}".format(clazz, arguments),
          "configBuild")

    ## call the function with the arguments
    ## NOTE: any exception thrown from the body of the constructor is
    ##       not worth catching here
    obj = clazz(*bounded_params.args, **bounded_params.kwargs)

    debug("Class {} initialized into object {}".format(clazz, obj),
          "configBuild")

    return obj
Example #21
0
    def initial_loop_state(self) -> AttentionLoopState:

        # Here we need to make sure that the hidden_features and attention_mask
        # are pre-computed. If this is used in combination with a decoder which
        # has train and runtime while loops, these tensors need to be created
        # outside of any of those loops in order to be available to both.

        # Note that we are not breaking lazy loading here because this method
        # is called from a lazy tensor.

        debug("Pre-computing attention tensors", "bless")
        debug("Hidden features: {}".format(self.hidden_features), "bless")
        debug("Hidden mask: {}".format(self.attention_mask), "bless")

        return empty_attention_loop_state(
            self.batch_size,
            tf.shape(self.attention_states)[1],
            self.context_vector_size)
Example #22
0
def instantiate_class(name: str, all_dicts: Dict[str, Any],
                      existing_objects: Dict[str, Any], depth: int) -> Any:
    """Instantiate a class from the configuration.

    Arguments: see help(build_object)
    """
    if name not in all_dicts:
        debug(str(all_dicts), "configBuild")
        raise ConfigInvalidValueException(name, "Undefined object")
    this_dict = all_dicts[name]

    if "class" not in this_dict:
        raise ConfigInvalidValueException(name, "Undefined object type")
    clazz = this_dict["class"].create()

    if not isclass(clazz) and not isfunction(clazz):
        raise ConfigInvalidValueException(
            name, "Cannot instantiate object with '{}'".format(clazz))

    # prepare the arguments for the constructor
    arguments = dict()

    for key, value in this_dict.items():
        if key == "class":
            continue

        arguments[key] = build_object(value, all_dicts, existing_objects,
                                      depth + 1)

    # get a signature of the constructing function
    construct_sig = signature(clazz)

    # if a signature contains a "name" attribute which is not in arguments,
    # replace it with the name of the section
    if "name" in construct_sig.parameters and "name" not in arguments:
        annotation = construct_sig.parameters["name"].annotation

        if annotation == Parameter.empty:
            debug(
                "No type annotation for the 'name' parameter in "
                "class/function {}. Default value will not be used.".format(
                    this_dict["class"].clazz), "configBuild")
        elif annotation != str:
            debug(
                "Type annotation for the 'name' parameter in class/function "
                "{} is not 'str'. Default value will not be used.".format(
                    this_dict["class"].clazz), "configBuild")
            debug("Annotation is {}".format(str(annotation)))
        else:
            debug(
                "Using default 'name' for object {}".format(
                    this_dict["class"].clazz), "configBuild")
            arguments["name"] = name

    try:
        # try to bound the arguments to the signature
        bounded_params = construct_sig.bind(**arguments)
    except TypeError as exc:
        raise ConfigBuildException(clazz, exc)

    debug("Instantiating class {} with arguments {}".format(clazz, arguments),
          "configBuild")

    # call the function with the arguments
    # NOTE: any exception thrown from the body of the constructor is
    # not worth catching here
    obj = clazz(*bounded_params.args, **bounded_params.kwargs)

    debug("Class {} initialized into object {}".format(clazz, obj),
          "configBuild")

    return obj
Example #23
0
def instantiate_class(name: str,
                      all_dicts: Dict[str, Any],
                      existing_objects: Dict[str, Any],
                      depth: int) -> Any:
    """Instantiate a class from the configuration.

    Arguments: see help(build_object)
    """
    if name not in all_dicts:
        debug(str(all_dicts), "configBuild")
        raise ConfigInvalidValueException(name, "Undefined object")
    this_dict = all_dicts[name]

    if "class" not in this_dict:
        raise ConfigInvalidValueException(name, "Undefined object type")
    clazz = this_dict["class"].create()

    if not isclass(clazz) and not isfunction(clazz):
        raise ConfigInvalidValueException(
            name, "Cannot instantiate object with '{}'".format(clazz))

    # prepare the arguments for the constructor
    arguments = dict()

    for key, value in this_dict.items():
        if key == "class":
            continue

        arguments[key] = build_object(value, all_dicts, existing_objects,
                                      depth + 1)

    # get a signature of the constructing function
    construct_sig = signature(clazz)

    # if a signature contains a "name" attribute which is not in arguments,
    # replace it with the name of the section
    if "name" in construct_sig.parameters and "name" not in arguments:
        annotation = construct_sig.parameters["name"].annotation

        if annotation == Parameter.empty:
            debug("No type annotation for the 'name' parameter in "
                  "class/function {}. Default value will not be used."
                  .format(this_dict["class"].clazz), "configBuild")
        elif annotation != str:
            debug("Type annotation for the 'name' parameter in class/function "
                  "{} is not 'str'. Default value will not be used."
                  .format(this_dict["class"].clazz), "configBuild")
            debug("Annotation is {}".format(str(annotation)))
        else:
            debug("Using default 'name' for object {}"
                  .format(this_dict["class"].clazz), "configBuild")
            arguments["name"] = name

    try:
        # try to bound the arguments to the signature
        bounded_params = construct_sig.bind(**arguments)
    except TypeError as exc:
        raise ConfigBuildException(clazz, exc)

    debug("Instantiating class {} with arguments {}".format(clazz, arguments),
          "configBuild")

    # call the function with the arguments
    # NOTE: any exception thrown from the body of the constructor is
    # not worth catching here
    obj = clazz(*bounded_params.args, **bounded_params.kwargs)

    debug("Class {} initialized into object {}".format(clazz, obj),
          "configBuild")

    return obj