def test_unflatten_dict_to_nested(self):
        inputs1 = {'a//b': 1, 'a//c': 2, 'e': 3}
        must1 = {'a': {'b': 1, 'c': 2}, 'e': 3}
        self.assertDictEqual(nest_utils.unflatten_dict_to_nested(inputs1),
                             must1)

        inputs2 = {'a//b': 1, 'b': 2, 'e': 3}
        must2 = {'a': {'b': 1}, 'b': 2, 'e': 3}
        self.assertDictEqual(nest_utils.unflatten_dict_to_nested(inputs2),
                             must2)

        inputs3 = {
            'a//b': 1,
            'a//c': 2,
            'a//d//d1': 2,
            'a//d//d2': 4,
            'e//0': 1,
            'e//1': 2,
            'e//2//e1': 10
        }
        must3 = {
            'a': {
                'b': 1,
                'c': 2,
                'd': {
                    'd1': 2,
                    'd2': 4
                }
            },
            'e': [1, 2, {
                'e1': 10
            }]
        }
        self.assertDictEqual(nest_utils.unflatten_dict_to_nested(inputs3),
                             must3)
Exemple #2
0
def filter_kpi_values(kpi: dict,
                      return_flattened: bool = False) -> (dict, dict):
    """
    Filter kpis according to its value type. If kpi value is not of type str
    or number, it will be filtered out. If value is numpy array of size 1, then
    element will be selected and not filtered out.

    Parameters
    ----------
    kpi
        dict, possibly nested, mapping kpi names to its values
    return_flattened
        flag to return flattened dict and do not unflatten it back

    Returns
    -------
    kpi_filtered
        dict with same structure as kpi, but only with values of numbers and
        string type
    kpi_filtered_out
        dict with same structure as kpi with values other then numbers and
        string type
    """
    logger = logging.getLogger(__name__)
    kpi_flatten = nest_utils.flatten_nested_struct(kpi)

    names_filtered_out = []
    for kpi_name in kpi_flatten:
        kpi_value = kpi_flatten[kpi_name]
        if isinstance(kpi_value, np.ndarray) and np.prod(kpi_value.shape) == 1:
            kpi_value = np.reshape(kpi_value, (1, ))[0]
        # pylint: disable=no-member
        # numpy does have floating member
        if isinstance(kpi_value, np.floating):
            kpi_value = float(kpi_value)
        elif isinstance(kpi_value, np.integer):
            kpi_value = int(kpi_value)
        elif isinstance(kpi_value, np.str):
            kpi_value = str(kpi_value)
        kpi_flatten[kpi_name] = kpi_value
        if not isinstance(kpi_value, (numbers.Number, str, list)):
            names_filtered_out.append(kpi_name)
    kpi_filtered = {
        k: v
        for k, v in kpi_flatten.items() if k not in names_filtered_out
    }
    kpi_filtered_out = {
        k: v
        for k, v in kpi_flatten.items() if k in names_filtered_out
    }
    if kpi_filtered and not return_flattened:
        kpi_filtered = nest_utils.unflatten_dict_to_nested(kpi_filtered)
    if kpi_filtered_out:
        logger.warning(
            "Following kpi keys cannot be serialized to json: "
            "%s", kpi_filtered_out.keys())
        if not return_flattened:
            kpi_filtered_out = nest_utils.unflatten_dict_to_nested(
                kpi_filtered_out)
    return kpi_filtered, kpi_filtered_out
Exemple #3
0
def split_batch_inputs(
    inputs: dict,
    not_batch_keys: Optional[List[str]] = None,
    ignore_none_values=True,
) -> Tuple[List[dict], dict]:
    """
    Split batch inputs to sample inputs

    Parameters
    ----------
    inputs
        batch inputs to split
    not_batch_keys
        keys to exclude from split
    ignore_none_values
        if the keys with None values should be treated as not batch keys

    Returns
    -------
    batch_inputs_flat_as_list
        list split batch inputs
    not_batch_inputs
        dict with not batch inputs
    """
    not_batch_keys = not_batch_keys or []
    batch_inputs = {
        each_key: each_value
        for each_key, each_value in inputs.items()
        if each_key not in not_batch_keys
    }
    not_batch_inputs = {
        each_key: each_value
        for each_key, each_value in inputs.items()
        if each_key in not_batch_keys
    }
    batch_inputs_flat = nest_utils.flatten_nested_struct(batch_inputs)
    if ignore_none_values:
        none_keys = [k for k, v in batch_inputs_flat.items() if v is None]
        batch_inputs_flat = {
            k: v
            for k, v in batch_inputs_flat.items() if k not in none_keys
        }
        not_batch_inputs.update(
            nest_utils.unflatten_dict_to_nested({k: None
                                                 for k in none_keys}))
    batch_inputs_flat_as_list = (
        nest_utils.dict_of_lists_to_list_of_dicts(batch_inputs_flat))
    batch_inputs_as_list = [
        nest_utils.unflatten_dict_to_nested(each_flat_input)
        for each_flat_input in batch_inputs_flat_as_list
    ]
    return batch_inputs_as_list, not_batch_inputs
Exemple #4
0
def represent_predictor_through_nucleotides(
        predictor: Predictor,
        incoming_keys_mapping: Optional[dict] = None) -> List[Nucleotide]:
    """
    Represent predictor feed tensors structure as list of nucleotides

    When predictor has following fetch'_tensors structure but in flatten
    `{'postprocessor_nucleotide1': {'out1': ..., 'out2': ...},
    'postprocessor_nucleotide2': {'out3': ..., 'out4': ...}}`,
    then it will return list of nodes
    `[postprocessor_nucleotide1, postprocessor_nucleotide2]` with generated keys
    `['out1', 'out2']` and `['out3', 'out4']` respectively

    Parameters
    ----------
    predictor
        predictor with fetch_tensors property defined
    incoming_keys_mapping
        mapping of the incoming keys to the predictor

    Returns
    -------
    predictor_nucleotides
        list of nucleotides which mimic the data structure of fetched predictor
        tensors
    """
    fetch_tensors_flatten = predictor.fetch_tensors
    fetch_tensors = nest_utils.unflatten_dict_to_nested(fetch_tensors_flatten)
    feeded_tensors_without_parameters_flatten = {
        each_key: each_tensor
        for each_key, each_tensor in predictor.feed_tensors.items()
        if each_tensor.op.type != 'PlaceholderWithDefault'
    }
    feeded_tensors_without_parameters = nest_utils.unflatten_dict_to_nested(
        feeded_tensors_without_parameters_flatten)
    incoming_keys = sorted(list(feeded_tensors_without_parameters))

    predictor_nucleotides = []
    for each_nucleotide_name_key, each_nucleotide_outputs in sorted(
            fetch_tensors.items()):
        nucleotide = PredictorNucleotide(name=each_nucleotide_name_key,
                                         inbound_nodes="dataset")
        nucleotide.incoming_keys = incoming_keys
        if incoming_keys_mapping is not None:
            nucleotide.incoming_keys_mapping = {
                "dataset": incoming_keys_mapping
            }
        nucleotide.generated_keys = sorted(list(each_nucleotide_outputs))
        predictor_nucleotides.append(nucleotide)
    return predictor_nucleotides
def remap_single_input(inputs: dict, mapping: Optional[dict] = None) -> dict:
    """
    Remap single input keys according to mapping

    Parameters
    ----------
    inputs
        dict with inputs, where keys should be remapped
    mapping
        mapping of old keys to new keys; if some key was not present, it will
        be passed as is; if new key is "_", it will be ignored in remapped
        result

    Returns
    -------
    remapped_inputs
        inputs with remapped keys
    """
    inputs_remapped_flat = {}
    mapping = mapping or {}
    inputs_flat = nest_utils.flatten_nested_struct(
        inputs, separator=_NESTED_KEY_SEPARATOR)
    for old_name, value in sorted(inputs_flat.items()):
        remapped_new_names = _get_new_key_for_nested_input_and_map(
            old_name, mapping)
        for each_new_name in remapped_new_names:
            if each_new_name == NucleotideKeyFields.IGNORE_KEY:
                continue
            inputs_remapped_flat[each_new_name] = value
    inputs_remapped = (nest_utils.unflatten_dict_to_nested(
        inputs_remapped_flat, separator=_NESTED_KEY_SEPARATOR))
    return inputs_remapped
def _get_nested_shapes(nested_dict):
    flatten = nest_utils.flatten_nested_struct(nested_dict)
    flatten_with_shapes = {k: v.get_shape().as_list()
                           for k, v in flatten.items()}
    nested_with_shapes = nest_utils.unflatten_dict_to_nested(
        flatten_with_shapes)
    return nested_with_shapes
Exemple #7
0
def select_inputs_by_sample_mask_np(sample_mask: np.ndarray,
                                    keys_to_exclude_from_sample_mask: Optional[
                                        List[str]] = None,
                                    **inputs) -> Dict[str, np.ndarray]:
    """
    Select inputs by masking out samples with sample_mask == 0

    Parameters
    ----------
    sample_mask
        tensor of shape [batch_size] with 1 indicating that sample should
        be leaved as is and 0 - remove sample
    keys_to_exclude_from_sample_mask
        list of keys that will not be masked using sample_mask
    **inputs
        inputs to mask

    Returns
    -------
    masked_inputs
        masked inputs sample-wise
    """
    inputs_flatten = nest_utils.flatten_nested_struct(inputs)
    inputs_masked_flatten = {}
    keys_to_exclude = keys_to_exclude_from_sample_mask or []
    sample_mask = sample_mask.astype(bool)
    for each_key, each_value in inputs_flatten.items():
        if each_key in keys_to_exclude:
            inputs_masked_flatten[each_key] = each_value
        else:
            inputs_masked_flatten[each_key] = each_value[sample_mask]
    inputs_masked = nest_utils.unflatten_dict_to_nested(inputs_masked_flatten)
    return inputs_masked
Exemple #8
0
def _get_data_results(data: tf.data.Dataset, session_manager,
                      max_iteration=None) -> dict:
    iterator = data.make_one_shot_iterator()
    sample = iterator.get_next()
    outputs_flatten = {}
    iteration_number = 0
    with session_manager as sess:
        while True:
            try:
                sample_out = sess.run(sample)
                sample_out_flatten = nest_utils.flatten_nested_struct(
                    sample_out)
                for k, v in sample_out_flatten.items():
                    outputs_flatten.setdefault(k, [])
                    if isinstance(v, bytes):
                        v = v.decode()
                    outputs_flatten[k].append(v)
                iteration_number += 1
            except tf.errors.OutOfRangeError:
                break

            if max_iteration is not None and iteration_number >= max_iteration:
                break
    outputs = nest_utils.unflatten_dict_to_nested(outputs_flatten)
    return outputs
Exemple #9
0
    def calculate_losses(
        self, *, inputs_from_dataset: _NESTED_TENSORS_DTYPE,
        predictions_raw: _NESTED_TENSORS_DTYPE
    ) -> Dict[str, Union[Dict[str, tf.Tensor], tf.Tensor]]:
        """
        Create loss for model using labels from 'inputs'
        and predictions

        Parameters
        ----------
        inputs_from_dataset
            dictionary holding the dataset inputs
        predictions_raw
            dict with prediction tensors as values

        Returns
        -------
        losses
            dict with values as losses; inside of keys should be 'total_loss'
            with value to optimize; default ragularizations on the all
            training variables will be applied afterwards if specified
        """
        gene_inputs = {}
        gene_inputs.update(inputs_from_dataset)
        gene_inputs.update(predictions_raw)
        losses = self._process_gene(gene_name='losses',
                                    gene_inputs=gene_inputs)
        unflatten_losses = nest_utils.unflatten_dict_to_nested(losses)
        total_loss = 0.0
        for loss in unflatten_losses.values():
            if 'total_loss' in loss:
                total_loss += loss['total_loss']
        losses['total_loss'] = total_loss
        losses = self._add_regularization(losses)
        return losses
Exemple #10
0
    def preprocess_dataset_inputs(
            inputs: Dict[str, tf.Tensor]) -> Dict[str, Dict[str, tf.Tensor]]:
        """
        Add preprocessing step as identity nodes on dataset inputs
        and add all the inputs to dataset key

        Parameters
        ----------
        inputs
            inputs from datasets

        Returns
        -------
        inputs_with_identity
            same as inputs, but with added identity ops and add to dataset key
        """
        inputs_flat = nest_utils.flatten_nested_struct(inputs)
        inputs_flat_identity = {
            k: tf.identity(v)
            for k, v in sorted(inputs_flat.items())
        }
        inputs_identity = nest_utils.unflatten_dict_to_nested(
            inputs_flat_identity)
        inputs_identity = {"dataset": inputs_identity}
        return inputs_identity
Exemple #11
0
    def predict(self, **inputs) -> Dict[str, tf.Tensor]:
        result_flat = {}
        default_axis = self.axis.get("default")
        inputs_flat = nest_utils.flatten_nested_struct(inputs,
                                                       flatten_lists=False)
        for each_key, each_input_list in inputs_flat.items():
            if not isinstance(each_input_list, (list, tuple)):
                msg = ("{}: all inputs to concat must be lists or tuples! "
                       "(input for key {} is of type {})").format(
                           self.name, each_key, type(each_input_list))
                raise ValueError(msg)
            axis_for_key = self.axis.get(each_key, default_axis)
            if axis_for_key is None:
                msg = ("{}: axis for key {} was not provided and default key "
                       "does not exist!").format(self.name, each_key)
                raise ValueError(msg)
            if axis_for_key >= len(each_input_list[0].shape):
                msg = ("{}: axis {} for input key {} is not valid for"
                       "inputs with shape {}").format(self.name, axis_for_key,
                                                      each_key,
                                                      each_input_list[0].shape)
                raise ValueError(msg)
            inputs_concat = tf.concat(each_input_list, axis=axis_for_key)
            result_flat[each_key] = inputs_concat

        result = nest_utils.unflatten_dict_to_nested(result_flat)
        return result
Exemple #12
0
    def predict_batch(self, inputs: Union[dict, list]) -> Tuple[dict, float]:
        """
        Make predictions given inputs

        Parameters
        ----------
        inputs
            inputs to the network

        Returns
        -------
        predictions
            predictions of the network
        predict_exec_time
            execution time of network prediction
        """
        time_start_predict = time.time()
        if not isinstance(inputs, list):
            inputs = [inputs]
        list_of_predictions = []
        for each_input in inputs:
            if self.model_incoming_keys_mapping is not None:
                each_input = nucleotide_utils.remap_and_collapse_inputs(
                    [each_input], [self.model_incoming_keys_mapping])
            each_input_flatten = nest_utils.flatten_nested_struct(each_input)
            current_prediction_flatten = predictors.predict_using_predictor(
                predictor=self._predictor,
                inputs=each_input_flatten,
                model_parameters=self.model_parameters)
            list_of_predictions.append(current_prediction_flatten)
        predictions_flatten = nucleotide_utils.collapse_inputs(
            list_of_predictions)
        predictions = nest_utils.unflatten_dict_to_nested(predictions_flatten)
        predict_exec_time = time.time() - time_start_predict
        return predictions, predict_exec_time
Exemple #13
0
    def combine_fn(*list_of_features) -> tf.data.Dataset:
        """
        Method to combine the features

        Parameters
        ----------
        list_of_features
            list of features to combine

        Returns
        -------
        data_with_combined_features
            data with combined features
        """
        features_combined_flatten = {}

        for each_features in list_of_features:
            each_features_flatten = nest_utils.flatten_nested_struct(
                each_features)
            for (each_feature_name,
                 each_feature) in each_features_flatten.items():
                if each_feature_name in features_combined_flatten:
                    _assert_tensors_have_same_shape(
                        features_combined_flatten[each_feature_name],
                        each_feature)
                    _assert_tensors_have_same_type(
                        features_combined_flatten[each_feature_name],
                        each_feature)
                else:
                    features_combined_flatten[each_feature_name] = each_feature

        features_combined = nest_utils.unflatten_dict_to_nested(
            features_combined_flatten)
        data_with_featured = tf.data.Dataset.from_tensors(features_combined)
        return data_with_featured
Exemple #14
0
def maybe_cast_dtype(
        inputs: Dict[str, tf.Tensor],
        cast_dtypes: Dict[tf.DType, tf.DType] = None) -> Dict[str, tf.Tensor]:
    """
    Cast values from nested inputs structure according to cast_dtypes mapping.
    If dtype of value inside of inputs is not inside of cast_dtypes keys, it
    will not be casted at all.

    Parameters
    ----------
    inputs
        possibly nested dict, with values as tensors
    cast_dtypes
        dict with mapping of which dtype should be casted to which, e.g.
        {float32: float16} means that all of float32 tensors will be casted
        to float16 before passing to nucleotide

    Returns
    -------
    inputs_casted : dict
        same structure as inputs, but with inputs casted according to
        cast_dtypes
    """
    if cast_dtypes is None:
        return inputs
    inputs_flatten = nest_utils.flatten_nested_struct(inputs)
    for k, each_input in inputs_flatten.items():
        if not isinstance(each_input, tf.Tensor):
            continue
        dtype_input = each_input.dtype
        if dtype_input in cast_dtypes:
            input_casted = tf.cast(each_input, cast_dtypes[dtype_input])
            inputs_flatten[k] = input_casted
    inputs_casted = nest_utils.unflatten_dict_to_nested(inputs_flatten)
    return inputs_casted
Exemple #15
0
 def _get_default_features(nested_features):
     features_flatten = nest_utils.flatten_nested_struct(nested_features)
     zero_values_flatten = {
         each_key: tf.zeros_like(each_value)
         for each_key, each_value in features_flatten.items()
     }
     zero_values = nest_utils.unflatten_dict_to_nested(zero_values_flatten)
     return zero_values
Exemple #16
0
 def get(self) -> dict:
     """
     Returns
     -------
     buffer_dict
         nested dict with buffer
     """
     return nest_utils.unflatten_dict_to_nested(self._buffer_flat)
Exemple #17
0
def _format_data_for_log(data: Optional[dict] = None,
                         shape_fn: Callable = np.shape) -> Optional[dict]:
    if data is None:
        return None
    data_flat = nest_utils.flatten_nested_struct(data)
    data_repr_flat = {
        k: (v if isinstance(v, tf.Tensor) else shape_fn(v))
        for k, v in data_flat.items()
    }
    data_repr = nest_utils.unflatten_dict_to_nested(data_repr_flat)
    return data_repr
Exemple #18
0
    def get_summaries(
            self, *, inputs_from_dataset: _NESTED_TENSORS_DTYPE,
            predictions_raw: _NESTED_TENSORS_DTYPE,
            predictions: _NESTED_TENSORS_DTYPE) -> _NESTED_TENSORS_DTYPE:
        """
        Create the summaries for model as dict
        to separate different types of summaries, the prefix is used:

            - `scalar_{}`
            - `image_{}`
            - `histogram_{}`
            - `text_{}`
            - `audio_{}`

        Names without this prefixes will not be stored to tensorboard.

        Parameters
        ----------
        inputs_from_dataset
            dictionary holding the dataset inputs
        predictions_raw
            dict with raw predictions
        predictions
            dict with output tensors as values

        Returns
        -------
        summaries
            combined summaries from all `ModelSummary` instances
        """
        gene_inputs = {}
        gene_inputs.update(inputs_from_dataset)
        gene_inputs.update(predictions_raw)
        gene_inputs.update(predictions)
        summaries = self._process_gene(gene_name='summaries',
                                       gene_inputs=gene_inputs)
        summaries_not_to_store = [
            n for n, s in self.summaries.items()
            if not s.store_inside_tensorboard
        ]
        if summaries_not_to_store:
            logger = logging.getLogger(__name__)
            logger.info(
                'Summaries with names %s will not be stored to '
                'tensorboard', summaries_not_to_store)
            summaries_unfatten = nest_utils.unflatten_dict_to_nested(summaries)
            for each_summary_name in summaries_not_to_store:
                del summaries_unfatten[each_summary_name]
            summaries = nest_utils.flatten_nested_struct(summaries_unfatten)
        return summaries
def collection2nested(collection_prefix: str,
                      separator: str = '::',
                      graph: Optional[tf.Graph] = None,
                      raise_error: bool = True) -> dict:
    """
    Construct the dict with keys as suffixes of collection
    names and values as corresponding tensors / variables / ops
    or list / value depending on the found collection_list

    Parameters
    ----------
    collection_prefix
        prefix to collection name
    separator
        separator to separate the suffix from key
    graph
        graph to add the collections
    raise_error
        if ValueError should be raised if no collections with defined prefix
        were found

    Returns
    -------
    dict_with_values
        dict with values from collections

    """
    graph = _maybe_get_default_graph(graph)
    collection_list = get_collections_by_prefix(
        collection_prefix, separator, graph, raise_error=raise_error)

    if not collection_list:
        return {}
    if len(collection_list) == 1 and collection_list[0] == collection_prefix:
        return graph.get_collection(collection_prefix)[0]

    result = {}
    for collection_name in collection_list:
        if len(graph.get_collection(collection_name)) == 1:
            value = graph.get_collection(collection_name)[0]
        else:
            value = graph.get_collection(collection_name)
        name_splitted = collection_name.split(separator)
        key = name_splitted[1]
        result[key] = value
    if len(result) == 1 and list(result.keys())[0] == "":
        result = list(result.values())[0]

    result = nest_utils.unflatten_dict_to_nested(result)
    return result
Exemple #20
0
 def restore(self):
     io_utils.maybe_mkdir(self.cache_target)
     cache_fname = self._get_cache_fname()
     if not os.path.exists(cache_fname):
         return None
     with open(cache_fname, "r") as file:
         restored = json.load(file)
     for each_key in restored:
         if isinstance(restored[each_key], list):
             restored[each_key] = np.array(restored[each_key])
     restored_unflatten = nest_utils.unflatten_dict_to_nested(restored)
     logger = logging.getLogger(__name__)
     logger.debug("restoring KPI values from %s", cache_fname)
     return restored_unflatten
Exemple #21
0
 def parse_tfrecord_example(self, example) -> dict:
     """Parse tfrecord example"""
     features_flat = nest_utils.flatten_nested_struct(
         self.get_tfrecords_features(), '/')
     output_types = self.get_tfrecords_output_types() or {}
     output_types_flat = nest_utils.flatten_nested_struct(
         output_types, '/')
     parsed_example = tf.parse_single_example(example, features_flat)
     data_decoded = {}
     for field_name, field_value in parsed_example.items():
         output_type = output_types_flat.get(field_name)
         data_decoded[field_name] = self.decode_field(
             field_name, field_value, output_type)
     data = nest_utils.unflatten_dict_to_nested(data_decoded, '/')
     data = self.postprocess_tfrecords(**data)
     return data
Exemple #22
0
    def combine_samples_to_batch(
            self,
            list_of_sample_results: List[Optional[dict]]) -> Optional[dict]:
        """
        Combine sample data to batch

        Parameters
        ----------
        list_of_sample_results
            list of sample results

        Returns
        -------
        batch_result
            batch result
        """
        # pylint: disable=no-self-use
        # is an interface
        list_of_sample_results_valid = [
            each_result for each_result in list_of_sample_results
            if each_result is not None
        ]
        if not list_of_sample_results_valid:
            return None

        list_of_flat_sample_results = [
            nest_utils.flatten_nested_struct(each_result)
            for each_result in list_of_sample_results_valid
        ]
        result_flat = {}
        for each_key in list_of_flat_sample_results[0]:
            result_flat[each_key] = np_utils.stack_with_pad([
                each_result[each_key]
                for each_result in list_of_flat_sample_results
            ])
        result = nest_utils.unflatten_dict_to_nested(result_flat)
        return result
Exemple #23
0
    def test_create_batch(self, mode, fix_batch_dimension):
        self.datasets_for_mix[0].fix_batch_dimension = fix_batch_dimension
        self.datasets_for_mix[1].fix_batch_dimension = fix_batch_dimension

        dataset_mix = DatasetMix(datasets=self.datasets_for_mix).build()

        dataset_mix.mode = mode
        batch_size = 10
        data_batch = dataset_mix.create_batch(batch_size=batch_size)

        batch_dim_must = batch_size if fix_batch_dimension else None

        output_shapes_must = {'input1': [batch_dim_must],
                              'input2': [batch_dim_must],
                              'input3': {'int': [batch_dim_must, 20]},
                              'input4': [batch_dim_must],
                              'sample_mask_data1': [batch_dim_must],
                              'sample_mask_data2': [batch_dim_must]}
        output_types_must = {'input1': tf.string,
                             'input2': tf.float32,
                             'input3': {'int': tf.int32},
                             'input4': tf.string,
                             'sample_mask_data1': tf.float32,
                             'sample_mask_data2': tf.float32}
        output_shapes_list_flatten = nest_utils.flatten_nested_struct(
            data_batch.output_shapes)
        output_shapes_list_flatten = {
            k: v.as_list() for k, v in output_shapes_list_flatten.items()}
        output_shapes_list = nest_utils.unflatten_dict_to_nested(
            output_shapes_list_flatten)
        self.assertDictEqual(output_shapes_must,
                             output_shapes_list)
        self.assertDictEqual(output_types_must,
                             data_batch.output_types)
        _ = _get_data_results(data_batch, self.test_session(),
                              max_iteration=100)
Exemple #24
0
    def test_create_features_for_single_sample(self, mode,
                                               sampling_weights=None):
        dataset_mix = DatasetMix(datasets=self.datasets_for_mix,
                                 sampling_weights=sampling_weights).build()
        dataset_mix.mode = mode
        data_mixed = dataset_mix.create_features_for_single_sample()

        output_shapes_must = {'input1': [],
                              'input2': [],
                              'input3': {'int': [20]},
                              'input4': [],
                              'sample_mask_data1': [],
                              'sample_mask_data2': []}
        output_types_must = {'input1': tf.string,
                             'input2': tf.float32,
                             'input3': {'int': tf.int32},
                             'input4': tf.string,
                             'sample_mask_data1': tf.float32,
                             'sample_mask_data2': tf.float32}
        output_shapes_list_flatten = nest_utils.flatten_nested_struct(
            data_mixed.output_shapes)
        output_shapes_list_flatten = {
            k: v.as_list() for k, v in output_shapes_list_flatten.items()}
        output_shapes_list = nest_utils.unflatten_dict_to_nested(
            output_shapes_list_flatten)
        self.assertDictEqual(output_shapes_must,
                             output_shapes_list)
        self.assertDictEqual(output_types_must,
                             data_mixed.output_types)

        default_values_1 = {
            'input3': {'int': np.zeros([self.number_of_samples1, 20],
                                       dtype=np.int32)},
            'input4': ["" for _ in range(self.number_of_samples1)],
            'sample_mask_data1': [1.0] * self.number_of_samples1,
            'sample_mask_data2': [0.0] * self.number_of_samples1
        }
        default_values_2 = {
            'input2': [0.0 for _ in range(self.number_of_samples2)],
            'sample_mask_data1': [0.0] * self.number_of_samples1,
            'sample_mask_data2': [1.0] * self.number_of_samples1
        }

        output1_with_defaults_must = copy.deepcopy(self.inputs1)
        output1_with_defaults_must.update(default_values_1)
        output2_with_defaults_must = copy.deepcopy(self.inputs2)
        output2_with_defaults_must.update(default_values_2)

        if mode == 'train':
            iterations_number = 500
        else:
            iterations_number = 100
        output = _get_data_results(data_mixed, self.test_session(),
                                   max_iteration=iterations_number)
        if mode == 'eval':
            outputs_must = {}
            for each_key in ['input1', 'input2', 'input4',
                             'sample_mask_data1', 'sample_mask_data2']:
                outputs_must[each_key] = _interleave(
                    output1_with_defaults_must[each_key],
                    output2_with_defaults_must[each_key],
                    iterations_number
                )
            outputs_must['input3'] = {
                'int': _interleave(
                    output1_with_defaults_must['input3']['int'],
                    output2_with_defaults_must['input3']['int'],
                    iterations_number
                )
            }
            for each_key in ['input2', 'input3', 'sample_mask_data1',
                             'sample_mask_data2']:
                self.assertAllClose(outputs_must[each_key],
                                    output[each_key])
            for each_key in ['input1', 'input4']:
                self.assertListEqual(outputs_must[each_key],
                                     output[each_key])
        else:
            number_samples_per_dataset = [0, 0]
            datasets_with_defaults = [output1_with_defaults_must,
                                      output2_with_defaults_must]
            for i_sample in range(iterations_number):
                sample = _select_sample_at_index(output, i_sample)
                dataset_ind = _get_dataset_id_from_sample(
                    sample, datasets_with_defaults)
                number_samples_per_dataset[dataset_ind] += 1
            sampling_weights_output = [v / iterations_number
                                       for v in number_samples_per_dataset]
            if sampling_weights is not None:
                sampling_weights_norm_must = [v / sum(sampling_weights)
                                              for v in sampling_weights]
            else:
                sampling_weights_norm_must = [0.5, 0.5]
            atol = 10 / iterations_number
            self.assertAllClose(sampling_weights_norm_must,
                                sampling_weights_output,
                                atol=atol)
    def test_predict_using_predictor(self, with_model_parameters):
        class PredictorMock(object):
            def __init__(self_, fetch_tensors, feed_tensors):
                self_.fetch_tensors = fetch_tensors
                self_.feed_tensors = feed_tensors

            def __call__(self_, inputs: dict):
                return {k + '_out': v for k, v in inputs.items()}

        data = {
            'node1': {
                'out1': 10,
                'out2': 20
            },
            'node2': {
                'out3': 30,
                'out4': 40
            },
            'node3': {
                'out5': 50
            }
        }
        data_for_predictor = {
            k: v
            for k, v in data.items() if k in ['node1', 'node2']
        }
        if with_model_parameters:
            model_parameters = {
                "nucleotide1": {
                    "parameter1": 10
                },
                "nucleotide3": {
                    "parameter2": 20,
                    "parameter3": [30, 40]
                }
            }
        else:
            model_parameters = None

        data_for_predictor_flatten = nest_utils.flatten_nested_struct(
            data_for_predictor, flatten_lists=False)
        feed_tensors = nest_utils.flatten_nested_struct(data_for_predictor,
                                                        flatten_lists=False)
        predictor_out_flatten_must = {
            k + '_out': v
            for k, v in data_for_predictor_flatten.items()
        }

        predictor_out_must = nest_utils.unflatten_dict_to_nested(
            predictor_out_flatten_must)
        if with_model_parameters:
            predictor_out_must.update({
                "nucleotide1": {
                    "parameter1_out": 10
                },
                "nucleotide3": {
                    "parameter2_out": 20,
                    "parameter3_out": [30, 40]
                }
            })

        predictor = PredictorMock(feed_tensors=feed_tensors,
                                  fetch_tensors=None)

        if with_model_parameters:
            result = predict_using_predictor(predictor,
                                             inputs=data,
                                             model_parameters=model_parameters)
        else:
            result = predict_using_predictor(predictor, inputs=data)
        self.assertDictEqual(predictor_out_must, result)
Exemple #26
0
def predictor_from_load_config(
        load_config: InferenceLoadConfig,
        tensorrt_config: Optional[TensorrtConfig] = None,
        session_config: Optional[tf.ConfigProto] = None,
        postprocessors_to_use: Optional[List[str]] = None,
        model_parameters: Optional[dict] = None) -> Predictor:
    """
    Create instance of :obj:`tf.contrib.predictor.Predictor` from load_config.

    If saved_model was provided and tensorrt_config.use_tensorrt, it will try
    to create Predictor using tensorrt. If constructor of tensorrt fails,
    saved model predictor will be used.

    Parameters
    ----------
    load_config
        load config
    tensorrt_config
        tensorrt config
    session_config
        session configuration
    postprocessors_to_use
        which postprocessors to use
    model_parameters
        model parameters to feed in nested view; only names will be used to get
        the placeholders and add to predictor feed_tensors

    Returns
    -------
    predictor
        predictor
    """
    logger = logging.getLogger(__name__)
    if load_config.saved_model:
        saved_model_path = load_config.saved_model
        if tensorrt_config and tensorrt_config.use_tensorrt:
            _predictor = _TensorrtPredictor(saved_model_dir=saved_model_path,
                                            tensorrt_config=tensorrt_config,
                                            config=session_config)
            logger.info("Using tensorrt predictor")
        else:
            _predictor = tf.contrib.predictor.from_saved_model(
                saved_model_path, config=session_config)
    else:
        if tensorrt_config and tensorrt_config.use_tensorrt:
            logger.warning(
                "currently tensorrt can be used only with saved_model")
        # create predictor from meta graph
        meta_graph_path = load_config.meta_graph
        checkpoint_path = load_config.checkpoint
        _predictor = _GraphAndCheckpointPredictor(meta_graph_path,
                                                  checkpoint_path,
                                                  config=session_config)
    if postprocessors_to_use:
        # pylint: disable=protected-access
        # there is no property setter and so only way to assign it
        fetch_tensors_unflatten = nest_utils.unflatten_dict_to_nested(
            _predictor._fetch_tensors)
        fetch_tensors_unflatten_filtered = {
            k: v
            for k, v in fetch_tensors_unflatten.items()
            if k in postprocessors_to_use
        }
        fetch_tensors_flatten_filtered = nest_utils.flatten_nested_struct(
            fetch_tensors_unflatten_filtered)
        _predictor._fetch_tensors = fetch_tensors_flatten_filtered

    if model_parameters:
        _predictor = _add_model_parameters_to_predictor(
            _predictor, model_parameters)
    return _predictor
Exemple #27
0
 def __getitem__(self, item: Union[int, slice]) -> dict:
     buffer_slice_flat = {k: v[item] for k, v in self._buffer_flat.items()}
     return nest_utils.unflatten_dict_to_nested(buffer_slice_flat)
Exemple #28
0
 def _postprocess_tfrecords(**data):
     data_flat = nest_utils.flatten_nested_struct(data)
     return nest_utils.unflatten_dict_to_nested(
         {k: v + "_pp"
          for k, v in data_flat.items()})
Exemple #29
0
    def test_parse_tfrecord_example(self, tf_decode_raw,
                                    tf_parse_single_example):
        def _get_tfrecords_features():
            return {
                "data1": "feature_1",
                "data2": "feature_data2",
                "data3": ["feature_data3_0", "feature_data3_1"],
                "data4": {
                    "sub1": "feature_data4_sub1",
                    "sub2": "feature_data4_sub2"
                }
            }

        def _get_tfrecords_output_types():
            return {"data1": "string_value", "data4/sub1": "float_value"}

        def _parse_single_example(example, features):
            example_flat = nest_utils.flatten_nested_struct(example, "/")
            result = {
                k: "-".join([str(example_flat[k]), features[k]])
                for k in example_flat
            }
            return result

        def _postprocess_tfrecords(**data):
            data_flat = nest_utils.flatten_nested_struct(data)
            return nest_utils.unflatten_dict_to_nested(
                {k: v + "_pp"
                 for k, v in data_flat.items()})

        tf_decode_raw.side_effect = lambda x, y: x + "_raw"
        tf_parse_single_example.side_effect = _parse_single_example

        mixin = tf_data_utils.TfRecordsMixin()
        mixin.get_tfrecords_features = MagicMock(wraps=_get_tfrecords_features)
        mixin.get_tfrecords_output_types = MagicMock(
            wraps=_get_tfrecords_output_types)
        mixin.postprocess_tfrecords = MagicMock(wraps=_postprocess_tfrecords)
        mixin.decode_field = MagicMock(wraps=mixin.decode_field)
        result = mixin.parse_tfrecord_example(self.data)

        features = _get_tfrecords_features()
        features_flat = nest_utils.flatten_nested_struct(features, "/")
        data_flat = nest_utils.flatten_nested_struct(self.data, "/")
        output_types_flat = nest_utils.flatten_nested_struct(
            _get_tfrecords_output_types(), "/")
        result_must = nest_utils.unflatten_dict_to_nested(
            {
                k: "-".join([str(data_flat[k]), features_flat[k]]) +
                ("_raw_pp" if k in output_types_flat else "_pp")
                for k in features_flat
            }, "/")
        self.assertAllEqual(result_must, result)

        mixin.get_tfrecords_features.assert_called_once_with()
        mixin.get_tfrecords_output_types.assert_called_once_with()

        combine_fn_before_decode = lambda x: "-".join([str(x[0]), x[1]])
        decode_values = nest_utils.flatten_nested_struct(
            nest_utils.combine_nested([self.data, features],
                                      combine_fun=combine_fn_before_decode),
            "/")
        decode_field_calls = [
            mock_call(each_key, decode_values[each_key],
                      output_types_flat.get(each_key))
            for each_key in decode_values
        ]
        mixin.decode_field.assert_has_calls(decode_field_calls, any_order=True)

        data_to_postprocess_must = nest_utils.unflatten_dict_to_nested(
            {
                k: "-".join([str(data_flat[k]), features_flat[k]]) +
                ("_raw" if k in output_types_flat else "")
                for k in features_flat
            }, "/")
        mixin.postprocess_tfrecords.assert_called_once_with(
            **data_to_postprocess_must)