def test_combine_nested_deep(self): np.random.seed(57454) list_of_nested = [] for i in range(3): d = dict() d['a'] = i + 1 d['b'] = [i + j for j in range(5)] d['c'] = { 'c1': [j + 10 for j in range(5)], 'c2': { 'c21': 10, 'c22': 20 }, 'c3': 1 } list_of_nested.append(d) list_of_nested_ = copy.deepcopy(list_of_nested) combine_fun = lambda x: np.sum(x) res = nest_utils.combine_nested(list_of_nested_, combine_fun) must = { 'a': 6, 'b': [3, 6, 9, 12, 15], 'c': { 'c1': [30, 33, 36, 39, 42], 'c2': { 'c21': 30, 'c22': 60 }, 'c3': 3 } } self.assertDictEqual(res, must)
def combine_summary_from_devices( summary_devices: List[Dict[str, tf.Tensor]]) -> Dict[str, tf.Tensor]: """ Combine (average or selects first element) the summary from multiple devices Parameters ---------- summary_devices list of dicts with same structure from multiple devices Returns ------- dict with same structure as first element in losses_devices with combination method depending on type of summary - for scalars and histograms values are averaged across devices and for other types the summary from first device is taken """ if all(s is None for s in summary_devices): return {} if len(summary_devices) == 1: return _dict_identity(summary_devices[0]) combine_method = { 'scalar': lambda x: tf.reduce_mean(x, axis=0), 'histogram': lambda x: tf.reduce_mean(x, axis=0), 'image': lambda x: tf.identity(x[0]), 'text': lambda x: tf.identity(x[0]), 'audio': lambda x: tf.identity(x[0]), 'default': lambda x: tf.identity(x[0]) } combine_method_summary = defaultdict(lambda: lambda x: tf.identity(x[0]), combine_method) summary = nest_utils.combine_nested(summary_devices, combine_fun=combine_method_summary) return summary
def combine_predictions_from_devices( predictions_devices: List[Dict[str, tf.Tensor]], predictions_have_variable_shape: bool = False) -> Dict[str, tf.Tensor]: """ Combines (concatenates) the predictions from multiple devices Parameters ---------- predictions_devices list of dicts with same structure from multiple devices predictions_have_variable_shape if predictions from different devices may have different shapes; if so, it will use sparse operations to combine them Returns ------- dict with same structure as first element in predictions_devices with concatenated over first dimension (batch dimension) values. If inputs have variable shape, then concatenation is done using :obj:`tf.sparse_concat` instead of :obj:`tf.concat` """ if len(predictions_devices) == 1: return _dict_identity(predictions_devices[0]) if predictions_have_variable_shape: combine_fun = lambda x: tf_ops.concat_padded(x, axis=0) else: combine_fun = lambda x: tf_ops.concat_or_stack(x, axis=0) with tf.variable_scope('combine_predictions'): predictions = nest_utils.combine_nested(predictions_devices, combine_fun=combine_fun) return predictions
def test_combine_nested(self): np.random.seed(57454) list_of_nested = [] for i in range(10): d = dict() d['a'] = np.random.randn(10, 5, 2) d['b'] = np.random.randn(100, 2) d['c'] = np.random.randn(10) d['e'] = np.random.randn(5, 6, 4) list_of_nested.append(d) combine_funs = [ lambda x: np.concatenate(x, 0), lambda x: x[0], lambda x: np.mean(x, 0), { 'a': lambda x: np.diff(x, 0), 'b': lambda x: np.concatenate(np.argmax(x, 0), 0), 'default': lambda x: x[0] } ] for combine_fun in combine_funs: list_of_nested_ = copy.deepcopy(list_of_nested) res = nest_utils.combine_nested(list_of_nested_, combine_fun) must = {} for k in list_of_nested_[0].keys(): if isinstance(combine_fun, dict): default = combine_fun['default'] combine_fun_ = combine_fun.get(k, default) else: combine_fun_ = combine_fun must[k] = combine_fun_([l[k] for l in list_of_nested_]) self.assertSetEqual(set(res.keys()), set(list_of_nested_[0].keys())) self.assertAllClose(must, res)
def test_kpi_call(self, is_last_iteration): temp_dir = self.get_temp_dir() os.mkdir(os.path.join(temp_dir, "save")) os.mkdir(os.path.join(temp_dir, "cache")) cacher1 = KPIMD5Cacher().build() kpi_plugin = DummyTpFpTnFnKPIPlugin(name="kpi_plugin1", inbound_nodes=["dataset"], cachers=[cacher1]).build() saver2 = KPIJsonSaver().build() cacher2 = KPIMD5Cacher().build() kpi_accumulator1 = DummyF1KPIAccumulator( name="kpi_acc1", inbound_nodes=["kpi_plugin1", "dataset"], cachers=[cacher2], savers=[saver2]).build() saver3 = KPIJsonSaver().build() cacher3 = KPIMD5Cacher().build() kpi_accumulator2 = _MeanKPIAccumulator( name="kpi_acc2", inbound_nodes=["kpi_acc1", "dataset"], incoming_keys_mapping={ "dataset": { "evaluate": "_" } }, cachers=[cacher3], savers=[saver3]).build() kpi_evaluator = KPIEvaluator( plugins=kpi_plugin, accumulators=[kpi_accumulator1, kpi_accumulator2]).build() kpi_evaluator.save_target = os.path.join(temp_dir, "save") kpi_evaluator.cache_target = os.path.join(temp_dir, "cache") dataset_nucleotide = Nucleotide(name="dataset").build() dataset_nucleotide.generated_keys = [ "labels", "predictions", "evaluate", "prefix" ] incoming_nucleotides = {'dataset': dataset_nucleotide} kpi_evaluator.build_dna(incoming_nucleotides) data_batch = nest_utils.combine_nested(self.data, np.array) kpi_evaluator.is_last_iteration = is_last_iteration _ = kpi_evaluator(dataset=data_batch) if is_last_iteration: last_kpi_must = { "kpi_acc1": self.kpis1_must[-1], "kpi_acc2": self.kpis2_must[-1] } else: last_kpi_must = {"kpi_acc1": self.kpis1_must[3]} self.assertAllClose(last_kpi_must, kpi_evaluator.last_kpi)
def evaluate_on_batch(self, method_to_evaluate: Callable, sample_mask=None, **inputs): """ Call KPIEvaluator on the batch of data Parameters ---------- method_to_evaluate method to call on the sample inputs sample_mask optional batch indicator which samples should be evaluated on inputs batch inputs to kpi evaluator Returns ------- kpi calculated kpi """ (batch_inputs_as_list, not_batch_inputs) = utils.split_batch_inputs( inputs, not_batch_keys=self.not_batch_keys) batch_size = len(batch_inputs_as_list) is_last_sample_batchwise = utils.get_is_last_sample_batchwise( batch_size, self.is_last_iteration, sample_mask) list_of_kpis = [] for i_sample, each_sample_inputs in enumerate(batch_inputs_as_list): if sample_mask is None or sample_mask[i_sample]: self.is_last_sample = is_last_sample_batchwise[i_sample] kpi_sample = method_to_evaluate(**each_sample_inputs, **not_batch_inputs) if kpi_sample: list_of_kpis.append(kpi_sample) if list_of_kpis: kpi = nest_utils.combine_nested(list_of_kpis, combine_fun=np.array) else: kpi = None return kpi
def combine_losses_from_devices( losses_devices: List[Dict[str, tf.Tensor]]) -> Dict[str, tf.Tensor]: """ Combine (average) the losses from multiple devices Parameters ---------- losses_devices list of dicts with same structure from multiple devices Returns ------- dict with same structure as first element in losses_devices with average value of losses for that key """ if len(losses_devices) == 1: return _dict_identity(losses_devices[0]) with tf.variable_scope('combine_losses'): losses = nest_utils.combine_nested( losses_devices, combine_fun=lambda x: tf.reduce_mean(x, axis=0)) return losses
def test_parse_tfrecord_example(self, tf_decode_raw, tf_parse_single_example): def _get_tfrecords_features(): return { "data1": "feature_1", "data2": "feature_data2", "data3": ["feature_data3_0", "feature_data3_1"], "data4": { "sub1": "feature_data4_sub1", "sub2": "feature_data4_sub2" } } def _get_tfrecords_output_types(): return {"data1": "string_value", "data4/sub1": "float_value"} def _parse_single_example(example, features): example_flat = nest_utils.flatten_nested_struct(example, "/") result = { k: "-".join([str(example_flat[k]), features[k]]) for k in example_flat } return result def _postprocess_tfrecords(**data): data_flat = nest_utils.flatten_nested_struct(data) return nest_utils.unflatten_dict_to_nested( {k: v + "_pp" for k, v in data_flat.items()}) tf_decode_raw.side_effect = lambda x, y: x + "_raw" tf_parse_single_example.side_effect = _parse_single_example mixin = tf_data_utils.TfRecordsMixin() mixin.get_tfrecords_features = MagicMock(wraps=_get_tfrecords_features) mixin.get_tfrecords_output_types = MagicMock( wraps=_get_tfrecords_output_types) mixin.postprocess_tfrecords = MagicMock(wraps=_postprocess_tfrecords) mixin.decode_field = MagicMock(wraps=mixin.decode_field) result = mixin.parse_tfrecord_example(self.data) features = _get_tfrecords_features() features_flat = nest_utils.flatten_nested_struct(features, "/") data_flat = nest_utils.flatten_nested_struct(self.data, "/") output_types_flat = nest_utils.flatten_nested_struct( _get_tfrecords_output_types(), "/") result_must = nest_utils.unflatten_dict_to_nested( { k: "-".join([str(data_flat[k]), features_flat[k]]) + ("_raw_pp" if k in output_types_flat else "_pp") for k in features_flat }, "/") self.assertAllEqual(result_must, result) mixin.get_tfrecords_features.assert_called_once_with() mixin.get_tfrecords_output_types.assert_called_once_with() combine_fn_before_decode = lambda x: "-".join([str(x[0]), x[1]]) decode_values = nest_utils.flatten_nested_struct( nest_utils.combine_nested([self.data, features], combine_fun=combine_fn_before_decode), "/") decode_field_calls = [ mock_call(each_key, decode_values[each_key], output_types_flat.get(each_key)) for each_key in decode_values ] mixin.decode_field.assert_has_calls(decode_field_calls, any_order=True) data_to_postprocess_must = nest_utils.unflatten_dict_to_nested( { k: "-".join([str(data_flat[k]), features_flat[k]]) + ("_raw" if k in output_types_flat else "") for k in features_flat }, "/") mixin.postprocess_tfrecords.assert_called_once_with( **data_to_postprocess_must)
def test_call(self, plugin_is_last_sample, sample_mask, is_last_iteration, is_last_sample_must): temp_dir = self.get_temp_dir() os.mkdir(os.path.join(temp_dir, "save")) os.mkdir(os.path.join(temp_dir, "cache")) plugin_is_last_sample.side_effect = lambda x: x saver = KPIJsonSaver().build() cacher = KPIMD5Cacher().build() kpi_plugin = DummyTpFpTnFnKPIPlugin(cachers=[cacher], savers=[saver]).build() kpi_plugin.save_target = os.path.join(temp_dir, "save") kpi_plugin.cache_target = os.path.join(temp_dir, "cache") kpi_plugin.evaluate_on_sample = MagicMock( wraps=kpi_plugin.evaluate_on_sample) data_batch = nest_utils.combine_nested(self.data, combine_fun=np.stack) kpi_plugin.is_last_iteration = is_last_iteration kpi_must_list = [] for i_sample, each_kpi_must in enumerate(self.kpi_must): if sample_mask is None or sample_mask[i_sample]: kpi_must_list.append(each_kpi_must) if kpi_must_list: kpi_must = nest_utils.combine_nested(kpi_must_list, combine_fun=np.array) else: kpi_must = None is_last_sample_calls_must = [mock_call(i) for i in is_last_sample_must] if sample_mask is None: evaluate_on_sample_args_must = [ mock_call(**each_sample_data) for each_sample_data in self.data ] else: evaluate_on_sample_args_must = [ mock_call(**each_sample_data) for i, each_sample_data in enumerate(self.data) if sample_mask[i] ] kpi = kpi_plugin(sample_mask=sample_mask, **data_batch) plugin_is_last_sample.assert_has_calls(is_last_sample_calls_must) kpi_plugin.evaluate_on_sample.assert_has_calls( evaluate_on_sample_args_must) if kpi_must is None: self.assertIsNone(kpi) return if sample_mask is None: self.assertAllClose(kpi_must, kpi) else: for i in range(sum(sample_mask)): sample_kpi_must = {k: v[i] for k, v in kpi_must.items()} sample_kpi = {k: v[i] for k, v in kpi.items()} if sample_mask[i]: self.assertAllClose(sample_kpi_must, sample_kpi) else: self.assertAllEqual(sample_kpi_must, sample_kpi)