def from_train_op(cls, train_op, loss, *, inputs=None, labels=None, metrics=None, updates=None, sess=None, dataset=None, tensor_with_value=None, session_config=None, model_dir=None): sess = TFOptimizer._get_or_create_session(sess) grads, variables = TFOptimizer._get_vars_grads_from_train_op(train_op) if dataset is None: dataset = TFOptimizer._get_dataset_from_loss(loss) _ = dataset.tensors # trigger create tensors if not available dataset_inputs = dataset._original_tensors if isinstance(dataset_inputs, tuple) and len(dataset_inputs) == 2: if inputs is None: inputs = dataset_inputs[0] if labels is None: labels = dataset_inputs[1] else: if inputs is None: inputs = dataset_inputs if labels is None: labels = [] inputs = nest.flatten(inputs) labels = nest.flatten(labels) from bigdl.orca.tfpark.zoo_optimizer import FakeOptimMethod return TFOptimizer._from_grads(loss=loss, sess=sess, inputs=inputs, labels=labels, grads=grads, variables=variables, dataset=dataset, metrics=metrics, tensor_with_value=tensor_with_value, optim_method=FakeOptimMethod(), session_config=session_config, updates=updates, model_dir=model_dir, train_op=train_op)
def to_dataset(iter): data_list = list(iter) import tensorflow as tf if not data_list: return [] datasets = [create_dataset_fn(data) for data in data_list] from functools import reduce dataset = reduce(lambda x, y: x.concatenate(y), datasets) dataset = dataset.batch(batch_per_shard, drop_remainder) iterator = dataset.make_initializable_iterator() train_next_ops = nest.flatten(iterator.get_next()) output_types = [t for t in nest.flatten(dataset.output_types)] output_types_enum = [t.as_datatype_enum for t in output_types] init_op_name = iterator.initializer.name table_init_op = tf.tables_initializer().name output_names = [op.name for op in train_next_ops] graph = train_next_ops[0].graph flatten_shapes = nest.flatten(dataset.output_shapes) flatten_shapes = [shape[1:] for shape in flatten_shapes] flatten_tensor_structure = [ TensorMeta(dtype=output_types[i], shape=list(flatten_shapes[i]), name="zoo_input_{}".format(i)) for i in range(len(flatten_shapes)) ] structure = dataset.output_types if isinstance(structure, tf.DType): structure = (structure, ) tensor_structure = nest.pack_sequence_as(structure, flatten_tensor_structure) meta_info = { "init_op_name": init_op_name, "table_init_op": table_init_op, "output_names": output_names, "output_types": output_types_enum, "tensor_structure": tensor_structure } return [(bytearray(graph.as_graph_def().SerializeToString()), meta_info)]
def partition(data, num_shards=None): """ Partition local in memory data and form a SparkXShards :param data: np.ndarray, a tuple, list, dict of np.ndarray, or a nested structure made of tuple, list, dict with ndarray as the leaf value :param num_shards: the number of shards that the data will be partitioned into :return: a SparkXShards """ sc = init_nncontext() node_num, core_num = get_node_and_core_number() shard_num = node_num * core_num if num_shards is None else num_shards import numpy as np type_err_msg = """ The types supported in bigdl.orca.data.XShards.partition are 1. np.ndarray 2. a tuple, list, dict of np.ndarray 3. nested structure made of tuple, list, dict with ndarray as the leaf value But got data of type {} """.format(type(data)) supported_types = {list, tuple, dict} if isinstance(data, np.ndarray): if data.shape[0] < shard_num: raise ValueError( "The length of data {} is smaller than the total number " "of shards {}. Please adjust the num_shards option to be " "at most {}.".format(data.shape[0], shard_num, data.shape[0])) arrays = np.array_split(data, shard_num) rdd = sc.parallelize(arrays) else: assert type(data) in supported_types, type_err_msg flattened = nest.flatten(data) data_length = len(flattened[0]) data_to_be_shard = [] if data_length < shard_num: raise ValueError( "The length of data {} is smaller than the total number " "of shards {}. Please adjust the num_shards option to be " "at most {}.".format(data_length, shard_num, data_length)) for i in range(shard_num): data_to_be_shard.append([]) for x in flattened: assert len(x) == data_length, \ "the ndarrays in data must all have the same size in first dimension, " \ "got first ndarray of size {} and another {}".format(data_length, len(x)) x_parts = np.array_split(x, shard_num) for idx, x_part in enumerate(x_parts): data_to_be_shard[idx].append(x_part) data_to_be_shard = [ nest.pack_sequence_as(data, shard) for shard in data_to_be_shard ] rdd = sc.parallelize(data_to_be_shard) data_shards = SparkXShards(rdd) return data_shards
def _expand_inputs(inputs, tensors_with_value, loss): additional_inputs = [] additional_values = [] inputs = nest.flatten(inputs) names = set([i.name for i in inputs]) if tensors_with_value: for t, v in tensors_with_value.items(): if t.name in names: msg = f"tensor {t} already in inputs, cannot put it in tensor_with_value" raise ValueError(msg) additional_inputs.append(t) additional_values.append(v) return inputs, additional_inputs, additional_values
def evaluate( self, data, batch_size=32, feature_cols=None, label_cols=None, auto_shard_files=False, ): """ Evaluate model. :param data: evaluation data. It can be XShards, Spark DataFrame, tf.data.Dataset. If data is XShards, each partition can be a Pandas DataFrame or a dictionary of {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of numpy arrays. If data is tf.data.Dataset, each element is a tuple of input tensors. :param batch_size: batch size per thread. :param feature_cols: feature_cols: feature column names if train data is Spark DataFrame or XShards of Pandas DataFrame. :param label_cols: label column names if train data is Spark DataFrame or XShards of Pandas DataFrame. :param auto_shard_files: whether to automatically detect if the dataset is file-based and and apply sharding on files, otherwise sharding on records. Default is False. :return: evaluation result as a dictionary of {'metric name': metric value} """ assert self.metrics is not None, \ "metrics is None, it should not be None in evaluate" if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in evaluation" assert label_cols is not None, \ "label columns is None; it should not be None in evaluation" if isinstance(data, SparkXShards): if data._get_class_name() == 'pandas.core.frame.DataFrame': assert feature_cols is not None, \ "feature columns is None; it should not be None in evaluation" assert label_cols is not None, \ "label columns is None; it should not be None in evaluation" data = process_xshards_of_pandas_dataframe( data, feature_cols, label_cols) dataset = to_dataset( data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, label_cols=label_cols, hard_code_batch_size=False, sequential_order=True, shuffle=False, auto_shard_files=auto_shard_files, ) flat_inputs = nest.flatten(self.inputs) flat_labels = nest.flatten(self.labels) return evaluate_metrics(flat_inputs + flat_labels, sess=self.sess, dataset=dataset, metrics=self.metrics)
def predict( self, data, batch_size=4, feature_cols=None, auto_shard_files=False, ): """ Predict input data :param data: data to be predicted. It can be XShards, Spark DataFrame. If data is XShards, each partition can be a Pandas DataFrame or a dictionary of {'x': feature}, where feature is a numpy array or a tuple of numpy arrays. :param batch_size: batch size per thread :param feature_cols: list of feature column names if input data is Spark DataFrame or XShards of Pandas DataFrame. :param auto_shard_files: whether to automatically detect if the dataset is file-based and and apply sharding on files, otherwise sharding on records. Default is False. :return: predicted result. If input data is XShards or tf.data.Dataset, the predict result is a XShards, each partition of the XShards is a dictionary of {'prediction': result}, where the result is a numpy array or a list of numpy arrays. If input data is Spark DataFrame, the predict result is a DataFrame which includes original columns plus 'prediction' column. The 'prediction' column can be FloatType, VectorUDT or Array of VectorUDT depending on model outputs shape. """ assert self.outputs is not None, \ "output is None, it should not be None in prediction" if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" if isinstance(data, SparkXShards): if data._get_class_name() == 'pandas.core.frame.DataFrame': assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" data = process_xshards_of_pandas_dataframe(data, feature_cols) assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \ "estimator prediction" dataset = to_dataset( data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, label_cols=None, hard_code_batch_size=False, sequential_order=True, shuffle=False, auto_shard_files=auto_shard_files, ) flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) predicted_rdd = tfnet.predict(dataset) if isinstance(data, DataFrame): return convert_predict_rdd_to_dataframe(data, predicted_rdd) elif isinstance(data, SparkXShards): return convert_predict_rdd_to_xshard(data, predicted_rdd) else: return predicted_rdd
def predict(self, data, feature_cols=None, batch_size=4): """ Predict input data :param batch_size: Int. Set batch Size, default is 4. :param data: data to be predicted. XShards, Spark DataFrame, numpy array and list of numpy arrays are supported. If data is XShards, each partition is a dictionary of {'x': feature}, where feature(label) is a numpy array or a list of numpy arrays. :param feature_cols: Feature column name(s) of data. Only used when data is a Spark DataFrame. Default: None. :return: predicted result. If the input data is XShards, the predict result is a XShards, each partition of the XShards is a dictionary of {'prediction': result}, where the result is a numpy array or a list of numpy arrays. If the input data is numpy arrays or list of numpy arrays, the predict result is a numpy array or a list of numpy arrays. """ sc = init_nncontext() model_bytes_broadcast = sc.broadcast(self.model_bytes) weight_bytes_broadcast = sc.broadcast(self.weight_bytes) def partition_inference(partition): model_bytes = model_bytes_broadcast.value weight_bytes = weight_bytes_broadcast.value partition = list(partition) data_num = len(partition) ie = IECore() config = {'CPU_THREADS_NUM': str(self.core_num)} ie.set_config(config, 'CPU') net = ie.read_network(model=model_bytes, weights=weight_bytes, init_from_buffer=True) net.batch_size = batch_size local_model = ie.load_network(network=net, device_name="CPU", num_requests=data_num) inputs = list(iter(local_model.requests[0].input_blobs)) outputs = list(iter(local_model.requests[0].output_blobs)) assert len( outputs) != 0, "The number of model outputs should not be 0." def add_elem(d): d_len = len(d) if d_len < batch_size: rep_time = [1] * (d_len - 1) rep_time.append(batch_size - d_len + 1) return np.repeat(d, rep_time, axis=0), d_len else: return d, d_len results = [] for idx, batch_data in enumerate(partition): infer_request = local_model.requests[idx] input_dict = dict() elem_num = 0 if isinstance(batch_data, list): for i, input in enumerate(inputs): input_dict[input], elem_num = add_elem(batch_data[i]) else: input_dict[inputs[0]], elem_num = add_elem(batch_data) infer_request.infer(input_dict) if len(outputs) == 1: results.append(infer_request.output_blobs[ outputs[0]].buffer[:elem_num]) else: results.append( list( map( lambda output: infer_request.output_blobs[ output].buffer[:elem_num], outputs))) return results def predict_transform(dict_data, batch_size): assert isinstance(dict_data, dict), "each shard should be an dict" assert "x" in dict_data, "key x should in each shard" feature_data = dict_data["x"] if isinstance(feature_data, np.ndarray): assert feature_data.shape[0] <= batch_size, \ "The batch size of input data (the second dim) should be less than the model " \ "batch size, otherwise some inputs will be ignored." elif isinstance(feature_data, list): for elem in feature_data: assert isinstance(elem, np.ndarray), "Each element in the x list should be " \ "a ndarray, but get " + \ elem.__class__.__name__ assert elem.shape[0] <= batch_size, "The batch size of each input data (the " \ "second dim) should be less than the " \ "model batch size, otherwise some inputs " \ "will be ignored." else: raise ValueError( "x in each shard should be a ndarray or a list of ndarray." ) return feature_data if isinstance(data, DataFrame): from bigdl.orca.learn.utils import dataframe_to_xshards, convert_predict_rdd_to_dataframe xshards, _ = dataframe_to_xshards(data, validation_data=None, feature_cols=feature_cols, label_cols=None, mode="predict") transformed_data = xshards.transform_shard(predict_transform, batch_size) result_rdd = transformed_data.rdd.mapPartitions( lambda iter: partition_inference(iter)) return convert_predict_rdd_to_dataframe( data, result_rdd.flatMap(lambda data: data)) elif isinstance(data, SparkXShards): transformed_data = data.transform_shard(predict_transform, batch_size) result_rdd = transformed_data.rdd.mapPartitions( lambda iter: partition_inference(iter)) def update_result_shard(data): shard, y = data shard["prediction"] = y return shard return SparkXShards( data.rdd.zip(result_rdd).map(update_result_shard)) elif isinstance(data, (np.ndarray, list)): if isinstance(data, np.ndarray): split_num = math.ceil(len(data) / batch_size) arrays = np.array_split(data, split_num) num_slices = min(split_num, self.node_num) data_rdd = sc.parallelize(arrays, numSlices=num_slices) elif isinstance(data, list): flattened = nest.flatten(data) data_length = len(flattened[0]) data_to_be_rdd = [] split_num = math.ceil(flattened[0].shape[0] / batch_size) num_slices = min(split_num, self.node_num) for i in range(split_num): data_to_be_rdd.append([]) for x in flattened: assert isinstance(x, np.ndarray), "the data in the data list should be " \ "ndarrays, but get " + \ x.__class__.__name__ assert len(x) == data_length, \ "the ndarrays in data must all have the same size in first dimension" \ ", got first ndarray of size {} and another {}".format(data_length, len(x)) x_parts = np.array_split(x, split_num) for idx, x_part in enumerate(x_parts): data_to_be_rdd[idx].append(x_part) data_to_be_rdd = [ nest.pack_sequence_as(data, shard) for shard in data_to_be_rdd ] data_rdd = sc.parallelize(data_to_be_rdd, numSlices=num_slices) print("Partition number: ", data_rdd.getNumPartitions()) result_rdd = data_rdd.mapPartitions( lambda iter: partition_inference(iter)) result_arr_list = result_rdd.collect() result_arr = None if isinstance(result_arr_list[0], list): result_arr = [ np.concatenate([r[i] for r in result_arr_list], axis=0) for i in range(len(result_arr_list[0])) ] elif isinstance(result_arr_list[0], np.ndarray): result_arr = np.concatenate(result_arr_list, axis=0) return result_arr else: raise ValueError( "Only XShards, Spark DataFrame, a numpy array and a list of numpy arr" "ays are supported as input data, but get " + data.__class__.__name__)
def from_keras(cls, keras_model, dataset, session_config=None, model_dir=None, metrics=None, optimizer=None): """ Create a TFOptimizer from a tensorflow.keras model. The model must be compiled. :param keras_model: the tensorflow.keras model, which must be compiled. :param dataset: a TFDataset :return: """ import tensorflow.keras.backend as K model_inputs = keras_model.inputs if hasattr(keras_model, "targets"): model_targets = keras_model.targets else: model_targets = keras_model._targets # target can be None if loss is None model_targets = list(filter(lambda x: x is not None, model_targets)) check_data_compatible(dataset, keras_model, mode="train") # standarize feature, labels to support keras model if isinstance(dataset, TFNdarrayDataset): dataset = _standarize_feature_label_dataset(dataset, keras_model) flatten_inputs = nest.flatten(dataset.feature_tensors) assert len(model_inputs) == len(flatten_inputs), \ ("the keras model and TFDataset should have the same number of tensors" + " keras model has {} inputs " + "while TFDataset has {} inputs").format(len(model_inputs), len(flatten_inputs)) for i in range(len(flatten_inputs)): if not TFOptimizer._shape_match(model_inputs[i].shape, flatten_inputs[i].shape): raise ValueError(("The {}th input in keras model {}" " does not match the TFDataset" "input {}").format(i, model_inputs[i], flatten_inputs[i])) flatten_targets = nest.flatten(dataset.label_tensors) assert len(model_targets) == len(flatten_targets), \ ("the keras model and TFDataset should have the same number of tensors" + " keras model has {} targets " + "while TFDataset has {} labels").format(len(model_targets), len(flatten_inputs)) # todo check targets shape, currently checking target shape will # cause too much false alarm. loss = keras_model.total_loss variables = keras_model._collected_trainable_weights variables.sort(key=lambda variable: variable.name) keras_optimizer = keras_model.optimizer from bigdl.orca.tfpark.zoo_optimizer import get_gradients_for_keras grads = get_gradients_for_keras(keras_optimizer, loss, variables) grads_and_vars = list(zip(grads, variables)) import tensorflow.python.keras.optimizers as koptimizers if isinstance(keras_optimizer, koptimizers.TFOptimizer): # work around keras TFOptimzier bug train_op = keras_optimizer.optimizer.apply_gradients( grads_and_vars) else: train_op = keras_optimizer.apply_gradients(grads_and_vars) sess = K.get_session() if keras_model.metrics and (dataset.get_validation_data() is not None): if isinstance(keras_model.metrics, dict): raise ValueError( "different metrics for different outputs are not supported right now" ) if len(keras_model.outputs) > 1: if not all([ name.endswith("loss") for name in keras_model.metrics_names ]): raise ValueError( "metrics (except loss) for multi-head model is not supported" ) else: bigdl_val_methods = [Loss()] val_outputs = keras_model.outputs val_labels = model_targets else: bigdl_val_methods = \ [to_bigdl_metric(m, keras_model.loss) for m in keras_model.metrics_names] val_outputs = keras_model.outputs val_labels = model_targets else: val_outputs = None val_labels = None bigdl_val_methods = None tensor_with_value = {K.learning_phase(): [True, False]} updates = [] updates += keras_model.get_updates_for(None) # Conditional updates relevant to this model updates += keras_model.get_updates_for(keras_model.inputs) if bigdl_val_methods is not None: val_methods = to_list(bigdl_val_methods) bigdl_metrics = {} for i, method in enumerate(val_methods): bigdl_metrics['bigdl_metric_' + str(i)] = BigDLMetric( method, val_outputs, val_labels) if metrics is None: metrics = bigdl_metrics else: metrics.update(bigdl_metrics) if optimizer is not None: clip_norm = None clip_value = None if hasattr(keras_optimizer, 'clipnorm'): clip_norm = keras_optimizer.clipnorm if hasattr(keras_optimizer, 'clipvalue'): clip_value = (-keras_optimizer.clipvalue, keras_optimizer.clipvalue) tf_model = TFModel.create(loss, sess, model_inputs, model_targets, keras_model.outputs, grads, variables, loss.graph, tensor_with_value, session_config, metrics, updates, model_dir=None) return cls(tf_model, optimizer, sess=sess, dataset=dataset, clip_norm=clip_norm, clip_value=clip_value, model_dir=model_dir) return cls.from_train_op(train_op, loss, inputs=model_inputs, labels=model_targets, metrics=metrics, updates=updates, sess=sess, dataset=dataset, tensor_with_value=tensor_with_value, session_config=session_config, model_dir=model_dir)
def from_loss(cls, loss, optim_method, session=None, inputs=None, dataset=None, val_outputs=None, val_labels=None, val_method=None, clip_norm=None, clip_value=None, metrics=None, tensor_with_value=None, session_config=None, model_dir=None, updates=None): """ Create a TFOptimizer from a TensorFlow loss tensor. The loss tensor must come from a TensorFlow graph that only takes TFDataset.tensors and the tensors in `tensor_with_value` as inputs. :param loss: The loss tensor of the TensorFlow model, should be a scalar :param optim_method: the optimization method to be used, such as bigdl.dllib.optim.optimizer.Adam :param session: the current TensorFlow Session, if you want to used a pre-trained model, you should use the Session to load the pre-trained variables and pass it to TFOptimizer. :param val_outputs: the validation output TensorFlow tensor to be used by val_methods :param val_labels: the validation label TensorFlow tensor to be used by val_methods :param val_method: the BigDL val_method(s) to be used. :param clip_norm: float >= 0. Gradients will be clipped when their L2 norm exceeds this value. :param clip_value: float >= 0. Gradients will be clipped when their absolute value exceeds this value. :param metrics: a dictionary. The key should be a string representing the metric's name and the value should be the corresponding TensorFlow tensor, which should be a scalar. :param tensor_with_value: a dictionary. The key is TensorFlow tensor, usually a placeholder, the value of the dictionary is a tuple of two elements. The first one of the tuple is the value to feed to the tensor in training phase and the second one is the value to feed to the tensor in validation phase. :return: a TFOptimizer """ sess = TFOptimizer._get_or_create_session(session) grads, variables = TFOptimizer._get_vars_grads(loss) if dataset is None and inputs is None: dataset = TFOptimizer._get_dataset_from_loss(loss) inputs = dataset._original_tensors else: if inputs is None: raise ValueError("please specify inputs") _ = dataset.tensors # trigger creating placeholders if isinstance(inputs, tuple) and len(inputs) == 2: inputs, labels = inputs else: labels = [] inputs = nest.flatten(inputs) labels = nest.flatten(labels) if clip_value is not None: if isinstance(clip_value, float) or isinstance(clip_value, int): if clip_value <= 0: ValueError( "The clip_value argument should be positive number") clip_value = (-float(clip_value), float(clip_value)) if not isinstance(clip_value, tuple): raise ValueError( "The clip_value argument should be" + " a positive float/int which clips to" + " (-clip_value, clip_value); " + "or a tuple which clips to (min_value, max_value)") if val_method is not None: val_methods = to_list(val_method) if metrics is None: metrics = {} for i, method in enumerate(val_methods): metrics['bigdl_metric_' + str(i)] = BigDLMetric( method, val_outputs, val_labels) return TFOptimizer._from_grads(loss, sess, inputs, labels, grads, variables, dataset, optim_method, clip_norm, clip_value, metrics, tensor_with_value, session_config, model_dir, updates)
def train(self, input_fn, end_trigger): with tf.Graph().as_default() as g: dataset = input_fn() generator_inputs = dataset.tensors[0] real_data = dataset.tensors[1] counter = tf.train.get_or_create_global_step() period = self._discriminator_steps + self._generator_steps is_discriminator_phase = tf.less(tf.mod(counter, period), self._discriminator_steps) with tf.variable_scope("Generator"): gen_data = self._call_fn_maybe_with_counter( self._generator_fn, counter, generator_inputs) with tf.variable_scope("Discriminator"): fake_d_outputs = self._call_fn_maybe_with_counter( self._discriminator_fn, counter, gen_data, generator_inputs) with tf.variable_scope("Discriminator", reuse=True): real_d_outputs = self._call_fn_maybe_with_counter( self._discriminator_fn, counter, real_data, generator_inputs) with tf.name_scope("Generator_loss"): generator_loss = self._call_fn_maybe_with_counter( self._generator_loss_fn, counter, fake_d_outputs) gen_reg_loss = tf.losses.get_regularization_loss("Generator") generator_loss = generator_loss + gen_reg_loss with tf.name_scope("Discriminator_loss"): discriminator_loss = self._call_fn_maybe_with_counter( self._discriminator_loss_fn, counter, real_d_outputs, fake_d_outputs) dis_reg_loss = tf.losses.get_regularization_loss( "Discriminator") discriminator_loss = discriminator_loss + dis_reg_loss generator_variables = tf.trainable_variables("Generator") discriminator_variables = tf.trainable_variables("Discriminator") def run_gen_compute(): gen_grads_vars = self._gen_opt.compute_gradients( generator_loss, var_list=generator_variables) gen_grads = [grad for grad, var in gen_grads_vars] dis_grads = [ tf.zeros_like(var) for var in discriminator_variables ] return gen_grads + dis_grads def run_dis_compute(): dis_grads_vars = self._gen_opt.compute_gradients( discriminator_loss, var_list=discriminator_variables) dis_grads = [grad for grad, var in dis_grads_vars] gen_gards = [tf.zeros_like(var) for var in generator_variables] return gen_gards + dis_grads grads = tf.cond(is_discriminator_phase, run_dis_compute, run_gen_compute) grads_vars = list( zip(grads, generator_variables + discriminator_variables)) gen_grads_vars = grads_vars[:len(generator_variables)] dis_grads_vars = grads_vars[len(generator_variables):] grads = [grad for grad, var in grads_vars] _train_op = tf.cond( is_discriminator_phase, lambda: self._dis_opt.apply_gradients(dis_grads_vars), lambda: self._gen_opt.apply_gradients(gen_grads_vars)) variables = generator_variables + discriminator_variables loss = tf.cond(is_discriminator_phase, lambda: discriminator_loss, lambda: generator_loss) with tf.control_dependencies([_train_op]): increase_counter = tf.assign_add(counter, 1) with tf.control_dependencies([increase_counter]): train_op = tf.no_op() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() kpt = tf.train.latest_checkpoint(self.model_dir) if kpt is not None: saver.restore(sess, kpt) opt = TFOptimizer._from_grads( loss, sess, inputs=nest.flatten(dataset._original_tensors), labels=[], grads=grads, variables=variables, dataset=dataset, optim_method=FakeOptimMethod(), session_config=self._session_config, model_dir=os.path.join(self.model_dir, "tmp"), train_op=train_op) opt.optimize(end_trigger) saver = tf.train.Saver() saver.save(sess, self.checkpoint_path, global_step=counter)
def predict(self, input_fn, predict_keys=None, checkpoint_path=None): """Outputs predictions for given features. :param input_fn: A function that constructs the features. * A `TFDataset` object, each elements of which is a tuple `(features, None)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must have same constraints as below. * features: A `tf.Tensor` or a dictionary of string feature name to `Tensor`. features are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. * A tuple, in which case the first item is extracted as features. :param checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, prediction is run with newly initialized `Variables` instead of ones restored from checkpoint. Return: Evaluated values of `predictions` tensors. """ import tensorflow as tf with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.PREDICT) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, None, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) if isinstance(spec.predictions, dict) and predict_keys is not None: outputs = [spec.predictions[key] for key in predict_keys] else: outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) predictions = tfnet.predict(result.get_prediction_data(), mini_batch=True) # If predictions is a dict, add back the keys and results is a dict as well. if isinstance(spec.predictions, dict): # Given a list of outputs; return a dict of outputs. def zip_key(outs, keys): if isinstance(outs, list): error_msg = "output length is " \ + "{} but keys length is {}".format(len(outs), len(keys)) assert len(outs) == len(keys), error_msg else: outs = [outs] res_dict = {} for out, key in zip(outs, keys): res_dict[key] = out return res_dict pred_keys = sorted(spec.predictions.keys()) if not predict_keys \ else predict_keys predictions = predictions.map(lambda res: zip_key(res, pred_keys)) return predictions return list(self.estimator.predict(input_fn, checkpoint_path=checkpoint_path))
def evaluate(self, input_fn, eval_methods, steps=None, checkpoint_path=None): """Evaluates the model given evaluation data `input_fn`. :param input_fn: A function that constructs the input data for evaluation. The function should construct and return one of the following: * A `TFDataset` object, each elements of which is a tuple `(features, labels)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. :param eval_methods: a list of strings to specify the evaluation metrics to be used in this model :param steps: Number of steps for which to evaluate model. :param checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, evaluation is run with newly initialized `Variables` instead of ones restored from checkpoint. Returns: A dict containing the evaluation metrics specified in `model_fn` keyed by name. """ if not all(isinstance(metric, six.string_types) for metric in eval_methods): raise ValueError("All metrics should be string types") from tensorflow_estimator.python.estimator.canned import prediction_keys import tensorflow as tf with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.EVAL) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.EVAL, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) if isinstance(spec.predictions, dict): if "mae" in eval_methods: key = prediction_keys.PredictionKeys.PREDICTIONS msg = "{} is required for evaluating mse,".format(key) + \ " please add it in your model_fn predictions" assert key in spec.prediction, msg outputs = [ spec.predictions[prediction_keys.PredictionKeys.PREDICTIONS]] else: key = prediction_keys.PredictionKeys.LOGITS msg = "{} is required in for evaluating,".format(key) + \ " please add it in your model_fn predictions" assert key in spec.predictions, msg outputs = [ spec.predictions[prediction_keys.PredictionKeys.LOGITS]] else: outputs = nest.flatten(spec.predictions) if len(outputs) > 1: raise Exception("Evaluate on more than one output is not " + "supported now") all_inputs = result._original_tensors if isinstance(all_inputs, tuple) and len(all_inputs) == 2: targets = nest.flatten(all_inputs[1]) else: targets = None return evaluate_string_metrics(sess=sess, string_metrics=eval_methods, dataset=result, inputs=nest.flatten(all_inputs), targets=targets, outputs=outputs, loss=spec.loss) return self.estimator.evaluate(input_fn, steps, checkpoint_path=checkpoint_path)