def create_global_step(session: tf.Session) -> tf.Variable: """ Creates the Tensorflow 'global_step' variable (see `MonitorContext.global_step_tensor`). :param session: Tensorflow session the optimiser is running in :return: The variable tensor. """ global_step_tensor = tf.Variable(0, trainable=False, name="global_step") session.run(global_step_tensor.initializer) return global_step_tensor
def _metric_step(self, stage, initial_ops, sess: tf.Session, epoch: int, step=None, repeats=1, summary_every=1): ops = initial_ops offsets, lengths = [], [] trainers = self.active() for trainer in trainers: offsets.append(len(ops)) metric_ops = trainer.metric_ops(stage) lengths.append(len(metric_ops)) ops.extend(metric_ops) if repeats > 1: all_results = np.stack([np.array(sess.run(ops)) for _ in range(repeats)]) results = np.mean(all_results, axis=0) else: results = sess.run(ops) if step is None: step = results[0] for trainer, offset, length in zip(trainers, offsets, lengths): chunk = results[offset: offset + length] summaries = trainer.process_metrics(stage, chunk, epoch, step) if trainer.summary_writer and step > 200 and (step % summary_every == 0): summary = tf.Summary(value=summaries) trainer.summary_writer.add_summary(summary, global_step=step) return results
def run_batches(sess: tf.Session, tensors_dict_ops: Mapping[str, tf.Tensor], max_nbatches: int = -1) -> Dict[str, np.ndarray]: '''Runs the ops in tensors_dict_ops for a fixed number of batches or until reaching a tf.errors.OutOfRangeError, concatenating the runs. Note: assumes that the dataset iterator doesn't need initialization, or is already initialized. Args - sess: tf.Session - tensors_dict_ops: dict, str => tf.Tensor, shape [batch_size] or [batch_size, D] - max_nbatches: int, maximum number of batches to run the ops for, set to -1 to run until reaching a tf.errors.OutOfRangeError Returns - all_tensors: dict, str => np.array, shape [N] or [N, D] ''' all_tensors = defaultdict(list) # type: DefaultDict[str, Any] curr_batch = 0 progbar = tqdm(total=max_nbatches if max_nbatches > 0 else None) try: while True: tensors_dict = sess.run(tensors_dict_ops) for name, arr in tensors_dict.items(): all_tensors[name].append(arr) curr_batch += 1 progbar.update(1) if curr_batch >= max_nbatches: break except tf.errors.OutOfRangeError: pass progbar.close() for name in all_tensors: all_tensors[name] = np.concatenate(all_tensors[name]) return all_tensors
def iterate_minibatches( sess: tf.Session, inputs: np.ndarray, targets: np.ndarray, batch_size: int, shuffle: bool = False, augment: bool = False, ) -> Generator[Tuple[np.ndarray, np.ndarray], Any, Any]: """Creates a Python generator for iterating through the provided data. Uses tf.data under the hood so code can be shared with the TPU pipeline. Args: sess: A tf.Session. inputs: Array of input examples, with examples indexed by the first dimension. targets: Array of target examples, with examples indexed by the first dimension. batch_size: The size of the sampled minibatches. shuffle: Whether to shuffle the data. augment: Whether to augment the data. Yields: A generator of (input, target) minibatches. """ dataset = iterate_minibatches_dataset(inputs=inputs, targets=targets, batch_size=batch_size, shuffle=shuffle, augment=augment) input_op, target_op = dataset.prefetch( _PREFETCH_NUM_BATCHES).make_one_shot_iterator().get_next() # Iterate once through the dataset. for _ in range(inputs.shape[0] // batch_size): input_array, target_array = sess.run([input_op, target_op]) yield input_array, target_array
def final_validation(sess: tf.Session, test_model: BindedModel, min_cnt=100) -> pd.DataFrame: logger = logging.getLogger("mincall.train.ops") lvl = logger.getEffectiveLevel() logger.setLevel(logging.WARNING) sol = None while True: ctc_loss, *alignment_stats, identity = sess.run( [ test_model.ctc_loss_unaggregated, *test_model.alignment_stats, test_model.identity, ], feed_dict={ test_model.learning_phase: 0, }, options=tf.RunOptions( timeout_in_ms=200 * 1000, # Single op should complete in 200s ), ) tmp = pd.DataFrame({ "ctc_loss": ctc_loss, "identity": identity, **{ dataset_pb2.Cigar.Name(op): stat for op, stat in zip(ops.aligment_stats_ordering, alignment_stats) }, }) if sol is None: sol = tmp else: sol = sol.append(tmp, ignore_index=True) if len(sol) > min_cnt: logger.setLevel(lvl) return sol
def relation_annotation(self, sess: tf.Session, data_iter): opts = self.options g = self.graph with tf.variable_scope("ext_kb", reuse=True): best_fit_r = g.get_tensor_by_name("ext_kb/best_fit_r:0") eval_batch_size = int(7 * 2**30 / (122051 * 32) // 100) * 100 data_iter.batch_size = eval_batch_size rel_probs_list = [] for data_batch in data_iter: # TODO: here should feed a batch of all relations arg1, arg2 = np.split(data_batch, 2, -1) feed_dict = { self.args_input[0][0]: arg1, self.args_input[0][1]: arg2, } rel_id = sess.run(best_fit_r, feed_dict=feed_dict) rel_probs_list.append(np.squeeze(rel_id)) # ----------- np ---------------------------- rel_predictions = np.concatenate(rel_id) return rel_predictions
def save_checkpoint_numpy(self, session: tf.Session, global_step: int): """ ## Save model as a set of numpy arrays """ checkpoints_path = pathlib.Path(self.info.checkpoint_path) if not checkpoints_path.exists(): checkpoints_path.mkdir() checkpoint_path = checkpoints_path / str(global_step) assert not checkpoint_path.exists() checkpoint_path.mkdir() values = session.run(self.__variables) # Save each variable files = {} for variable, value in zip(self.__variables, values): file_name = tf_util.variable_name_to_file_name( tf_util.strip_variable_name(variable.name)) file_name = f"{file_name}.npy" files[variable.name] = file_name np.save(str(checkpoint_path / file_name), value) # Save header with open(str(checkpoint_path / "info.json"), "w") as f: f.write(json.dumps(files)) # Delete old checkpoints for c in checkpoints_path.iterdir(): if c.name != checkpoint_path.name: util.rm_tree(c)
def compute_style_cost(model, STYLE_LAYERS): """ Computes the overall style cost from several chosen layers Arguments: model -- our tensorflow model STYLE_LAYERS -- A python list containing: - the names of the layers we would like to extract style from - a coefficient for each of them Returns: J_style -- tensor representing a scalar value, style cost defined above by equation (2) """ # initialize the overall style cost J_style = 0 for layer_name, coeff in STYLE_LAYERS: # Select the output tensor of the currently selected layer out = model[layer_name] # Set a_S to be the hidden layer activation from the layer we have selected, by running the session on out a_S = sess.run(out) # Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name] # and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that # when we run the session, this will be the activations drawn from the appropriate layer, with G as input. a_G = out # Compute style_cost for the current layer J_style_layer = compute_layer_style_cost(a_S, a_G) # Add coeff * J_style_layer of this layer to overall style cost J_style += coeff * J_style_layer return J_style
def check_tf_model(sess: tf.Session, onnx_model: onnx.ModelProto, torch_to_onnx: Mapping[str, str], transform: Mapping[str, Callable[[np.ndarray], np.ndarray]], margin: float = 1.5e-8, relative: bool = False): onnx_weights = {} for weight in onnx_model.graph.initializer: onnx_weights[weight.name] = onnx_to_numpy(weight) tensors = [v for v in tf.trainable_variables()] variable_name = [v.name for v in tf.trainable_variables()] if len(onnx_weights) > 0: # Iterating the parameters reversed checks the model from the loss backwards, # which helps with debugging issue = None for name, w in zip(variable_name, tensors): if name in torch_to_onnx.keys(): onnx_name = torch_to_onnx[name] print( f"{name} ==> {onnx_name}\n{w.shape} ==> {onnx_weights[onnx_name].shape}" ) if name in transform.keys(): onnx_w = transform[name](onnx_weights[onnx_name]) else: onnx_w = onnx_weights[onnx_name].reshape(w.shape) tf_w = sess.run(w) try: if relative: check_tensor_relative(onnx_w, tf_w, margin) else: check_tensor(onnx_w, tf_w, margin) except TestFailureError as e: print("For weight: ", name) raise e
def evaluation(session: tf.Session, input_x: np.ndarray, input_y: np.ndarray, iter_idx: int, input_batch_size: int = 32): valid_loss = 0.0 valid_accuracy = 0.0 start = time.time() for i in range(0, len(input_x), input_batch_size): ret = session.run( [loss, accuracy], feed_dict={ x: input_x[i:i + input_batch_size], y: input_y[i:i + input_batch_size], training_boolean: False }) valid_loss += ret[0] * min(input_batch_size, len(input_x) - i) valid_accuracy += ret[1] # if iter_idx % write_rate == 0: # ret = session.run( # merged_summary, # feed_dict={ # x: input_x, # y: input_y, # training_boolean: False, # batch_size: input_x.shape[0] # } # ) # valid_writer.add_summary(ret, iter_idx) if iter_idx % log_rate == 0: eprint('eval: %03d' % iter_idx, 'time: %.5f' % (time.time() - start), 'accuracy: %.5f' % (valid_accuracy / len(input_x)), 'loss: %.5f' % (valid_loss / len(input_x)))
def run_optimization_epoch( self, train_ops: NamedTuple, session: tf.Session, hparams: tf.contrib.training.HParams, epoch_number: int, ): """Runs training epoch by executing `train_ops` in `session`. Args: train_ops: Training operations returned by `build_opt_ops` method. session: Active session where to run a training epoch. hparams: Hyperparameters of the optimization procedure. epoch_number: Number of epoch. """ del epoch_number # not used by SupervisedWavefunctionOptimizer. for _ in range(hparams.num_batches_per_epoch): for _ in range(hparams.num_monte_carlo_sweeps * hparams.num_sites): session.run(train_ops.mc_step) session.run(train_ops.apply_gradients) session.run(train_ops.epoch_increment)
def _train_adjustable_model( session: tf.Session, train_input: tf.Tensor, train_operations: TrainOperations, batch_stream: BaseBatchStream, tensorboard_log_dir: Path, number_of_epochs: int, ) -> Path: learning_strategy = _DEFAULT_LEARNING_STRATEGY thresholds_vars: List[tf.Variable] = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) thresholds_vars_names = [th_var.name for th_var in thresholds_vars] global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) optimizer_vars = list(set(global_vars) - set(thresholds_vars)) _LOGGER.info( f'Total number of adjustable parameters {len(thresholds_vars)}') regular_checkpoints_saver = tf.train.Saver( var_list=thresholds_vars, max_to_keep=_NUMBER_OF_REGULAR_CHECKPOINTS, ) best_loss_checkpoints_saver = tf.train.Saver( var_list=thresholds_vars, max_to_keep=_NUMBER_OF_BEST_LOSS_CHECKPOINTS, ) summary_writer = tf.summary.FileWriter(str(tensorboard_log_dir), session.graph) loss_summary = tf.summary.scalar('train_loss', train_operations.loss) learning_rate_summary = tf.summary.scalar('train_learning_rate', train_operations.learning_rate) initial_learning_rate = session.run(train_operations.learning_rate) next_learning_rate = initial_learning_rate train_step = 0 decay_step = 0 nans_step_counter = 0 best_loss = float('inf') best_loss_step = 0 def reset_optimizer(): init_optimizer_vars = tf.variables_initializer(optimizer_vars) session.run(init_optimizer_vars) ckpt_dir = tensorboard_log_dir / 'ckpt' ckpt_dir.mkdir(parents=True, exist_ok=True) def get_regular_checkpoint_path(step): checkpoint_path = ckpt_dir / f'regular_ckpt_{step}' return str(checkpoint_path) def get_best_checkpoint_path(step): checkpoint_path = ckpt_dir / f'best_ckpt_{step}' return str(checkpoint_path) for epoch in range(number_of_epochs): _LOGGER.info(f'Epoch: {epoch + 1}') for batch_data, _ in batch_stream: feed_dict = { train_input: batch_data, train_operations.learning_rate: next_learning_rate, } fetches = { 'gradients': train_operations.gradients, 'loss': train_operations.loss, 'learning_rate_summary': learning_rate_summary, 'loss_summary': loss_summary, } result = session.run(fetches=fetches, feed_dict=feed_dict) next_learning_rate = initial_learning_rate.copy() next_learning_rate *= np.exp(-train_step * learning_strategy.lr_decay) next_learning_rate *= np.abs( np.cos(learning_strategy.lr_cos_phase * decay_step / learning_strategy.lr_cos_steps)) next_learning_rate += _MINIMAL_LEARNING_RATE summary_writer.add_summary(result['learning_rate_summary'], train_step) summary_writer.add_summary(result['loss_summary'], train_step) thresholds_vars_values = session.run(thresholds_vars) thresholds_with_nan = [ np.isnan(var_value).any() for var_value in thresholds_vars_values ] thresholds_with_nan = [ var_name for var_name, var_has_nan in zip( thresholds_vars_names, thresholds_with_nan) if var_has_nan ] if thresholds_with_nan: _LOGGER.warning( f'Some thresholds are None, restore previous trainable values\n' f'{thresholds_with_nan}') nans_step_counter += 1 checkpoint_step = train_step // _REGULAR_CHECKPOINTS_DELTA checkpoint_step -= nans_step_counter checkpoint_step = max(checkpoint_step, 0) checkpoint_step *= _REGULAR_CHECKPOINTS_DELTA regular_checkpoints_saver.restore( session, get_regular_checkpoint_path(checkpoint_step)) reset_optimizer() continue if best_loss > result['loss']: best_loss_checkpoints_saver.save( session, get_best_checkpoint_path(train_step)) best_loss, best_loss_step = result['loss'], train_step if train_step % _REGULAR_CHECKPOINTS_DELTA == 0: nans_step_counter = 0 regular_checkpoints_saver.save( session, get_regular_checkpoint_path(train_step)) train_step += 1 decay_step += 1 if train_step % learning_strategy.lr_cos_steps == 0: _LOGGER.info('Reinitialize optimizer') reset_optimizer() decay_step = 0 _LOGGER.info(f'minimal loss value = {best_loss}') best_loss_checkpoints_saver.restore( session, get_best_checkpoint_path(best_loss_step)) best_checkpoint_path = get_best_checkpoint_path(best_loss_step) return Path(best_checkpoint_path)
def infer( self, sess: tf.Session, xs: np.ndarray, x_lengths: np.ndarray, minibatch_size: int, ) -> List[List[InferredSeqOneBeam]]: """ Infer new data """ final_decoder_output, alignment = sess.run( [ self.decoder_outputs, self.final_context_state.cell_state.alignment_history, ], feed_dict={ self._ph_signals: xs, self._ph_signals_length: x_lengths, self._ph_seq_max_lengths: [x_length for x_length in x_lengths], self._ph_batch_size: minibatch_size, self._ph_start_tokens: [self.START_TOKEN_ID] * minibatch_size, }, ) predicted_ids = final_decoder_output.predicted_ids decoder_output = final_decoder_output.beam_search_decoder_output scores = decoder_output.scores logger.debug('scores.shape: {}'.format(scores.shape)) if not self.decoder_rnn.ignore_alignment_history: logger.debug('alignment.shape: {}'.format(alignment.shape)) alignment_transposed = alignment.transpose([1, 0, 2]) nuc_arr = np.array(Nuc.NUC_ARR) beam_width = predicted_ids.shape[2] res = [] for minibatch_index, batch_beam_outputs in enumerate(predicted_ids): inferred_seqs_beam = [] # type: List[InferredSeqOneBeam] for beam_index in range(beam_width): _seq = ''.join(nuc_arr[predicted_ids[minibatch_index, :, beam_index, ]]) _scores = scores[minibatch_index, :, beam_index, ] if self.decoder_rnn.keep_full_alignment: _alignment_matrix = alignment_transposed[ minibatch_index * beam_width + beam_index, :, :, ].tolist() else: _alignment_matrix = [] if not self.decoder_rnn.ignore_alignment_history: _attention_sums = np.sum( alignment_transposed[minibatch_index * beam_width + beam_index, :, :, ], axis=1, ).tolist() else: _attention_sums = [] inferred_seqs_beam.append( InferredSeqOneBeam( seq=_seq, scores=_scores.tolist(), alignment_matrix=_alignment_matrix, attention_sums=_attention_sums, )) res.append(inferred_seqs_beam) return res
def _do_evaluation(self, session: tf.Session, val_generator, batch_size, validation_step, callbacks, step, progbar=None): for callback in callbacks: callback.validation_start({}) metrics = {} t_y = None t_preds = None # test model on validation data for data, labels in val_generator.get_batch(batch_size): targets = [self._get_loss()] targets.extend(self._get_metrics().values()) if self._get_summary() is not None: targets.append(self._get_summary()) _results = session.run(targets, feed_dict=self._build_feed_dict( data, labels)) preds = session.run(self._predicted_class, feed_dict=self._build_feed_dict(data, labels)) if type(t_y) == type(None): t_y = np.argmax(labels, axis=1) t_preds = preds else: t_y = np.hstack((t_y, np.argmax(labels, axis=1))) t_preds = np.hstack((t_preds, preds)) if self._get_summary() is not None: self.val_writer.add_summary(_results[-1], self._global_step) _loss = _results[0] metric_values = {'loss': _loss} for i, metric_name in enumerate(self._get_metrics().keys()): metric_values[metric_name] = _results[i + 1] for key, value in metric_values.items(): if key not in metrics: metrics[key] = [] metrics[key].append(value) class_weight = compute_class_weight('balanced', np.unique(t_y), t_y) sample_weights = np.zeros(t_y.shape[0]) progbar.update(step + 1, [('val_' + key, np.mean(value)) for key, value in metrics.items()]) for i, weight in enumerate(class_weight): sample_weights[t_y == i] = weight try: weighted_acc = accuracy_score(t_y, t_preds, True, sample_weights) except: weighted_acc = None metrics['weighted_acc'] = weighted_acc for callback in callbacks: callback.validation_end(logs={ 'metrics': metrics, 'validation_step': validation_step })
def _do_epoch(self, sess: tf.Session, epoch: int, train_generator, val_generator, validation_step: int, batch_size: int, callbacks): for callback in callbacks: callback.epoch_start({'epoch': epoch}) step = 0 # Define eval metrics for progbar stateful_metrics = ['val_loss'] stateful_metrics.extend( ['val_' + key for key in self._get_metrics().keys()]) progbar = Progbar(train_generator.steps(batch_size), stateful_metrics=stateful_metrics) # Do an entire epoch for data, labels in train_generator.get_batch(batch_size): for callback in callbacks: callback.batch_start({}) # Build the targets that should be executed targets = [self._get_train_step(), self._get_loss()] targets.extend(self._get_metrics().values()) if self._get_summary() is not None: targets.append(self._get_summary()) # run training step. feed dict must be implemented by the specific model _results = sess.run(targets, feed_dict=self._build_feed_dict(data, labels)) # Write the summary to tensorboard if self._get_summary() is not None: self.train_writer.add_summary(_results[-1], self._global_step) # Extract the metrics and log to monitor callback _loss = _results[1] metric_values = {'loss': _loss} for i, metric_name in enumerate(self._get_metrics().keys()): metric_values[metric_name] = _results[i + 2] # Log metrics and metrics to stdout progbar.update(step + 1, [(key, value) for key, value in metric_values.items()]) # Do validation if step % validation_step == 0: self._do_evaluation(sess, val_generator, batch_size, validation_step, callbacks, step, progbar) # Some stuff to do after each cycle step += 1 self._global_step += 1 # Call further callbacks for callback in callbacks: callback.batch_end(metric_values) for callback in callbacks: callback.epoch_end({'epoch': epoch})
def _get_best_action(self, sess: tf.Session, state: np.array) -> int: output = sess.run(self.prediction_network.outputs, feed_dict={self.prediction_inputs: state}) action = np.argmax(output, axis=1) return action
def get_histograms(self, sess: tf.Session): return sess.run(self.histograms)
def _create_dummy_variable(session: tf.Session): dummy_var = tf.Variable(0, name='dummy_var', dtype=tf.int32) session.run(tf.variables_initializer([dummy_var])) return dummy_var
def detect(self, session: tf.Session): (boxes, scores, classes) = session.run( (self.box_tensor, self.score_tensor, self.class_tensor), feed_dict={self.image_tensor: image_np}) # filter by score threshold and mask out the lane detections lane_score_mask = np.logical_and( np.squeeze(classes) != 4, np.squeeze(scores) > .7) boxes = np.squeeze(boxes)[lane_score_mask] classes = np.squeeze(classes).astype(np.int32)[lane_score_mask] scores = np.squeeze(scores)[lane_score_mask] # filter to the box that appears lowest on screen, if none then default to zeros if (classes == 1).any(): blocks = boxes[classes == 1] blocks = blocks[blocks[:, 0].argmax()] else: blocks = np.zeros(4) if (classes == 2).any(): ship = boxes[classes == 2] ship = ship[ship[:, 0].argmax()] else: ship = np.zeros(4) if (classes == 3).any(): spikes = boxes[classes == 3] spikes = spikes[spikes[:, 0].argmax()] else: spikes = np.zeros(4) if len(boxes) <= 0: feature_vector = self.last_box_center else: # convert box detection (y_min, x_min, y_max, x_max) -> (x_center, y_center) # reduce granularity to the floor of 0.05 block_vector = [ (np.divide(blocks[3] + blocks[1], 2) * 20 // 1 / 20) + 0, (np.divide(blocks[2] + blocks[0], 2) * 20 // 1 / 20) + 0 ] spike_vector = [ (np.divide(spikes[3] + spikes[1], 2) * 20 // 1 / 20) + 0, (np.divide(spikes[2] + spikes[0], 2) * 20 // 1 / 20) + 0 ] # convert ship into a one-hot based on it's lane if np.any(ship != np.zeros(4)): ship_center = np.divide(ship[3] + ship[1], 2) if ship_center < 0.375: ship_vector = [1, 0, 0] elif ship_center < 0.625: ship_vector = [0, 1, 0] else: ship_vector = [0, 0, 1] else: ship_vector = self.last_box_center[-3:] # combine features into a single vector and save it feature_vector = block_vector + spike_vector + ship_vector self.last_box_center = feature_vector # if SHOW_DISPLAY mode is on, SHOW_DISPLAY the detection to the screen. NOTE, this inflates iteration time. if SHOW_DISPLAY is True: vis_util.visualize_boxes_and_labels_on_image_array( screen, boxes, classes, scores, self.category_index, use_normalized_coordinates=True, line_thickness=8) cv2.namedWindow("detection", cv2.WINDOW_NORMAL) cv2.imshow("detection", screen) return feature_vector
def run_ae(data: DataSet, mb_size: int, train: Operation, loss: Tensor, X: Tensor, G_X: Tensor, sess: Session, experiment_id: str, feature_eval: Optional[Callable[[Session], Tuple[Number, Number]]], interpolation: Optional[Callable[[Callable[[int], DataSet], Session, str, str], None]], view_dist: Optional[Callable[[Callable[[int], DataSet], Session, str, str], None]], view_disentangle: Optional[Callable[[Callable[[int], DataSet], Session, str, str], None]], data_feeder: Callable[ [DataSet, Tensor, int], Dict[Tensor, np.ndarray]] = default_feeder, max_iter: int = 100000) -> None: sample_cond = get_timer_cond(50) feature_eval_cond = get_timer_cond(100) interpolation_cond = get_timer_cond(90) print_cond = get_timer_cond(30) view_z_cond = get_timer_cond(150) view_disentangle_cond = get_timer_cond(100) def get_iter_id(_i): return str(_i).zfill(int(np.log10(max_iter) + 1)) # def print_cond(i): # return i % 500 == 0 # counter = count(1) for it in range(max_iter): if sample_cond(it): # iter_id = str(next(counter)).zfill(3) plot_rec_sample(data.sample, data.dim_X, X, G_X, sess, experiment_id, get_iter_id(it)) if feature_eval_cond(it) and feature_eval is not None: fe_loss, fe_acc = feature_eval(sess) print(f"Feature Evaluation at {get_iter_id(it)}: loss -> {fe_loss}, Accuracy -> {fe_acc}") if interpolation_cond(it) and interpolation is not None: interpolation(data.sample, sess, experiment_id, get_iter_id(it)) if view_disentangle_cond(it) and view_disentangle is not None: view_disentangle(data.sample, sess, experiment_id, get_iter_id(it)) if view_z_cond(it) and view_dist is not None: view_dist(data.sample, sess, experiment_id, get_iter_id(it)) data_feed = data_feeder(data, X, mb_size) sess.run(train, feed_dict=data_feed) if print_cond(it): data_feed = data_feeder(data, X, mb_size) loss_val = sess.run(loss, feed_dict=data_feed) print('Iter: {}'.format(it)) print('Loss: {0:.4f}'.format(loss_val)) print()
def predict_policy_value(self, sess: tf.Session, states): return sess.run([self.policy_predictions, self.value], feed_dict={self.policy_states: states})
def step(self, sess: tf.Session, feed_dict: dict): """Runs one step of training. sess must be an active TensorFlow Session in order for this to work""" feed_dict[self.is_training] = True ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + [self.training_op] return sess.run(ops, feed_dict)
def getWeightsByName(graph: tf.Graph, tensor_name: str, sess: tf.Session) -> tf.Tensor: t = getTensor(graph, tensor_name) res = sess.run(t) return res
def predict_win(self, sess: tf.Session, inputs): return sess.run([self.predicted_side], feed_dict={self.picks_inputs: inputs})
def train(self, sess: tf.Session, states: np.array, actions: np.array, advantages: np.array, nMinibatch: int, nEpochs: int, nBatches: int = 0, stateOffset=0, stateScale=1, verbose=True): assert (np.all(np.isfinite(states))) assert (np.all(np.isfinite(actions))) assert (np.all(np.isfinite(advantages))) assert (self.initialized) nData = actions.shape[0] #reset bookkeeping for next iter self.usedSigmaSum = 0 self.usedSigmaSumCounter = 0 #manage history self.history.append([states.copy(), actions.copy(), advantages.copy()]) if len(self.history) > self.nHistory: self.history.popleft() #safety-check that the observed state distribution is at least roughly zero-mean unit sd if self.stateDim > 0: scaledStates = (states - stateOffset) * stateScale stateAbsMax = np.max(np.absolute(scaledStates)) if stateAbsMax > 10: print("Warning: states deviate up to {} sd:s from expected!". format(stateAbsMax)) else: scaledStates = states #train assert (len(advantages.shape) == 1 ) #to prevent nasty silent broadcasting bugs nMinibatch = min([nData, nMinibatch]) if nBatches == 0: nBatches = max([1, int(nData * nEpochs / nMinibatch)]) #nBatches=1000 nVarAdaptBatches = nBatches mbStates = np.zeros([nMinibatch, self.stateDim]) mbActions = np.zeros([nMinibatch, self.actionDim]) mbOldMean = np.zeros([nMinibatch, self.actionDim]) mbAdvantages = np.zeros([nMinibatch]) logPiOld = np.ones([nData]) mbLogPiOld = np.ones([nMinibatch]) if self.usePPOLoss: policyMean, policyVar, policyLogVar = sess.run( [self.policyMean, self.policyVar, self.policyLogVar], feed_dict={self.stateIn: scaledStates}) #for i in range(nData): # logPiOld[i]=np.sum(-0.5*np.square(actions[i,:]-policyMean[i,:])/policyVar[i,:]-0.5*policyLogVar[i,:]) logPiOld = np.sum( -0.5 * np.square(actions - policyMean) / policyVar - 0.5 * policyLogVar, axis=1) if self.separateVarAdapt: assert (self.usePPOLoss == False) #if negativeAdvantageAvoidanceSigma>0: oldMeans = sess.run(self.policyMean, {self.stateIn: scaledStates}) for batchIdx in range( nBatches + nVarAdaptBatches if self.separateVarAdapt else nBatches): if batchIdx < nVarAdaptBatches: historyLen = len(self.history) for i in range(nMinibatch): histIdx = np.random.randint(0, historyLen) h = self.history[histIdx] nData = h[1].shape[0] dataIdx = np.random.randint(0, nData) mbStates[i, :] = h[0][dataIdx, :] mbActions[i, :] = h[1][dataIdx, :] mbAdvantages[i] = h[2][dataIdx] advantageMean = np.mean(mbAdvantages) mbStates = ( mbStates - stateOffset ) * stateScale #here, we must scale per batch because using the history temp, currLoss = sess.run( [self.optimizePolicySigma, self.policyLoss], feed_dict={ self.stateIn: mbStates, self.actionIn: mbActions, self.advantagesIn: mbAdvantages }) if verbose and (batchIdx % 100 == 0): print( "Adapting policy variance, batch {}/{}, mean advantage {:.2f}, loss {}" .format(batchIdx, nVarAdaptBatches, advantageMean, currLoss)) #temp,currLoss=sess.run([self.optimizePolicyMean,self.policyLoss],feed_dict={self.stateIn:mbStates,self.actionIn:mbActions,self.advantagesIn:mbAdvantages}) else: nData = actions.shape[0] for i in range(nMinibatch): dataIdx = np.random.randint(0, nData) mbStates[i, :] = scaledStates[dataIdx, :] mbActions[i, :] = actions[dataIdx, :] if self.stateDim > 0: mbOldMean[i, :] = oldMeans[dataIdx, :] mbAdvantages[i] = advantages[dataIdx] advantageMean = np.mean(mbAdvantages) temp, currLoss = sess.run( [self.optimizePolicyMean, self.policyLoss], feed_dict={ self.stateIn: mbStates, self.actionIn: mbActions, self.advantagesIn: mbAdvantages, self.logPiOldIn: mbLogPiOld, self.oldPolicyMean: mbOldMean }) if verbose and (batchIdx % 100 == 0): print( "Adapting policy mean, batch {}/{}, mean advantage {:.2f}, loss {}" .format(batchIdx - nVarAdaptBatches, nBatches, advantageMean, currLoss)) else: for batchIdx in range( nBatches + nVarAdaptBatches if self.separateVarAdapt else nBatches): for i in range(nMinibatch): dataIdx = np.random.randint(0, nData) if self.stateDim != 0: mbStates[i, :] = scaledStates[dataIdx, :] mbActions[i, :] = actions[dataIdx, :] mbAdvantages[i] = advantages[dataIdx] mbLogPiOld[i] = logPiOld[dataIdx] advantageMean = np.mean(mbAdvantages) temp, currLoss = sess.run( [self.optimizePolicy, self.policyLoss], feed_dict={ self.stateIn: mbStates, self.actionIn: mbActions, self.advantagesIn: mbAdvantages, self.logPiOldIn: mbLogPiOld }) if verbose and (batchIdx % 100 == 0): print( "Training policy, batch {}/{}, mean advantage {:.2f}, loss {}" .format(batchIdx, nBatches, advantageMean, currLoss))
def metrics(self, sess: tf.Session): metric_values_tensors = [v for v, _ in self.metrics_array.values()] metric_values = sess.run(metric_values_tensors) return {name: value for name, value in zip(self.metrics_array.keys(), metric_values)}
def getExpectation(self, sess: tf.Session, observations: np.array): return sess.run(self.policyMean, feed_dict={self.stateIn: observations})
def getWeightsByTensor(sess: tf.Session, tensor: tf.Tensor) -> tf.Tensor: res = sess.run(tensor) return res
def getSd(self, sess: tf.Session, observations: np.array): return sess.run(self.policySigma, feed_dict={self.stateIn: observations})
def _init_session(self, session: tf.Session): session.run(tf.global_variables_initializer())
def initialize_variables(sess: tf.Session): sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer(), ])
def initialize_test(self, tf_session: tf.Session, label2index): tf_session.run( self._test_iter.initializer, self._get_init_args(self.image_dataset.file_ids_test_iter(), label2index)) self.test_spec = tf_session.run(self._test_iter.string_handle())
def __call__(self, sess: tf.Session, observ): return sess.run(self.processed_observ, feed_dict={self.observ_: observ})