def get_variables(scope): return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
def init_learner_state(self): learner_init_op = tf.initialize_variables( self.learner.learner.get_variables(tf.GraphKeys.GLOBAL_VARIABLES)) local_inits = tf.get_collection(tf.GraphKeys.LOCAL_INIT_OP) with tf.control_dependencies(local_inits + [learner_init_op]): return self.learner.assign_state(self.learner.initial_state())
def get_ckpt_var_map(ckpt_path, ckpt_scope, var_scope, skip_mismatch=None): """Get a var map for restoring from pretrained checkpoints. Args: ckpt_path: string. A pretrained checkpoint path. ckpt_scope: string. Scope name for checkpoint variables. var_scope: string. Scope name for model variables. skip_mismatch: skip variables if shape mismatch. Returns: var_map: a dictionary from checkpoint name to model variables. """ logging.info('Init model from checkpoint {}'.format(ckpt_path)) if not ckpt_scope.endswith('/') or not var_scope.endswith('/'): raise ValueError('Please specific scope name ending with /') if ckpt_scope.startswith('/'): ckpt_scope = ckpt_scope[1:] if var_scope.startswith('/'): var_scope = var_scope[1:] var_map = {} # Get the list of vars to restore. model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=var_scope) reader = tf.train.load_checkpoint(ckpt_path) ckpt_var_name_to_shape = reader.get_variable_to_shape_map() ckpt_var_names = set(reader.get_variable_to_shape_map().keys()) for i, v in enumerate(model_vars): if not v.op.name.startswith(var_scope): logging.info('skip {} -- does not match scope {}'.format( v.op.name, var_scope)) ckpt_var = ckpt_scope + v.op.name[len(var_scope):] if (ckpt_var not in ckpt_var_names and v.op.name.endswith('/ExponentialMovingAverage')): ckpt_var = ckpt_scope + v.op.name[:-len('/ExponentialMovingAverage' )] if ckpt_var not in ckpt_var_names: if 'Momentum' in ckpt_var or 'RMSProp' in ckpt_var: # Skip optimizer variables. continue if skip_mismatch: logging.info('skip {} ({}) -- not in ckpt'.format( v.op.name, ckpt_var)) continue raise ValueError('{} is not in ckpt {}'.format(v.op, ckpt_path)) if v.shape != ckpt_var_name_to_shape[ckpt_var]: if skip_mismatch: logging.info('skip {} ({} vs {}) -- shape mismatch'.format( v.op.name, v.shape, ckpt_var_name_to_shape[ckpt_var])) continue raise ValueError('shape mismatch {} ({} vs {})'.format( v.op.name, v.shape, ckpt_var_name_to_shape[ckpt_var])) if i < 5: # Log the first few elements for sanity check. logging.info('Init {} from ckpt var {}'.format( v.op.name, ckpt_var)) var_map[ckpt_var] = v return var_map
def gradients(ys, xs, grad_ys=None, checkpoints="collection", **kwargs): """Recompute gradients. Authors: Tim Salimans & Yaroslav Bulatov Modified by: Nikolay Zakirov memory efficient gradient implementation inspired by "Training Deep Nets with Sublinear Memory Cost" by Chen et al. 2016 (https://arxiv.org/abs/1604.06174) ys,xs,grad_ys,kwargs are the arguments to standard tensorflow tf.gradients (https://www.tensorflow.org/versions/r0.12/api_docs/python/train.html#gradients) 'checkpoints' can either be - a list consisting of tensors from the forward pass of the neural net that we should re-use when calculating the gradients in the backward pass all other tensors that do not appear in this list will be re-computed - a string or list specifying how this list should be determined. currently we support - 'speed': checkpoint all outputs of convolutions and matmuls. these ops are usually the most expensive, so checkpointing them maximizes the running speed (this is a good option if nonlinearities, concats, batchnorms, etc are taking up a lot of memory) - 'memory': try to minimize the memory usage (currently using a very simple strategy that identifies a number of bottleneck tensors in the graph to checkpoint) - 'collection': look for a tensorflow collection named 'checkpoints', which holds the tensors to checkpoint - a list: a list of strings to be matched in the names of the tensors """ # print("Calling memsaving gradients with", checkpoints) if not isinstance(ys, list): ys = [ys] if not isinstance(xs, list): xs = [xs] bwd_ops = ge.get_backward_walk_ops([y.op for y in ys], inclusive=True) logging.debug("bwd_ops: %s", len(bwd_ops)) # forward ops are all ops that are candidates for recomputation fwd_ops = ge.get_forward_walk_ops([x.op for x in xs], inclusive=True, within_ops=bwd_ops) logging.debug("fwd_ops: %s", len(fwd_ops)) # exclude ops with no inputs fwd_ops = [op for op in fwd_ops if op.inputs] logging.debug("fwd_ops: %s", len(fwd_ops)) # don't recompute xs, remove variables xs_ops = _to_ops(xs) fwd_ops = [op for op in fwd_ops if op not in xs_ops] fwd_ops = [op for op in fwd_ops if "/assign" not in op.name] fwd_ops = [op for op in fwd_ops if "/Assign" not in op.name] fwd_ops = [op for op in fwd_ops if "/read" not in op.name] logging.debug("fwd_ops: %s", len(fwd_ops)) ts_all = ge.filter_ts(fwd_ops, True) # get the tensors logging.debug("ts_all: %s", len(ts_all)) ts_all = [t for t in ts_all if "/read" not in t.name] ts_all = set(ts_all) - set(xs) - set(ys) logging.debug("ts_all: %s", len(ts_all)) # construct list of tensors to checkpoint during forward pass, if not # given as input if not isinstance(checkpoints, list): if checkpoints == "collection": checkpoints = tf.get_collection("checkpoints") elif checkpoints == "speed": # checkpoint all expensive ops to maximize running speed checkpoints = ge.filter_ts_from_regex(fwd_ops, "conv2d|Conv|MatMul") elif checkpoints == "memory": # remove very small tensors and some weird ops def fixdims( t ): # tf.Dimension values are not compatible with int, convert manually try: return [ int(e if e is not None else 64) for e in t.as_list() ] except AttributeError as e: logging.exception("%s", e) logging.exception("unknown shape %s", t) return [0] # unknown shape ts_all = [ t for t in ts_all if np.prod(fixdims(t.shape)) > MIN_CHECKPOINT_NODE_SIZE # if (tf.size(t) > MIN_CHECKPOINT_NODE_SIZE) ] logging.debug("ts_all: %s", len(ts_all)) ts_all = [t for t in ts_all if "L2Loss" not in t.name] ts_all = [t for t in ts_all if "entropy" not in t.name] ts_all = [t for t in ts_all if "FusedBatchNorm" not in t.name] ts_all = [t for t in ts_all if "Switch" not in t.name] ts_all = [t for t in ts_all if "dropout" not in t.name] # DV: FP16_FIX - need to add 'Cast' layer here to make it work for FP16 ts_all = [t for t in ts_all if "Cast" not in t.name] logging.debug("ts_all: %s", len(ts_all)) # filter out all tensors that are inputs of the backward graph with util.capture_ops() as bwd_ops: tf_gradients(ys, xs, grad_ys, **kwargs) bwd_inputs = [t for op in bwd_ops for t in op.inputs] # list of tensors in forward graph that is in input to bwd graph ts_filtered = list(set(bwd_inputs).intersection(ts_all)) debug_print("Using tensors %s", ts_filtered) # try two slightly different ways of getting bottlenecks tensors # to checkpoint logging.debug("len(ts_filtered): %s", len(ts_filtered)) logging.debug("len(ts_all) %s", len(ts_all)) for ts in [ts_filtered, ts_all]: # get all bottlenecks in the graph bottleneck_ts = [] for t in ts: b = set( ge.get_backward_walk_ops(t.op, inclusive=True, within_ops=fwd_ops)) f = set( ge.get_forward_walk_ops(t.op, inclusive=False, within_ops=fwd_ops)) # check that there are no shortcuts b_inp = {inp for op in b for inp in op.inputs}.intersection(ts_all) f_inp = {inp for op in f for inp in op.inputs}.intersection(ts_all) if not set(b_inp).intersection( f_inp) and len(b_inp) + len(f_inp) >= len(ts_all): bottleneck_ts.append(t) # we have a bottleneck! else: logging.debug( "Rejected bottleneck candidate and ops %s %d", [t], len(b_inp) + len(f_inp) - len(ts_all)) # success? or try again without filtering? if len(bottleneck_ts) >= np.sqrt( len(ts_filtered)): # yes, enough bottlenecks found! break # bottleneck_ts = [t for t in ts_all if 'Add' in t.name] # logging.debug("Add only ts_all: %s", len(bottleneck_ts)) if not bottleneck_ts: raise Exception( "unable to find bottleneck tensors! please provide checkpoint " 'nodes manually, or use checkpoints="speed" or a list of strings.' ) logging.debug("len(bottleneck_ts): %s", len(bottleneck_ts)) # sort the bottlenecks bottlenecks_sorted_lists = tf_toposort(bottleneck_ts, within_ops=fwd_ops) sorted_bottlenecks = [ t for ts in bottlenecks_sorted_lists for t in ts ] # save an approximately optimal number ~ sqrt(N) n_filtered = len(ts_filtered) if len(bottleneck_ts) <= np.ceil(np.sqrt(n_filtered)): checkpoints = sorted_bottlenecks else: step = int(np.ceil(len(bottleneck_ts) / np.sqrt(n_filtered))) checkpoints = sorted_bottlenecks[step::step] else: raise Exception('%s is unsupported input for "checkpoints"' % (checkpoints, )) else: # exclude some layers as was done in the original bottleneck searching # algorithm for excl_layer in [ "L2Loss", "entropy", "FusedBatchNorm", "Switch", "dropout", "Cast" ]: ts_all = [t for t in ts_all if excl_layer not in t.name] logging.info("Excluding %s from ts_all: %d", excl_layer, len(ts_all)) # leave only layers that match strings in checkpoints list ts_all = [ t for t in ts_all if any(partial_match in t.name for partial_match in checkpoints) ] logging.info("Leaving only %s in ts_all: %d", checkpoints, len(ts_all)) checkpoints = ts_all.copy() checkpoints = list(set(checkpoints).intersection(ts_all)) # at this point selection happened and checkpoints is list of nodes # assert isinstance(checkpoints, list) # TODO(nikzak): implement multithreading in graph recomputation logging.info("Checkpoint nodes used: %s", len(checkpoints)) # better error handling of special cases # xs are already handled as checkpoint nodes, so no need to include them xs_intersect_checkpoints = set(xs).intersection(set(checkpoints)) if xs_intersect_checkpoints: debug_print("Warning, some input nodes are also checkpoint nodes: %s", xs_intersect_checkpoints) ys_intersect_checkpoints = set(ys).intersection(set(checkpoints)) debug_print("ys: %s, checkpoints: %s, intersect: %s", ys, checkpoints, ys_intersect_checkpoints) # saving an output node (ys) gives no benefit in memory while creating # new edge cases, exclude them if ys_intersect_checkpoints: debug_print( "Warning, some output nodes are also checkpoints nodes: %s", format_ops(ys_intersect_checkpoints)) # remove initial and terminal nodes from checkpoints list if present checkpoints = list(set(checkpoints) - set(ys) - set(xs)) logging.info("Pruned initial and terminal nodes. Leaving %d", len(checkpoints)) # check that we have some nodes to checkpoint if not checkpoints: raise Exception("no checkpoints nodes found or given as input! ") # disconnect dependencies between checkpointed tensors checkpoints_disconnected = {} for x in checkpoints: if x.op and x.op.name is not None: grad_node = tf.stop_gradient(x, name=x.op.name + "_sg") else: grad_node = tf.stop_gradient(x) grad_node.op._set_device(x.op.node_def.device) checkpoints_disconnected[x] = grad_node # partial derivatives to the checkpointed tensors and xs ops_to_copy = fast_backward_ops(seed_ops=[y.op for y in ys], stop_at_ts=checkpoints, within_ops=fwd_ops) debug_print("Found %s ops to copy within fwd_ops %s, seed %s, stop_at %s", len(ops_to_copy), fwd_ops, [r.op for r in ys], checkpoints) debug_print("ops_to_copy = %s", ops_to_copy) debug_print("Processing list %s", ys) _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {}) for origin_op, op in info._transformed_ops.items(): op._set_device(origin_op.node_def.device) copied_ops = info._transformed_ops.values() debug_print("Copied %s to %s", ops_to_copy, copied_ops) ge.reroute_ts(checkpoints_disconnected.values(), checkpoints_disconnected.keys(), can_modify=copied_ops) debug_print("Rewired %s in place of %s restricted to %s", checkpoints_disconnected.values(), checkpoints_disconnected.keys(), copied_ops) # get gradients with respect to current boundary + original x's copied_ys = [info._transformed_ops[y.op]._outputs[0] for y in ys] boundary = list(checkpoints_disconnected.values()) dv = tf_gradients(ys=copied_ys, xs=boundary + xs, grad_ys=grad_ys, **kwargs) debug_print("Got gradients %s", dv) debug_print("for %s", copied_ys) debug_print("with respect to %s", boundary + xs) inputs_to_do_before = [y.op for y in ys] if grad_ys is not None: inputs_to_do_before += grad_ys wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None] my_add_control_inputs(wait_to_do_ops, inputs_to_do_before) # partial derivatives to the checkpointed nodes # dictionary of "node: backprop" for nodes in the boundary d_checkpoints = dict( zip(checkpoints_disconnected.keys(), dv[:len(checkpoints_disconnected)])) # partial derivatives to xs (usually the params of the neural net) d_xs = dv[len(checkpoints_disconnected):] # incorporate derivatives flowing through the checkpointed nodes logging.info("Sorting nodes topologically") checkpoints_sorted_lists = tf_toposort(checkpoints, within_ops=fwd_ops) logging.info("Rebuilding graph with %d checkpoints", len(checkpoints_sorted_lists)) for index, ts in enumerate(checkpoints_sorted_lists[::-1]): if index % 50 == 0: logging.info("Processed %d nodes", index) debug_print("Processing list %s", ts) checkpoints_other = [r for r in checkpoints if r not in ts] checkpoints_disconnected_other = [ checkpoints_disconnected[r] for r in checkpoints_other ] # copy part of the graph below current checkpoint node, stopping at # other checkpoints nodes ops_to_copy = fast_backward_ops(within_ops=fwd_ops, seed_ops=[r.op for r in ts], stop_at_ts=checkpoints_other) debug_print("Found %s ops to copy within %s, seed %s, stop_at %s", len(ops_to_copy), fwd_ops, [r.op for r in ts], checkpoints_other) debug_print("ops_to_copy = %s", ops_to_copy) if not ops_to_copy: # we're done! break _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {}) for origin_op, op in info._transformed_ops.items(): op._set_device(origin_op.node_def.device) copied_ops = info._transformed_ops.values() debug_print("Copied %s to %s", ops_to_copy, copied_ops) ge.reroute_ts(checkpoints_disconnected_other, checkpoints_other, can_modify=copied_ops) debug_print("Rewired %s in place of %s restricted to %s", checkpoints_disconnected_other, checkpoints_other, copied_ops) # gradient flowing through the checkpointed node boundary = [info._transformed_ops[r.op]._outputs[0] for r in ts] substitute_backprops = [d_checkpoints[r] for r in ts] dv = tf_gradients(boundary, checkpoints_disconnected_other + xs, grad_ys=substitute_backprops, **kwargs) debug_print("Got gradients %s", dv) debug_print("for %s", boundary) debug_print("with respect to %s", checkpoints_disconnected_other + xs) debug_print("with boundary backprop substitutions %s", substitute_backprops) inputs_to_do_before = [d_checkpoints[r].op for r in ts] wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None] my_add_control_inputs(wait_to_do_ops, inputs_to_do_before) # partial derivatives to the checkpointed nodes for r, dr in zip(checkpoints_other, dv[:len(checkpoints_other)]): if dr is not None: if d_checkpoints[r] is None: d_checkpoints[r] = dr else: d_checkpoints[r] += dr def _unsparsify(x): if not isinstance(x, tf.IndexedSlices): return x if x.dense_shape is None: raise ValueError( "memory_saving_gradients has sparse gradients of unknown shape." ) indices = x.indices while indices.shape.ndims < x.values.shape.ndims: indices = tf.expand_dims(indices, -1) return tf.scatter_nd(indices, x.values, x.dense_shape) # partial derivatives to xs (usually the params of the neural net) d_xs_new = dv[len(checkpoints_other):] for j in range(len(xs)): if d_xs_new[j] is not None: if d_xs[j] is None: d_xs[j] = _unsparsify(d_xs_new[j]) else: d_xs[j] += _unsparsify(d_xs_new[j]) return d_xs
def vars(self): return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name_scope)
def train(self, input_fn, checkpoint_path=None, save_checkpoint_steps=None): if self._cluster_spec is not None: device_fn = tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % self._worker_rank, merge_devices=True, cluster=self._cluster_spec) cluster_def = self._cluster_spec.as_cluster_def() local_address = self._cluster_spec.job_tasks('worker')[ self._worker_rank] server = tf.train.Server(tf.train.ClusterSpec( {'local': { 0: local_address }}), job_name='local', task_index=0) target = 'grpc://' + local_address else: device_fn = None cluster_def = None target = None config = tf.ConfigProto(cluster_def=cluster_def) config.inter_op_parallelism_threads = 4 config.intra_op_parallelism_threads = 4 config.experimental.share_session_state_in_clusterspec_propagation \ = True tf.config.set_soft_device_placement(False) with tf.Graph().as_default() as g: with tf.device(device_fn): features, labels = self._get_features_and_labels_from_input_fn( input_fn, ModeKeys.TRAIN) spec, _ = self._get_model_spec(features, labels, ModeKeys.TRAIN) # Explicitly add a Saver if not tf.get_collection(tf.GraphKeys.SAVERS): saver = tf.train.Saver( sharded=True, defer_build=True, save_relative_paths=True) # Must set for portability tf.add_to_collection(tf.GraphKeys.SAVERS, saver) self._bridge.connect() with tf.train.MonitoredTrainingSession( master=target, config=config, is_chief=(self._worker_rank == 0), checkpoint_dir=checkpoint_path, save_checkpoint_steps=save_checkpoint_steps, hooks=spec.training_hooks) as sess: iter_id = 0 while not sess.should_stop(): self._bridge.start(iter_id) logging.debug('after bridge start.') sess.run(spec.train_op, feed_dict={}) logging.debug('after session run.') self._bridge.commit() logging.debug('after bridge commit.') iter_id += 1 if self._cluster_spec is not None: self._cheif_barriar(is_chief=(self._worker_rank == 0)) self._bridge.terminate() return self
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(GPU_INDEX)): pointclouds_pl, labels_pl = MODEL.placeholder_inputs( BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to helpfully increment the 'batch' parameter # for you every time it trains. batch = tf.get_variable('batch', [], initializer=tf.constant_initializer(0), trainable=False) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and loss pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay) MODEL.get_loss(pred, labels_pl, end_points) losses = tf.get_collection('losses') total_loss = tf.add_n(losses, name='total_loss') tf.summary.scalar('total_loss', total_loss) for l in losses + [total_loss]: tf.summary.scalar(l.op.name, l) correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE) tf.summary.scalar('accuracy', accuracy) print "--- Get training operator" # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(total_loss, global_step=batch) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) # Init variables init = tf.global_variables_initializer() sess.run(init) ops = { 'pointclouds_pl': pointclouds_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': total_loss, 'train_op': train_op, 'merged': merged, 'step': batch, 'end_points': end_points } best_acc = -1 for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) log_string("Model saved in file: %s" % save_path)
def get_weights(): return tf.get_collection(_WEIGHT_COLLECTION)
def get_scalar_summaries(): """Returns the list of (name, Tensor) summaries recorded by scalar().""" return tf.get_collection('edsummaries')
def get_masks(): return tf.get_collection(_MASK_COLLECTION)
def get_thresholds(): return tf.get_collection(_THRESHOLD_COLLECTION)
def get_masked_weights(): return tf.get_collection(_MASKED_WEIGHT_COLLECTION)
def __init__(self, args): # inputs/mask.shape=(128, None) 'None' in shape means any number seq_length.shape=(128,) inputs = tf.placeholder(shape=(args.batch_size, None), dtype=tf.int32, name='inputs') mask = tf.placeholder(shape=(args.batch_size, None), dtype=tf.float32, name='inputs_mask') seq_length = tf.placeholder(shape=args.batch_size, dtype=tf.float32, name='seq_length') self.input_form = [inputs, mask, seq_length] # all shape=(128, None) encoder_inputs = inputs decoder_inputs = tf.concat( [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs], axis=1) decoder_targets = tf.concat( [inputs, tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)], axis=1) decoder_mask = tf.concat( [mask, tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)], axis=1) # map size x_size = out_size = args.map_size[0] * args.map_size[1] # embeddings.shape=(16900, 32) tf.random_uniform(shape, minval=0, maxval=None, ...) # x_latent_size is the input embedding size = 32 embeddings = tf.Variable(tf.random_uniform( [x_size, args.x_latent_size], -1.0, 1.0), dtype=tf.float32) # tf.nn.embedding_lookup(params, ids, ...) Looks up ids in a list of embedding tensors. # shape=(128, None, 32) encoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, decoder_inputs) with tf.variable_scope("encoder"): # create a GRUCell output_size = state_size = 256 encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) # tf.compat.v1.nn.dynamic_rnn(cell, inputs, ...) = keras.layers.RNN(cell) # returns (outputs, state) # 'outputs' is a tensor of shape [batch_size, max_time, cell_output_size] # 'state' is a tensor of shape [batch_size, cell_state_size] = (128, 256) _, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, sequence_length=seq_length, dtype=tf.float32, ) # tf.compat.v1.get_variable(name, shape=None, dtype=None, # initializer=None, ...) mu_w = tf.get_variable("mu_w", [args.rnn_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) mu_b = tf.get_variable("mu_b", [args.rnn_size], tf.float32, tf.constant_initializer(0.0)) sigma_w = tf.get_variable("sigma_w", [args.rnn_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) sigma_b = tf.get_variable("sigma_b", [args.rnn_size], tf.float32, tf.constant_initializer(0.0)) # all shape=(128, 256) mu = tf.matmul(encoder_final_state, mu_w) + mu_b log_sigma_sq = tf.matmul(encoder_final_state, sigma_w) + sigma_b eps = tf.random_normal(shape=tf.shape(log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) if args.eval: z = tf.zeros(shape=(args.batch_size, args.rnn_size), dtype=tf.float32) else: # Re-parameterization trick z = mu + tf.sqrt(tf.exp(log_sigma_sq)) * eps self.batch_post_embedded = z with tf.variable_scope("decoder"): decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) decoder_init_state = z decoder_outputs, _ = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=decoder_init_state, sequence_length=seq_length, dtype=tf.float32, ) # out_size = 16900 out_w = tf.get_variable("out_w", [out_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) out_b = tf.get_variable("out_b", [out_size], tf.float32, tf.constant_initializer(0.0)) # tf.reduce_mean(input_tensor, axis=None, ...) Reduces input_tensor to mean value along the given axis. # tf.reshape(tensor, shape, name=None) Reshape the tensor into given shape, -1 indicates calculated value. # tf.nn.sampled_softmax_loss() A fast way to train softmax classifier, usually an underestimate (for training only). batch_rec_loss = tf.reduce_mean( decoder_mask * tf.reshape( tf.nn.sampled_softmax_loss( weights=out_w, biases=out_b, labels=tf.reshape(decoder_targets, [-1, 1]), inputs=tf.reshape(decoder_outputs, [-1, args.rnn_size]), num_sampled=args.neg_size, num_classes=out_size), [args.batch_size, -1]), axis=-1 # reduce to mean along the last dimension ) batch_latent_loss = -0.5 * tf.reduce_sum( 1 + log_sigma_sq - tf.square(mu) - tf.exp(log_sigma_sq), axis=1) self.rec_loss = rec_loss = tf.reduce_mean(batch_rec_loss) self.latent_loss = latent_loss = tf.reduce_mean(batch_latent_loss) self.loss = loss = tf.reduce_mean([rec_loss, latent_loss]) self.train_op = tf.train.AdamOptimizer( args.learning_rate).minimize(loss) target_out_w = tf.nn.embedding_lookup(out_w, decoder_targets) target_out_b = tf.nn.embedding_lookup(out_b, decoder_targets) self.batch_likelihood = tf.reduce_mean(decoder_mask * tf.log_sigmoid( tf.reduce_sum(decoder_outputs * target_out_w, -1) + target_out_b), axis=-1, name="batch_likelihood") # save/restore variables to/from checkpoints, max_to_keep = max #recent checkpoint files to keep. saver = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) self.save, self.restore = saver.save, saver.restore
def get_trainable_variables(scope): return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
def predict_and_report(self, sequences, labels, W_embed, report=True, file=False): """ PURPOSE: Prediction using best model on provided examples and generating report if indicated and labels are provided. ARGS: sequences (list(list)) order of product numbers labels (list) order class labels W_embed (list(list)) trained word embedding Matrix report (bool) indicator for whether a report is generated """ from sklearn.metrics import confusion_matrix, classification_report import json with tf.Session(graph=self.graph) as sess: _, saver_ = tf.get_collection('Init_Save_ops') saver_.restore(sess, self.final_ckpt) logits_ = self.graph.get_tensor_by_name( 'OutputLyr/Logits_lyr/BiasAdd:0') sequences_, W_embed_, Y_, training_ = tf.get_collection( "Input_var") self.logits_prediction = logits_.eval(feed_dict={ W_embed_: W_embed, sequences_: sequences, training_: False }) self.class_prediction = np.argmax(self.logits_prediction, axis=1) confusion_mat = confusion_matrix(labels, self.class_prediction) true_neg = confusion_mat[0, 0] / (confusion_mat[0, 0] + confusion_mat[1, 0]) false_neg = confusion_mat[0, 1] / (confusion_mat[0, 1] + confusion_mat[1, 1]) ratio = true_neg / false_neg if report: print('-----------{}-----------'.format('Confusion Matrix')) print(confusion_mat, '\n') print( '-----------{}-----------'.format('Classification Report')) print(classification_report(labels, self.class_prediction)) print('True Negative:', true_neg) print('False Negative:', false_neg) print('Upper Constraint:', ratio) if file: summary_dict = self.__dict__.copy() class_report_dict = classification_report( labels, self.class_prediction, output_dict=True) summary_dict.update(class_report_dict) summary_dict.update({ 'true_negative': true_neg, 'false_negative': false_neg, 'upper_constraint': ratio }) summary_dict.pop('graph', None) summary_dict.pop('logits_prediction', None) summary_dict.pop('class_prediction', None) with open(self.summary_file, 'w') as file: json.dump(summary_dict, file, indent=2) with open(self.most_recent_summary_file, 'w') as file: json.dump(summary_dict, file, indent=2)
def inception_model_fn(features, labels, mode, params): """Inception v3 model using Estimator API.""" num_classes = FLAGS.num_classes is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_eval = (mode == tf.estimator.ModeKeys.EVAL) if isinstance(features, dict): features = features['feature'] features = tensor_transform_fn(features, params['input_perm']) # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): if FLAGS.precision == 'bfloat16': with contrib_tpu.bfloat16_scope(): logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) logits = tf.cast(logits, tf.float32) elif FLAGS.precision == 'float32': logits, end_points = inception.inception_v3( features, num_classes, is_training=is_training) return logits, end_points if FLAGS.clear_update_collections: # updates_collections must be set to None in order to use fused batchnorm with arg_scope( inception.inception_v3_arg_scope( weight_decay=0.0, batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON, updates_collections=None)): logits, end_points = build_network() else: with arg_scope( inception.inception_v3_arg_scope( batch_norm_decay=BATCH_NORM_DECAY, batch_norm_epsilon=BATCH_NORM_EPSILON)): logits, end_points = build_network() predictions = { 'classes': tf.argmax(input=logits, axis=1), 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'classify': tf.estimator.export.PredictOutput(predictions) }) if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and ( not FLAGS.use_tpu): with tf.control_dependencies([ tf.Print(predictions['classes'], [predictions['classes']], summarize=FLAGS.eval_batch_size, message='prediction: ') ]): labels = tf.Print(labels, [labels], summarize=FLAGS.eval_batch_size, message='label: ') one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32) if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=tf.cast(end_points['AuxLogits'], tf.float32), weights=0.4, label_smoothing=0.1, scope='aux_loss') tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits, weights=1.0, label_smoothing=0.1) losses = tf.add_n(tf.losses.get_losses()) l2_loss = [] for v in tf.trainable_variables(): if 'BatchNorm' not in v.name and 'weights' in v.name: l2_loss.append(tf.nn.l2_loss(v)) loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss) initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256 if FLAGS.use_learning_rate_warmup: # Adjust initial learning rate to match final warmup rate warmup_decay = FLAGS.learning_rate_decay**( (FLAGS.warmup_epochs + FLAGS.cold_epochs) / FLAGS.learning_rate_decay_epochs) adj_initial_learning_rate = initial_learning_rate * warmup_decay final_learning_rate = 0.0001 * initial_learning_rate host_call = None train_op = None if is_training: batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size global_step = tf.train.get_or_create_global_step() current_epoch = tf.cast( (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32) learning_rate = tf.train.exponential_decay( learning_rate=initial_learning_rate, global_step=global_step, decay_steps=int(FLAGS.learning_rate_decay_epochs * batches_per_epoch), decay_rate=FLAGS.learning_rate_decay, staircase=True) if FLAGS.use_learning_rate_warmup: wlr = 0.1 * adj_initial_learning_rate wlr_height = tf.cast( 0.9 * adj_initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1), tf.float32) epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32) exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs + FLAGS.learning_rate_decay_epochs) lin_inc_lr = tf.add( wlr, tf.multiply( tf.cast(tf.subtract(current_epoch, epoch_offset), tf.float32), wlr_height)) learning_rate = tf.where( tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(tf.greater_equal(current_epoch, exp_decay_start), learning_rate, lin_inc_lr)), wlr) # Set a minimum boundary for the learning rate. learning_rate = tf.maximum(learning_rate, final_learning_rate, name='learning_rate') if FLAGS.optimizer == 'sgd': tf.logging.info('Using SGD optimizer') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) elif FLAGS.optimizer == 'momentum': tf.logging.info('Using Momentum optimizer') optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) elif FLAGS.optimizer == 'RMS': tf.logging.info('Using RMS optimizer') optimizer = tf.train.RMSPropOptimizer(learning_rate, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) else: tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer) if FLAGS.use_tpu: optimizer = contrib_tpu.CrossShardOptimizer(optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) if FLAGS.moving_average: ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) with tf.control_dependencies([train_op ]), tf.name_scope('moving_average'): train_op = ema.apply(variables_to_average) # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) if not FLAGS.skip_host_call: def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide them as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops() host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) eval_metrics = None if is_eval: def metric_fn(labels, logits): """Evaluation metric function. Evaluates accuracy. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `eval_metrics`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `eval_metrics`. Args: labels: `Tensor` with shape `[batch, ]`. logits: `Tensor` with shape `[batch, num_classes]`. Returns: A dict of the metrics to return from evaluation. """ predictions = tf.argmax(logits, axis=1) top_1_accuracy = tf.metrics.accuracy(labels, predictions) in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32) top_5_accuracy = tf.metrics.mean(in_top_5) return { 'accuracy': top_1_accuracy, 'accuracy@5': top_5_accuracy, } eval_metrics = (metric_fn, [labels, logits]) return contrib_tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def apply_customized_matrix_compression( matrix_compression_obj, # pylint:disable=invalid-name weight_params_fn, weight_init_obj, layer_obj, weight_name, weight_shape, weight_dtype, scope_name='pruning_interface', spec=None): """Apply pruning or compression to a lingvo layer. This provides a unified interface to perform pruning or compression for a lingvo layer. Args: matrix_compression_obj: A Pruning or compression_lib.lingvo_compression_op.ApplyCompression object; weight_params_fn: functional handle to create model parameters; weight_init_obj: a weight initialization object; layer_obj: a layer object in the lingvo package, weight matrix of this layer is pruned or compressed; weight_name: name of the tensor that is compressed, str; weight_shape: shape of the weight matrix; weight_dtype: data type of the weight matrix; scope_name: TensorFlow scope for creating relavant variables. spec: spec to use for the compression op. Returns: None. """ if isinstance(matrix_compression_obj, pruning.Pruning): prune_option = matrix_compression_obj.matrix_compression_spec.prune_option with tf.variable_scope(scope_name): # Create mask and threshold variable and add them to pruning collection. mask_pc = weight_params_fn(weight_shape, weight_init_obj.Constant(1.0), weight_dtype) threshold_pc = weight_params_fn([], weight_init_obj.Constant(0.0), tf.float32) layer_obj.CreateVariable('mask', mask_pc, trainable=False) layer_obj.CreateVariable('threshold', threshold_pc, trainable=False) if layer_obj.vars.mask not in tf.get_collection( pruning.MASK_COLLECTION): tf.add_to_collection(pruning.WEIGHT_COLLECTION, getattr(layer_obj.vars, weight_name)) tf.add_to_collection(pruning.MASK_COLLECTION, layer_obj.vars.mask) tf.add_to_collection(pruning.THRESHOLD_COLLECTION, layer_obj.vars.threshold) if prune_option in [ 'first_order_gradient', 'second_order_gradient' ]: grad_pc = weight_params_fn(weight_shape, weight_init_obj.Constant(0.0), weight_dtype) layer_obj.CreateVariable('gradient', grad_pc, trainable=False) layer_obj.CreateVariable('old_weight', grad_pc, trainable=False) layer_obj.CreateVariable('old_old_weight', grad_pc, trainable=False) tf.add_to_collection(pruning.WEIGHT_GRADIENT_COLLECTION, layer_obj.vars.gradient) tf.add_to_collection(pruning.OLD_WEIGHT_COLLECTION, layer_obj.vars.old_weight) tf.add_to_collection(pruning.OLD_OLD_WEIGHT_COLLECTION, layer_obj.vars.old_old_weight) else: _ = matrix_compression_obj.customized_apply_compression( getattr(layer_obj.vars, weight_name), layer_obj, weight_params_fn, weight_init_obj, scope=scope_name, spec=spec) hparams = matrix_compression_obj.get_spec() if hparams.use_collection: tf.add_to_collection(UPDATE_OP_COLLECTION, matrix_compression_obj.all_update_op())
def resnet_model_fn(features, labels, mode, params): """The model_fn for ResNet to be used with TPUEstimator. Args: features: `Tensor` of batched images. If transpose_input is enabled, it is transposed to device layout and reshaped to 1D tensor. labels: `Tensor` of labels for the data samples mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}` params: `dict` of parameters passed to the model from the TPUEstimator, `params['batch_size']` is always provided and should be used as the effective batch size. Returns: A `TPUEstimatorSpec` for the model """ if isinstance(features, dict): features = features['feature'] # In most cases, the default data format NCHW instead of NHWC should be # used for a significant performance boost on GPU/TPU. NHWC should be used # only if the network needs to be run on CPU since the pooling operations # are only supported on NHWC. if params['data_format'] == 'channels_first': assert not params['transpose_input'] # channels_first only for GPU features = tf.transpose(features, [0, 3, 1, 2]) if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT: image_size = params['image_size'] features = tf.reshape(features, [image_size, image_size, 1, -1]) features = tf.transpose(features, [3, 0, 1, 2]) # HWCN to NHWC # DropBlock keep_prob for the 4 block groups of ResNet architecture. # None means applying no DropBlock at the corresponding block group. dropblock_keep_probs = [None] * 4 if params['dropblock_groups']: # Scheduled keep_prob for DropBlock. train_steps = tf.cast(params['train_steps'], tf.float32) current_step = tf.cast(tf.train.get_global_step(), tf.float32) current_ratio = current_step / train_steps dropblock_keep_prob = (1 - current_ratio * ( 1 - params['dropblock_keep_prob'])) # Computes DropBlock keep_prob for different block groups of ResNet. dropblock_groups = [int(x) for x in params['dropblock_groups'].split(',')] for block_group in dropblock_groups: if block_group < 1 or block_group > 4: raise ValueError( 'dropblock_groups should be a comma separated list of integers ' 'between 1 and 4 (dropblcok_groups: {}).' .format(params['dropblock_groups'])) dropblock_keep_probs[block_group - 1] = 1 - ( (1 - dropblock_keep_prob) / 4.0**(4 - block_group)) # This nested function allows us to avoid duplicating the logic which # builds the network, for different values of --precision. def build_network(): network = resnet_model.resnet_v1( resnet_depth=params['resnet_depth'], num_classes=params['num_label_classes'], dropblock_size=params['dropblock_size'], dropblock_keep_probs=dropblock_keep_probs, data_format=params['data_format']) return network( inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) # Compute the summary statistic if params['precision'] == 'bfloat16': with tf.tpu.bfloat16_scope(): sum_stat = build_network() sum_stat = tf.cast(sum_stat, tf.float32) elif params['precision'] == 'float32': sum_stat = build_network() if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'summary': sum_stat, } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs={ 'inference': tf.estimator.export.PredictOutput(predictions) }) n = params['num_label_classes'] # If necessary, in the model_fn, use params['batch_size'] instead the batch # size flags (--train_batch_size or --eval_batch_size). batch_size = params['batch_size'] # pylint: disable=unused-variable # Add a little bit of scatter to the labels to smooth out the distribution if (params['label_smoothing'] > 0.) and (mode == tf.estimator.ModeKeys.TRAIN): labels += params['label_smoothing']*tf.random_normal(shape=[batch_size, n]) # Now build a conditional density estimator from this density # Defines the chain of bijective transforms if params['training_loss'] == 'VMIM': net = sum_stat # Below is the chain for a MAF chain = [ tfp.bijectors.MaskedAutoregressiveFlow( shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128], conditional_tensor=net, shift_only=False)), tfb.Permute(np.arange(n)[::-1]), tfp.bijectors.MaskedAutoregressiveFlow( shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128], conditional_tensor=net, shift_only=False)), tfb.Permute(np.arange(n)[::-1]), tfp.bijectors.MaskedAutoregressiveFlow( shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128], conditional_tensor=net, shift_only=True)), tfb.Permute(np.arange(n)[::-1]), tfp.bijectors.MaskedAutoregressiveFlow( shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128], conditional_tensor=net, shift_only=True)), ] bij = tfb.Chain(chain) prior = tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_identity_multiplier=1.0) distribution = tfd.TransformedDistribution(prior, bijector=bij) # Compute loss function with some L2 regularization loss = - tf.reduce_mean(distribution.log_prob(labels),axis=0) elif params['training_loss'] == 'MAE': loss = tf.reduce_mean(tf.keras.losses.mae(labels, sum_stat),axis=0) elif params['training_loss'] == 'MSE': loss = tf.reduce_mean(tf.keras.losses.mse(labels, sum_stat),axis=0) else: raise NotImplementedError # Add weight decay to the loss for non-batch-normalization variables. if params['enable_lars']: loss = loss else: loss = loss + params['weight_decay'] * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name ]) host_call = None if mode == tf.estimator.ModeKeys.TRAIN: # Compute the current epoch and associated learning rate from global_step. global_step = tf.train.get_global_step() steps_per_epoch = params['num_train_images'] / params['train_batch_size'] current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch) # LARS is a large batch optimizer. LARS enables higher accuracy at batch 16K # and larger batch sizes. if params['enable_lars']: learning_rate = 0.0 optimizer = lars_util.init_lars_optimizer(current_epoch, params) else: learning_rate = learning_rate_schedule(params, current_epoch) optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=params['momentum'], use_nesterov=True) if params['use_tpu']: # When using TPU, wrap the optimizer with CrossShardOptimizer which # handles synchronization details between different TPU cores. To the # user, this should look like regular synchronous training. optimizer = tf.tpu.CrossShardOptimizer(optimizer) # Batch normalization requires UPDATE_OPS to be added as a dependency to # the train operation. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step) if not params['skip_host_call']: def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] # Host call fns are executed params['iterations_per_loop'] times after # one TPU loop is finished, setting max_queue value to the same as # number of iterations will make the summary writer only flush the data # to storage once per loop. with tf2.summary.create_file_writer( FLAGS.model_dir, max_queue=params['iterations_per_loop']).as_default(): with tf2.summary.record_if(True): tf2.summary.scalar('loss', loss[0], step=gs) tf2.summary.scalar('learning_rate', lr[0], step=gs) tf2.summary.scalar('current_epoch', ce[0], step=gs) return tf.summary.all_v2_summary_ops() # To log the loss, current learning rate, and epoch for Tensorboard, the # summary op needs to be run on the host CPU via host_call. host_call # expects [batch_size, ...] Tensors, thus reshape to introduce a batch # dimension. These Tensors are implicitly concatenated to # [params['batch_size']]. gs_t = tf.reshape(global_step, [1]) loss_t = tf.reshape(loss, [1]) lr_t = tf.reshape(learning_rate, [1]) ce_t = tf.reshape(current_epoch, [1]) host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t]) else: train_op = None eval_metrics = None return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, host_call=host_call, eval_metrics=eval_metrics)
def __init__(self): self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.max_bbox_per_scale = 150 self.train_logdir = "./data/log/train" self.trainset = Dataset('train') self.testset = Dataset('test') self.steps_per_period = len(self.trainset) self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) with tf.name_scope('define_input'): self.input_data = tf.placeholder(dtype=tf.float32, name='input_data') self.label_sbbox = tf.placeholder(dtype=tf.float32, name='label_sbbox') self.label_mbbox = tf.placeholder(dtype=tf.float32, name='label_mbbox') self.label_lbbox = tf.placeholder(dtype=tf.float32, name='label_lbbox') self.true_sbboxes = tf.placeholder(dtype=tf.float32, name='sbboxes') self.true_mbboxes = tf.placeholder(dtype=tf.float32, name='mbboxes') self.true_lbboxes = tf.placeholder(dtype=tf.float32, name='lbboxes') self.trainable = tf.placeholder(dtype=tf.bool, name='training') with tf.name_scope("define_loss"): self.model = YOLOV3(self.input_data, self.trainable) self.net_var = tf.global_variables() self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss( self.label_sbbox, self.label_mbbox, self.label_lbbox, self.true_sbboxes, self.true_mbboxes, self.true_lbboxes) self.loss = self.giou_loss + self.conf_loss + self.prob_loss with tf.name_scope('learn_rate'): self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step') warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period, dtype=tf.float64, name='warmup_steps') train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs) * self.steps_per_period, dtype=tf.float64, name='train_steps') self.learn_rate = tf.cond( pred=self.global_step < warmup_steps, true_fn=lambda: self.global_step / warmup_steps * self. learn_rate_init, false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) * (1 + tf.cos( (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))) global_step_update = tf.assign_add(self.global_step, 1.0) with tf.name_scope("define_weight_decay"): moving_ave = tf.train.ExponentialMovingAverage( self.moving_ave_decay).apply(tf.trainable_variables()) with tf.name_scope("define_first_stage_train"): self.first_stage_trainable_var_list = [] for var in tf.trainable_variables(): var_name = var.op.name var_name_mess = str(var_name).split('/') if var_name_mess[0] in [ 'conv_sbbox', 'conv_mbbox', 'conv_lbbox' ]: self.first_stage_trainable_var_list.append(var) first_stage_optimizer = tf.train.AdamOptimizer( self.learn_rate).minimize( self.loss, var_list=self.first_stage_trainable_var_list) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies( [first_stage_optimizer, global_step_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_frozen_variables = tf.no_op() with tf.name_scope("define_second_stage_train"): second_stage_trainable_var_list = tf.trainable_variables() second_stage_optimizer = tf.train.AdamOptimizer( self.learn_rate).minimize( self.loss, var_list=second_stage_trainable_var_list) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies( [second_stage_optimizer, global_step_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_all_variables = tf.no_op() with tf.name_scope('loader_and_saver'): self.loader = tf.train.Saver(self.net_var) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) with tf.name_scope('summary'): tf.summary.scalar("learn_rate", self.learn_rate) tf.summary.scalar("giou_loss", self.giou_loss) tf.summary.scalar("conf_loss", self.conf_loss) tf.summary.scalar("prob_loss", self.prob_loss) tf.summary.scalar("total_loss", self.loss) logdir = "./data/log/" if os.path.exists(logdir): shutil.rmtree(logdir) os.mkdir(logdir) self.write_op = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(logdir, graph=self.sess.graph)
def model_fn(features, labels, mode, params=None): #//@follow-up Estmator Evaluation (7) """Build model and optimizer.""" is_training = mode == tf.estimator.ModeKeys.TRAIN # Check training mode. if FLAGS.train_mode == 'pretrain': num_transforms = 2 if FLAGS.fine_tune_after_block > -1: raise ValueError('Does not support layer freezing during pretraining,' 'should set fine_tune_after_block<=-1 for safety.') elif FLAGS.train_mode == 'finetune': #//@follow-up Estmator Evaluation (8) num_transforms = 1 #boostx add predict elif FLAGS.train_mode == 'predict': #//@audit predict predictions,endpoints = model(features["image"], tf.estimator.ModeKeys.TRAIN) _,top_5 = tf.nn.top_k(predictions,k=5) predictions = { 'top_1': tf.argmax(predictions, -1), 'top_5': top_5, 'probabilities': tf.nn.softmax(predictions), 'logits': predictions, } return tf.estimator.EstimatorSpec(mode, predictions=predictions) #boostx:end else: raise ValueError('Unknown train_mode {}'.format(FLAGS.train_mode)) # Split channels, and optionally apply extra batched augmentation. #//@follow-up Estmator Evaluation (9) features_list = tf.split( features, num_or_size_splits=num_transforms, axis=-1) if FLAGS.use_blur and is_training and FLAGS.train_mode == 'pretrain': features_list = data_util.batch_random_blur( features_list, FLAGS.image_size, FLAGS.image_size) features = tf.concat(features_list, 0) # (num_transforms * bsz, h, w, c) # Base network forward pass. with tf.variable_scope('base_model'): if FLAGS.train_mode == 'finetune' and FLAGS.fine_tune_after_block >= 4: # Finetune just supervised (linear) head will not update BN stats. model_train_mode = False else: # Pretrain or finetuen anything else will update BN stats. model_train_mode = is_training hiddens = model(features, is_training=model_train_mode) # Add head and loss. if FLAGS.train_mode == 'pretrain': tpu_context = params['context'] if 'context' in params else None hiddens_proj = model_util.projection_head(hiddens, is_training) contrast_loss, logits_con, labels_con = obj_lib.add_contrastive_loss( hiddens_proj, hidden_norm=FLAGS.hidden_norm, temperature=FLAGS.temperature, tpu_context=tpu_context if is_training else None) logits_sup = tf.zeros([params['batch_size'], num_classes]) else: contrast_loss = tf.zeros([]) logits_con = tf.zeros([params['batch_size'], 10]) labels_con = tf.zeros([params['batch_size'], 10]) logits_sup = model_util.supervised_head( hiddens, num_classes, is_training) obj_lib.add_supervised_loss( labels=labels['labels'], logits=logits_sup, weights=labels['mask']) # Add weight decay to loss, for non-LARS optimizers. model_util.add_weight_decay(adjust_per_optimizer=True) loss = tf.losses.get_total_loss() if FLAGS.train_mode == 'pretrain': variables_to_train = tf.trainable_variables() else: collection_prefix = 'trainable_variables_inblock_' variables_to_train = [] for j in range(FLAGS.fine_tune_after_block + 1, 6): variables_to_train += tf.get_collection(collection_prefix + str(j)) assert variables_to_train, 'variables_to_train shouldn\'t be empty!' tf.logging.info('===============Variables to train (begin)===============') tf.logging.info(variables_to_train) tf.logging.info('================Variables to train (end)================') learning_rate = model_util.learning_rate_schedule( FLAGS.learning_rate, num_train_examples) if is_training: if FLAGS.train_summary_steps > 0: # Compute stats for the summary. prob_con = tf.nn.softmax(logits_con) entropy_con = - tf.reduce_mean( tf.reduce_sum(prob_con * tf.math.log(prob_con + 1e-8), -1)) summary_writer = tf2.summary.create_file_writer(FLAGS.model_dir) # TODO(iamtingchen): remove this control_dependencies in the future. with tf.control_dependencies([summary_writer.init()]): with summary_writer.as_default(): should_record = tf.math.equal( tf.math.floormod(tf.train.get_global_step(), FLAGS.train_summary_steps), 0) with tf2.summary.record_if(should_record): contrast_acc = tf.equal( tf.argmax(labels_con, 1), tf.argmax(logits_con, axis=1)) contrast_acc = tf.reduce_mean(tf.cast(contrast_acc, tf.float32)) label_acc = tf.equal( tf.argmax(labels['labels'], 1), tf.argmax(logits_sup, axis=1)) label_acc = tf.reduce_mean(tf.cast(label_acc, tf.float32)) tf2.summary.scalar( 'train_contrast_loss', contrast_loss, step=tf.train.get_global_step()) tf2.summary.scalar( 'train_contrast_acc', contrast_acc, step=tf.train.get_global_step()) tf2.summary.scalar( 'train_label_accuracy', label_acc, step=tf.train.get_global_step()) tf2.summary.scalar( 'contrast_entropy', entropy_con, step=tf.train.get_global_step()) tf2.summary.scalar( 'learning_rate', learning_rate, step=tf.train.get_global_step()) tf2.summary.scalar( 'input_mean', tf.reduce_mean(features), step=tf.train.get_global_step()) tf2.summary.scalar( 'input_max', tf.reduce_max(features), step=tf.train.get_global_step()) tf2.summary.scalar( 'input_min', tf.reduce_min(features), step=tf.train.get_global_step()) tf2.summary.scalar( 'num_labels', tf.reduce_mean(tf.reduce_sum(labels['labels'], -1)), step=tf.train.get_global_step()) if FLAGS.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer( learning_rate, FLAGS.momentum, use_nesterov=True) elif FLAGS.optimizer == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate) elif FLAGS.optimizer == 'lars': optimizer = LARSOptimizer( learning_rate, momentum=FLAGS.momentum, weight_decay=FLAGS.weight_decay, exclude_from_weight_decay=['batch_normalization', 'bias']) else: raise ValueError('Unknown optimizer {}'.format(FLAGS.optimizer)) if FLAGS.use_tpu: optimizer = tf.tpu.CrossShardOptimizer(optimizer) control_deps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if FLAGS.train_summary_steps > 0: control_deps.extend(tf.summary.all_v2_summary_ops()) with tf.control_dependencies(control_deps): train_op = optimizer.minimize( loss, global_step=tf.train.get_or_create_global_step(), var_list=variables_to_train) if FLAGS.checkpoint: def scaffold_fn(): """Scaffold function to restore non-logits vars from checkpoint.""" tf.train.init_from_checkpoint( FLAGS.checkpoint, {v.op.name: v.op.name for v in tf.global_variables(FLAGS.variable_schema)}) if FLAGS.zero_init_logits_layer: # Init op that initializes output layer parameters to zeros. output_layer_parameters = [ var for var in tf.trainable_variables() if var.name.startswith( 'head_supervised')] tf.logging.info('Initializing output layer parameters %s to zero', [x.op.name for x in output_layer_parameters]) with tf.control_dependencies([tf.global_variables_initializer()]): init_op = tf.group([ tf.assign(x, tf.zeros_like(x)) for x in output_layer_parameters]) return tf.train.Scaffold(init_op=init_op) else: return tf.train.Scaffold() else: scaffold_fn = None return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, train_op=train_op, loss=loss, scaffold_fn=scaffold_fn) else: def metric_fn(logits_sup, labels_sup, logits_con, labels_con, mask, **kws): #//@follow-up metric_fn (0) """Inner metric function.""" metrics = {k: tf.metrics.mean(v, weights=mask) for k, v in kws.items()} metrics['label_top_1_accuracy'] = tf.metrics.accuracy( tf.argmax(labels_sup, 1), tf.argmax(logits_sup, axis=1), weights=mask) metrics['label_top_5_accuracy'] = tf.metrics.recall_at_k( tf.argmax(labels_sup, 1), logits_sup, k=5, weights=mask) metrics['contrastive_top_1_accuracy'] = tf.metrics.accuracy( tf.argmax(labels_con, 1), tf.argmax(logits_con, axis=1), weights=mask) metrics['contrastive_top_5_accuracy'] = tf.metrics.recall_at_k( tf.argmax(labels_con, 1), logits_con, k=5, weights=mask) #//@audit save the predicted (label, logit) to logfile #import sys #tf.logging.info(labels_sup) #tf.print(labels_sup,output_stream=sys.stdout) metrics['boostx_recall'] = tf.metrics.recall( tf.argmax(labels_sup, 1), tf.argmax(logits_sup, axis=1), weights=mask) return metrics metrics = { 'logits_sup': logits_sup, 'labels_sup': labels['labels'], 'logits_con': logits_con, 'labels_con': labels_con, 'mask': labels['mask'], 'contrast_loss': tf.fill((params['batch_size'],), contrast_loss), 'regularization_loss': tf.fill((params['batch_size'],), tf.losses.get_regularization_loss()), } return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=(metric_fn, metrics), #//@follow-up metric_fn (-1) scaffold_fn=None)
def get_customized_apply_compression_op(self, a_matrix_tfvar, matrix_compressor, layer_obj, weight_params_fn, weight_init_obj, scope='default_scope'): """Returns pruning + kmeans compressed operator for a customized layer. Args: a_matrix_tfvar: TF variable representing a tensor variable in a model. matrix_compressor: MatrixCompressorInferface object to specify the compression algorithm. Must return two matrices b_matrix,c_matrix in its compression. layer_obj: a customeried layer object that handles variable creation. weight_params_fn: functional handle to create model parameters. weight_init_obj: a weight initialization object. scope: TF scope used for creating new TF variables. Returns: A TF node that has the compressed version of a_matrix_tfvar. """ self.matrix_compressor = matrix_compressor a_matrix = np.zeros(shape=a_matrix_tfvar.shape) if getattr(self._spec, 'do_transpose', False): a_matrix = np.transpose(a_matrix) [b_matrix, c_matrix] = matrix_compressor.static_matrix_compressor(a_matrix) self.uncompressed_size = matrix_compressor.uncompressed_size self.compressed_size = matrix_compressor.compressed_size p = layer_obj.params with tf.variable_scope(scope) as scope: # Create pruning relevant variables. mask_pc = weight_params_fn(a_matrix.shape, weight_init_obj.Constant(1.0), p.dtype) threshold_pc = weight_params_fn([], weight_init_obj.Constant(0.0), tf.float32) self._create_layer_variable(layer_obj, 'mask', mask_pc, None, False) self._create_layer_variable(layer_obj, 'threshold', threshold_pc, None, False) if layer_obj.vars.mask not in tf.get_collection(pruning.MASK_COLLECTION): tf.add_to_collection(pruning.WEIGHT_COLLECTION, layer_obj.vars.wm) tf.add_to_collection(pruning.MASK_COLLECTION, layer_obj.vars.mask) tf.add_to_collection(pruning.THRESHOLD_COLLECTION, layer_obj.vars.threshold) if self.pruning_obj.get_spec().prune_option in [ 'first_order_gradient', 'second_order_gradient' ]: grad_pc = weight_params_fn(a_matrix.shape, weight_init_obj.Constant(0.0), p.dtype) self._create_layer_variable(layer_obj, 'gradient', grad_pc, None, False) self._create_layer_variable(layer_obj, 'old_weight', grad_pc, None, False) self._create_layer_variable(layer_obj, 'old_old_weight', grad_pc, None, False) tf.add_to_collection(pruning.WEIGHT_GRADIENT_COLLECTION, layer_obj.vars.gradient) tf.add_to_collection(pruning.OLD_WEIGHT_COLLECTION, layer_obj.vars.old_weight) tf.add_to_collection(pruning.OLD_OLD_WEIGHT_COLLECTION, layer_obj.vars.old_old_weight) b_matrix_pc = weight_params_fn(b_matrix.shape, weight_init_obj.Constant(1.0), p.dtype) c_matrix_pc = weight_params_fn(c_matrix.shape, weight_init_obj.Constant(1), tf.int32) alpha_pc = weight_params_fn([], weight_init_obj.Constant(1.0), tf.float32) self._create_layer_variable(layer_obj, 'alpha', alpha_pc, None, False) self._create_layer_variable( layer_obj, 'b_matrix_tfvar', b_matrix_pc, None, trainable=self.matrix_compressor.get_spec().is_b_matrix_trainable) self._create_layer_variable( layer_obj, 'c_matrix_tfvar', c_matrix_pc, None, trainable=self.matrix_compressor.get_spec().is_c_matrix_trainable) self.b_matrix_tfvar = layer_obj.vars.b_matrix_tfvar self.c_matrix_tfvar = layer_obj.vars.c_matrix_tfvar self.alpha = layer_obj.vars.alpha self.a_matrix_tfvar = a_matrix_tfvar self.mask = layer_obj.vars.mask self.threshold = layer_obj.vars.threshold self.pruned_a_matrix_tfvar = tf.multiply(layer_obj.vars.wm, layer_obj.vars.mask, 'masked_weight') def maybe_apply_compression(): """Decide whether global step is within compression range. Returns: is_step_within_compression_range: bool. """ with tf.compat.v1.name_scope(self._spec.name): # Compress if current step is more than begin_compression_step and # less than end_compression_step (unless it's negative) global_step = tf.train.get_global_step() def real_global_step_fn(): return tf.cast(tf.train.get_global_step(), tf.int32) def mock_global_step_fn(): return self._spec.begin_compression_step def is_global_step_none(global_step): return tf.constant(global_step is None, dtype=tf.bool) global_step = tf.cond(is_global_step_none(global_step), mock_global_step_fn, real_global_step_fn) is_step_within_compression_range = tf.logical_and( tf.greater_equal( tf.cast(global_step, tf.int32), self._spec.begin_compression_step), tf.logical_or( tf.less_equal( tf.cast(global_step, tf.int32), self._spec.end_compression_step), tf.less(self._spec.end_compression_step, 0))) return is_step_within_compression_range if getattr(self._spec, 'do_transpose', False): self.pruning_and_compression_op = ( self.alpha * self.pruned_a_matrix_tfvar + (1 - self.alpha) * tf.math.multiply( tf.transpose( tf.reshape( tf.nn.embedding_lookup(self.b_matrix_tfvar, self.c_matrix_tfvar), tf.transpose(a_matrix_tfvar).shape)), self.mask, name='pruned_compressed_weight')) else: self.pruning_and_compression_op = ( self.alpha * self.pruned_a_matrix_tfvar + (1 - self.alpha) * tf.math.multiply( tf.reshape( tf.nn.embedding_lookup(self.b_matrix_tfvar, self.c_matrix_tfvar), a_matrix_tfvar.shape), self.mask, name='pruned_compressed_weight')) def pruned_a_matrix_fn(): return self.pruned_a_matrix_tfvar def quantized_pruned_a_matrix_fn(): return self.pruning_and_compression_op self.final_op = tf.cond(maybe_apply_compression(), quantized_pruned_a_matrix_fn, pruned_a_matrix_fn) self.add_compression_summaries() self.pruning_obj.add_pruning_summaries() self.update_op = tf.no_op() return [self.final_op, self.update_op]
def get_layer_variables_by_scope(scope_name): ret = [] for v in tf.get_collection(tf.GraphKeys.MODEL_VARIABLES): if scope_name + '/' in v.name: ret.append(v) return ret
def __init__(self, sess, env, handle, name, update_every=5, use_mf=False, learning_rate=1e-4, tau=0.005, gamma=0.95): # assert isinstance(env, GridWorld) self.env = env self.name = name self._saver = None self.sess = sess self.handle = handle self.view_space = env.get_view_space(handle) assert len(self.view_space) == 3 self.feature_space = env.get_feature_space(handle) self.num_actions = env.get_action_space(handle)[0] self.update_every = update_every self.use_mf = use_mf # trigger of using mean field self.temperature = 0.1 self.lr = learning_rate self.tau = tau self.gamma = gamma with tf.variable_scope(name or "ValueNet"): self.name_scope = tf.get_variable_scope().name self.obs_input = tf.placeholder(tf.float32, (None, ) + self.view_space, name="Obs-Input") self.feat_input = tf.placeholder(tf.float32, (None, ) + self.feature_space, name="Feat-Input") self.mask = tf.placeholder(tf.float32, shape=(None, ), name='Terminate-Mask') if self.use_mf: self.act_prob_input0 = tf.placeholder(tf.float32, (None, self.num_actions), name="Act-Prob-Input0") self.act_prob_input1 = tf.placeholder(tf.float32, (None, self.num_actions), name="Act-Prob-Input1") self.act_prob_input2 = tf.placeholder(tf.float32, (None, self.num_actions), name="Act-Prob-Input2") self.act_prob_input3 = tf.placeholder(tf.float32, (None, self.num_actions), name="Act-Prob-Input3") self.act_input = tf.placeholder(tf.int32, (None, ), name="Act") self.act_one_hot = tf.one_hot(self.act_input, depth=self.num_actions, on_value=1.0, off_value=0.0) with tf.variable_scope("Eval-Net"): self.eval_name = tf.get_variable_scope().name self.e_q = self._construct_net(active_func=tf.nn.relu) self.predict = tf.nn.softmax(self.e_q / self.temperature) self.e_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=self.eval_name) with tf.variable_scope("Target-Net"): self.target_name = tf.get_variable_scope().name self.t_q = self._construct_net(active_func=tf.nn.relu) self.t_variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=self.target_name) with tf.variable_scope("Update"): self.update_op = [ tf.assign( self.t_variables[i], self.tau * self.e_variables[i] + (1. - self.tau) * self.t_variables[i]) for i in range(len(self.t_variables)) ] with tf.variable_scope("Optimization"): self.target_q_input = tf.placeholder(tf.float32, (None, ), name="Q-Input") self.e_q_max = tf.reduce_sum(tf.multiply( self.act_one_hot, self.e_q), axis=1) self.loss = tf.reduce_sum( tf.square(self.target_q_input - self.e_q_max) * self.mask) / tf.reduce_sum(self.mask) self.train_op = tf.train.AdamOptimizer(self.lr).minimize( self.loss)
def main(unused_argv): tf.logging.set_verbosity(FLAGS.log) if FLAGS.checkpoint_path: checkpoint_path = FLAGS.checkpoint_path else: expdir = FLAGS.expdir tf.logging.info("Will load latest checkpoint from %s.", expdir) while not tf.gfile.Exists(expdir): tf.logging.fatal("\tExperiment save dir '%s' does not exist!", expdir) sys.exit(1) try: checkpoint_path = tf.train.latest_checkpoint(expdir) except tf.errors.NotFoundError: tf.logging.fatal( "There was a problem determining the latest checkpoint.") sys.exit(1) if not tf.train.checkpoint_exists(checkpoint_path): tf.logging.fatal("Invalid checkpoint path: %s", checkpoint_path) sys.exit(1) savedir = FLAGS.savedir if not tf.gfile.Exists(savedir): tf.gfile.MakeDirs(savedir) # Make the graph with tf.Graph().as_default(): with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: model = utils.get_module("baseline.models.%s" % FLAGS.model) hparams = model.get_hparams(FLAGS.config) # Load the trained model with is_training=False with tf.name_scope("Reader"): batch = reader.NSynthDataset( FLAGS.tfrecord_path, is_training=False).get_baseline_batch(hparams) _ = model.train_op(batch, hparams, FLAGS.config) z = tf.get_collection("z")[0] init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) # Add ops to save and restore all the variables. # Restore variables from disk. saver = tf.train.Saver() saver.restore(sess, checkpoint_path) tf.logging.info("Model restored.") # Start up some threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) i = 0 z_val = [] try: while True: if coord.should_stop(): break res_val = sess.run([z]) z_val.append(res_val[0]) tf.logging.info("Iter: %d" % i) tf.logging.info("Z:{}".format(res_val[0].shape)) i += 1 if i + 1 % 1 == 0: save_arrays(savedir, hparams, z_val) # Report all exceptions to the coordinator, pylint: disable=broad-except except Exception as e: coord.request_stop(e) # pylint: enable=broad-except finally: save_arrays(savedir, hparams, z_val) # Terminate as usual. It is innocuous to request stop twice. coord.request_stop() coord.join(threads)
def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu, exclude_bert): """Creates an optimizer training op, optionally excluding BERT vars.""" global_step = tf.train.get_or_create_global_step() learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) # Implements linear decay of the learning rate. learning_rate = tf.train.polynomial_decay( learning_rate, global_step, num_train_steps, end_learning_rate=0.0, power=1.0, cycle=False) # Implements linear warmup. I.e., if global_step < num_warmup_steps, the # learning rate will be `global_step/num_warmup_steps * init_lr`. if num_warmup_steps: global_steps_int = tf.cast(global_step, tf.int32) warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) global_steps_float = tf.cast(global_steps_int, tf.float32) warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) warmup_percent_done = global_steps_float / warmup_steps_float warmup_learning_rate = init_lr * warmup_percent_done is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) learning_rate = ((1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate) # It is recommended that you use this optimizer for fine tuning, since this # is how the model was trained (note that the Adam m/v variables are NOT # loaded from init_checkpoint.) optimizer = optimization.AdamWeightDecayOptimizer( learning_rate=learning_rate, weight_decay_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) if use_tpu: optimizer = tf_estimator.tpu.CrossShardOptimizer(optimizer) tvars = tf.trainable_variables() if exclude_bert: bert_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "bert") tvars = [vv for vv in tvars if vv not in bert_vars] tf.logging.info("Training the following variables:") for vv in tvars: tf.logging.info(vv.name) grads = tf.gradients(loss, tvars, colocate_gradients_with_ops=True) # This is how the model was pre-trained. (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=global_step) new_global_step = global_step + 1 train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def find_trainable_variables(key): return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, ".*{}.*".format(key))
def main(): if FLAGS.datasource == 'sinusoid': if FLAGS.train: test_num_updates = 1 else: test_num_updates = 10 else: if FLAGS.datasource == 'miniimagenet': if FLAGS.train: test_num_updates = 1 # eval on at least one update during training else: test_num_updates = 10 else: test_num_updates = 10 if not FLAGS.train: orig_meta_batch_size = FLAGS.meta_batch_size # always use meta batch size of 1 when testing. FLAGS.meta_batch_size = 1 if FLAGS.datasource == 'sinusoid': data_generator = DataGenerator(FLAGS.update_batch_size * 2, FLAGS.meta_batch_size) else: if FLAGS.metatrain_iterations == 0 and FLAGS.datasource == 'miniimagenet': assert FLAGS.meta_batch_size == 1 assert FLAGS.update_batch_size == 1 data_generator = DataGenerator( 1, FLAGS.meta_batch_size) # only use one datapoint, else: if FLAGS.datasource == 'miniimagenet': # TODO - use 15 val examples for imagenet? if FLAGS.train: data_generator = DataGenerator( FLAGS.update_batch_size + 15, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory else: data_generator = DataGenerator( FLAGS.update_batch_size * 2, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory else: data_generator = DataGenerator( FLAGS.update_batch_size * 2, FLAGS.meta_batch_size ) # only use one datapoint for testing to save memory dim_output = data_generator.dim_output if FLAGS.baseline == 'oracle': assert FLAGS.datasource == 'sinusoid' dim_input = 3 FLAGS.pretrain_iterations += FLAGS.metatrain_iterations FLAGS.metatrain_iterations = 0 else: dim_input = data_generator.dim_input if FLAGS.datasource == 'miniimagenet' or FLAGS.datasource == 'omniglot': tf_data_load = True num_classes = data_generator.num_classes if FLAGS.train: # only construct training model if needed random.seed(5) image_tensor, label_tensor = data_generator.make_data_tensor() inputa = tf.slice(image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labela = tf.slice(label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb } print("inputa shape", inputa.shape) random.seed(6) image_tensor, label_tensor = data_generator.make_data_tensor( train=False) inputa = tf.slice(image_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) inputb = tf.slice(image_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) labela = tf.slice(label_tensor, [0, 0, 0], [-1, num_classes * FLAGS.update_batch_size, -1]) labelb = tf.slice(label_tensor, [0, num_classes * FLAGS.update_batch_size, 0], [-1, -1, -1]) metaval_input_tensors = { 'inputa': inputa, 'inputb': inputb, 'labela': labela, 'labelb': labelb } else: tf_data_load = False input_tensors = None model = MAML(dim_input, dim_output, test_num_updates=test_num_updates) if FLAGS.train or not tf_data_load: model.construct_model(input_tensors=input_tensors, prefix='metatrain_') if tf_data_load: model.construct_model(input_tensors=metaval_input_tensors, prefix='metaval_') model.summ_op = tf.summary.merge_all() saver = loader = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) sess = tf.InteractiveSession() if not FLAGS.train: # change to original meta batch size when loading model. FLAGS.meta_batch_size = orig_meta_batch_size if FLAGS.train_update_batch_size == -1: FLAGS.train_update_batch_size = FLAGS.update_batch_size if FLAGS.train_update_lr == -1: FLAGS.train_update_lr = FLAGS.update_lr exp_string = 'cls_' + str(FLAGS.num_classes) + '.mbs_' + str( FLAGS.meta_batch_size) + '.ubs_' + str( FLAGS.train_update_batch_size) + '.numstep' + str( FLAGS.num_updates) + '.updatelr' + str(FLAGS.train_update_lr) if FLAGS.num_filters != 64: exp_string += 'hidden' + str(FLAGS.num_filters) if FLAGS.max_pool: exp_string += 'maxpool' if FLAGS.stop_grad: exp_string += 'stopgrad' if FLAGS.baseline: exp_string += FLAGS.baseline if FLAGS.norm == 'batch_norm': exp_string += 'batchnorm' elif FLAGS.norm == 'layer_norm': exp_string += 'layernorm' elif FLAGS.norm == 'None': exp_string += 'nonorm' else: print('Norm setting not recognized.') resume_itr = 0 model_file = None tf.global_variables_initializer().run() tf.train.start_queue_runners() if not FLAGS.rand_init: if FLAGS.resume or not FLAGS.train: model_file = tf.train.latest_checkpoint(FLAGS.logdir + '/' + exp_string) if FLAGS.test_iter > 0: model_file = model_file[:model_file.index('model' )] + 'model' + str( FLAGS.test_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 5:]) print("Restoring model weights from " + model_file) saver.restore(sess, model_file) if FLAGS.train: train(model, saver, sess, exp_string, data_generator, resume_itr) else: test(model, saver, sess, exp_string, data_generator, test_num_updates)
def train_graph(self, train_dict): """ PURPOSE: Train a deep neural net classifier for baskets of products ARGS: train_dict (dict) dictionary with ALL the following key values embeddings (list(list)) trained product embedding layers sequences_train (list(list)) training order product numbers labels_train (list) training order class labels sequences_valid (list(list)) test order product numbers labels_valid (list) test order class batch_size (int) number of training example per mini batch n_stop (int) early stopping criteria """ embeddings = train_dict.get('embeddings', None) sequences_train = train_dict.get('sequences_train', None) labels_train = train_dict.get('labels_train', None) sequences_valid = train_dict.get('sequences_valid', None) labels_valid = train_dict.get('labels_valid', None) batch_size = train_dict.get('batch_size', 100) n_stop = train_dict.get('n_stop', 5) n_train_ex = len(sequences_train) n_batches = n_train_ex // batch_size done, epoch, acc_reg = 0, 0, [0, 1] with self.graph.as_default(): correct_, accuracy_ = tf.get_collection('Eval_ops') acc_summary = tf.summary.scalar('Accuracy', accuracy_) file_writer = tf.summary.FileWriter(self.log_dir, self.graph) with tf.Session(graph=self.graph) as sess: init_, saver_ = tf.get_collection('Init_Save_ops') correct_, accuracy_ = tf.get_collection('Eval_ops') optimizer_, training_op_ = tf.get_collection("Optimizer_ops") sequences_, W_embed_, Y_, training_ = tf.get_collection( "Input_var") sess.run(init_) while done != 1: epoch += 1 batches = self._partition_(list(range(n_train_ex)), n_batches) #Mini-Batch Training step for iteration in ProgressBar( range(n_batches), 'Epoch {} Iterations'.format(epoch)): sequences_batch = [ sequences_train[indx] for indx in batches[iteration] ] labels_batch = [ labels_train[indx] for indx in batches[iteration] ] sess.run( [training_op_], feed_dict={ training_: True, W_embed_: embeddings, sequences_: sequences_batch, Y_: labels_batch }) #Intermediate Summary Writing if iteration % 10 == 0: summary_str = acc_summary.eval( feed_dict={ training_: False, W_embed_: embeddings, sequences_: sequences_valid, Y_: labels_valid }) step = epoch * n_batches + iteration file_writer.add_summary(summary_str, step) #Early Stopping Regularization if epoch % 1 == 0: # Evaluating the Accuracy of Current Model acc_ckpt = accuracy_.eval( feed_dict={ training_: False, W_embed_: embeddings, sequences_: sequences_valid, Y_: labels_valid }) if acc_ckpt > acc_reg[0]: # Saving the new "best" model save_path = saver_.save(sess, self.temp_ckpt) acc_reg = [acc_ckpt, 1] elif acc_ckpt <= acc_reg[0] and acc_reg[1] < n_stop: acc_reg[1] += 1 elif acc_ckpt <= acc_reg[0] and acc_reg[1] >= n_stop: #Restoring previous "best" model saver_.restore(sess, self.temp_ckpt) done = 1 #Calculating Accuracy for Output acc_train = accuracy_.eval( feed_dict={ training_: False, W_embed_: embeddings, sequences_: sequences_train, Y_: labels_train }) acc_test = accuracy_.eval( feed_dict={ training_: False, W_embed_: embeddings, sequences_: sequences_valid, Y_: labels_valid }) print( 'Register:{} Epoch:{:2d} Train Accuracy:{:6.4f} Validation Accuracy: {:6.4f}' .format(acc_reg, epoch, acc_train, acc_test)) #Final Model Save save_path = saver_.save(sess, self.final_ckpt)
def model_fn(features, labels, mode, params): """Returns the model function.""" feature = features['feature'] print(feature) labels = labels['label'] one_hot_labels = model_utils.get_label( labels, params, FLAGS.src_num_classes, batch_size=FLAGS.train_batch_size) def get_logits(): """Return the logits.""" avg_pool = model.conv_model(feature, mode) name = 'final_dense_dst' with tf.variable_scope('target_CLS'): logits = tf.layers.dense(inputs=avg_pool, units=FLAGS.src_num_classes, name=name) return logits logits = get_logits() logits = tf.cast(logits, tf.float32) dst_loss = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=one_hot_labels, ) dst_l2_loss = FLAGS.weight_decay * tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name and 'kernel' in v.name ]) loss = dst_loss + dst_l2_loss train_op = None if mode == tf.estimator.ModeKeys.TRAIN: cur_finetune_step = tf.train.get_global_step() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): finetune_learning_rate = lr_schedule() optimizer = tf.train.AdamOptimizer(finetune_learning_rate) train_op = tf.contrib.slim.learning.create_train_op( loss, optimizer) with tf.variable_scope('finetune'): train_op = optimizer.minimize(loss, cur_finetune_step) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: eval_metrics = model_utils.metric_fn(labels, logits) if mode == tf.estimator.ModeKeys.TRAIN: with tf.control_dependencies([train_op]): tf.summary.scalar('classifier/finetune_lr', finetune_learning_rate) else: train_op = None return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics, )
# Use function binding to create all the builder functions that are neeeded: bound_train_model = partial(model, lr_placeholder, outfeed_train_queue, True) bound_train_loop = partial(loop_builder, batches_per_step, bound_train_model, infeed_train_queue) bound_test_model = partial(model, lr_placeholder, outfeed_test_queue, False) bound_test_loop = partial(loop_builder, test_batches, bound_test_model, infeed_test_queue) # Use the bound builder functions to place the model on the IPU: with scopes.ipu_scope("/device:IPU:0"): train_loop = ipu_compiler.compile(bound_train_loop, inputs=[]) test_loop = ipu_compiler.compile(bound_test_loop, inputs=[]) # Initialisers should go on the CPU: with tf.device("cpu"): metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_initializer = tf.variables_initializer(var_list=metrics_vars) saver = tf.train.Saver() # Setup and acquire an IPU device: config = utils.create_ipu_config() config = utils.auto_select_ipus(config, 1) utils.configure_ipu_system(config) # These allow us to retrieve the results of IPU feeds: dequeue_train_outfeed = outfeed_train_queue.dequeue() dequeue_test_outfeed = outfeed_test_queue.dequeue() # Create a benchmark program for the infeed to determine maximum achievable throughput: infeed_perf = dataset_benchmark.infeed_benchmark(infeed_train_queue, epochs, num_train, True)