def _get_seed_ops(self): """Return a list of `tf.Operation` used as a starting point for LMS to traverse the graph. If a starting scope is given, the ops in this scope will be used. Otherwise, this method automatically searches for starting ops. """ # seep ops for search seed_ops = set() ops = ge.make_list_of_op(self._graph) if self._starting_scope: scope_ops = set( ge.filter_ops_from_regex(ops, "^{}".format(self._starting_scope))) if not scope_ops: raise ValueError('No operations were found in starting ' 'scope {}.'.format(self._starting_scope)) seed_ops |= scope_ops if self._starting_op_names: for name in self._starting_op_names: name_ops = set( ge.filter_ops_from_regex(ops, "^{}$".format(name))) if not name_ops: raise ValueError('No starting operation was found with ' 'name {}.'.format(name)) seed_ops |= name_ops seed_ops = list(seed_ops) if not seed_ops: candidates = set() non_grad_ops = [ op for op in self._graph.get_operations() if not (op in self._grad_ops) ] for op in non_grad_ops: for t in op.outputs: frontier_ops = set(util.get_consuming_ops(t)) if (frontier_ops & self._grad_ops): candidates.add(op) break # ordering an operation by how much it covers the other ops tmp_dict = {} max_nelems = -1 for op in candidates: nelems = len( set( ge.get_forward_walk_ops( op, within_ops=non_grad_ops, inclusive=False)) & candidates) if nelems > 0: tmp_dict[op] = nelems max_nelems = nelems if ( nelems > max_nelems) else max_nelems # seed ops will cover most of the forward ops seed_ops = [k for k, v in tmp_dict.items() if v == max_nelems] return seed_ops
def gradient_memory_mbs(): """Evaluates gradient, prints peak memory.""" start_time0 = time.perf_counter() start_time = start_time0 tf.reset_default_graph() tf.set_random_seed(1) train_op, loss = create_train_op_and_loss() print("Graph construction: %.2f ms" % (1000 * (time.perf_counter() - start_time))) g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("checkpoints", op.outputs[0]) sess = create_session() sessrun(tf.global_variables_initializer()) start_time = time.perf_counter() sessrun(train_op) start_time = time.perf_counter() print("loss %f" % (sess.run(loss), )) print("Compute time: %.2f ms" % (1000 * (time.perf_counter() - start_time))) mem_use = mem_util.peak_memory(run_metadata)['/gpu:0'] / 1e6 print("Memory used: %.2f MB " % (mem_use)) total_time = time.perf_counter() - start_time0 assert total_time < 100 return mem_use
def gradient_memory_mbs(): """Evaluates gradient, prints peak memory.""" start_time0 = time.perf_counter() start_time = start_time0 tf.reset_default_graph() tf.set_random_seed(1) train_op, loss = create_train_op_and_loss() print("Graph construction: %.2f ms" %(1000*(time.perf_counter()-start_time))) g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("checkpoints", op.outputs[0]) sess = create_session() sessrun(tf.global_variables_initializer()) start_time = time.perf_counter() sessrun(train_op) start_time = time.perf_counter() print("loss %f"%(sess.run(loss),)) print("Compute time: %.2f ms" %(1000*(time.perf_counter()-start_time))) mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6 print("Memory used: %.2f MB "%(mem_use)) total_time = time.perf_counter()-start_time0 assert total_time < 100 return mem_use
def check_op_name(regex='Block./layer.*/concat.*'): ops = tf.get_default_graph().get_operations() concat_ops_list = ge.filter_ops_from_regex(ops, regex) for op in ops: if op not in concat_ops_list and op.outputs: print(op) logging.info(' out: {0}'.format(op.outputs[0]))
def gradient_memory_measure_mb(): """Evaluates gradient, prints peak memory in MBs.""" global sess start_time0 = time.perf_counter() loss = create_loss() if DUMP_GRAPHDEF: open('graphdef.txt', 'w').write(str(tf.get_default_graph().as_graph_def())) # use block_layer1, block_layer2, block_layer3 as checkpoint nodes g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("checkpoints", op.outputs[0]) start_time = time.perf_counter() grads = tf.gradients(loss, tf.trainable_variables()) start_time = time.perf_counter() sess = create_session() start_time = time.perf_counter() sessrun(tf.global_variables_initializer()) start_time = time.perf_counter() sessrun(grads) start_time = time.perf_counter() sessrun(grads) mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6 print("Memory used: %.2f MB "%(mem_use)) total_time = time.perf_counter()-start_time0 print("Total time: %.2f sec"%(total_time)) assert total_time < 100 return mem_use
def gradient_memory_measure_mb(): """Evaluates gradient, prints peak memory in MBs.""" global sess start_time0 = time.perf_counter() loss = create_loss() if DUMP_GRAPHDEF: open('graphdef.txt', 'w').write(str(tf.get_default_graph().as_graph_def())) # use block_layer1, block_layer2, block_layer3 as checkpoint nodes g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("checkpoints", op.outputs[0]) start_time = time.perf_counter() grads = tf.gradients(loss, tf.trainable_variables()) start_time = time.perf_counter() sess = create_session() start_time = time.perf_counter() sessrun(tf.global_variables_initializer()) start_time = time.perf_counter() sessrun(grads) start_time = time.perf_counter() sessrun(grads) mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6 print("Memory used: %.2f MB "%(mem_use)) total_time = time.perf_counter()-start_time0 print("Total time: %.2f sec"%(total_time)) assert total_time < 100 return mem_use
def _build_gradient_ops(self): """Return a set of operations in the backward phase. Operations in the backward phase are determined by its scope. """ for scope in self._optimizer_scopes: ops_for_scope = set(ge.filter_ops_from_regex( ge.make_list_of_op(self._graph), "^{}".format(scope))) if not ops_for_scope: raise ValueError('No operations were found with optimizer ' 'scope {}.'.format(scope)) self._grad_ops.update(ops_for_scope)
def run_grads(): """Runs optimization for few steps, returns loss.""" tf.reset_default_graph() tf.set_random_seed(1) grads, loss = grads_and_loss() g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("remember", op.outputs[0]) sess = create_session() sess.run(tf.global_variables_initializer()) return sess.run(grads)[0]
def run_grads(): """Runs optimization for few steps, returns loss.""" tf.reset_default_graph() tf.set_random_seed(1) grads, loss = grads_and_loss() g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("remember", op.outputs[0]) sess = create_session() sess.run(tf.global_variables_initializer()) return sess.run(grads)[0]
def test_get_filter(self): """Test for various filtering operations on ts ops.""" # TODO(fkp): parameterise self.assertEqual(len(ge.filter_ops(self.graph, True)), 8) self.assertEqual( len(ge.filter_ops(self.graph, lambda op: op.node_def.op == "Const")), 3) self.assertEqual( len(ge.filter_ops(self.graph, lambda op: op.node_def.op == "Add")), 5) self.assertEqual( len(ge.filter_ops_from_regex(self.graph, r"^.*\b[abc]$")), 3) self.assertEqual(len(ge.filter_ts(self.graph, True)), 8) self.assertEqual( len(ge.filter_ts_from_regex(self.graph, r"^.*/[fgh]:\d$")), 3) self.assertEqual(len(ge.get_name_scope_ops(self.graph, "foo/")), 7) self.assertEqual(len(ge.get_name_scope_ops(self.graph, "foo/bar")), 4)
def test_get_filter(self): """Test for various filtering operations on ts ops.""" # TODO (fkp): parameterise id:836 # https://github.com/imdone/tensorflow/issues/837 self.assertEqual(len(ge.filter_ops(self.graph, True)), 8) self.assertEqual( len(ge.filter_ops(self.graph, lambda op: op.node_def.op == "Const")), 3) self.assertEqual( len(ge.filter_ops(self.graph, lambda op: op.node_def.op == "Add")), 5) self.assertEqual( len(ge.filter_ops_from_regex(self.graph, r"^.*\b[abc]$")), 3) self.assertEqual(len(ge.filter_ts(self.graph, True)), 8) self.assertEqual( len(ge.filter_ts_from_regex(self.graph, r"^.*/[fgh]:\d$")), 3) self.assertEqual(len(ge.get_name_scope_ops(self.graph, "foo/")), 7) self.assertEqual(len(ge.get_name_scope_ops(self.graph, "foo/bar")), 4)
def gradient_memory_measure_mb(): """Evaluates gradient, prints peak memory in MBs.""" global sess assert tf.test.is_gpu_available() tf.reset_default_graph() tf.set_random_seed(1) np.random.seed(1) start_time0 = time.perf_counter() loss = create_loss() if DUMP_GRAPHDEF: open('graphdef.txt', 'w').write(str(tf.get_default_graph().as_graph_def())) # use block_layer1, block_layer2, block_layer3 as checkpoint nodes # this is only active when checkpoint strategy=collection is used g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("checkpoints", op.outputs[0]) start_time = time.perf_counter() grads = tf.gradients(loss, tf.trainable_variables()) vars = tf.trainable_variables() grads = tf.gradients(loss, vars) grads_and_vars = zip(grads, vars) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train_op = optimizer.apply_gradients(grads_and_vars) start_time = time.perf_counter() sess = create_session() start_time = time.perf_counter() sessrun(tf.global_variables_initializer()) start_time = time.perf_counter() sessrun(grads) start_time = time.perf_counter() sessrun(grads) # without checkpoints we expect following sequence of losses # Loss 35.49785, memory 626.32 MB # Loss 32.18098, memory 626.31 MB # Loss 29.42088, memory 628.37 MB # Loss 28.29715, memory 628.37 MB # Loss 26.50492, memory 628.37 MB # Loss 25.59675, memory 628.37 MB # Loss 24.45332, memory 628.37 MB # Loss 23.91770, memory 628.37 MB # Loss 22.29025, memory 626.31 MB # Loss 22.42356, memory 626.31 MB loss0 = sess.run(loss) assert loss0 > 35 for i in range(10): sessrun(train_op) loss0 = sess.run(loss) mem_use = mem_util.peak_memory(run_metadata)['/gpu:0'] / 1e6 print("Loss %.5f, memory %.2f MB" % (loss0, mem_use)) assert loss0 < 22 mem_use = mem_util.peak_memory(run_metadata)['/gpu:0'] / 1e6 print("Memory used: %.2f MB " % (mem_use)) total_time = time.perf_counter() - start_time0 print("Total time: %.2f sec" % (total_time)) assert total_time < 100 return mem_use
def gradient_memory_measure_mb(): """Evaluates gradient, prints peak memory in MBs.""" global sess assert tf.test.is_gpu_available() tf.reset_default_graph() tf.set_random_seed(1) np.random.seed(1) start_time0 = time.perf_counter() loss = create_loss() if DUMP_GRAPHDEF: open('graphdef.txt', 'w').write(str(tf.get_default_graph().as_graph_def())) # use block_layer1, block_layer2, block_layer3 as checkpoint nodes # this is only active when checkpoint strategy=collection is used g = tf.get_default_graph() ops = g.get_operations() for op in ge.filter_ops_from_regex(ops, "block_layer"): tf.add_to_collection("checkpoints", op.outputs[0]) start_time = time.perf_counter() grads = tf.gradients(loss, tf.trainable_variables()) vars = tf.trainable_variables() grads = tf.gradients(loss, vars) grads_and_vars = zip(grads, vars) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train_op = optimizer.apply_gradients(grads_and_vars) start_time = time.perf_counter() sess = create_session() start_time = time.perf_counter() sessrun(tf.global_variables_initializer()) start_time = time.perf_counter() sessrun(grads) start_time = time.perf_counter() sessrun(grads) # without checkpoints we expect following sequence of losses # Loss 35.49785, memory 626.32 MB # Loss 32.18098, memory 626.31 MB # Loss 29.42088, memory 628.37 MB # Loss 28.29715, memory 628.37 MB # Loss 26.50492, memory 628.37 MB # Loss 25.59675, memory 628.37 MB # Loss 24.45332, memory 628.37 MB # Loss 23.91770, memory 628.37 MB # Loss 22.29025, memory 626.31 MB # Loss 22.42356, memory 626.31 MB loss0 = sess.run(loss) assert loss0 > 35 for i in range(10): sessrun(train_op) loss0 = sess.run(loss) mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6 print("Loss %.5f, memory %.2f MB" %(loss0, mem_use)) assert loss0 < 22 mem_use = mem_util.peak_memory(run_metadata)['/gpu:0']/1e6 print("Memory used: %.2f MB "%(mem_use)) total_time = time.perf_counter()-start_time0 print("Total time: %.2f sec"%(total_time)) assert total_time < 100 return mem_use