def testMisc(self): ns = NS() ns.w = 0 ns["x"] = 3 ns.x = 1 ns.y = NS(z=2) self.assertEqual(list(ns.Keys()), [("w", ), ("x", ), ("y", "z")]) self.assertEqual(list(ns.Values()), [0, 1, 2]) self.assertEqual(list(ns.Items()), [(("w", ), 0), (("x", ), 1), (("y", "z"), 2)]) self.assertEqual(ns.AsDict(), OrderedDict([("w", 0), ("x", 1), ("y", NS(z=2))])) ns.Update(ns.y) self.assertEqual(list(ns), [("w", ), ("x", ), ("y", "z"), ("z", )]) self.assertEqual(list(ns.Keys()), [("w", ), ("x", ), ("y", "z"), ("z", )]) self.assertEqual(list(ns.Values()), [0, 1, 2, 2]) self.assertEqual(list(ns.Items()), [(("w", ), 0), (("x", ), 1), (("y", "z"), 2), (("z", ), 2)]) self.assertEqual( ns.AsDict(), OrderedDict([("w", 0), ("x", 1), ("y", NS(z=2)), ("z", 2)])) ns = NS(v=2, w=NS(x=1, y=[3, NS(z=0)])) self.assertItemsEqual([("v", ), ("w", "x"), ("w", "y", 0), ("w", "y", 1, "z")], list(ns.Keys()))
def _make(self, hp, global_step=None): ts = NS() ts.global_step = global_step ts.x = tf.placeholder(dtype=tf.int32, name="x") ts.seq = self.model.make_training_graph(x=ts.x, length=self.segment_length) ts.final_state = ts.seq.final_state ts.loss = ts.seq.loss ts.error = ts.seq.error ts.learning_rate = tf.Variable(hp.initial_learning_rate, dtype=tf.float32, trainable=False, name="learning_rate") ts.decay_op = tf.assign(ts.learning_rate, ts.learning_rate * hp.decay_rate) ts.optimizer = tf.train.AdamOptimizer(ts.learning_rate) ts.params = tf.trainable_variables() print[param.name for param in ts.params] ts.gradients = tf.gradients( ts.loss, ts.params, # secret memory-conserving sauce aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) loose_params = [ param for param, gradient in util.equizip(ts.params, ts.gradients) if gradient is None ] if loose_params: raise ValueError("loose parameters: %s" % " ".join(param.name for param in loose_params)) # tensorflow fails miserably to compute gradient for these for reg_var in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES): ts.gradients[ts.params.index(reg_var)] += ( hp.weight_decay * tf.gradients(tf.sqrt(tf.reduce_sum(reg_var**2)), [reg_var])[0]) ts.clipped_gradients, _ = tf.clip_by_global_norm( ts.gradients, hp.clip_norm) ts.training_op = ts.optimizer.apply_gradients( util.equizip(ts.clipped_gradients, ts.params), global_step=ts.global_step) ts.summaries = [ tf.summary.scalar("loss_train", ts.loss), tf.summary.scalar("error_train", ts.error), tf.summary.scalar("learning_rate", ts.learning_rate) ] for parameter, gradient in util.equizip(ts.params, ts.gradients): ts.summaries.append( tf.summary.scalar("meanlogabs_%s" % parameter.name, tfutil.meanlogabs(parameter))) ts.summaries.append( tf.summary.scalar("meanlogabsgrad_%s" % parameter.name, tfutil.meanlogabs(gradient))) return ts
def _make(self, unused_hp): ts = NS() ts.x = tf.placeholder(dtype=tf.int32, name="x") ts.seq = self.model.make_evaluation_graph(x=ts.x) ts.final_state = ts.seq.final_state ts.loss = ts.seq.loss ts.error = ts.seq.error return ts
def _make(self, hp): ts = NS() ts.x = tf.placeholder(dtype=tf.int32, name="x") # conditioning graph ts.cond = self.model.make_evaluation_graph(x=ts.x) # generation graph tf.get_variable_scope().reuse_variables() ts.initial_xelt = tf.placeholder(dtype=tf.int32, name="initial_xelt", shape=[None]) ts.length = tf.placeholder(dtype=tf.int32, name="length", shape=[]) ts.temperature = tf.placeholder(dtype=tf.float32, name="temperature", shape=[]) ts.sample = self.model.make_sampling_graph( initial_xelt=ts.initial_xelt, length=ts.length, temperature=ts.temperature) return ts