Beispiel #1
0
    def _make(self, hp, global_step=None):
        ts = NS()
        ts.global_step = global_step
        ts.x = tf.placeholder(dtype=tf.int32, name="x")
        ts.seq = self.model.make_training_graph(x=ts.x,
                                                length=self.segment_length)
        ts.final_state = ts.seq.final_state
        ts.loss = ts.seq.loss
        ts.error = ts.seq.error

        ts.learning_rate = tf.Variable(hp.initial_learning_rate,
                                       dtype=tf.float32,
                                       trainable=False,
                                       name="learning_rate")
        ts.decay_op = tf.assign(ts.learning_rate,
                                ts.learning_rate * hp.decay_rate)
        ts.optimizer = tf.train.AdamOptimizer(ts.learning_rate)
        ts.params = tf.trainable_variables()
        print[param.name for param in ts.params]

        ts.gradients = tf.gradients(
            ts.loss,
            ts.params,
            # secret memory-conserving sauce
            aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)

        loose_params = [
            param for param, gradient in util.equizip(ts.params, ts.gradients)
            if gradient is None
        ]
        if loose_params:
            raise ValueError("loose parameters: %s" %
                             " ".join(param.name for param in loose_params))

        # tensorflow fails miserably to compute gradient for these
        for reg_var in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
            ts.gradients[ts.params.index(reg_var)] += (
                hp.weight_decay *
                tf.gradients(tf.sqrt(tf.reduce_sum(reg_var**2)), [reg_var])[0])

        ts.clipped_gradients, _ = tf.clip_by_global_norm(
            ts.gradients, hp.clip_norm)
        ts.training_op = ts.optimizer.apply_gradients(
            util.equizip(ts.clipped_gradients, ts.params),
            global_step=ts.global_step)

        ts.summaries = [
            tf.summary.scalar("loss_train", ts.loss),
            tf.summary.scalar("error_train", ts.error),
            tf.summary.scalar("learning_rate", ts.learning_rate)
        ]
        for parameter, gradient in util.equizip(ts.params, ts.gradients):
            ts.summaries.append(
                tf.summary.scalar("meanlogabs_%s" % parameter.name,
                                  tfutil.meanlogabs(parameter)))
            ts.summaries.append(
                tf.summary.scalar("meanlogabsgrad_%s" % parameter.name,
                                  tfutil.meanlogabs(gradient)))

        return ts
Beispiel #2
0
    def run(self,
            session,
            examples,
            max_step_count=None,
            hp=None,
            aggregates=None):
        aggregates = NS(aggregates or {})
        for key in "loss error".split():
            if key not in aggregates:
                aggregates[key] = util.MeanAggregate()

        tensors = self.tensors.Extract(*[key for key in aggregates.Keys()])
        tensors.Update(self.tensors.Extract("final_state.model"))

        state = NS(step=0, model=self.model.initial_state(hp.batch_size))

        try:
            for batch in util.batches(examples, hp.batch_size):
                for segment in util.segments(batch,
                                             hp.segment_length,
                                             overlap=hp.chunk_size):
                    if max_step_count is not None and state.step >= max_step_count:
                        raise StopIteration()

                    x, = util.examples_as_arrays(segment)
                    feed_dict = {self.tensors.x: x.T}
                    feed_dict.update(self.model.feed_dict(state.model))
                    values = NS.FlatCall(
                        ft.partial(session.run, feed_dict=feed_dict), tensors)

                    for key in aggregates.Keys():
                        aggregates[key].add(values[key])

                    sys.stderr.write(".")
                    state.model = values.final_state.model
                    state.step += 1
        except StopIteration:
            pass

        sys.stderr.write("\n")

        values = NS(
            (key, aggregate.value) for key, aggregate in aggregates.Items())

        values.summaries = [
            tf.Summary.Value(tag="%s_valid" % key, simple_value=values[key])
            for key in "loss error".split()
        ]
        print "### evaluation loss %6.5f error %6.5f" % (values.loss,
                                                         values.error)

        if np.isnan(values.loss):
            raise ValueError("loss has become NaN")

        return values