Exemple #1
0
def get_exec_time_loss(loss_fn, logits_shape, num_runs=1):
    run_opts = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)

    times = []

    @tf.function
    def run_loss(logits, labels):
        return loss_fn(logits, labels)

    conc = run_loss.get_concrete_function(tf.TensorSpec(logits_shape),
                                          tf.TensorSpec(logits_shape))

    for run in range(num_runs + 1):
        with tf1.Session() as sess:
            run_meta = tf1.RunMetadata()
            sess.run(tf1.global_variables_initializer())
            logits = tf.random.normal(logits_shape)
            labels = tf.random.normal(logits_shape)
            out = conc(logits, labels)
            sess.run(out, options=run_opts, run_metadata=run_meta)
            t1 = timeline.Timeline(run_meta.step_stats)
            lctf = t1.generate_chrome_trace_format()

            del logits
            del labels

        time = convert_string_to_time(lctf)
        times.append(time)
    if np.std(times) <= np.std(times[1:]):
        return np.average(times), np.std(times)
    # Filter first run
    return np.average(times[1:]), np.std(times[1:])
Exemple #2
0
def main():
    tf.disable_eager_execution()

    with tf.device('/gpu:0'):
        t1 = tf.random.uniform(shape=[32, 56, 56, 64], dtype=tf.half)
        t2 = tf.random.uniform(shape=[3, 3, 64, 64], dtype=tf.half)
        t = tf.nn.conv2d(input=t1,
                         filters=t2,
                         strides=[2, 2],
                         padding='SAME',
                         data_format='NHWC',
                         name='Conv2D')

    run_options = tf.RunOptions()
    run_options.trace_level = run_options.FULL_TRACE
    run_metadata = tf.RunMetadata()

    options = tf.GraphOptions(build_cost_model=1)
    cfg = tf.ConfigProto(graph_options=options)
    with tf.Session(config=cfg) as sess:
        sess.run(tf.global_variables_initializer())
        _ = sess.run([t], options=run_options, run_metadata=run_metadata)

    for node in run_metadata.cost_graph.node:
        if node.name == 'Conv2D':
            print(node.name, ':', node.compute_cost * 1000, 'ns.')
Exemple #3
0
    def _step(self,
              handles,
              merged=None,
              writer=None,
              summary=False,
              log_trace=False):  # Optimization step
        feed_dict = {
            self.model.is_train: True,
            self.model.monte_carlo: self.monte_carlo,
            self.model.augmentation: self.augment_train,
            self.model.total_steps: self.total_steps,
            self.learning_rate_multiplier: self.curr_multiplier
        }
        for h_t, h in zip(self.model.handles, handles):
            feed_dict.update({h_t: h})
        feed_dict.update(self.model.custom_feed_dict)

        run_options = tf.RunOptions(
            trace_level=tf.RunOptions.FULL_TRACE) if log_trace else None
        run_metadata = tf.RunMetadata() if log_trace else None

        if summary:  # Write summaries on TensorBoard
            assert merged is not None, 'No merged summary exists.'
            assert writer is not None, 'No summary writer exists.'

            _, loss, Y_true, Y_pred, summaries = self.model.session.run(
                [
                    self.optimization_operation, self.model.loss,
                    self.model.Y_all, self.model.pred, merged
                ],
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata)
            writer.add_summary(summaries, self.curr_step + 1)
            writer.flush()
        else:
            _, loss, Y_true, Y_pred, = self.model.session.run(
                [
                    self.optimization_operation, self.model.loss,
                    self.model.Y_all, self.model.pred
                ],
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata)

        if log_trace:
            assert writer is not None, 'TensorFlow FileWriter must be provided for logging.'
            tracing_dir = os.path.join(writer.get_logdir(), 'tracing')
            if not os.path.exists(tracing_dir):
                os.makedirs(tracing_dir)
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format(
                show_memory=False)
            with open(
                    os.path.join(tracing_dir,
                                 'step_{}.json'.format(self.curr_step + 1)),
                    'w') as f:
                f.write(chrome_trace)

        return loss, Y_true, Y_pred
Exemple #4
0
    def benchmark(self, image_arrays, trace_filename=None):
        if not self.sess:
            self.build()

        # init session
        self.sess.run(
            self.signitures['prediction'],
            feed_dict={self.signitures['image_arrays']: image_arrays})

        start = perf_counter()
        for i in range(10):
            self.sess.run(
                self.signitures['prediction'],
                feed_dict={self.signitures['image_arrays']: image_arrays})
        end = perf_counter()
        inference_time = (end - start) / 10

        print('Inference time: ', inference_time)
        print('FPS: ', 1 / inference_time)
        if trace_filename:
            run_options = tf.RunOptions()
            run_options.trace_level = tf.RunOptions.FULL_TRACE
            run_metadata = tf.RunMetadata()
            self.sess.run(
                self.signitures['prediction'],
                feed_dict={self.signitures['image_arrays']: image_arrays},
                options=run_options,
                run_metadata=run_metadata)
            with tf.io.gfile.GFile(trace_filename, 'w') as trace_file:
                from tensorflow.python.client import timeline  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file.write(
                    trace.generate_chrome_trace_format(show_memory=True))
Exemple #5
0
def main():
    t = trainer.Trainer()
    args = t.args
    params = Params(args.batch_size, args.seq_len, args.model_size)

    # Initialize dataset
    dataset = TextDataLoader(args.batch_size, args.src_vocab, args.tgt_vocab,
                             args.src_text, args.tgt_text, params.max_seq_len,
                             args.src_vocab_size, args.tgt_vocab_size,
                             args.sentences_size)
    enc_inputs, dec_inputs, _, _ = dataset.next_batch()

    # Model
    graph, mesh_to_impl, mtf_loss = Transformer(enc_inputs, dec_inputs, params,
                                                dataset.src_vocab_size,
                                                dataset.tgt_vocab_size,
                                                args.strategy, t.num_nodes,
                                                t.num_gpus)

    # Train
    run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
    config = tf.ConfigProto(allow_soft_placement=False)
    t.train_model(graph,
                  mesh_to_impl,
                  mtf_loss,
                  dataset,
                  config=config,
                  run_options=run_options)
Exemple #6
0
def tf1_benchmark():
    """Run TF1 inference and benchmark."""
    # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
    from tensorflow.python.client import timeline
    with tf1.Session() as sess:
        model = effnetv2_model.EffNetV2Model(FLAGS.model_name,
                                             FLAGS.hparam_str)
        batch_size = FLAGS.batch_size
        run_options = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)
        run_metadata = tf1.RunMetadata()
        isize = FLAGS.image_size or model.cfg.eval.isize
        data_dtype = tf.float16 if FLAGS.mixed_precision else tf.float32
        inputs = tf.ones((batch_size, isize, isize, 3), data_dtype)
        output = model(inputs, training=False)
        sess.run(tf1.global_variables_initializer())

        print('starting warmup.')
        for _ in range(5):
            sess.run(output)

        print('starting benchmark.')
        start = time.perf_counter()
        for _ in range(10):
            sess.run(output)
        end = time.perf_counter()
        inference_time = (end - start) / 10

        print('Per batch inference time: ', inference_time)
        print('FPS: ', batch_size / inference_time)

        if FLAGS.trace_file:
            sess.run(output, options=run_options, run_metadata=run_metadata)
            with tf.io.gfile.GFile(FLAGS.trace_file, 'w') as f:
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                f.write(trace.generate_chrome_trace_format(show_memory=True))
Exemple #7
0
def get_exec_time_profile(lyr, batch_size, get_grads=False):  # must

    print(lyr.__class__.__name__)

    run_opts = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)
    if type(lyr.input) != list:
        input_shapes = [(batch_size,) + tuple(lyr.input.shape[1:])]
    else:
        input_shapes = [(batch_size,) + tuple(inp.shape[1:]) for inp in lyr.input]
    inputs = [tf.random.normal(shp) for shp in input_shapes]
    func = tf.function(lyr)
    conc = func.get_concrete_function(*[tf.TensorSpec(shape=shp, dtype=tf.float32)
                                        for shp in input_shapes])
    run_meta = tf1.RunMetadata()

    with tf1.Session() as sess:
        sess.run(tf1.global_variables_initializer())
        out = conc(*inputs)
        sess.run(out, options=run_opts, run_metadata=run_meta)
        profile = tf1.profiler.Profiler(sess.graph)
        profile.add_step(0, run_meta)
        profiler_options = (tf1.profiler.ProfileOptionBuilder(
            tf1.profiler.ProfileOptionBuilder.time_and_memory(
                min_cpu_micros=int(0)
            )).with_step(0).with_empty_output().build())
        prof = profile.profile_graph(options=profiler_options)
        micro_s = prof.total_exec_micros
        if get_grads:
            out_grads = tf.random.normal(tf.shape(out))
            loss = tf.losses.mean_squared_error(out, out_correct)
            grads = tf.gradients(loss, inp)
    return micro_s, prof
Exemple #8
0
def get_exec_time_timeline(mod, batch_size, get_grads=False):
    run_opts = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)
    if type(mod.input) != list:
        input_shapes = [(batch_size,) + tuple(mod.input.shape[1:])]
        output_shapes = [(batch_size,) + tuple(mod.output.shape[1:])]
    else:
        input_shapes = [(batch_size,) + tuple(inp.shape[1:]) for inp in mod.input]
        output_shapes = [(batch_size,) + tuple(inp.shape[1:]) for inp in mod.output]
    inputs = [tf.random.normal(shp) for shp in input_shapes]
    outputs = [tf.random.normal(shp) for shp in output_shapes]
    func = tf.function(mod)
    if len(inputs) == 1:
        conc = func.get_concrete_function(tf.TensorSpec(shape=input_shapes[0],
                                                        dtype=tf.float32))
    else:
        conc = func.get_concrete_function([tf.TensorSpec(shape=shp, dtype=tf.float32)
                                           for shp in input_shapes])

    with tf1.Session() as sess:
        run_meta = tf1.RunMetadata()
        sess.run(tf1.global_variables_initializer())
        out = conc(*inputs)
        if not get_grads:
            sess.run(out, options=run_opts, run_metadata=run_meta)
            t1 = timeline.Timeline(run_meta.step_stats)
            ctf = t1.generate_chrome_trace_format()
        else:
            grads = tf.gradients(out, inputs, grad_ys=outputs)
            run_meta = tf1.RunMetadata()
            sess.run(grads, options=run_opts, run_metadata=run_meta)
            t1 = timeline.Timeline(run_meta.step_stats)
            ctf = t1.generate_chrome_trace_format()

    return convert_string_to_time(ctf)
def evaluate_full_batch(sess, model, minibatch_iter, many_runs_timeline, mode):
    """
    Full batch evaluation
    NOTE: HERE GCN RUNS THROUGH THE FULL GRAPH. HOWEVER, WE CALCULATE F1 SCORE
        FOR VALIDATION / TEST NODES ONLY. 
    """
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    t1 = time.time()
    num_cls = minibatch_iter.class_arr.shape[-1]
    feed_dict, labels = minibatch_iter.feed_dict(mode)
    if args_global.timeline:
        preds, loss = sess.run([model.preds, model.loss],
                               feed_dict=feed_dict,
                               options=options,
                               run_metadata=run_metadata)
        fetched_timeline = timeline.Timeline(run_metadata.step_stats)
        chrome_trace = fetched_timeline.generate_chrome_trace_format()
        many_runs_timeline.append(chrome_trace)
    else:
        preds, loss = sess.run([model.preds, model.loss], feed_dict=feed_dict)
    node_val_test = minibatch_iter.node_val if mode == 'val' else minibatch_iter.node_test
    t2 = time.time()
    f1_scores = calc_f1(labels[node_val_test], preds[node_val_test],
                        model.sigmoid_loss)
    return loss, f1_scores[0], f1_scores[1], (t2 - t1)
Exemple #10
0
    def train(self, num_inputs, writer=None, step_offset=0):
        """ Train the network on the data provided by the input tensor.
        :param num_inputs: The total number of inputs in the data-set. Used to determine batches per epoch
        :param writer: The summary writer to add summaries to. This is created by the caller so when we stack layers
                        we don't end up with duplicate outputs. If `None` then no summaries will be written.
        :param step_offset: The offset for the global step variable so I don't accidentally overwrite my summaries
        """
        # Divide by num_gpus to avoid accidentally training on the same data a bunch of times
        if self._gpus > 0:
            batches_per_epoch = num_inputs // self._batch_size // self._gpus
        else:
            batches_per_epoch = num_inputs // self._batch_size
        total_batches = batches_per_epoch * self._max_epochs
        # Get how many batches constitute roughly 10 percent of the total for recording summaries
        summary_mod = int(0.1 * total_batches)
        global_step = step_offset

        logging.info("Training self-organizing Map")
        for epoch in range(self._max_epochs):
            logging.info("Epoch: {}/{}".format(epoch, self._max_epochs))
            for batch in range(batches_per_epoch):
                current_batch = batch + (batches_per_epoch * epoch)
                global_step = current_batch + step_offset
                percent_complete = current_batch / total_batches
                logging.debug("\tBatch {}/{} - {:.2%} complete".format(
                    batch, batches_per_epoch, percent_complete))
                # Only do summaries when a SummaryWriter has been provided
                if writer:
                    if current_batch > 0 and current_batch % summary_mod == 0:
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()
                        summary, _, _, = self._sess.run(
                            [
                                self._merged, self._training_op,
                                self._activity_op
                            ],
                            feed_dict={self._epoch: epoch},
                            options=run_options,
                            run_metadata=run_metadata)
                        writer.add_run_metadata(run_metadata,
                                                "step_{}".format(global_step))
                        writer.add_summary(summary, global_step)
                        self._save_checkpoint(global_step)
                    else:
                        summary, _ = self._sess.run(
                            [self._merged, self._training_op],
                            feed_dict={self._epoch: epoch})
                        writer.add_summary(summary, global_step)
                else:
                    self._sess.run(self._training_op,
                                   feed_dict={self._epoch: epoch})

        self._trained = True
        return global_step
Exemple #11
0
def get_exec_time_timeline(model,
                           batch_size,
                           get_grads=False,
                           num_runs=1,
                           return_timeline=False):
    print("get_exec_time_timeline", model.__class__.__name__)
    run_opts = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)
    input_shapes, output_shapes = get_shapes(model, batch_size)
    concrete_function = get_concrete_function(model, input_shapes)

    # input_names = [f"input_random_normal_{i}" for i in range(len(input_shapes))]
    # output_names = [f"output_random_normal_{i}" for i in range(len(output_shapes))]
    # inputs = [tf.random.normal(shp, name=name) for name, shp in zip(input_names, input_shapes)]
    # outputs = [tf.random.normal(shp, name=name) for name, shp in zip(output_names, output_shapes)]
    times = []

    for run in range(num_runs + 1):
        # with tf1.Session(config=config) as sess:
        with tf1.Session() as sess:
            run_meta = tf1.RunMetadata()
            sess.run(tf1.global_variables_initializer())
            inputs = [tf.random.normal(shp) for shp in input_shapes]
            outputs = [tf.random.normal(shp) for shp in output_shapes]
            out = concrete_function(*inputs)
            if not get_grads:
                sess.run(out, options=run_opts, run_metadata=run_meta)
                t1 = timeline.Timeline(run_meta.step_stats)
                ctf = t1.generate_chrome_trace_format()
            else:
                grads = tf.gradients(out, inputs, grad_ys=outputs)
                run_meta = tf1.RunMetadata()
                sess.run(grads, options=run_opts, run_metadata=run_meta)
                t1 = timeline.Timeline(run_meta.step_stats)
                ctf = t1.generate_chrome_trace_format()
            if return_timeline:
                return ctf

            # for i in inputs:
            #    del i
            # del inputs
            # for o in outputs:
            #    del o
            # del outputs

        time = convert_string_to_time(ctf)
        times.append(time)

    # for handle in inputs:
    #    tf1.delete_session_tensor(handle)
    # for handle in output_names:
    #    tf1.delete_session_tensor(handle)
    if np.std(times) <= np.std(times[1:]):
        return np.average(times), np.std(times)
    # Filter first run
    return np.average(times[1:]), np.std(times[1:])
Exemple #12
0
Fichier : lm.py Projet : yyht/lamb
 def fit(self, feed, session=None):
     """Training step for observed source language example."""
     if session is None:
         session = tf.get_default_session()
     run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
     _, cost, summary, last_state = session.run([
         self.training_update, self.unregularized_loss,
         self.training_summary, self.last_state
     ],
                                                feed_dict=feed,
                                                options=run_options)
     return cost, summary, last_state
def get_report(loop_op: tf.Operation,
               infeed_queue_initializer: tf.Operation,
               outfeed_op: tf.Operation,
               report_dest: str,
               available_memory_proportion: Optional[float] = 0.6) -> None:
    """Generate report from running model on IPU and save to disk.

    Args:
        loop_op: Inference op to generate report on.
        infeed_queue_initializer: Initializer for the infeed queue
        outfeed_op: Outfeed operator.
        report_dest: Location to store report.
        available_memory_proportion: Proportion of tile memory available as temporary memory
        for matmul and convolution execution

    """
    # Set compile and device options
    use_poplar_text_report = report_mode == 'text'
    opts = ipu_utils.create_ipu_config(
        profiling=True,
        use_poplar_text_report=use_poplar_text_report,
        profile_execution=True)
    opts = ipu_utils.set_matmul_options(opts,
                                        matmul_options={
                                            "availableMemoryProportion":
                                            str(available_memory_proportion)
                                        })
    opts = ipu_utils.set_convolution_options(
        opts,
        convolution_options={
            "availableMemoryProportion": str(available_memory_proportion)
        })
    ipu_utils.auto_select_ipus(opts, [1])
    ipu_utils.configure_ipu_system(opts)

    with tf.device('cpu'):
        report = gen_ipu_ops.ipu_event_trace()

    run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
    session = tf.Session()
    session.run(infeed_queue_initializer)
    session.run(loop_op, options=run_options)
    session.run(outfeed_op, options=run_options)
    out = session.run(report)
    if report_mode == 'text':
        # extract the report
        rep = ipu_utils.extract_all_strings_from_event_trace(out)
        logging.info("Writing profiling report to %s" % report_dest)
        with open(report_dest, "w") as f:
            f.write(rep)
    else:
        save_tf_report(out)
Exemple #14
0
def main():
    t = trainer.Trainer()
    args = t.args
    lr = 0.01

    # Initialize dataset
    dataset = TextDataLoader(args.batch_size, args.src_vocab, None,
                             args.src_text, None, args.seq_len,
                             args.src_vocab_size, args.tgt_vocab_size,
                             args.sentences_size)
    inputs, labels, _, _ = dataset.next_batch()

    # Convert inputs and labels to int32, due to a bug in mtf.one_hot that leads
    # to TypeError due to type mismatch
    inputs = tf.cast(inputs, tf.int32)
    labels = tf.cast(labels, tf.int32)

    vocab_size = utils.RoundUp(dataset.src_vocab_size, t.num_gpus)
    print("Vocab size: %d" % vocab_size)
    params = Params(args.batch_size, vocab_size, args.seq_len, t.num_nodes,
                    t.num_gpus)

    # Model
    if args.strategy == 0:
        import rnnlm_data as rnn
    elif args.strategy == 1:
        import rnnlm_opt as rnn
    elif args.strategy == 2:
        import rnnlm_gnmt as rnn
    elif args.strategy == 3:
        import rnnlm_flexflow as rnn
    else:
        assert False
    graph, mesh_to_impl, mtf_loss = rnn.model(params, inputs, labels)

    #try:
    #    soft_placement = rnn.model.soft_placement
    #except AttributeError:
    #    soft_placement = False
    soft_placement = True

    # Train
    run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
    config = tf.ConfigProto(allow_soft_placement=soft_placement,
                            log_device_placement=True)
    t.train_model(graph,
                  mesh_to_impl,
                  mtf_loss,
                  dataset,
                  config=config,
                  run_options=run_options)
def run_model(train_op, init, warmup_runs=10, profile_runs=10):
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    run_metadata = tf.RunMetadata()
    with tf.Session(config=config) as sess:
        sess.run(init)
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        for i in range(warmup_runs):
            sess.run(train_op)

        for i in range(profile_runs):
            sess.run(train_op,
                     options=options,
                     run_metadata=run_metadata)

    return sess.graph_def, run_metadata
Exemple #16
0
 def __init__(self,
              timing_topn=20,
              timing_min_ms=100,
              memory_topn=20,
              memory_min_bytes=1024 * 1024,
              every_secs=None,
              every_steps=None,
              stats_client=None):
     self._timing_topn = timing_topn
     self._timing_min_ms = timing_min_ms
     self._memory_topn = memory_topn
     self._memory_min_bytes = memory_min_bytes
     self._stats_client = stats_client or _gctx.stats_client
     self._timer = tf.train.SecondOrStepTimer(every_secs=every_secs,
                                              every_steps=every_steps)
     self._run_options = \
         tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    def trace_solver_solution(save_path: PathLike, train_ds, solver):
        import tensorflow.compat.v1 as tf1
        from tensorflow.python.client import timeline

        data_iter = train_ds.__iter__()
        data_list = [x.numpy() for x in data_iter.next()]
        with tf1.Session() as sess:
            sqrtn_fn, *_ = _build_model_via_solver(dataset, model_name, train_ds.element_spec, solver)
            out = sqrtn_fn(*[tf1.convert_to_tensor(x) for x in data_list])

            run_meta = tf1.RunMetadata()
            sess.run(tf1.global_variables_initializer())
            sess.run(out, options=tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE), run_metadata=run_meta)
            t1 = timeline.Timeline(run_meta.step_stats)
            lctf = t1.generate_chrome_trace_format()

        with Path(save_path).open("w") as f:
            f.write(lctf)
Exemple #18
0
    def testMultipleInt32ValuesOverMultipleRunsAreRecorded(self):
        with tf.Session() as sess:
            x_init_val = np.array([10], dtype=np.int32)
            x_init = tf.constant(x_init_val, shape=[1], name="x_init")
            x = tf.Variable(x_init, name="x")

            x_inc_val = np.array([2], dtype=np.int32)
            x_inc = tf.constant(x_inc_val, name="x_inc")
            inc_x = tf.assign_add(x, x_inc, name="inc_x")

            sess.run(x.initializer)

            run_options = tf.RunOptions(output_partition_graphs=True)
            tf_debug.watch_graph(
                run_options,
                sess.graph,
                debug_ops=["DebugNumericSummary"],
                debug_urls=[self._debug_url],
            )

            # Increase three times.
            for _ in range(3):
                sess.run(inc_x, options=run_options)

        # Debugger data is stored within a special directory within logdir.
        event_files = glob.glob(
            os.path.join(
                self._logdir,
                constants.DEBUGGER_DATA_DIRECTORY_NAME,
                "events.debugger*",
            ))
        self.assertEqual(1, len(event_files))

        self._check_health_pills_in_events_file(
            event_files[0],
            {
                "x_inc:0:DebugNumericSummary": [x_inc_val] * 3,
                "x:0:DebugNumericSummary": [
                    x_init_val,
                    x_init_val + x_inc_val,
                    x_init_val + 2 * x_inc_val,
                ],
            },
        )
Exemple #19
0
    def compile(self, optimizer, clipnorm, loss='mse'):
        # TODO(KGF): check the following import taken from runner.py
        # Was not in this file, originally.
        from tensorflow.keras.optimizers import (SGD, Adam, RMSprop, Nadam)
        if optimizer == 'sgd':
            optimizer_class = SGD(lr=self.DUMMY_LR, clipnorm=clipnorm)
        elif optimizer == 'momentum_sgd':
            optimizer_class = SGD(lr=self.DUMMY_LR,
                                  clipnorm=clipnorm,
                                  decay=1e-6,
                                  momentum=0.9)
        elif optimizer == 'tf_momentum_sgd':
            # TODO(KGF): removed TFOptimizer wrapper from here and below
            # may not work anymore? See
            # https://github.com/tensorflow/tensorflow/issues/22780
            optimizer_class = tf.train.MomentumOptimizer(
                learning_rate=self.DUMMY_LR, momentum=0.9)
        elif optimizer == 'adam':
            optimizer_class = Adam(lr=self.DUMMY_LR, clipnorm=clipnorm)
        elif optimizer == 'tf_adam':
            optimizer_class = tf.train.AdamOptimizer(
                learning_rate=self.DUMMY_LR)
        elif optimizer == 'rmsprop':
            optimizer_class = RMSprop(lr=self.DUMMY_LR, clipnorm=clipnorm)
        elif optimizer == 'nadam':
            optimizer_class = Nadam(lr=self.DUMMY_LR, clipnorm=clipnorm)
        else:
            print("Optimizer not implemented yet")
            exit(1)

        # Timeline profiler
        if (self.conf is not None and conf['training']['timeline_prof']):
            self.run_options = tf.RunOptions(
                trace_level=tf.RunOptions.FULL_TRACE)
            self.run_metadata = tf.RunMetadata()
            self.model.compile(optimizer=optimizer_class,
                               loss=loss,
                               options=self.run_options,
                               run_metadata=self.run_metadata)
        else:
            self.model.compile(optimizer=optimizer_class, loss=loss)

        self.ensure_equal_weights()
Exemple #20
0
    def _test_drive(self, save_dir):
        self.train_set.initialize(
            self.model.session)  # Initialize training iterator
        handles = self.train_set.get_string_handles(
            self.model.session)  # Get a string handle from training iterator
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

        feed_dict = {
            self.model.is_train: True,
            self.model.monte_carlo: False,
            self.model.augmentation: True,
            self.learning_rate_multiplier: 0.0
        }
        for h_t, h in zip(self.model.handles, handles):
            feed_dict.update({h_t: h})

        print('Running test epoch...')
        start_time = time.time()
        i = 0
        while True:
            try:
                self.model.session.run([
                    self.optimization_operation, self.model.loss,
                    self.model.Y_all, self.model.pred
                ],
                                       feed_dict=feed_dict,
                                       options=options,
                                       run_metadata=run_metadata)
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format(
                    show_memory=False)
                with open(
                        os.path.join(save_dir, 'logs',
                                     'timeline_{:03}.json'.format(i)),
                        'w') as f:
                    f.write(chrome_trace)
                i += 1
            except tf.errors.OutOfRangeError:
                break

        print('Test epoch: {:.2f} sec'.format(time.time() - start_time))
    def benchmark(self, image_arrays, trace_filename=None):
        """Benchmark inference latency/throughput.

        Args:
          image_arrays: a list of images in numpy array format.
          trace_filename: If None, specify the filename for saving trace.
        """
        if not self.sess:
            self.build()

        # init session
        self.sess.run(
            self.signitures["prediction"],
            feed_dict={self.signitures["image_arrays"]: image_arrays},
        )

        start = time.perf_counter()
        for _ in range(10):
            self.sess.run(
                self.signitures["prediction"],
                feed_dict={self.signitures["image_arrays"]: image_arrays},
            )
        end = time.perf_counter()
        inference_time = (end - start) / 10

        print("Per batch inference time: ", inference_time)
        print("FPS: ", self.batch_size / inference_time)

        if trace_filename:
            run_options = tf.RunOptions()
            run_options.trace_level = tf.RunOptions.FULL_TRACE
            run_metadata = tf.RunMetadata()
            self.sess.run(
                self.signitures["prediction"],
                feed_dict={self.signitures["image_arrays"]: image_arrays},
                options=run_options,
                run_metadata=run_metadata,
            )
            with tf.io.gfile.GFile(trace_filename, "w") as trace_file:
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file.write(
                    trace.generate_chrome_trace_format(show_memory=True))
Exemple #22
0
def get_exec_time_loss(loss_fn, logits_shape):
    run_opts = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE)
    logits = tf.random.normal(logits_shape)
    labels = tf.random.normal(logits_shape)

    @tf.function
    def run_loss():
        return loss_fn(logits, labels)

    conc = run_loss.get_concrete_function()

    with tf1.Session() as sess:
        run_meta = tf1.RunMetadata()
        sess.run(tf1.global_variables_initializer())
        out = conc()
        sess.run(out, options=run_opts, run_metadata=run_meta)
        t1 = timeline.Timeline(run_meta.step_stats)
        lctf = t1.generate_chrome_trace_format()

    return convert_string_to_time(lctf)
Exemple #23
0
    def _poll_server_till_success(self, max_tries, poll_interval_seconds):
        for _ in range(max_tries):
            try:
                with tf.Session() as sess:
                    a_init_val = np.array([42.0])
                    a_init = tf.constant(a_init_val, shape=[1], name="a_init")
                    a = tf.Variable(a_init, name="a")

                    run_options = tf.RunOptions(output_partition_graphs=True)
                    tf_debug.watch_graph(run_options,
                                         sess.graph,
                                         debug_ops=["DebugNumericSummary"],
                                         debug_urls=[self._debug_url])

                    sess.run(a.initializer, options=run_options)
                    return True
            except tf.errors.FailedPreconditionError as exc:
                time.sleep(poll_interval_seconds)

        return False
    def log_model_analysis(self):
        run_metadata = tf.RunMetadata()
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

        _, loss = self.sess.run(
            [self.optimizer, self.loss],
            feed_dict={
                self.x: self.batch_input,
                self.x2: self.batch_input_bicubic,
                self.y: self.batch_true,
                self.lr_input: self.lr,
                self.dropout: self.dropout_rate
            },
            options=run_options,
            run_metadata=run_metadata)

        # tf.contrib.tfprof.model_analyzer.print_model_analysis(
        #   tf.get_default_graph(),
        #   run_meta=run_metadata,
        #   tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)
        self.first_training = False
Exemple #25
0
    def benchmark(self, image_arrays, trace_filename=None):
        """Benchmark inference latency/throughput.

    Args:
      image_arrays: a list of images in numpy array format.
      trace_filename: If None, specify the filename for saving trace.
    """
        if not self.sess:
            self.build()

        # init session
        self.sess.run(
            self.signitures['prediction'],
            feed_dict={self.signitures['image_arrays']: image_arrays})

        start = time.perf_counter()
        for _ in range(10):
            self.sess.run(
                self.signitures['prediction'],
                feed_dict={self.signitures['image_arrays']: image_arrays})
        end = time.perf_counter()
        inference_time = (end - start) / 10

        print('Per batch inference time: ', inference_time)
        print('FPS: ', self.batch_size / inference_time)

        if trace_filename:
            run_options = tf.RunOptions()
            run_options.trace_level = tf.RunOptions.FULL_TRACE
            run_metadata = tf.RunMetadata()
            self.sess.run(
                self.signitures['prediction'],
                feed_dict={self.signitures['image_arrays']: image_arrays},
                options=run_options,
                run_metadata=run_metadata)
            with tf.io.gfile.GFile(trace_filename, 'w') as trace_file:
                from tensorflow.python.client import timeline  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                trace_file.write(
                    trace.generate_chrome_trace_format(show_memory=True))
Exemple #26
0
def run_model(args):
    image_dim = args.image_size

    if args.channels_last:
        K.set_image_data_format('channels_last')
        input_shape = (image_dim, image_dim, 3)
    else:
        K.set_image_data_format('channels_first')
        input_shape = (3, image_dim, image_dim)

    num_classes = 15
    batch_size = args.batch_size
    model_class = model_choices.get(args.model)
    model = model_class(weights=None,
                        include_top=True,
                        input_shape=input_shape,
                        classes=num_classes)

    if args.tensors_on_oom:
        run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
        run_metadata = tf.RunMetadata()
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      options=run_options,
                      run_metadata=run_metadata)
    else:
        model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

    random_generator = random_image_generator(batch_size, num_classes,
                                              input_shape)
    steps_per_epoch = args.steps
    if dist_mod:
        steps_per_epoch = steps_per_epoch // dist_mod.size()

    verbose = 0 if dist_mod and dist_mod.rank() != 0 else 1
    model.fit_generator(random_generator,
                        steps_per_epoch=steps_per_epoch,
                        epochs=args.epochs,
                        callbacks=get_callbacks(args),
                        verbose=verbose)
Exemple #27
0
  def run(self,
          pianorolls,
          masks=None,
          sample_steps=0,
          current_step=0,
          total_gibbs_steps=0,
          temperature=0.99,
          timeout_ms=0):
    """Given input pianorolls, runs Gibbs sampling to fill in the rest.

    When total_gibbs_steps is 0, total_gibbs_steps is set to
    time * instruments.  If faster sampling is desired on the expanse of sample
    quality, total_gibbs_steps can be explicitly set to a lower number,
    possibly to the value of sample_steps if do not plan on stopping sample
    early to obtain intermediate results.

    This function can be used to return intermediate results by setting the
    sample_steps to when results should be returned and leaving
    total_gibbs_steps to be 0.

    To continue sampling from intermediate results, set current_step to the
    number of steps taken, and feed in the intermediate pianorolls.  Again
    leaving total_gibbs_steps as 0.

    Builds the graph and restores checkpoint if necessary.

    Args:
      pianorolls: a 4D numpy array of shape (batch, time, pitch, instrument)
      masks: a 4D numpy array of the same shape as pianorolls, with 1s
          indicating mask out.  If is None, then the masks will be where have 1s
          where there are no notes, indicating to the model they should be
          filled in.
      sample_steps: an integer indicating the number of steps to sample in this
          call.  If set to 0, then it defaults to total_gibbs_steps.
      current_step: an integer indicating how many steps might have already
          sampled before.
      total_gibbs_steps: an integer indicating the total number of steps that
          a complete sampling procedure would take.
      temperature: a float indicating the temperature for sampling from softmax.
      timeout_ms: Timeout for session.Run. Set to zero for no timeout.

    Returns:
      A dictionary, consisting of "pianorolls" which is a 4D numpy array of
      the sampled results and "time_taken" which is the time taken in sampling.
    """
    if self.sess is None:
      # Build graph and restore checkpoint.
      self.instantiate_sess_and_restore_checkpoint()

    if masks is None:
      masks = np.zeros_like(pianorolls)

    start_time = time.time()
    run_options = None
    if timeout_ms:
      run_options = tf.RunOptions(timeout_in_ms=timeout_ms)
    new_piece = self.sess.run(
        self.samples,
        feed_dict={
            self.placeholders["pianorolls"]: pianorolls,
            self.placeholders["outer_masks"]: masks,
            self.placeholders["sample_steps"]: sample_steps,
            self.placeholders["total_gibbs_steps"]: total_gibbs_steps,
            self.placeholders["current_step"]: current_step,
            self.placeholders["temperature"]: temperature
        }, options=run_options)

    label = "independent blocked gibbs"
    time_taken = (time.time() - start_time) / 60.0
    tf.logging.info("exit  %s (%.3fmin)" % (label, time_taken))
    return dict(pianorolls=new_piece, time_taken=time_taken)
Exemple #28
0
    def benchmark_model(self,
                        warmup_runs,
                        bm_runs,
                        num_threads,
                        trace_filename=None):
        """Benchmark model."""
        if self.tensorrt:
            print('Using tensorrt ', self.tensorrt)
            self.build_and_save_model()
            graphdef = self.freeze_model()

        if num_threads > 0:
            print('num_threads for benchmarking: {}'.format(num_threads))
            sess_config = tf.ConfigProto(
                intra_op_parallelism_threads=num_threads,
                inter_op_parallelism_threads=1)
        else:
            sess_config = tf.ConfigProto()

        # rewriter_config_pb2.RewriterConfig.OFF
        sess_config.graph_options.rewrite_options.dependency_optimization = 2
        if self.use_xla:
            sess_config.graph_options.optimizer_options.global_jit_level = (
                tf.OptimizerOptions.ON_2)

        with tf.Graph().as_default(), tf.Session(config=sess_config) as sess:
            inputs = tf.placeholder(tf.float32,
                                    name='input',
                                    shape=self.inputs_shape)
            output = self.build_model(inputs, is_training=False)

            img = np.random.uniform(size=self.inputs_shape)

            sess.run(tf.global_variables_initializer())
            if self.tensorrt:
                fetches = [inputs.name] + [i.name for i in output]
                goutput = self.convert_tr(graphdef, fetches)
                inputs, output = goutput[0], goutput[1:]

            if not self.use_xla:
                # Don't use tf.group because XLA removes the whole graph for tf.group.
                output = tf.group(*output)
            for i in range(warmup_runs):
                start_time = time.time()
                sess.run(output, feed_dict={inputs: img})
                print('Warm up: {} {:.4f}s'.format(i,
                                                   time.time() - start_time))
            print('Start benchmark runs total={}'.format(bm_runs))
            timev = []
            for i in range(bm_runs):
                if trace_filename and i == (bm_runs // 2):
                    run_options = tf.RunOptions()
                    run_options.trace_level = tf.RunOptions.FULL_TRACE
                    run_metadata = tf.RunMetadata()
                    sess.run(output,
                             feed_dict={inputs: img},
                             options=run_options,
                             run_metadata=run_metadata)
                    logging.info('Dumping trace to %s', trace_filename)
                    trace_dir = os.path.dirname(trace_filename)
                    if not tf.io.gfile.exists(trace_dir):
                        tf.io.gfile.makedirs(trace_dir)
                    with tf.io.gfile.GFile(trace_filename, 'w') as trace_file:
                        from tensorflow.python.client import timeline  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
                        trace = timeline.Timeline(
                            step_stats=run_metadata.step_stats)
                        trace_file.write(
                            trace.generate_chrome_trace_format(
                                show_memory=True))

                start_time = time.time()
                sess.run(output, feed_dict={inputs: img})
                timev.append(time.time() - start_time)

            timev.sort()
            timev = timev[2:bm_runs - 2]
            print(
                '{} {}runs {}threads: mean {:.4f} std {:.4f} min {:.4f} max {:.4f}'
                .format(self.model_name, len(timev), num_threads,
                        np.mean(timev), np.std(timev), np.min(timev),
                        np.max(timev)))
Exemple #29
0
    def train(self,
              log_dir=None,
              max_epoch=10000,
              learning_rate=0.001,
              batch_size=None,
              interval_sec=300,
              restore_step=None,
              run_metadata=False):
        """Train model.

        Args:
            log_dir (str): Log directory where log and model is saved.
            max_epoch (int): Size of epoch
            learning_rate (float): Learning rate
            batch_size (int): Batch size when using mini-batch descent method.
                If specifying a size larger then learning data or `None`,
                using batch descent.
            interfal_sec (float): Specify logging time interval in seconds.
                Default by 300.
            restore_step (int): When you specify this argument, this mixin
                resotres model for specified step.
            run_metadata (bool): If true, run metadata and write logs.
        """
        if log_dir is None:
            log_dir = os.path.join(os.path.dirname(__file__), 'tf_logs',
                                   datetime.utcnow().strftime('%Y%m%d%H%M%S'))
        if batch_size is None:
            batch_size = 1
        n_batches = len(self.corpus) // (batch_size * self.time_size)
        jump = (len(self.corpus) - 1) // batch_size
        if run_metadata:
            options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            metadata = tf.RunMetadata()
        else:
            options = None
            metadata = None
        with self.open_writer(log_dir) as writer:
            with self.open_session(interval_sec=interval_sec,
                                   per_step=n_batches,
                                   restore_step=restore_step) as sess:
                incomes = np.empty([batch_size, self.time_size], dtype=int)
                labels = np.empty([batch_size, self.time_size], dtype=int)
                for b in range(batch_size):
                    incomes[b, ] = self.corpus[b * jump:b * jump +
                                               self.time_size]
                    labels[b, ] = self.corpus[b * jump + 1:b * jump +
                                              self.time_size + 1]
                step = restore_step or 0
                next_hs = np.zeros([batch_size, self.hidden_size])
                if restore_step is None:
                    for summary in sess.run(
                            self.los_summaries,
                            feed_dict={
                                self.incomes: incomes[:batch_size],
                                self.labels: labels[:batch_size],
                                self.prev_hs: next_hs
                            },
                    ):
                        writer.add_summary(summary, step)
                for epoch_i in range(step // self.data_size, max_epoch):
                    for batch_i in range(n_batches):
                        inc, lab = self.fetch_batch(epoch_i, batch_i,
                                                    batch_size, jump, incomes,
                                                    labels)
                        fd = {
                            self.incomes: inc,
                            self.labels: lab,
                            self.prev_hs: next_hs,
                            self.learning_rate: learning_rate,
                        }
                        _, next_hs = sess.run([self.training_op, self.next_hs],
                                              feed_dict=fd,
                                              options=options,
                                              run_metadata=metadata)
                        step += 1
                        if run_metadata:
                            writer.add_run_metadata(metadata, f'step: {step}')
                        self.record(sess, writer, step, feed_dict=fd)
                self.record(sess, writer, step, feed_dict=fd, force_write=True)
Exemple #30
0
def run_sobel(logdir, verbose=False):
    """Run a Sobel edge detection demonstration.

    See the summary description for more details.

    Arguments:
      logdir: Directory into which to write event logs.
      verbose: Boolean; whether to log any output.
    """
    if verbose:
        logger.info("--- Starting run: sobel")

    tf.reset_default_graph()
    tf.set_random_seed(0)

    image = get_image(verbose=verbose)
    kernel_radius = tf.placeholder(shape=(), dtype=tf.int32)

    with tf.name_scope("horizontal_kernel"):
        kernel_side_length = kernel_radius * 2 + 1
        # Drop off influence for pixels further away from the center.
        weighting_kernel = 1.0 - tf.abs(
            tf.linspace(-1.0, 1.0, num=kernel_side_length))
        differentiation_kernel = tf.linspace(-1.0, 1.0, num=kernel_side_length)
        horizontal_kernel = tf.matmul(
            tf.expand_dims(weighting_kernel, 1),
            tf.expand_dims(differentiation_kernel, 0),
        )

    with tf.name_scope("vertical_kernel"):
        vertical_kernel = tf.transpose(a=horizontal_kernel)

    float_image = tf.cast(image, tf.float32)
    dx = convolve(float_image, horizontal_kernel, name="convolve_dx")
    dy = convolve(float_image, vertical_kernel, name="convolve_dy")
    gradient_magnitude = tf.norm(tensor=[dx, dy],
                                 axis=0,
                                 name="gradient_magnitude")
    with tf.name_scope("normalized_gradient"):
        normalized_gradient = gradient_magnitude / tf.reduce_max(
            input_tensor=gradient_magnitude)
    with tf.name_scope("output_image"):
        output_image = tf.cast(255 * normalized_gradient, tf.uint8)

    summ = image_summary.op(
        "sobel",
        tf.stack([output_image]),
        display_name="Sobel edge detection",
        description=(
            "Demonstration of [Sobel edge detection]. The step "
            "parameter adjusts the radius of the kernel. "
            "The kernel can be of arbitrary size, and considers "
            "nearby pixels with \u2113\u2082-linear falloff.\n\n"
            # (that says ``$\ell_2$-linear falloff'')
            "Edge detection is done on a per-channel basis, so "
            "you can observe which edges are &ldquo;mostly red "
            "edges,&rdquo; for instance.\n\n"
            "For practical edge detection, a small kernel "
            "(usually not more than more than *r*=2) is best.\n\n"
            "[Sobel edge detection]: %s\n\n"
            "%s" %
            ("https://en.wikipedia.org/wiki/Sobel_operator", IMAGE_CREDIT)),
    )

    with tf.Session() as sess:
        sess.run(image.initializer)
        writer = tf.summary.FileWriter(os.path.join(logdir, "sobel"))
        writer.add_graph(sess.graph)
        for step in xrange(8):
            if verbose:
                logger.info("--- sobel: step: %s" % step)
                feed_dict = {kernel_radius: step}
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            s = sess.run(
                summ,
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata,
            )
            writer.add_summary(s, global_step=step)
            writer.add_run_metadata(run_metadata, "step_%04d" % step)
        writer.close()