Python ProfileOptionBuilderの例

プログラミング言語: Python

名前空間/パッケージ名: tensorflow.python.profiler.option_builder

メソッド/関数: ProfileOptionBuilder

hotexamples.comのコード掲載数: 30

Python ProfileOptionBuilder - 30件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtensorflow.python.profiler.option_builder.ProfileOptionBuilderの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: classification.py プロジェクト: giamic/music-analysis

def profiled_run(sess, writer=None, log_step=False, mode='raw_data'):
    """

    :param sess:
    :param writer:
    :param log_step:
    :param mode: either 'timeline' or 'raw_data'
    :return:
    """
    assert mode == 'timeline' or mode == 'raw_data'

    if log_step:
        print("step {} of {}, global_step set to {}".format(n, steps - 1, global_step))
    run_meta = tf.RunMetadata()
    summary, _ = sess.run([merged, train_step], feed_dict={handle: trn_handle},
                          options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_meta)
    if writer is not None:
        writer.add_summary(summary, global_step=global_step)
    profiler.add_step(n, run_meta)

    if mode == 'raw_data':
        opts = (option_builder.ProfileOptionBuilder(option_builder.ProfileOptionBuilder.time_and_memory())
                .with_step(-1)
                .with_file_output(os.path.join(model_folder, 'profile_time.txt')).build())
        profiler.profile_operations(options=opts)

    if mode == 'timeline':
        opts = (option_builder.ProfileOptionBuilder(option_builder.ProfileOptionBuilder.time_and_memory())
                .with_step(-1)
                .with_timeline_output(os.path.join(model_folder, 'profile_graph.txt')).build())
        profiler.profile_graph(options=opts)
    return

コード例 #2

ファイルを表示

 def detect_from_cvmat(self, img):
     s = time.time()
     i = 123
     self.h_img, self.w_img, _ = img.shape
     img_resized = cv2.resize(img, (448, 448))
     img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
     img_resized_np = np.asarray(img_RGB)
     inputs = np.zeros((1, 448, 448, 3), dtype='float32')
     inputs[0] = (img_resized_np / 255.0) * 2.0 - 1.0
     in_dict = {self.x: inputs}
     print(
         "detect frome cvmat------======================================================================="
     )
     net_output = self.sess.run(self.fc_19,
                                feed_dict=in_dict,
                                options=self.run_options,
                                run_metadata=self.run_metadata)
     self.writer.add_run_metadata(self.run_metadata, "12345")
     self.profiler.add_step(step=i, run_meta=self.run_metadata)
     profile_graph_opts_builder = option_builder.ProfileOptionBuilder(
         option_builder.ProfileOptionBuilder.time_and_memory())
     profile_graph_opts_builder.with_timeline_output(
         timeline_file='./tmp_tf/Yolo_profiler.json')
     self.profiler.profile_graph(profile_graph_opts_builder.build())
     self.writer.close()
     self.result = self.interpret_output(net_output[0])
     self.show_results(img, self.result)
     strtime = str(time.time() - s)
     if self.disp_console:
         print('Elapsed time : ' + strtime + ' secs' + '\n')

コード例 #3

ファイルを表示

 def __exit__(self, exception_type, exception_value, traceback):
     if self.profile:
         # Generate profiling result
         profile_op_builder = option_builder.ProfileOptionBuilder().select(
             ['micros', 'occurrence']).order_by('micros').with_max_depth(5)
         self.profiler.profile_operations(profile_op_builder.build())
     return self

コード例 #4

ファイルを表示

 def __init__(self, graph,
              save_steps=None,
              save_secs=None,
              output_dir="", suffix=""):
   """Initializes a hook that takes periodic profiling snapshots.
   `options.run_metadata` argument of `tf.Session.Run` is used to collect
   metadata about execution. This hook sets the metadata and dumps it in Chrome
   Trace format.
   Args:
     save_steps: `int`, save profile traces every N steps. Exactly one of
         `save_secs` and `save_steps` should be set.
     save_secs: `int` or `float`, save profile traces every N seconds.
     output_dir: `string`, the directory to save the profile traces to.
         Defaults to the current directory.
   """
   self._output_file = os.path.join(output_dir, "profile-{}-{}.txt")
   self._suffix = suffix
   self._file_writer = SummaryWriterCache.get(output_dir)
   self._timer = tf.train.SecondOrStepTimer(
       every_secs=save_secs, every_steps=save_steps)
   self._profiler = model_analyzer.Profiler(graph=graph)
   profile_op_builder = option_builder.ProfileOptionBuilder( )
   ## sort by time taken
   #profile_op_builder.select(['micros', 'occurrence'])
   #profile_op_builder.order_by('micros')
   profile_op_builder.select(['bytes'])
   profile_op_builder.order_by('bytes')
   profile_op_builder.with_max_depth(10) # can be any large number
   self._profile_op_builder = profile_op_builder

コード例 #5

ファイルを表示

 def add_profile(self, epoch, logs, run_metadata: tf.RunMetadata,
                 profiler: tf.profiler.Profiler,
                 profile_writer: tf.summary.FileWriter, save_path: str):
     timeline_path = f"{save_path}/timeline"
     pathlib.Path(timeline_path).mkdir(exist_ok=True, parents=True)
     profiler.add_step(epoch, run_meta=run_metadata)
     opts = (option_builder.ProfileOptionBuilder(
         option_builder.ProfileOptionBuilder.time_and_memory()).with_step(
             epoch).with_timeline_output(f"{timeline_path}/step").build())
     profiler.profile_graph(options=opts)
     profile_writer.add_run_metadata(run_metadata, f"step{epoch}")

コード例 #6

ファイルを表示

 def __exit__(self, exception_type, exception_value, traceback):
     if self.profiler == "pyprof":
         import pstats, StringIO
         self.profiler_handle.disable()
         # self.profiler_handle.dump_stats("./outputs/" + self.name + ".pyprof")
         s = StringIO.StringIO()
         ps = pstats.Stats(self.profiler_handle,
                           stream=s).sort_stats("cumulative")
         ps.print_stats()
         print(s.getvalue())
     elif self.profiler != "none":
         # Generate profiling result
         profile_op_builder = option_builder.ProfileOptionBuilder().select(
             ['micros', 'occurrence']).order_by('micros').with_max_depth(5)
         self.profiler_handle.profile_operations(profile_op_builder.build())
     return self

コード例 #7

ファイルを表示

ファイル: profiler_timeline.py プロジェクト: Pekary/tensorflow_tools

def test():
    shape = [2, 3]
    a = tf.get_variable(name="a",
                        shape=shape,
                        initializer=tf.random_normal_initializer(stddev=0.5))
    b = tf.get_variable(name="b",
                        shape=shape,
                        initializer=tf.random_normal_initializer(stddev=0.5))

    c = tf.multiply(a, b, name='c')
    d = tf.matmul(a, b, transpose_b=True, name='d')

    with tf.Session() as sess:
        profiler = Profiler(sess.graph)
        run_meta = tf.RunMetadata()

        sess.run(tf.global_variables_initializer())
        sess.run([c, d],
                 options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                 run_metadata=run_meta)

        profiler.add_step(0, run_meta)
        profiler.profile_name_scope(options=option_builder.ProfileOptionBuilder
                                    .trainable_variables_parameter())
        opts = option_builder.ProfileOptionBuilder.time_and_memory()
        profiler.profile_operations(options=opts)

        opts = (option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.time_and_memory()).with_step(
                0).with_timeline_output("test.out").build())
        profiler.profile_graph(options=opts)

        print("c: ", c.eval())
        print("d: ", d.eval())

        tl = timeline.Timeline(run_meta.step_stats)
        ctf = tl.generate_chrome_trace_format()

        # timeline.json = test.out

        with open('timeline.json', 'w') as f:
            f.write(ctf)

コード例 #8

ファイルを表示

ファイル: profile_example.py プロジェクト: LeoZhao-Habana/test-tools

                     options=options,
                     run_metadata=run_metadata)

            #first way to save trace with timeline
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            many_runs_timeline.update_timeline(chrome_trace)

            #second way by using profiler
            profiler.add_step(step=i, run_meta=run_metadata)
        many_runs_timeline.save('timeline_03_merged_%d_runs.json' % runs)

        #Second way to show trace & profile
        #graph view
        #统计内容为每个graph node的运行时间和占用内存
        profile_graph_opts_builder = option_builder.ProfileOptionBuilder(
        option_builder.ProfileOptionBuilder.time_and_memory())
        #输出方式为timeline, 输出文件夹必须存在
        profile_graph_opts_builder.with_timeline_output(timeline_file='mnist_profiler.json')
        #定义显示sess.Run() 第1步的统计数据
        profile_graph_opts_builder.with_step(1)
        #显示视图为graph view
        profiler.profile_graph(profile_graph_opts_builder.build())

        #scope view
        #统计内容为所有trainable Variable Op
        profile_scope_opt_builder = option_builder.ProfileOptionBuilder(
        option_builder.ProfileOptionBuilder.trainable_variables_parameter())
        #显示的嵌套深度为4
        profile_scope_opt_builder.with_max_depth(4)
        #显示字段是params，即参数
        profile_scope_opt_builder.select(['params'])

コード例 #9

ファイルを表示

def train(log_dir, args, hparams):
    voicefilter_audio = Audio(hparams)

    save_dir = os.path.join(log_dir, 'extract_pretrained')
    plot_dir = os.path.join(log_dir, 'plots')
    wav_dir = os.path.join(log_dir, 'wavs')
    spec_dir = os.path.join(log_dir, 'spec-spectrograms')
    eval_dir = os.path.join(log_dir, 'eval-dir')
    #eval_plot_dir = os.path.join(eval_dir, 'plots')
    eval_wav_dir = os.path.join(eval_dir, 'wavs')
    tensorboard_dir = os.path.join(log_dir, 'extractron_events')
    meta_folder = os.path.join(log_dir, 'metas')

    os.makedirs(save_dir, exist_ok=True)
    os.makedirs(plot_dir, exist_ok=True)
    os.makedirs(wav_dir, exist_ok=True)
    os.makedirs(spec_dir, exist_ok=True)
    os.makedirs(eval_dir, exist_ok=True)
    #os.makedirs(eval_plot_dir, exist_ok=True)
    os.makedirs(eval_wav_dir, exist_ok=True)
    os.makedirs(tensorboard_dir, exist_ok=True)
    os.makedirs(meta_folder, exist_ok=True)

    checkpoint_path = os.path.join(save_dir, 'extractron_model.ckpt')
    checkpoint_path2 = os.path.join(save_dir, 'super_extractron_model.ckpt')
    #input_paths = [os.path.join(args.base_dir, args.extractron_input)]
    #if args.extractron_inputs:
    #    input_paths = [os.path.join(args.base_dir, arg_input_path)
    #                   for arg_input_path in args.extractron_inputs]
    #if args.extractron_input_glob:
    #    input_paths = glob.glob(args.extractron_input_glob)

    log('Checkpoint path: {}'.format(checkpoint_path))
    log('Using model: {}'.format(args.model))
    log(hparams_debug_string())

    # Start by setting a seed for repeatability
    tf.set_random_seed(hparams.extractron_random_seed)

    # Set up data feeder
    with tf.variable_scope('datafeeder'):
        feeder = Feeder(hparams)
        feeder.setup_dataset(args.dataset, args.eval_dataset)

        class DotDict(dict):
            """
            a dictionary that supports dot notation
            as well as dictionary access notation
            usage: d = DotDict() or d = DotDict({'val1':'first'})
            set attributes: d.val2 = 'second' or d['val2'] = 'second'
            get attributes: d.val2 or d['val2']
            """
            __getattr__ = dict.__getitem__
            __setattr__ = dict.__setitem__
            __delattr__ = dict.__delitem__

            def __init__(self, dct):
                for key, value in dct.items():
                    if hasattr(value, 'keys'):
                        value = DotDict(value)
                    self[key] = value

        dictkeys = [
            'target_linear', 'mixed_linear', 'target_mel', 'mixed_mel',
            'spkid_embeddings'
        ]
        eval_dictkeys = [
            'eval_target_linear', 'eval_mixed_linear', 'eval_target_phase',
            'eval_mixed_phase', 'eval_target_mel', 'eval_mixed_mel',
            'eval_spkid_embeddings'
        ]
        feeder_dict = DotDict(dict(zip(dictkeys, feeder.next)))
        feeder_dict.update(DotDict(dict(zip(eval_dictkeys, feeder.eval_next))))

    # Set up model:
    global_step = tf.Variable(0, name='global_step', trainable=False)
    model, stats = model_train_mode(args, feeder_dict, hparams, global_step)
    eval_model = model_test_mode(args, feeder_dict, hparams, global_step)

    # Book keeping
    step = 0
    time_window = ValueWindow(100)
    loss_window = ValueWindow(100)
    saver = tf.train.Saver(max_to_keep=5)
    saver2 = tf.train.Saver(max_to_keep=15)

    log('Extractron training set to a maximum of {} steps'.format(
        args.extractron_train_steps))

    # Memory allocation on the GPU as needed
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.log_device_placement = True
    config.allow_soft_placement = True

    # Train
    with tf.Session(config=config) as sess:
        try:
            #summary_writer = tf.summary.FileWriter(tensorboard_dir, sess.graph)
            xsummary_writer = SummaryWriter(tensorboard_dir)

            sess.run(tf.global_variables_initializer())

            # saved model restoring
            if args.restore:
                # Restore saved model if the user requested it, default = True
                try:
                    checkpoint_state = tf.train.get_checkpoint_state(save_dir)

                    if (checkpoint_state
                            and checkpoint_state.model_checkpoint_path):
                        log('Loading checkpoint {}'.format(
                            checkpoint_state.model_checkpoint_path),
                            slack=True)
                        saver.restore(sess,
                                      checkpoint_state.model_checkpoint_path)

                    else:
                        log('No model to load at {}'.format(save_dir),
                            slack=True)
                        saver.save(sess,
                                   checkpoint_path,
                                   global_step=global_step)

                except tf.errors.OutOfRangeError as e:
                    log('Cannot restore checkpoint: {}'.format(e), slack=True)
            else:
                log('Starting new training!', slack=True)
                saver.save(sess, checkpoint_path, global_step=global_step)

            if hparams.tfprof or hparams.timeline:
                run_options = tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()
                if hparams.timeline:
                    from tensorflow.python.client import timeline
                if hparams.tfprof:
                    from tensorflow.python.profiler import model_analyzer, option_builder
                    my_profiler = model_analyzer.Profiler(graph=sess.graph)
                    profile_op_builder = option_builder.ProfileOptionBuilder()
                    profile_op_builder.select(['micros', 'occurrence'])
                    profile_op_builder.order_by('micros')
                    #profile_op_builder.select(['device', 'bytes', 'peak_bytes'])
                    #profile_op_builder.order_by('bytes')
                    profile_op_builder.with_max_depth(
                        20)  # can be any large number
                    profile_op_builder.with_file_output('profile.log')
                    profile_op = profile_op_builder.build()

            # Training loop
            while step < args.extractron_train_steps:
                start_time = time.time()
                # from tensorflow.python import debug as tf_debug
                # sess=tf_debug.LocalCLIDebugWrapperSession(sess)
                if hparams.tfprof or hparams.timeline:
                    step, loss, opt = sess.run(
                        [global_step, model.loss, model.optimize],
                        options=run_options,
                        run_metadata=run_metadata)
                    if hparams.timeline:
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                            show_dataflow=True, show_memory=True)
                        with open('timeline_01.json', 'w') as f:
                            f.write(chrome_trace)
                    if hparams.tfprof:
                        my_profiler.add_step(step=int(step),
                                             run_meta=run_metadata)
                        my_profiler.profile_name_scope(profile_op)
                else:
                    step, loss, opt = sess.run(
                        [global_step, model.loss, model.optimize])
                time_window.append(time.time() - start_time)
                loss_window.append(loss)
                message = \
                'Step {:7d} [{:.3f} sec/step, {:.3f} sec/step, loss={:.5f}, avg_loss={:.5f}]'.format(
                    step, time.time() - start_time, time_window.average, loss, loss_window.average)

                log(message,
                    end='\r',
                    slack=(step % args.checkpoint_interval == 0))

                # Originally assume 100 means loss exploded, now change to 1000 due to waveglow settings
                if loss > 100 or np.isnan(loss):
                    log('Loss exploded to {:.5f} at step {}'.format(
                        loss, step))
                    raise Exception('Loss exploded')

                if step % args.summary_interval == 0:
                    log('\nWriting summary at step {}'.format(step))
                    add_train_summary(xsummary_writer, step, loss)
                    #summary_writer.add_summary(sess.run(stats), step)
                    #summary_writer.flush()

                if step % args.gc_interval == 0:
                    log('\nGarbage collect: {}\n'.format(gc.collect()))

                if step % args.eval_interval == 0:
                    # Run eval and save eval stats
                    log('\nRunning evaluation at step {}'.format(step))

                    #1. avg loss, before, after, predicted mag, mixed phase, mixed_mag, target phase, target_mag
                    #2. 3 wavs
                    #3. 3 mag specs
                    #4. sdr

                    eval_losses = []
                    before_losses = []
                    after_losses = []
                    linear_losses = []

                    for i in tqdm(range(args.test_steps)):
                        try:
                            eloss, before_loss, after_loss, linear_loss, \
                            mixed_phase, mixed_mel, mixed_linear, \
                            target_phase, target_mel, target_linear, \
                            predicted_linear = sess.run([
                                eval_model.tower_loss[0], eval_model.tower_before_loss[0], eval_model.tower_after_loss[0], eval_model.tower_linear_loss[0],
                                eval_model.tower_mixed_phase[0][0], eval_model.tower_mixed_mel[0][0],
                                eval_model.tower_mixed_linear[0][0],
                                eval_model.tower_target_phase[0][0], eval_model.tower_target_mel[0][0],
                                eval_model.tower_target_linear[0][0],
                                eval_model.tower_linear_outputs[0][0]
                            ])
                            eval_losses.append(eloss)
                            before_losses.append(before_loss)
                            after_losses.append(after_loss)
                            linear_losses.append(linear_loss)
                            #if i==0:
                            #    tmp_phase=mixed_phase
                            #    tmp_spec=mixed_spec
                        except tf.errors.OutOfRangeError:
                            log('\n test dataset out of range')
                            pass

                    eval_loss = sum(eval_losses) / len(eval_losses)
                    before_loss = sum(before_losses) / len(before_losses)
                    after_loss = sum(after_losses) / len(after_losses)
                    linear_loss = sum(linear_losses) / len(linear_losses)

                    #mixed_wav = voicefilter_audio.spec2wav(tmp_spec, tmp_phase)
                    mixed_wav = voicefilter_audio.spec2wav(
                        mixed_linear, mixed_phase)
                    target_wav = voicefilter_audio.spec2wav(
                        target_linear, target_phase)
                    predicted_wav = voicefilter_audio.spec2wav(
                        predicted_linear, mixed_phase)
                    librosa.output.write_wav(
                        os.path.join(eval_wav_dir,
                                     'step-{}-eval-mixed.wav'.format(step)),
                        mixed_wav, hparams.sample_rate)
                    librosa.output.write_wav(
                        os.path.join(eval_wav_dir,
                                     'step-{}-eval-target.wav'.format(step)),
                        target_wav, hparams.sample_rate)
                    librosa.output.write_wav(
                        os.path.join(
                            eval_wav_dir,
                            'step-{}-eval-predicted.wav'.format(step)),
                        predicted_wav, hparams.sample_rate)
                    #audio.save_wav(mixed_wav, os.path.join(
                    #    eval_wav_dir, 'step-{}-eval-mixed.wav'.format(step)), sr=hparams.sample_rate)
                    #audio.save_wav(target_wav, os.path.join(
                    #    eval_wav_dir, 'step-{}-eval-target.wav'.format(step)), sr=hparams.sample_rate)
                    #audio.save_wav(predicted_wav, os.path.join(
                    #    eval_wav_dir, 'step-{}-eval-predicted.wav'.format(step)), sr=hparams.sample_rate)

                    mixed_linear_img = plot_spectrogram_to_numpy(
                        mixed_linear.T)
                    target_linear_img = plot_spectrogram_to_numpy(
                        target_linear.T)
                    predicted_linear_img = plot_spectrogram_to_numpy(
                        predicted_linear.T)

                    #plot.plot_spectrogram(predicted_spec,
                    #        os.path.join(eval_plot_dir, 'step-{}-eval-spectrogram.png'.format(step)),
                    #        title='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, eval_loss),
                    #        target_spectrogram=target_spec)

                    log('Eval loss for global step {}: {:.3f}'.format(
                        step, eval_loss))
                    log('Writing eval summary!')

                    add_eval_summary(xsummary_writer, step, before_loss,
                                     after_loss, linear_loss, eval_loss,
                                     hparams.sample_rate, mixed_wav,
                                     target_wav, predicted_wav,
                                     mixed_linear_img, target_linear_img,
                                     predicted_linear_img)

                if step % args.super_checkpoint_interval == 0 or step == args.extractron_train_steps:
                    # Save model and current global step
                    saver2.save(sess,
                                checkpoint_path2,
                                global_step=global_step)

                if step % args.checkpoint_interval == 0 or step == args.extractron_train_steps:
                    # Save model and current global step
                    saver.save(sess, checkpoint_path, global_step=global_step)

                    #log('\nSaving alignment, Mel-Spectrograms and griffin-lim inverted waveform..')

                    #input_seq, mel_prediction, alignment, target, target_length = sess.run([
                    #    model.tower_inputs[0][0],
                    #    model.tower_mel_outputs[0][0],
                    #    model.tower_alignments[0][0],
                    #    model.tower_mel_targets[0][0],
                    #    model.tower_targets_lengths[0][0],
                    #])

                    ## save predicted mel spectrogram to disk (debug)
                    #mel_filename = 'mel-prediction-step-{}.npy'.format(step)
                    #np.save(os.path.join(mel_dir, mel_filename),
                    #        mel_prediction.T, allow_pickle=False)

                    ## save griffin lim inverted wav for debug (mel -> wav)
                    #wav = audio.inv_mel_spectrogram(mel_prediction.T, hparams)
                    #audio.save_wav(wav, os.path.join(
                    #    wav_dir, 'step-{}-wave-from-mel.wav'.format(step)), sr=hparams.sample_rate)

                    ## save alignment plot to disk (control purposes)
                    #plot.plot_alignment(alignment, os.path.join(plot_dir, 'step-{}-align.png'.format(step)),
                    #                    title='{}, {}, step={}, loss={:.5f}'.format(
                    #                        args.model, time_string(), step, loss),
                    #                    max_len=target_length // hparams.outputs_per_step)
                    ## save real and predicted mel-spectrogram plot to disk (control purposes)
                    #plot.plot_spectrogram(mel_prediction, os.path.join(plot_dir, 'step-{}-mel-spectrogram.png'.format(step)),
                    #                      title='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, loss), target_spectrogram=target,
                    #                      max_len=target_length)
                    #log('Input at step {}: {}'.format(
                    #    step, sequence_to_text(input_seq)))

            log('Extractron training complete after {} global steps!'.format(
                args.extractron_train_steps),
                slack=True)
            return save_dir

        except Exception as e:
            log('Exiting due to exception: {}'.format(e), slack=True)
            traceback.print_exc()

コード例 #10

ファイルを表示

    def profile(self):
        global bb
        tf.reset_default_graph()
        self.initialize_profile()
        print("Beginning to profile network with parameters",
              get_num_parameters(self.model.get_variable_scope()))
        placeholders = self.model.get_placeholders()

        subprocess.call("mkdir -p %s" % (self.profile_out_path), shell=True)

        graph_output = self.model.get_compute_graphs()

        inputs_feed = self.reader_factory.get_class(
            self.reader_type)(self.training_files, self.num_max_entries,
                              self.num_data_dims, bb).get_feeds()

        init = [
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ]

        session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                      inter_op_parallelism_threads=1)

        inference_time_values = []
        with tf.Session(config=session_conf) as sess:
            # with tf.Session() as sess:
            sess.run(init)
            profiler = Profiler(sess.graph)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            iteration_number = 0

            print("Starting iterations")
            while iteration_number < 20:

                inputs_train = sess.run(list(inputs_feed))

                if len(placeholders) == 5:
                    inputs_train_dict = {
                        placeholders[0]:
                        inputs_train[0][:, :, self.spatial_features_indices],
                        placeholders[1]:
                        inputs_train[0][:, :,
                                        self.spatial_features_local_indices],
                        placeholders[2]:
                        inputs_train[0][:, :, self.other_features_indices],
                        placeholders[3]:
                        inputs_train[0][:, :, self.target_indices],
                        placeholders[4]:
                        inputs_train[1],
                        self.model.is_train:
                        True,
                        self.model.learning_rate:
                        1
                    }
                else:
                    inputs_train_dict = {
                        placeholders[0]:
                        inputs_train[0][:, :, self.spatial_features_indices],
                        placeholders[1]:
                        inputs_train[0][:, :,
                                        self.spatial_features_local_indices],
                        placeholders[2]:
                        inputs_train[0][:, :, self.other_features_indices],
                        placeholders[3]:
                        inputs_train[0][:, :, self.target_indices],
                        placeholders[4]:
                        inputs_train[1],
                        placeholders[5]:
                        inputs_train[2],
                        self.model.is_train:
                        True,
                        self.model.learning_rate:
                        1
                    }
                run_meta = tf.RunMetadata()
                start_time = time.time()
                eval_output = sess.run(
                    graph_output,
                    feed_dict=inputs_train_dict,
                    options=tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE),
                    run_metadata=run_meta)
                print("XC Time: --- %s seconds --- Iteration %d" %
                      (time.time() - start_time, iteration_number))
                profiler.add_step(iteration_number, run_meta)

                # Or profile the timing of your model operations.
                opts = option_builder.ProfileOptionBuilder.time_and_memory()
                profiler.profile_operations(options=opts)

                # Or you can generate a timeline:
                opts = (option_builder.ProfileOptionBuilder(
                    option_builder.ProfileOptionBuilder.time_and_memory()
                ).with_step(iteration_number).with_timeline_output(
                    os.path.join(self.profile_out_path, 'profile')).build())
                x = profiler.profile_graph(options=opts)

                inference_time_values.append(x.total_exec_micros)
                peak_bytes = x.total_peak_bytes

                iteration_number += 1

            print(self.config_name, "Batch size: ", bb)
            print(repr(np.array(inference_time_values)))
            print(
                "Mean",
                np.mean(np.array(inference_time_values, dtype=np.float32)[1:]))
            print(
                "Variance",
                np.std(np.array(inference_time_values, dtype=np.float32)[1:]))
            print("Peak bytes", peak_bytes)

            # Stop the threads
            coord.request_stop()

            # Wait for threads to stop
            coord.join(threads)

コード例 #11

ファイルを表示

def main(argv=None):  # pylint: disable=unused-argument
    assert args.ckpt > 0 or args.batch_eval
    assert args.detect or args.segment, "Either detect or segment should be True"
    if args.trunk == 'resnet50':
        net = ResNet
        depth = 50
    if args.trunk == 'resnet101':
        net = ResNet
        depth = 101
    if args.trunk == 'vgg16':
        net = VGG
        depth = 16

    net = net(config=net_config, depth=depth, training=False)

    if args.dataset == 'voc07' or args.dataset == 'voc07+12':
        loader = VOCLoader('07', 'test')
    if args.dataset == 'voc12':
        loader = VOCLoader('12', 'val', segmentation=args.segment)
    if args.dataset == 'coco':
        loader = COCOLoader(args.split)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False,
                                          gpu_options=tf.GPUOptions(allow_growth=True,
                                                                    per_process_gpu_memory_fraction=0.2))) as sess:
        if args.use_profile:
            profiler = model_analyzer.Profiler(graph=sess.graph)
            detector = Detector(sess, net, loader, net_config,
                                no_gt=args.no_seg_gt, profiler=profiler)
        else:
            detector = Detector(sess, net, loader, net_config,
                                no_gt=args.no_seg_gt)

        if args.dataset == 'coco':
            tester = COCOEval(detector, loader)
        else:
            tester = Evaluation(detector, loader, iou_thresh=args.voc_iou_thresh)
        if not args.batch_eval:
            detector.restore_from_ckpt(args.ckpt)
            tester.evaluate_network(args.ckpt)
        else:
            log.info('Evaluating %s' % args.run_name)
            ckpts_folder = CKPT_ROOT + args.run_name + '/'
            out_file = ckpts_folder + evaluation_logfile

            max_checked = get_last_eval(out_file)
            log.debug("Maximum checked ckpt is %i" % max_checked)
            with open(out_file, 'a') as f:
                start = max(args.min_ckpt, max_checked+1)
                ckpt_files = glob(ckpts_folder + '*.data*')
                folder_has_nums = np.array(list((map(filename2num, ckpt_files))), dtype='int')
                nums_available = sorted(folder_has_nums[folder_has_nums >= start])
                nums_to_eval = [nums_available[-1]]
                for n in reversed(nums_available):
                    if nums_to_eval[-1] - n >= args.step:
                        nums_to_eval.append(n)
                nums_to_eval.reverse()

                for ckpt in nums_to_eval:
                    log.info("Evaluation of ckpt %i" % ckpt)
                    tester.reset()
                    detector.restore_from_ckpt(ckpt)
                    res = tester.evaluate_network(ckpt)
                    f.write(res)
                    f.flush()

        if args.use_profile:
            profile_scope_builder = option_builder.ProfileOptionBuilder(
                # option_builder.ProfileOptionBuilder.trainable_variables_parameter()
            )
            profile_scope_builder.with_max_depth(4)
            profile_scope_builder.with_min_memory(int(2e6))
            profile_scope_builder.with_step(2)
            profile_scope_builder.select(['bytes'])
            # profile_scope_builder.with_node_names(show_name_regexes=['.*resnet.*', '.*ssd.*'])
            # profile_scope_builder.with_node_names(hide_name_regexes=['.*resnet.*', '.*ssd.*'])
            # profile_scope_builder.order_by('output_bytes')
            detector.profiler.profile_name_scope(profile_scope_builder.build())

コード例 #12

ファイルを表示

ファイル: ba2.py プロジェクト: sfu-gruvi-3dv/BA-NET

    def trackPY(self, image1, image2, intrinsics, points, depths, initR,
                initT):

        # fetches  = {'backbone':self.pyramid}

        # feed_dict= {self.placeholder_image1:image1,
        #             self.placeholder_image2:image2}

        # start_time = time.time()
        # results=self.session.run(fetches,feed_dict=feed_dict)
        # duration = time.time() - start_time
        # print duration

        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        profiler = tf.profiler.Profiler(self.session.graph)

        fetches = {
            'rotations': self.predict_rotations,
            'translations': self.predict_translations,
            'keep_ratio': self.ratio
        }

        feed_dict = {
            self.placeholder_image1: image1,
            self.placeholder_image2: image2,
            self.placeholder_intrin: intrinsics,
            self.placeholder_points: points,
            self.placeholder_depths: depths,
            self.placeholder_initR: initR,
            self.placeholder_initT: initT
        }

        start_time = time.time()
        run_meta = tf.RunMetadata()
        results = self.session.run(fetches,
                                   feed_dict=feed_dict,
                                   options=options,
                                   run_metadata=run_meta)
        #results=self.session.run(fetches,feed_dict=feed_dict)
        duration = time.time() - start_time
        print "duration:", duration

        profiler.add_step(step, run_meta)
        profiler.profile_name_scope(
            options=(option_builder.ProfileOptionBuilder.
                     trainable_variables_parameter()))
        opts = option_builder.ProfileOptionBuilder.time_and_memory()
        profiler.profile_operations(options=opts)
        opts = (option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.time_and_memory()).with_step(
                step).with_timeline_output("./time/time_%d.json" %
                                           step).build())
        profiler.profile_graph(options=opts)

        global step
        if step == 1:
            tf.profiler.advise(self.session.graph, run_meta=run_meta)
        step += 1

        return results['rotations'], results['translations'], results[
            'keep_ratio']

コード例 #13

ファイルを表示

def main(_):
    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.int64, [None])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_,
                                                               logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    graph_location = tempfile.mkdtemp()
    print('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    from tensorflow.python.profiler import model_analyzer
    from tensorflow.python.profiler import option_builder
    with tf.Session(config=get_sess_config()) as sess:
        sess.run(tf.global_variables_initializer())
        profiler = model_analyzer.Profiler(sess.graph)
        #for i in range(20000):
        for i in range(2):
            batch = mnist.train.next_batch(21000)
            '''if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x: batch[0], y_: batch[1], keep_prob: 1.0})
        print('step %d, training accuracy %g' % (i, train_accuracy))
      '''
            #train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
            run_metadata = tf.RunMetadata()
            sess.run(
                train_step,
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 0.5
                },
                options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                run_metadata=run_metadata)
            profiler.add_step(i, run_metadata)

            # profile the timing of your model operations.
            opts = (tf.profiler.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory()).select(
                    ['micros', 'bytes',
                     'occurrence']).order_by('micros').build())
            profiler.profile_operations(options=opts)
            '''
      opts = (option_builder.ProfileOptionBuilder(
        option_builder.ProfileOptionBuilder.time_and_memory())
        .with_step(i)
        .with_timeline_output("./timeline_output/code_step").build())
      profiler.profile_python(options=opts)
      '''

            # can generate a timeline:
            opts = (option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory()).
                    with_step(i).with_timeline_output(
                        "./timeline_output/step").build())
            profiler.profile_graph(options=opts)

        #print('test accuracy %g' % accuracy.eval(feed_dict={
        #    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
    # Print to stdout an analysis of the memory usage and the timing information
    # broken down by python codes.
    # ProfileOptionBuilder = tf.profiler.ProfileOptionBuilder
    # opts = ProfileOptionBuilder(ProfileOptionBuilder.time_and_memory()
    #    ).with_node_names(show_name_regexes=['*']).build()
    #).with_node_names(show_name_regexes=['.*my_code.py.*']).build()
    '''tf.profiler.profile(
      tf.get_default_graph(),
      run_meta=run_metadata,
      cmd='code',
     options=opts)
  '''
    '''

コード例 #14

ファイルを表示

    profiler = Profiler(sess.graph)

    try:
        logger.info("Trying to find a previous model checkpoint.")
        saver.restore(sess, os.path.join(model_folder, "model.ckpt"))
        logger.info("Previous model restored")
    except tf.errors.NotFoundError:
        logger.warning("No model checkpoint found. Initializing a new model.")

    trn_handle = sess.run(trn_itr.string_handle())
    tst_handle = sess.run(tst_itr.string_handle())
    # test = sess.run([x, song_id, time, y_], feed_dict={handle: trn_handle})
    # print(test)

    opts = (option_builder.ProfileOptionBuilder(
        option_builder.ProfileOptionBuilder.trainable_variables_parameter()).
            with_file_output(os.path.join(model_folder,
                                          'profile_model.txt')).build())
    profiler.profile_name_scope(options=opts)
    targets = [embeddings, y_, distance_matrix, song_id, time]

    steps = params['steps']
    for n in range(steps):
        global_step = sess.run(tf.train.get_global_step())

        # if n == 0:  # log the results on the test set and reconstruct the tree
        if (n > 0 and n % params['test_step'] == 0) or n == steps - 1:
            print("step {} of {}, global_step set to {}. Test time!".format(
                n, steps - 1, global_step))
            # output_folder = test_run(sess, targets, merged, handle, tst_handle, global_step, model_folder, annotations_path, test_writer, saver, clustering=False, tree=True)
            output_folder = profiled_test_run(sess,
                                              targets,

コード例 #15

ファイルを表示

ファイル: hpc_csi_model.py プロジェクト: bmw2142/HPC

##Other Setting
accuracy = tf.metrics.accuracy(labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(Li, axis=1),)[1]

##Session Setting
sess = tf.Session()
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)

##
profiler = model_analyzer.Profiler(graph=sess.graph)
run_options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
##

##Interal
for step in range(200):    # training
    _, loss_ = sess.run([optimzor, loss_function], {tf_xt: xt ,tf_yi:yi,tf_y: y_train, tf_is_training: True})
    if step % 50 == 0:      # testing
        accuracy_ = sess.run(accuracy, {tf_xt: xt,tf_yi:yi ,tf_y: y_train, tf_is_training: True},options=run_options, run_metadata=run_metadata)
        profiler.add_step(step=step, run_meta=run_metadata)
        print('train loss: %.4f' % loss_, '| test accuracy: %.2f' % accuracy_)

##profiling excution

profile_code_opt_builder = option_builder.ProfileOptionBuilder()
profile_code_opt_builder.with_max_depth(1000)
profile_code_opt_builder.with_node_names(show_name_regexes=['mnist.py.*'])
profile_code_opt_builder.with_min_execution_time(min_micros=10)
profile_code_opt_builder.select(['micros'])
profile_code_opt_builder.order_by('micros')
profiler.profile_python(profile_code_opt_builder.build())

コード例 #16

ファイルを表示

ファイル: profile_tensor_ops.py プロジェクト: mverzett/DeepHGCal

with tf.Session() as sess:
    sess.run(init)
    profiler = Profiler(sess.graph)

    for iteration_number in range(num_iterations):
        print("Iteration ", iteration_number)
        run_meta = tf.RunMetadata()

        sess.run(graph_dense,
                 options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                 run_metadata=run_meta)

        profiler.add_step(iteration_number, run_meta)

        # Profile the parameters of your model.
        profiler.profile_name_scope(
            options=(option_builder.ProfileOptionBuilder.
                     trainable_variables_parameter()))

        # Or profile the timing of your model operations.
        opts = option_builder.ProfileOptionBuilder.time_and_memory()
        profiler.profile_operations(options=opts)

        # Or you can generate a timeline:
        opts = (option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.time_and_memory()).with_step(
                iteration_number).with_timeline_output(
                    profiler_output_file).build())
        profiler.profile_graph(options=opts)

コード例 #17

ファイルを表示

def main(argv):
    argparser = argparse.ArgumentParser(
        'NTP 2.0', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # data
    # WARNING: for countries, it's not necessary to enter the dev/test set as the evaluation does so
    # TODO: fix this behavior - all datasets should have the same behavior
    argparser.add_argument('--train', action='store', type=str)
    argparser.add_argument('--dev', action='store', type=str, default=None)
    argparser.add_argument('--test', action='store', type=str, default=None)

    argparser.add_argument('--clauses',
                           '-c',
                           action='store',
                           type=str,
                           default=None)
    argparser.add_argument('--mentions',
                           action='store',
                           type=str,
                           default=None)
    argparser.add_argument('--mentions-min',
                           action='store',
                           type=int,
                           default=1)

    # model params
    argparser.add_argument('--embedding-size',
                           '-k',
                           action='store',
                           type=int,
                           default=100)
    argparser.add_argument('--batch-size',
                           '-b',
                           action='store',
                           type=int,
                           default=10)
    # k-max for the new variable
    argparser.add_argument('--k-max',
                           '-m',
                           action='store',
                           type=int,
                           default=None)
    argparser.add_argument('--max-depth',
                           '-M',
                           action='store',
                           type=int,
                           default=1)

    # training params
    argparser.add_argument('--epochs',
                           '-e',
                           action='store',
                           type=int,
                           default=100)
    argparser.add_argument('--learning-rate',
                           '-l',
                           action='store',
                           type=float,
                           default=0.001)
    argparser.add_argument('--clip', action='store', type=float, default=1.0)
    argparser.add_argument('--l2', action='store', type=float, default=0.01)

    argparser.add_argument('--kernel',
                           action='store',
                           type=str,
                           default='rbf',
                           choices=['linear', 'rbf'])

    argparser.add_argument('--auxiliary-loss-weight',
                           '--auxiliary-weight',
                           '--aux-weight',
                           action='store',
                           type=float,
                           default=None)
    argparser.add_argument('--auxiliary-loss-model',
                           '--auxiliary-model',
                           '--aux-model',
                           action='store',
                           type=str,
                           default='complex')
    argparser.add_argument('--auxiliary-epochs',
                           '--aux-epochs',
                           action='store',
                           type=int,
                           default=0)

    argparser.add_argument('--corrupted-pairs',
                           '--corruptions',
                           '-C',
                           action='store',
                           type=int,
                           default=1)
    argparser.add_argument('--all', '-a', action='store_true')

    argparser.add_argument('--retrieve-k-facts',
                           '-F',
                           action='store',
                           type=int,
                           default=None)
    argparser.add_argument('--retrieve-k-rules',
                           '-R',
                           action='store',
                           type=int,
                           default=None)

    argparser.add_argument(
        '--index-type',
        '-i',
        action='store',
        type=str,
        default='nmslib',
        choices=['nmslib', 'faiss', 'faiss-cpu', 'random', 'exact'])

    argparser.add_argument('--index-refresh-rate',
                           '-I',
                           action='store',
                           type=int,
                           default=100)

    argparser.add_argument('--nms-m', action='store', type=int, default=15)
    argparser.add_argument('--nms-efc', action='store', type=int, default=100)
    argparser.add_argument('--nms-efs', action='store', type=int, default=100)

    argparser.add_argument('--evaluation-mode',
                           '-E',
                           action='store',
                           type=str,
                           default='ranking',
                           choices=['ranking', 'countries', 'ntn', 'none'])
    argparser.add_argument('--exact-knn-evaluation',
                           action='store',
                           type=str,
                           default=None,
                           choices=[None, 'faiss', 'exact'])

    argparser.add_argument('--loss-aggregator',
                           action='store',
                           type=str,
                           default='sum',
                           choices=['sum', 'mean'])

    argparser.add_argument('--decode', '-D', action='store_true')
    argparser.add_argument('--seed', action='store', type=int, default=0)

    argparser.add_argument('--keep-prob',
                           action='store',
                           type=float,
                           default=1.0)
    argparser.add_argument('--initializer',
                           action='store',
                           type=str,
                           default='uniform',
                           choices=['uniform', 'xavier'])

    argparser.add_argument('--mixed-losses', action='store_true')
    argparser.add_argument('--mixed-losses-aggregator',
                           action='store',
                           type=str,
                           default='mean',
                           choices=['mean', 'sum'])

    argparser.add_argument(
        '--rule-embeddings-type',
        '--rule-type',
        '-X',
        action='store',
        type=str,
        default='standard',
        choices=['standard', 'attention', 'sparse-attention'])

    argparser.add_argument('--unification-type',
                           '-U',
                           action='store',
                           type=str,
                           default='classic',
                           choices=['classic', 'joint'])

    argparser.add_argument('--unification-aggregation-type',
                           action='store',
                           type=str,
                           default='min',
                           choices=['min', 'mul', 'minmul'])

    argparser.add_argument('--epoch-based-batches', action='store_true')

    argparser.add_argument('--evaluate-per-epoch', action='store_true')

    argparser.add_argument('--no-ntp0', action='store_true')

    # checkpointing and regular model saving / loading - if checkpoint-path is not None - do checkpointing
    argparser.add_argument('--dump-path', type=str, default=None)
    argparser.add_argument('--checkpoint', action='store_true')
    argparser.add_argument('--checkpoint-frequency', type=int, default=1000)
    argparser.add_argument('--save', action='store_true')
    argparser.add_argument('--load', action='store_true')

    argparser.add_argument('--explanation',
                           '--explain',
                           action='store',
                           type=str,
                           default=None,
                           choices=['train', 'dev', 'test'])

    argparser.add_argument('--profile', action='store_true')
    argparser.add_argument('--tf-profiler', action='store_true')
    argparser.add_argument('--tensorboard', action='store_true')
    argparser.add_argument('--multimax', action='store_true')

    argparser.add_argument('--dev-only', action='store_true')

    argparser.add_argument('--only-rules-epochs',
                           action='store',
                           type=int,
                           default=0)
    argparser.add_argument('--test-batch-size',
                           action='store',
                           type=int,
                           default=None)

    argparser.add_argument('--input-type',
                           action='store',
                           type=str,
                           default='standard',
                           choices=['standard', 'reciprocal'])

    argparser.add_argument('--use-concrete', action='store_true')

    args = argparser.parse_args(argv)

    checkpoint = args.checkpoint
    dump_path = args.dump_path
    save = args.save
    load = args.load

    is_explanation = args.explanation

    nb_epochs = args.epochs
    nb_aux_epochs = args.auxiliary_epochs

    arguments_filename = None
    checkpoint_path = None
    if load:
        logger.info("Loading arguments from the loaded model...")
        arguments_filename = os.path.join(dump_path, 'arguments.json')
        checkpoint_path = os.path.join(dump_path, 'final_model/')
        # load a model, if there's one to load
    elif checkpoint and not check_checkpoint_finished(
            os.path.join(dump_path, 'checkpoints/')):
        checkpoint_path = os.path.join(dump_path, 'checkpoints/')
        logger.info("Loading arguments from an unfinished checkpoint...")
        arguments_filename = os.path.join(dump_path, 'arguments.json')

    loading_type = None

    if arguments_filename is not None and os.path.exists(arguments_filename):
        with open(arguments_filename, 'r') as f:
            json_arguments = json.load(f)
        args = argparse.Namespace(**json_arguments)
        if load:
            loading_type = 'model'
        elif checkpoint and not check_checkpoint_finished(
                os.path.join(dump_path, 'checkpoints/')):
            loading_type = 'checkpoint'

        # Load arguments from json

        # args = argparse.Namespace(**json_arguments)

        # args = vars(args)
        # for k, v in json_arguments.items():
        #     if k in args and args[k] != v:
        #         logger.info("\t{}={} (overriding loaded model's value of {})".format(k, args[k], v))
        #     if k not in args:
        #         args[k] = v
        #         logger.info("\t{}={} (overriding loaded model's value of {})".format(k, args[k], v))

    import pprint
    pprint.pprint(vars(args))

    train_path = args.train
    dev_path = args.dev
    test_path = args.test

    clauses_path = args.clauses
    mentions_path = args.mentions
    mentions_min = args.mentions_min

    input_type = args.input_type

    entity_embedding_size = predicate_embedding_size = args.embedding_size
    symbol_embedding_size = args.embedding_size

    batch_size = args.batch_size
    seed = args.seed

    learning_rate = args.learning_rate
    clip_value = args.clip
    l2_weight = args.l2
    kernel_name = args.kernel

    aux_loss_weight = 1.0
    if 'auxiliary_loss_weight' in args:
        aux_loss_weight = args.auxiliary_loss_weight

    aux_loss_model = args.auxiliary_loss_model

    nb_corrupted_pairs = args.corrupted_pairs
    is_all = args.all

    index_type = args.index_type
    index_refresh_rate = args.index_refresh_rate

    retrieve_k_facts = args.retrieve_k_facts
    retrieve_k_rules = args.retrieve_k_rules

    nms_m = args.nms_m
    nms_efc = args.nms_efc
    nms_efs = args.nms_efs

    k_max = args.k_max
    max_depth = args.max_depth

    evaluation_mode = args.evaluation_mode
    exact_knn_evaluation = args.exact_knn_evaluation

    loss_aggregator = args.loss_aggregator

    has_decode = args.decode

    keep_prob = 1.0
    if 'keep_prob' in args:
        keep_prob = args.keep_prob
    initializer_name = args.initializer

    mixed_losses = args.mixed_losses
    mixed_losses_aggregator_type = args.mixed_losses_aggregator

    rule_embeddings_type = args.rule_embeddings_type

    unification_type = args.unification_type
    unification_aggregation_type = args.unification_aggregation_type

    is_no_ntp0 = args.no_ntp0
    checkpoint_frequency = args.checkpoint_frequency

    profile = args.profile
    tf_profiler = args.tf_profiler
    tensorboard = args.tensorboard

    multimax = args.multimax
    dev_only = args.dev_only

    n_only_rules_epochs = args.only_rules_epochs

    test_batch_size = args.test_batch_size

    if test_batch_size is None:
        test_batch_size = batch_size * (1 + nb_corrupted_pairs * 2 *
                                        (2 if is_all else 1))

    # fire up eager
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    tf.enable_eager_execution(config=config)

    # set the seeds
    tf.set_random_seed(seed)
    np.random.seed(seed)
    random_state = np.random.RandomState(seed)

    epoch_based_batches = args.epoch_based_batches
    evaluate_per_epoch = args.evaluate_per_epoch
    use_concrete = args.use_concrete

    import multiprocessing

    nms_index_params = {
        'method': 'hnsw',
        'space': 'l2',
        'num_threads': multiprocessing.cpu_count(),
        'm': nms_m,
        'efc': nms_efc,
        'efs': nms_efs
    }

    faiss_index_params = {}
    faiss_index_params_cpu = {}
    try:
        import faiss
        faiss_index_params = {
            'resource':
            faiss.StandardGpuResources() if index_type in {'faiss'} else None
        }
        if faiss_index_params['resource'] is not None:
            faiss_index_params['resource'].noTempMemory()
        faiss_index_params_cpu = {'cpu': True}
    except ImportError:
        pass

    random_index_params = {
        'random_state': random_state,
    }

    index_type_to_params = {
        'nmslib': nms_index_params,
        'faiss-cpu': faiss_index_params_cpu,
        'faiss': faiss_index_params,
        'random': random_index_params,
        'exact': {},
    }

    kernel = gntp.kernels.get_kernel_by_name(kernel_name)

    clauses = []
    if clauses_path:
        with open(clauses_path, 'r') as f:
            clauses += [
                gntp.parse_clause(line.strip()) for line in f.readlines()
            ]

    mention_counts = gntp.read_mentions(mentions_path) if mentions_path else []
    mentions = [(s, pattern, o) for s, pattern, o, c in mention_counts
                if c >= mentions_min]

    data = Data(train_path=train_path,
                dev_path=dev_path,
                test_path=test_path,
                clauses=clauses,
                evaluation_mode=evaluation_mode,
                mentions=mentions,
                input_type=input_type)

    index_store = gntp.lookup.LookupIndexStore(
        index_type=index_type, index_params=index_type_to_params[index_type])

    aux_model = gntp.models.get_model_by_name(aux_loss_model)

    model = gntp.models.NTP(kernel=kernel,
                            max_depth=max_depth,
                            k_max=k_max,
                            retrieve_k_facts=retrieve_k_facts,
                            retrieve_k_rules=retrieve_k_rules,
                            index_refresh_rate=index_refresh_rate,
                            index_store=index_store,
                            unification_type=unification_type)

    neural_kb = NeuralKB(data=data,
                         entity_embedding_size=entity_embedding_size,
                         predicate_embedding_size=predicate_embedding_size,
                         symbol_embedding_size=symbol_embedding_size,
                         model_type='ntp',
                         initializer_name=initializer_name,
                         rule_embeddings_type=rule_embeddings_type,
                         use_concrete=use_concrete)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

    if loading_type == 'checkpoint':
        logger.info(
            "********** Resuming from an unfinished checkpoint **********")
        # dirty hack, but this initializes optimizer's slots, so the loader can populate them
        optimizer._create_slots(neural_kb.variables)
        checkpoint_load(checkpoint_path, neural_kb, optimizer)

    elif loading_type == 'model':
        load_path = os.path.join(dump_path, 'final_model/')
        checkpoint_load(load_path, neural_kb, optimizer)

    # bather will always be ran with the starting random_state...
    batcher = Batcher(data,
                      batch_size,
                      nb_epochs,
                      random_state,
                      nb_corrupted_pairs,
                      is_all,
                      nb_aux_epochs,
                      epoch_based_batches=epoch_based_batches)

    batches_per_epoch = batcher.nb_batches / nb_epochs if nb_epochs > 0 else 0

    # ...and after that, if there's a random state to load, load it :)
    if loading_type is not None:
        checkpoint_rs = load_random_state(checkpoint_path)
        random_state.set_state(checkpoint_rs.get_state())

    batch_times = []
    logger.info('Starting training (for {} batches)..'.format(
        len(batcher.batches)))

    if tf.train.get_or_create_global_step().numpy() > 0:
        logger.info(
            '...checkpoint restoration - resuming from batch no {}'.format(
                tf.train.get_or_create_global_step().numpy() + 1))

    if tensorboard:
        # TODO add changeable params too
        if not os.path.exists(dump_path):
            os.makedirs(dump_path)
        else:
            # this should never happen
            pass

        writer = tf.contrib.summary.create_file_writer(dump_path)
        writer.set_as_default()

    per_epoch_losses = []

    if tf_profiler:
        profiler = model_analyzer.Profiler()

    start_training_time = time.time()

    n_epochs_finished = 0

    if profile:
        manager = multiprocessing.Manager()
        gpu_memory_profiler_return = manager.list()

        def gpu_memory_profiler():
            import subprocess
            import os
            env = os.environ.copy()
            which_gpu = -1
            if 'CUDA_VISIBLE_DEVICES' in env:
                try:
                    which_gpu = int(env['CUDA_VISIBLE_DEVICES'])
                except:
                    pass
            del env['LD_LIBRARY_PATH']
            while True:
                time.sleep(0.1)
                cmd = ["nvidia-smi", "--query-gpu=memory.used", "--format=csv"]
                output = subprocess.check_output(cmd, env=env)
                output = output.decode('utf-8')
                output = output.split('\n')
                if len(output) == 3:  # there's only one gpu
                    which_gpu = 0
                output = output[1:-1]
                if which_gpu > -1:
                    gpu_memory_profiler_return.append(
                        int(output[which_gpu].split()[0]))
                else:
                    gpu_memory_profiler_return.append(output)
            return

        gpu_memory_job = multiprocessing.Process(target=gpu_memory_profiler)
        gpu_memory_job.start()

    is_epoch_end = False
    with context.eager_mode():

        for batch_no, (batch_start, batch_end) in enumerate(batcher.batches):

            if tf_profiler:
                opts = (option_builder.ProfileOptionBuilder(
                    option_builder.ProfileOptionBuilder.
                    trainable_variables_parameter()).with_max_depth(
                        100000).with_step(batch_no).with_timeline_output(
                            'eager_profile').with_accounted_types(['.*'
                                                                   ]).build())

                context.enable_run_metadata()

            # print(sum(random_state.get_state()[1]))

            # TODO fix this - this was here due to checkpointing but causes the first batch to be skipped
            # and will likely cause the test to fail?
            # if tf.train.get_or_create_global_step().numpy() + 1 > batch_no:
            #     continue
            if is_explanation is not None:  # or load_model:
                logger.info("EXPLANATION MODE ON - turning training off!")
                break

            start_time = time.time()

            is_epoch_start = is_epoch_end
            is_epoch_end = (batch_no + 1) - int(
                (batch_no + 1) / batches_per_epoch) * batches_per_epoch < 1

            Xi_batch, Xp_batch, Xs_batch, Xo_batch, target_inputs = batcher.get_batch(
                batch_no, batch_start, batch_end)

            Xi_batch = tf.convert_to_tensor(Xi_batch, dtype=tf.int32)

            # goals should be [GE, GE, GE]
            with tf.GradientTape() as tape:

                if n_only_rules_epochs > n_epochs_finished:
                    is_rules_only = True
                else:
                    is_rules_only = False

                neural_kb.create_neural_kb(is_epoch_start, training=True)

                p_emb = tf.nn.embedding_lookup(neural_kb.relation_embeddings,
                                               Xp_batch)
                s_emb = tf.nn.embedding_lookup(neural_kb.entity_embeddings,
                                               Xs_batch)
                o_emb = tf.nn.embedding_lookup(neural_kb.entity_embeddings,
                                               Xo_batch)

                if keep_prob != 1.0:
                    p_emb = tf.nn.dropout(p_emb, keep_prob)
                    s_emb = tf.nn.dropout(s_emb, keep_prob)
                    o_emb = tf.nn.dropout(o_emb, keep_prob)

                if batcher.is_pretraining:
                    # PRE-TRAINING
                    aux_scores = aux_model.predict(p_emb, s_emb, o_emb)
                    loss = aux_model.loss(target_inputs,
                                          aux_scores,
                                          aggregator=loss_aggregator)
                else:

                    goal_scores, other = model.predict(
                        p_emb,
                        s_emb,
                        o_emb,
                        neural_facts_kb=neural_kb.neural_facts_kb,
                        neural_rules_kb=neural_kb.neural_rules_kb,
                        mask_indices=Xi_batch,
                        is_training=True,
                        target_inputs=target_inputs,
                        mixed_losses=mixed_losses,
                        aggregator_type=mixed_losses_aggregator_type,
                        no_ntp0=is_no_ntp0,
                        support_explanations=is_explanation is not None,
                        unification_score_aggregation=
                        unification_aggregation_type,
                        multimax=multimax,
                        tensorboard=tensorboard)

                    proof_states, new_target_inputs = other

                    if multimax:
                        target_inputs = new_target_inputs

                    model_loss = model.loss(target_inputs,
                                            goal_scores,
                                            aggregator=loss_aggregator)
                    loss = model_loss

                    if aux_loss_weight is not None and aux_loss_weight > 0.0:
                        aux_scores = aux_model.predict(p_emb, s_emb, o_emb)
                        loss_aux = aux_loss_weight * aux_model.loss(
                            target_inputs,
                            aux_scores,
                            aggregator=loss_aggregator)
                        loss += loss_aux

                if l2_weight:
                    loss_l2_weight = l2_weight * tf.add_n(
                        [tf.nn.l2_loss(var) for var in neural_kb.variables])
                    if loss_aggregator == 'mean':
                        num_of_vars = tf.reduce_sum([
                            tf.reduce_prod(var.shape)
                            for var in neural_kb.variables
                        ])
                        loss_l2_weight /= tf.cast(num_of_vars, tf.float32)
                    loss += loss_l2_weight

            # if not is_epoch_end:
            per_epoch_losses.append(loss.numpy())

            logger.info('Loss @ batch {} on {}: {}'.format(
                batch_no, batcher.nb_batches, loss))

            model_variables = neural_kb.get_trainable_variables(
                is_rules_only=is_rules_only)
            gradients = tape.gradient(loss, model_variables)
            grads_and_vars = [(tf.clip_by_value(grad, -clip_value,
                                                clip_value), var)
                              for grad, var in zip(gradients, model_variables)]

            optimizer.apply_gradients(
                grads_and_vars=grads_and_vars,
                global_step=tf.train.get_or_create_global_step())

            if tensorboard:
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar('loss_total', loss)
                    tf.contrib.summary.scalar('loss_ntp_model', model_loss)
                    if aux_loss_weight is not None and aux_loss_weight > 0.0:
                        tf.contrib.summary.scalar('loss_aux_model', loss_aux)
                    if l2_weight != 0.0:
                        tf.contrib.summary.scalar('loss_l2_weight',
                                                  loss_l2_weight)
                    tf.contrib.summary.histogram('embeddings_relation',
                                                 neural_kb.relation_embeddings)
                    tf.contrib.summary.histogram('embeddings_entity',
                                                 neural_kb.entity_embeddings)

                with tf.contrib.summary.always_record_summaries():
                    for grad, var in grads_and_vars:
                        tf.contrib.summary.scalar(
                            'gradient_sparsity_{}'.format(
                                var.name.replace(':', '__')),
                            tf.nn.zero_fraction(grad))
                        # if batch_end % data.nb_examples == 0 or batch_end % data.nb_examples == 1:
                        #     pdb.set_trace()
                        gradient_norm = tf.sqrt(tf.reduce_sum(tf.pow(grad, 2)))
                        tf.contrib.summary.scalar(
                            'gradient_norm_{}'.format(
                                var.name.replace(':', '__')), gradient_norm)
                        tf.contrib.summary.histogram(
                            'gradient_{}'.format(var.name.replace(':', '__')),
                            grad)
                        tf.contrib.summary.histogram(
                            'variable_{}'.format(var.name.replace(':', '__')),
                            var)
                        # gradient_values = tf.reduce_sum(tf.abs(grad))
                        # tf.contrib.summary.scalar('gradient_values/{}'.format(var.name.replace(':', '__')),
                        #                           gradient_values)

                    # grads = [g for g, _ in grads_and_vars]
                    # flattened_grads = tf.concat([tf.reshape(t, [-1]) for t in grads], axis=0)
                    # flattened_vars = tf.concat([tf.reshape(t, [-1]) for t in neural_kb.variables], axis=0)
                    # tf.contrib.summary.histogram('values_grad', flattened_grads)
                    # tf.contrib.summary.histogram('values_var', flattened_vars)
            if tensorboard:
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar('time_per_batch',
                                              time.time() - start_time)
            if tensorboard and is_epoch_end:
                with tf.contrib.summary.always_record_summaries():
                    tb_pel = sum(per_epoch_losses)
                    if loss_aggregator == 'mean':
                        tb_pel /= len(per_epoch_losses)
                    tf.contrib.summary.scalar('per_epoch_loss', tb_pel)

            if is_epoch_end:
                n_epochs_finished += 1
                per_epoch_losses = []

            # post-epoch whatever...
            if evaluate_per_epoch and is_epoch_end:
                index_type = 'faiss' if exact_knn_evaluation is None else exact_knn_evaluation
                tmp_exact_knn_eval = exact_knn_evaluation
                if exact_knn_evaluation is None and index_type == 'faiss':
                    tmp_exact_knn_eval = 'faiss'
                do_eval(evaluation_mode,
                        model,
                        neural_kb,
                        data,
                        batcher,
                        batch_size,
                        index_type_to_params,
                        is_no_ntp0,
                        is_explanation,
                        dev_only=True,
                        tensorboard=tensorboard,
                        verbose=True,
                        exact_knn_evaluation=tmp_exact_knn_eval,
                        test_batch_size=test_batch_size)

            # # checkpoint saving
            if checkpoint_path is not None and (batch_no +
                                                1) % checkpoint_frequency == 0:
                checkpoint_store(checkpoint_path, neural_kb, optimizer,
                                 random_state, args)

            if profile:
                if batch_no != 0:  # skip the first one as it's significantly longer (warmup?)
                    batch_times.append(time.time() - start_time)
                if batch_no == 10:
                    break

            if tf_profiler:
                profiler.add_step(batch_no, context.export_run_metadata())
                context.disable_run_metadata()
                # profiler.profile_operations(opts)
                profiler.profile_graph(options=opts)

    end_time = time.time()

    if tf_profiler:
        profiler.advise(options=model_analyzer.ALL_ADVICE)

    if profile:
        gpu_memory_job.terminate()
        if len(gpu_memory_profiler_return) == 0:
            gpu_memory_profiler_return = [0]
        nb_negatives = nb_corrupted_pairs * 2 * (2 if is_all else 1)
        nb_triple_variants = 1 + nb_negatives
        examples_per_batch = nb_triple_variants * batch_size
        print('Examples per batch: {}'.format(examples_per_batch))
        print('Batch times: {}'.format(batch_times))
        time_per_batch = np.average(batch_times)
        print('Average time per batch: {}'.format(time_per_batch))
        print('examples per second: {}'.format(examples_per_batch /
                                               time_per_batch))
    else:
        if is_explanation is None:
            logger.info('Training took {} seconds'.format(end_time -
                                                          start_training_time))

    # last checkpoint save
    if checkpoint_path is not None:
        checkpoint_store(checkpoint_path, neural_kb, optimizer, random_state,
                         args)

    # and save the model, if you want to save it (it's better practice to have
    # the checkpoint_path different to save_path, as one can save checkpoints on scratch, and models permanently
    if save:
        save_path = os.path.join(dump_path, 'final_model/')
        checkpoint_store(save_path, neural_kb, optimizer, random_state, args)

    # TODO prettify profiling
    if profile:
        return max(gpu_memory_profiler_return)

    logger.info('Starting evaluation ..')

    neural_kb.create_neural_kb()

    idx_to_relation = {
        idx: relation
        for relation, idx in data.relation_to_idx.items()
    }

    if has_decode:
        for neural_rule in neural_kb.neural_rules_kb:
            gntp.decode(neural_rule,
                        neural_kb.relation_embeddings,
                        idx_to_relation,
                        kernel=kernel)

    # explanations for the train set, just temporarily
    if is_explanation is not None:

        from gntp.util import make_batches

        which_triples = []
        if is_explanation == 'train':
            which_triples = data.train_triples
        elif is_explanation == 'dev':
            which_triples = data.dev_triples
        elif is_explanation == 'test':
            which_triples = data.test_triples

        _triples = [(data.entity_to_idx[s], data.predicate_to_idx[p],
                     data.entity_to_idx[o]) for s, p, o in which_triples]

        batches = make_batches(len(_triples), batch_size)

        explanations_filename = 'explanations-{}-{}.txt'.format(
            checkpoint_path.replace('/', '_'), is_explanation)
        with open(explanations_filename, 'w') as fw:
            for neural_rule in neural_kb.neural_rules_kb:
                decoded_rules = gntp.decode(neural_rule,
                                            neural_kb.relation_embeddings,
                                            idx_to_relation,
                                            kernel=kernel)

                for decoded_rule in decoded_rules:
                    fw.write(decoded_rule + '\n')

            fw.write('--' * 50 + '\n')

            for start, end in batches:
                batch = np.array(_triples[start:end])
                Xs_batch, Xp_batch, Xo_batch = batch[:, 0], batch[:,
                                                                  1], batch[:,
                                                                            2]

                _p_emb = tf.nn.embedding_lookup(neural_kb.relation_embeddings,
                                                Xp_batch)
                _s_emb = tf.nn.embedding_lookup(neural_kb.entity_embeddings,
                                                Xs_batch)
                _o_emb = tf.nn.embedding_lookup(neural_kb.entity_embeddings,
                                                Xo_batch)

                _res, (proof_states, _) = model.predict(
                    _p_emb,
                    _s_emb,
                    _o_emb,
                    neural_facts_kb=neural_kb.neural_facts_kb,
                    neural_rules_kb=neural_kb.neural_rules_kb,
                    is_training=False,
                    no_ntp0=is_no_ntp0,
                    support_explanations=is_explanation is not None)

                # path_indices = decode_per_path_type_proof_states_indices(proof_states)
                path_indices = decode_proof_states_indices(proof_states,
                                                           top_k=3)
                decoded_paths = decode_paths(path_indices, neural_kb)

                _ps, _ss, _os = Xp_batch.tolist(), Xs_batch.tolist(
                ), Xo_batch.tolist()
                __triples = [(data.idx_to_entity[s], data.idx_to_predicate[p],
                              data.idx_to_entity[o])
                             for s, p, o in zip(_ss, _ps, _os)]

                _scores = _res.numpy().tolist()

                for i, (_triple, _score, decoded_path) in enumerate(
                        zip(__triples, _scores, decoded_paths)):
                    _s, _p, _o = _triple
                    _triple_str = '{}({}, {})'.format(_p, _s, _o)

                    # print(_triple_str, _score, decoded_path)
                    fw.write("{}\t{}\t{}\n".format(_triple_str, _score,
                                                   decoded_path))
        logging.info('DONE with explanation...quitting.')
        sys.exit(0)

    eval_start = time.time()
    do_eval(evaluation_mode,
            model,
            neural_kb,
            data,
            batcher,
            batch_size,
            index_type_to_params,
            is_no_ntp0,
            is_explanation,
            dev_only=dev_only,
            exact_knn_evaluation=exact_knn_evaluation,
            test_batch_size=test_batch_size)
    logging.info('Evaluation took {} seconds'.format(time.time() - eval_start))

コード例 #18

ファイルを表示

def test_lanenet_for_eval(image_path, weights_path):
    """

    :param image_path: 测试图片地址
    :param weights_path: 训练模型地址
    :return:
    """
    assert ops.exists(image_path), '{:s} not exist'.format(image_path)

    log.info('Start reading image and preprocessing')
    t_start = time.time()
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    """
    :param: cv2.IMREAD_COLOR:  It specifies to load a color image. Any transparency of image will be neglected. 
    """
    image_vis = image

    image = cv2.resize(image, (512, 256), interpolation=cv2.INTER_LINEAR)
    """
    :param: INTER_LINEAR: 双线性插值。
    """

    image = image / 127.5 - 1.0  # 归一化 (只归一未改变维数)
    log.info('Image load complete, cost time: {:.5f}s'.format(time.time() - t_start))

    input_tensor = tf.placeholder(dtype=tf.float32, shape=[1, 256, 512, 3], name='input_tensor')
    """
    在神经网络构建graph的时候在模型中的占位，此时并没有把要输入的数据传入模型，它只会分配必要的内存。
    等建立session，在会话中，运行模型的时候通过feed_dict()函数向占位符喂入数据。

    :param: dtype：数据类型。常用的是tf.float32,tf.float64等数值类型
    :param: shape：数据形状。NHWC：[batch, in_height, in_width, in_channels] [参与训练的一批(batch)图像的数量，输入图片的高度，输入图片的宽度，输入图片的通道数]
    :param: name：名称。
    """

    net = lanenet.LaneNet(phase='test', net_flag='vgg')

    binary_seg_ret, instance_seg_ret = net.inference(input_tensor=input_tensor, name='lanenet_model')

    postprocessor = lanenet_postprocess.LaneNetPostProcessor()

    saver = tf.train.Saver()
    # 加载预训练模型参数

    # Set session configuration
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION
    # 限制 GPU 使用率

    sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH
    # 动态申请显存

    sess_config.gpu_options.allocator_type = 'BFC'  # best fit with coalescing  内存管理算法
    # 内存分配类型
    sess = tf.Session(config=sess_config)

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)

        binary_seg_image_, instance_seg_image_ = sess.run(
            [binary_seg_ret, instance_seg_ret],
            feed_dict={input_tensor: [image]}
        )

        profiler = model_analyzer.Profiler(graph=sess.graph)
        run_metadata = tf.RunMetadata()

        t_start = time.time()

        binary_seg_image, instance_seg_image = sess.run(
            [binary_seg_ret, instance_seg_ret],
            feed_dict={input_tensor: [image]},
            options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
            run_metadata=run_metadata
        )

        t_cost = time.time() - t_start
        log.info('Single imgae inference cost time: {:.5f}s'.format(t_cost))

        profiler.add_step(step=1, run_meta=run_metadata)

        profile_op_builder = option_builder.ProfileOptionBuilder()
        profile_op_builder.select(['micros', 'occurrence'])
        profile_op_builder.order_by('micros')
        profile_op_builder.with_max_depth(5)
        profile_op_builder.with_file_output(outfile="./op_profiler.txt")
        # profiler.profile_graph(profile_op_builder.build())
        profiler.profile_operations(profile_op_builder.build())

        profile_code_builder = option_builder.ProfileOptionBuilder()
        profile_code_builder.with_max_depth(1000)
        profile_code_builder.with_node_names(show_name_regexes=['cnn_basenet.py.*'])
        profile_code_builder.with_min_execution_time(min_micros=10)
        profile_code_builder.select(['micros'])
        profile_code_builder.order_by('min_micros')
        profile_code_builder.with_file_output(outfile="./code_profiler.txt")
        profiler.profile_python(profile_code_builder.build())

        profiler.advise(options=model_analyzer.ALL_ADVICE)

        postprocess_result = postprocessor.postprocess(
            binary_seg_result=binary_seg_image[0],
            instance_seg_result=instance_seg_image[0],
            source_image=image_vis
        )

        """
        postprocess_result = postprocessor.postprocess_for_test(
            binary_seg_result=binary_seg_image[0],
            instance_seg_result=instance_seg_image[0],
            source_image=image_vis
        )"""

        mask_image = postprocess_result['mask_image']

        for i in range(CFG.TRAIN.EMBEDDING_FEATS_DIMS):
            # __C.TRAIN.EMBEDDING_FEATS_DIMS = 4
            instance_seg_image[0][:, :, i] = minmax_scale(instance_seg_image[0][:, :, i])
            # 与 instance_seg_image[0][:, :, i] =
            # cv2.normalize(instance_seg_image[0][:, :, i], None, 0, 255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC3)
            # 功能相同
            # 将bgr彩色矩阵归一化到0-255之间
        embedding_image = np.array(instance_seg_image[0], np.uint8)

        # for op in tf.get_default_graph().get_operations():
        #     print(str(op.name))

        # print([n.name for n in tf.get_default_graph().as_graph_def().node])

        plt.figure('mask_image')
        # plt.imshow(mask_image[:, :, (2, 1, 0)])
        plt.imshow(mask_image)
        plt.figure('src_image')
        plt.imshow(image_vis[:, :, (2, 1, 0)])
        plt.figure('instance_image')
        plt.imshow(embedding_image[:, :, (2, 1, 0)])
        plt.figure('binary_image')
        plt.imshow(binary_seg_image[0] * 255, cmap='gray')
        """"
        plt.figure("result")
        plt.imshow(postprocess_result['source_image'])
        """
        plt.show()

        cv2.imwrite('instance_mask_image.png', mask_image)
        cv2.imwrite('source_image.png', postprocess_result['source_image'])
        cv2.imwrite('binary_mask_image.png', binary_seg_image[0] * 255)

    sess.close()

    return

コード例 #19

ファイルを表示

ファイル: train.py プロジェクト: youngch12/Cluster_KGCN

def train(args, data, show_loss, show_topk):
    n_user, n_item, n_entity, n_relation = data[0], data[1], data[2], data[3]
    train_data, eval_data, test_data = data[4], data[5], data[6]
    adj_entity, adj_relation = data[7], data[8]

    model = KGCN(args, n_user, n_entity, n_relation, adj_entity, adj_relation)

    # top-K evaluation settings
    user_list, train_record, test_record, item_set, k_list = topk_settings(
        show_topk, train_data, test_data, n_item)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # monitor the usage of memory while training the model
        profiler = model_analyzer.Profiler(graph=sess.graph)
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        # tensor-board
        writer = tf.summary.FileWriter('../data/' + args.dataset + '/logs',
                                       tf.get_default_graph())

        for step in range(args.n_epochs):
            # training
            t = time.time()
            np.random.shuffle(train_data)
            start = 0
            i = 0
            # skip the last incomplete minibatch if its size < batch size
            while start + args.batch_size <= train_data.shape[0]:
                _, loss = model.train(
                    sess,
                    get_feed_dict(model, train_data, start,
                                  start + args.batch_size), run_options,
                    run_metadata)
                # add the data into tfprofiler
                profiler.add_step(step=step, run_meta=run_metadata)
                if i == 0:
                    writer.add_run_metadata(run_metadata, 'step %d' % step)
                i += 1
                start += args.batch_size
                # if show_loss:
                #     print(start, loss)

            # CTR evaluation
            train_auc, train_f1 = ctr_eval(sess, model, train_data,
                                           args.batch_size)
            eval_auc, eval_f1 = ctr_eval(sess, model, eval_data,
                                         args.batch_size)
            test_auc, test_f1 = ctr_eval(sess, model, test_data,
                                         args.batch_size)

            # values = ps.virtual_memory()
            # used_memory = values.used / (1024.0 ** 3)
            train_time = time.time() - t

            # print('epoch %d    train auc: %.4f  f1: %.4f    eval auc: %.4f  f1: %.4f    test auc: %.4f  f1: %.4f'
            #       % (step, train_auc, train_f1, eval_auc, eval_f1, test_auc, test_f1))
            print(
                'epoch %d   training time: %.5f    train auc: %.4f  f1: %.4f    eval auc: %.4f  f1: %.4f    test auc: %.4f  f1: %.4f'
                % (step, train_time, train_auc, train_f1, eval_auc, eval_f1,
                   test_auc, test_f1))

        # # 统计模型的memory使用大小
        profile_scope_opt_builder = option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.trainable_variables_parameter(
            ))
        # 显示字段是params，即参数
        profile_scope_opt_builder.select(['params'])
        # 根据params数量进行显示结果排序
        profile_scope_opt_builder.order_by('params')
        # 显示视图为scope view
        profiler.profile_name_scope(profile_scope_opt_builder.build())

        # ------------------------------------
        # 最耗时top 5 ops
        profile_op_opt_builder = option_builder.ProfileOptionBuilder()

        # 显示字段：op执行时间，使用该op的node的数量。 注意：op的执行时间即所有使用该op的node的执行时间总和。
        profile_op_opt_builder.select(['micros', 'occurrence'])
        # 根据op执行时间进行显示结果排序
        profile_op_opt_builder.order_by('micros')
        # 过滤条件：只显示排名top 7
        profile_op_opt_builder.with_max_depth(6)

        # 显示视图为op view
        profiler.profile_operations(profile_op_opt_builder.build())

        # ------------------------------------
        writer.close()

コード例 #20

ファイルを表示

ファイル: main.py プロジェクト: FreeGrid/resnet

def train():
    # global_step
    global_step = tf.Variable(0, name = 'global_step', trainable=False)
    # cifar10 数据文件夹
    data_dir = '../cifar-10-batches-bin/'
    # 训练时的日志logs文件，没有这个目录要先建一个
    train_dir = './logs/'
    # 加载 images，labels
    images, labels =inputs(data_dir, BATCH_SIZE)

    # 求 loss
    loss = losses(inference(images), labels)
    # 设置优化算法，这里用 SGD 随机梯度下降法，恒定学习率
    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
    # global_step 用来设置初始化
    train_op = optimizer.minimize(loss, global_step = global_step)
    # 保存操作
    saver = tf.train.Saver(tf.all_variables())
    # 汇总操作
    summary_op = tf.summary.merge_all()
    # 初始化方式是初始化所有变量
    init = tf.initialize_all_variables()

    os.environ['CUDA_VISIBLE_DEVICES'] = str(0)

	#自动选择运行设备 ： tf.ConfigProto(allow_soft_placement=True)，
	#设置tf.ConfigProto()中参数log_device_placement = True ,可以获取到 operations 和 Tensor 被指派到哪个设备(几号CPU或几号GPU)上运行,会在终端打印出各项操作是在哪个设备上运行的。
    config = tf.ConfigProto()
	#动态申请显存
    config.gpu_options.allow_growth = True
    
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
	
    cluster = tf.train.ClusterSpec({
        'node1':[
            '192.168.136.101:2222'
        ],
        'node2':[
            '192.168.136.102:2222'
        ]
    })
    server = tf.train.Server(cluster, job_name='node1', task_index=0)
    session = tf.Session(target='grpc://192.168.136.102:2222', config=config)
    # 占用 GPU 的 20% 资源
    #config.gpu_options.per_process_gpu_memory_fraction = 0.2
    # 设置会话模式，用 InteractiveSession 可交互的会话，逼格高
    sess = tf.InteractiveSession(config=config)
    # 运行初始化
    sess.run(init)
    profiler = model_analyzer.Profiler(graph=sess.graph)

    # 设置多线程协调器
    coord = tf.train.Coordinator()
    # 开始 Queue Runners (队列运行器)
    threads = tf.train.start_queue_runners(sess = sess, coord = coord)
    # 把汇总写进 train_dir，注意此处还没有运行
    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)

    # 开始训练过程
    for step in range(MAX_STEP):
        if coord.should_stop():
            break
        start_time = time.time()
        # 在会话中运行 loss
        _, loss_value = sess.run([train_op, loss],options=options, run_metadata=run_metadata)
        profiler.add_step(step=step, run_meta=run_metadata)
        duration = time.time() - start_time
        # 确认收敛
        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
        if step % 30 == 0:
            # 本小节代码设置一些花哨的打印格式，可以不用管
            num_examples_per_step = BATCH_SIZE
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = float(duration)
            format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                          'sec/batch)')
            print (format_str % (datetime.datetime.now(), step, loss_value,
                                 examples_per_sec, sec_per_batch))

        if step % 100 == 0:
            # 运行汇总操作， 写入汇总
            summary_str = sess.run(summary_op)
            summary_writer.add_summary(summary_str, step)

        if step % 1000 == 0 or (step + 1) == MAX_STEP:
            # 保存当前的模型和权重到 train_dir，global_step 为当前的迭代次数
            checkpoint_path = os.path.join(train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)

    coord.request_stop()
    coord.join(threads)

        #统计内容为每个graph node的运行时间和占用内存
    profile_graph_opts_builder = option_builder.ProfileOptionBuilder(
      option_builder.ProfileOptionBuilder.time_and_memory())

    #输出方式为timeline
    # 输出文件夹必须存在
    profile_graph_opts_builder.with_timeline_output(timeline_file='/tmp/mnist_profiler.json')
    #定义显示sess.Run() 第70步的统计数据
    profile_graph_opts_builder.with_step(70)

    #显示视图为graph view
    profiler.profile_graph(profile_graph_opts_builder.build())
    sess.close()

コード例 #21

ファイルを表示

ファイル: train.py プロジェクト: sneaxiy/benchmark

    def train(sess):
        sess.run(init)

        if args.profile:
            profiler_step = 0
            profiler = model_analyzer.Profiler(graph=sess.graph)
            run_options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
        
        total_time = 0.0
        epoch_times = []
        
        for epoch_id in xrange(max_epoch):
            batch_times = []
            epoch_start_time = time.time()
            train_data_iter = reader.get_data_iter( train_data, batch_size, num_steps)

            # assign lr, update the learning rate
            new_lr_1 = base_learning_rate * ( lr_decay ** max(epoch_id + 1 - epoch_start_decay, 0.0) )
            sess.run( lr_update, {new_lr: new_lr_1})
        
            total_loss = 0.0
            iters = 0
            batch_len = len(train_data) // batch_size
            epoch_size = ( batch_len - 1 ) // num_steps

            if args.profile:
                log_fre = 1
            else:
                log_fre = epoch_size // 10
        
            init_h = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32')
            init_c = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32')
        
            count = 0.0
            for batch_id, batch in enumerate(train_data_iter):
                x,y = batch
                feed_dict = {}
                feed_dict[feeding_list[0]] = x
                feed_dict[feeding_list[1]] = y
                feed_dict[feeding_list[2]] = init_h
                feed_dict[feeding_list[3]] = init_c
        
                batch_start_time = time.time()
                if args.profile:
                    output = sess.run([cost, final_h, final_c, train_op], feed_dict, options=run_options, run_metadata=run_metadata)
                    profiler.add_step(step=profiler_step, run_meta=run_metadata)
                    profiler_step = profiler_step + 1
                    if batch_id >= 10:
                        break
                else:
                    output = sess.run([cost, final_h, final_c, train_op], feed_dict)
                batch_time = time.time() - batch_start_time
                batch_times.append(batch_time)
        
                train_cost = output[0]
                init_h = output[1]
                init_c = output[2]
        
                total_loss += train_cost
                iters += num_steps
                count = count + 1
                if batch_id > 0 and  batch_id % log_fre == 0:
                    ppl = np.exp( total_loss / iters )
                    print("-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f" % (epoch_id, batch_id, batch_time, ppl, new_lr_1))
        
            ppl = np.exp(total_loss / iters)
            epoch_time = time.time() - epoch_start_time
            epoch_times.append(epoch_time)
            total_time += epoch_time
        
            print("\nTrain epoch:[%d]; epoch Time: %.5f s; ppl: %.5f; avg_time: %.5f steps/s\n"
                  % (epoch_id, epoch_time, ppl, (batch_id + 1) / sum(batch_times)))

            valid_ppl, _ = eval(sess, valid_data)
            print("Valid ppl: %.5f" % valid_ppl)
    
        test_ppl, test_time = eval(sess, test_data)
        print("Test Time (total): %.5f, ppl: %.5f" % (test_time, test_ppl))
              
        if args.profile:
            profile_op_opt_builder = option_builder.ProfileOptionBuilder()
            profile_op_opt_builder.select(['micros','occurrence'])
            profile_op_opt_builder.order_by('micros')
            profile_op_opt_builder.with_max_depth(50)
            profiler.profile_operations(profile_op_opt_builder.build())

コード例 #22

ファイルを表示

def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
        # GPU and resulting in a slow down.
        with tf.device('/cpu:0'):
            images, labels = cifar10.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        # KJ: add variable
        i = 0

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                save_checkpoint_secs=10,  # Save checkpoint by interval
                save_summaries_steps=10,  # Save summary by interval
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(
                    log_device_placement=FLAGS.log_device_placement
                    # , intra_op_parallelism_threads=1
                    # , inter_op_parallelism_threads=1
                    ,
                    allow_soft_placement=True
                    # , device_count = {'GPU': 0}
                )) as mon_sess:

            # Create tfProfiler instance
            cifar_profiler = model_analyzer.Profiler(graph=mon_sess.graph)
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE
                                        )  # Set level to Full Trace
            run_metadata = tf.RunMetadata()

            while not mon_sess.should_stop():
                if i % FLAGS.log_frequency == 0:
                    mon_sess.run(train_op,
                                 options=run_options,
                                 run_metadata=run_metadata)
                    cifar_profiler.add_step(step=i, run_meta=run_metadata)

                else:
                    mon_sess.run(train_op)
                i += 1
            """
      Profiler Section
        1. Profile each graph node's execution time and consumed memory
        2. Profile each layer's parameters, modle size and parameters distribution
        3. Profile top K most time-consuming operations
        4. Profile top K most memory-consuming operations
        5. Profile python code performance line by line
        6. Give optimization Advice
      """

            # 1. Profile each graph node's execution time and consumed memory
            profile_graph_opts_builder = option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory())
            profile_graph_opts_builder.with_timeline_output(
                timeline_file=os.path.join(
                    os.path.split(os.path.split(os.path.abspath(__file__))[0])
                    [0], 'logs/cifar10_profiler/cifar10_profiler.json'))
            profile_graph_opts_builder.with_step(
                (FLAGS.max_steps - 1) // 2)  # Profile <num>th step
            cifar_profiler.profile_graph(
                profile_graph_opts_builder.build())  # Show graph view result

            # 2. Profile each layer's parameters, modle size and parameters distribution
            profile_scope_opt_builder = option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.
                trainable_variables_parameter())
            profile_scope_opt_builder.with_max_depth(
                4)  # Maximum level of nested depth
            profile_scope_opt_builder.select(['params'])  # Show params
            profile_scope_opt_builder.order_by('params')  # Sort by params
            cifar_profiler.profile_name_scope(
                profile_scope_opt_builder.build())

            # 3. Profile top K most time-consuming operations
            profile_op_opt_builder = option_builder.ProfileOptionBuilder()
            profile_op_opt_builder.select(
                ['micros',
                 'occurrence'])  # Show Op execution time, node's number
            profile_op_opt_builder.order_by('micros')  # Sort by micros
            profile_op_opt_builder.with_max_depth(4)  # Only show top 5
            cifar_profiler.profile_operations(profile_op_opt_builder.build())

            # 4. Profile top K most memory-consuming operations
            profile_op_opt_builder = option_builder.ProfileOptionBuilder()
            profile_op_opt_builder.select(
                ['bytes',
                 'occurrence'])  # Show Op consumed memory, node's number
            profile_op_opt_builder.order_by('bytes')  # Sort by bytes
            profile_op_opt_builder.with_max_depth(4)  # Only show top 5
            cifar_profiler.profile_operations(profile_op_opt_builder.build())

            # 5. Profile python code performance line by line
            profile_code_opt_builder = option_builder.ProfileOptionBuilder()
            profile_code_opt_builder.with_max_depth(1000)
            profile_code_opt_builder.with_node_names(
                show_name_regexes=[r'cifar10[\s\S]*'])
            profile_code_opt_builder.with_min_execution_time(
                min_micros=10)  # Only show Top 10
            profile_code_opt_builder.select(['micros'])
            profile_code_opt_builder.order_by('micros')
            cifar_profiler.profile_python(profile_code_opt_builder.build())

            # 6. Give optimization Advice
            cifar_profiler.advise(options=model_analyzer.ALL_ADVICE)

コード例 #23

ファイルを表示

    def train(self,
              logfile,
              num_epochs=400,
              try_restore_latest_checkpoint=False):

        checkpoint_dir = path.join('checkpoints', path.split(logfile)[-1])
        checkpoint_path = path.join(checkpoint_dir, 'checkpoint.ckp')
        makedirs(path.dirname(checkpoint_dir), exist_ok=True)
        makedirs(path.dirname(logfile), exist_ok=True)

        last_epoch = 0
        if try_restore_latest_checkpoint is True:
            try:
                latest_ckp = tf.train.latest_checkpoint(checkpoint_dir)
                last_epoch = int(latest_ckp.split('-')[-1])
                self._train_model.model.saver.restore(
                    sess=self._train_session,
                    save_path=latest_ckp,
                )
                print(
                    'Restoring checkpoint from epoch {}\n'.format(last_epoch))
            except Exception:
                print(
                    'Could not restore from checkpoint, training from scratch!\n'
                )

        f = open(logfile, 'a')

        for current_epoch in range(1, num_epochs):
            epoch = last_epoch + current_epoch

            self._train_session.run([
                stream.iterator_initializer
                for stream in self._train_model.data if stream is not None
            ])
            sum_loss = 0
            batches = 0

            start = time.time()

            try:
                while True:
                    out = self._train_session.run([
                        self._train_model.model.train_op,
                        self._train_model.model.batch_loss,
                    ], **self.sess_opts)

                    if self._hparams.profiling is True:
                        self.profiler.add_step(batches, self.run_meta)

                        from tensorflow.python.profiler import option_builder

                        self.profiler.profile_name_scope(
                            options=(option_builder.ProfileOptionBuilder.
                                     trainable_variables_parameter()))

                        opts = option_builder.ProfileOptionBuilder.time_and_memory(
                        )
                        self.profiler.profile_operations(options=opts)

                        opts = (
                            option_builder.ProfileOptionBuilder(
                                option_builder.ProfileOptionBuilder.
                                time_and_memory()).with_step(batches).
                            with_timeline_output('/tmp/timelines/').build())

                        self.profiler.profile_graph(options=opts)

                    sum_loss += out[1]
                    print('batch: {}'.format(batches))
                    batches += 1

            except tf.errors.OutOfRangeError:
                pass

            print('epoch time: {}'.format(time.time() - start))
            f.write('Average batch_loss as epoch {} is {}\n'.format(
                epoch, sum_loss / batches))
            f.flush()

            if epoch % 5 == 0:
                save_path = self._train_model.model.saver.save(
                    sess=self._train_session,
                    save_path=checkpoint_path,
                    global_step=epoch,
                )

                error_rate = self.evaluate(save_path, epoch)
                for (k, v) in error_rate.items():
                    f.write(k + ': {:.4f}% '.format(v * 100))
                f.write('\n')
                f.flush()

        f.close()

コード例 #24

ファイルを表示

    def run(self,
            use_gpu,
            feed=None,
            repeat=1,
            log_level=0,
            check_output=False,
            profile=False):
        sess = self._init_session(use_gpu)
        #tf.debugging.set_log_device_placement(True)

        if profile:
            profiler = model_analyzer.Profiler(graph=sess.graph)
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
        else:
            profiler = None
            run_options = None
            run_metadata = None
        self.timeline_dict = None

        if feed is None:
            feed = self._feed_random_data()

        runtimes = []
        fetches = []
        outputs = None
        for i in range(repeat):
            begin = time.time()
            outputs = sess.run(fetches=self.fetch_list,
                               feed_dict=feed,
                               options=run_options,
                               run_metadata=run_metadata)
            end = time.time()
            runtimes.append(end - begin)

            if profile:
                # Update profiler
                profiler.add_step(step=i, run_meta=run_metadata)
                # For timeline
                tl = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = tl.generate_chrome_trace_format()
                trace_file = open(self.name + '_tf.timeline', 'w')
                trace_file.write(chrome_trace)
                #self._update_timeline(chrome_trace)

            if check_output:
                fetches.append(outputs)
        if profile:
            # Generate profiling result
            profile_op_builder = option_builder.ProfileOptionBuilder()
            profile_op_builder.select(['micros', 'occurrence'])
            profile_op_builder.order_by('micros')
            profile_op_builder.with_max_depth(10)
            profiler.profile_operations(profile_op_builder.build())
            # Generate timeline
        #            profile_graph_builder = option_builder.ProfileOptionBuilder(
        #                                    option_builder.ProfileOptionBuilder.time_and_memory())
        #            profile_graph_builder.with_timeline_output(timeline_file=self.name + '_tf.timeline')
        #            profile_graph_builder.with_step(10)
        #            profiler.profile_graph(profile_graph_builder.build())
        #tl_output_file = self.name + "_tf.timeline"
        #with open(tl_output_file, 'w') as f:
        #    json.dump(self.timeline_dict, f)

        stats = {
            "framework": "tensorflow",
            "version": tf.__version__,
            "name": self.name,
            "total": runtimes
        }
        stats["device"] = "GPU" if use_gpu else "CPU"
        utils.print_stat(stats, log_level=log_level)
        return outputs

コード例 #25

ファイルを表示

ファイル: inference.py プロジェクト: Traeyee/DickLearning

    logging.info("# test evaluation")
    sess.run(eval_init_op)
    # It means 跑op=y_hat，输入是num_eval_batches，只截取前num_eval_samples个结果
    logging.info("# get hypotheses")
    if hp.use_profile:
        logging.info("# init profile")
        run_metadata = tf.RunMetadata()
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        mnist_profiler = model_analyzer.Profiler(graph=sess.graph)
        ts = time.time()
        hypotheses = get_hypotheses(num_eval_batches, num_eval_samples, sess, y_hat, m.idx2token,
                                    use_profile=True,
                                    options=run_options, run_metadata=run_metadata, profiler=mnist_profiler)
        logging.info("eval: takes %s" % (time.time() - ts))
        # 统计内容为每个graph node的运行时间和占用内存
        profile_graph_opts_builder = option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.time_and_memory())

        # 输出方式为timeline
        profile_graph_opts_builder.with_timeline_output(timeline_file='/tmp/mnist_profiler.json')
        # 定义显示sess.Run() 第0步的统计数据
        profile_graph_opts_builder.with_step(0)
        profile_graph_opts_builder.with_step(1)
        # 显示视图为graph view
        mnist_profiler.profile_graph(profile_graph_opts_builder.build())
    else:
        ts = time.time()
        hypotheses = get_hypotheses(num_eval_batches, num_eval_samples, sess, y_hat, m.idx2token)
        logging.info("eval: takes %s" % (time.time() - ts))
    if not os.path.exists(hp.evaldir):
        os.makedirs(hp.evaldir)
    translation = os.path.join(hp.evaldir, "inference.out")

コード例 #26

ファイルを表示

ファイル: simple_demo.py プロジェクト: hanzhn/CLS.Tensorflow

def main(_):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2,))

        features = cls_preprocessing.preprocess_for_eval(image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE):
            model = cls_reg_net.CLS_REG_Model(FLAGS.resnet_size, FLAGS.resnet_version,
                                    FLAGS.attention_block, FLAGS.location_feature_stage,
                                    FLAGS.data_format)

            results = model(features, training=False)
            if FLAGS.location_feature_stage:
                logits, loc, location = results
            else:
                logits = results
        # tf.summary.image('base',tf.reshape(tf.range(9, dtype=tf.float32), [1,3,3,1]))
        tf.summary.image('origin_pic',tf.transpose(features, [0, 2, 3, 1]))
        # tf.summary.image('att_map', tf.transpose(att_map, [0, 2, 3, 1]))
        # tf.summary.image('loc', tf.transpose(loc, [0, 2, 3, 1]))
        merged = tf.summary.merge_all()

        saver = tf.train.Saver()
        with tf.Session() as sess:
            # 创建 profiler 对象
            my_profiler = model_analyzer.Profiler(graph=sess.graph)
            # 创建 metadata 对象
            run_metadata = tf.RunMetadata()
            run_options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, get_checkpoint())

            # init summary writer
            writer = tf.summary.FileWriter("./demo/test_out/" ,sess.graph)
            i = 0
            for picname in os.listdir('./demo'):
                if picname.split('.')[-1] != 'jpg':
                    print(picname)
                    continue
                np_image = imread(os.path.join('./demo',picname))

                print(type(np_image), np_image.shape)
                # exit()
                logits_, loc_, location_, summary= sess.run([logits, loc, location, merged], 
                                                        feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]},
                                                        options=run_options, run_metadata=run_metadata)
                my_profiler.add_step(step=i, run_meta=run_metadata)

                # att = att.reshape([-1])
                # ma = np.argmax(att)
                # mi = np.argmin(att)
                # print(res)
                # # print(ma/28, ma%28, mi/28, mi%28)
                # print(att[ma],att[mi])
                # print(lo)
                writer.add_summary(summary,i)
                i+=1
                # img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2)
                # imsave('./demo/test_out.jpg', img_to_draw)

            #统计内容为每个graph node的运行时间和占用内存
            profile_graph_opts_builder = option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.time_and_memory())

            #输出方式为timeline
            profile_graph_opts_builder.with_timeline_output(timeline_file='/tmp/profiler.json')
            #定义显示sess.Run() 第70步的统计数据
            profile_graph_opts_builder.with_step(3)

            #显示视图为graph view
            my_profiler.profile_graph(profile_graph_opts_builder.build())

コード例 #27

ファイルを表示

                                                    is_training=False)
    probabilities = tf.nn.softmax(logits)

    init_fn = slim.assign_from_checkpoint_fn(
        checkpoints, slim.get_model_variables('InceptionV3'))

    results = {}
    with tf.Session() as sess:
        init_fn(sess)

        #profiler
        inception_profiler = model_analyzer.Profiler(graph=sess.graph)
        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()

        profile_scope_opt_builder = option_builder.ProfileOptionBuilder(
            option_builder.ProfileOptionBuilder.float_operation())
        inception_profiler.profile_name_scope(
            profile_scope_opt_builder.build())

        #https://upload.wikimedia.org/wikipedia/commons/d/d9/First_Student_IC_school_bus_202076.jpg
        for f in (glob.glob("First_Student_IC_school_bus_202076.jpg")):
            img = image2placeholder(f, image_size)
            probabilities = sess.run(
                probabilities,
                feed_dict={x: img},
                options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                run_metadata=run_metadata)
            probabilities = probabilities[0, 0:]
            sorted_inds = [
                i[0]
                for i in sorted(enumerate(-probabilities), key=lambda x: x[1])

コード例 #28

ファイルを表示

    def profile(self):
        self.initialize()
        print("Beginning to profile network with parameters",
              get_num_parameters(self.model.get_variable_scope()))
        placeholders = self.model.get_placeholders()
        graph_loss = self.model.get_losses()
        graph_optmiser = self.model.get_optimizer()
        graph_summary = self.model.get_summary()
        graph_summary_validation = self.model.get_summary_validation()
        graph_accuracy = self.model.get_accuracy()
        graph_logits, graph_prediction = self.model.get_compute_graphs()
        graph_temp = self.model.get_temp()

        inputs_feed = self._get_input_feeds(self.training_files)
        inputs_validation_feed = self._get_input_feeds(self.validation_files)

        init = [
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ]
        with tf.Session() as sess:
            sess.run(init)
            profiler = Profiler(sess.graph)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            summary_writer = tf.summary.FileWriter(self.summary_path,
                                                   sess.graph)

            iteration_number = 0

            print("Starting iterations")
            run_ops = [
                graph_temp, graph_loss, graph_optmiser, graph_summary,
                graph_accuracy, graph_prediction
            ]
            while iteration_number < 1000:
                inputs_train = sess.run(list(inputs_feed))

                inputs_train_dict = {
                    placeholders[0]: inputs_train[0],
                    placeholders[1]: inputs_train[1],
                    placeholders[2]: inputs_train[2],
                    placeholders[3]: inputs_train[3],
                    placeholders[4]: inputs_train[4]
                }

                if iteration_number % 100 == 0:
                    run_meta = tf.RunMetadata()

                    sess.run(run_ops,
                             feed_dict=inputs_train_dict,
                             options=tf.RunOptions(
                                 trace_level=tf.RunOptions.FULL_TRACE),
                             run_metadata=run_meta)

                    profiler.add_step(iteration_number, run_meta)

                    # Profile the parameters of your model.
                    profiler.profile_name_scope(
                        options=(option_builder.ProfileOptionBuilder.
                                 trainable_variables_parameter()))

                    # Or profile the timing of your model operations.
                    opts = option_builder.ProfileOptionBuilder.time_and_memory(
                    )
                    profiler.profile_operations(options=opts)

                    # Or you can generate a timeline:
                    opts = (option_builder.ProfileOptionBuilder(
                        option_builder.ProfileOptionBuilder.time_and_memory(
                        )).with_step(iteration_number).with_timeline_output(
                            self.config['profiler_output_file_name']).build())
                    profiler.profile_graph(options=opts)

                else:
                    sess.run(run_ops, feed_dict=inputs_train_dict)

                print("Profiling - Iteration %4d" % iteration_number)
                iteration_number += 1

            # Stop the threads
            coord.request_stop()

            # Wait for threads to stop
            coord.join(threads)

コード例 #29

ファイルを表示

ファイル: runs.py プロジェクト: giamic/music-analysis

def profiled_test_run(sess,
                      targets,
                      merged_summary,
                      handle,
                      h,
                      global_step,
                      model_folder,
                      annotations,
                      profiler,
                      n,
                      writer=None,
                      saver=None,
                      clustering=True,
                      tree=True,
                      mode='raw_data'):
    """
    Run the model on the test database, then write the summaries to disk and save the model.
    One can cluster the embeddings and use the distance matrix to reconstruct a tree.

    :param sess:
    :param writer:
    :param saver: if a tf.train.Saver() is provided, save the model
    :param clustering:
    :param tree:
    :return:
    """
    assert mode == 'timeline' or mode == 'raw_data'

    run_meta = tf.RunMetadata()
    summary, y, labels, dm, ids, times = sess.run(
        [merged_summary] + targets,
        feed_dict={handle: h},
        options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
        run_metadata=run_meta)
    if writer is not None:
        writer.add_summary(summary, global_step=global_step)
    if saver is not None:
        saver.save(sess, os.path.join(model_folder, "model.ckpt"))

    profiler.add_step(n, run_meta)

    if mode == 'raw_data':
        opts = (
            option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory()).
            with_step(-1)  # average of all steps
            .with_file_output(os.path.join(model_folder,
                                           'profile_time.txt')).build())
        profiler.profile_operations(options=opts)

    if mode == 'timeline':
        opts = (
            option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory()).
            with_step(-1)  # average of all steps
            .with_timeline_output(
                os.path.join(model_folder, 'profile_graph.txt')).build())
        profiler.profile_graph(options=opts)

    if clustering or tree:
        output_folder = os.path.join(
            model_folder, 'test',
            datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        os.mkdir(output_folder)
        if clustering:
            clustering_classification(labels, y, output_folder)
        if tree:
            tree_analysis(dm, ids, times, annotations, output_folder)
        return output_folder

    return

コード例 #30

ファイルを表示

ファイル: testing_grappler_mem_optimizer.py プロジェクト: simpeng/learning

def main(_):
    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir)

    a = bias_variable([3, 3])
    b = tf.constant(0.2, shape=[3, 3])
    c = tf.constant(10.0, shape=[3, 3])
    d = a + b
    e = tf.multiply(d, c)
    relu1 = tf.nn.relu(e, name='relu1')
    train_relu1 = tf.train.AdamOptimizer(1e-4).minimize(relu1)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.int64, [None])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope('loss'):
        cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_,
                                                               logits=y_conv)
    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), y_)
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    from tensorflow.python.profiler import model_analyzer
    from tensorflow.python.profiler import option_builder
    with tf.Session(config=get_sess_config()) as sess:

        many_runs_timeline = TimeLiner()

        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/pool1/MaxPool_grad/MaxPoolGrad'
        )._set_attr(
            '_swap_to_host',
            attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(
                i=[0, 1])))
        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/conv1/Relu_grad/ReluGrad')._set_attr(
                '_swap_to_host', attr_value_pb2.AttrValue(i=1))

        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/pool2/MaxPool_grad/MaxPoolGrad'
        )._set_attr(
            '_swap_to_host',
            attr_value_pb2.AttrValue(list=attr_value_pb2.AttrValue.ListValue(
                i=[0, 1])))
        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/conv2/Relu_grad/ReluGrad')._set_attr(
                '_swap_to_host', attr_value_pb2.AttrValue(i=1))
        sess.graph.get_operation_by_name(
            'adam_optimizer/gradients/conv2/Conv2D_grad/Conv2DBackpropInput'
        )._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=2))
        #sess.graph.get_operation_by_name('pool1/MaxPool')._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
        #gradient_ops = sess.graph.get_operation_by_name('adam_optimizer/gradients/conv2/Conv2D_grad/ShapeN')
        #gradient_ops._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
        #gradient_ops._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=1))
        sess.run(tf.global_variables_initializer())
        profiler = model_analyzer.Profiler(sess.graph)
        #for i in range(20000):
        for i in range(FLAGS.iteration_count):
            batch = mnist.train.next_batch(FLAGS.batch_size)
            run_metadata = tf.RunMetadata()
            sess.run(
                train_step,
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 0.5
                },
                options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
                run_metadata=run_metadata)
            #sess.run(train_relu1, feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata)

            trace = timeline.Timeline(step_stats=run_metadata.step_stats)
            chrome_trace = trace.generate_chrome_trace_format(
                show_dataflow=True, show_memory=True)
            many_runs_timeline.update_timeline(chrome_trace)

            profiler.add_step(i, run_metadata)

            # profile the timing of your model operations.
            #opts = (tf.profiler.ProfileOptionBuilder(
            #  option_builder.ProfileOptionBuilder.time_and_memory())
            #  .select(['micros', 'bytes', 'occurrence', 'peak_bytes', 'residual_bytes', 'output_bytes'])
            #  .order_by('name').build())
            #profiler.profile_operations(options=opts)

            # can generate a timeline:
            opts = (option_builder.ProfileOptionBuilder(
                option_builder.ProfileOptionBuilder.time_and_memory()
            ).with_step(i).with_timeline_output(
                "./timeline_output/step_" + FLAGS.mem_opt +
                str(FLAGS.batch_size) + str(FLAGS.iteration_count)).build())
            profiler.profile_graph(options=opts)
    chrome_trace_filename = str(FLAGS.batch_size) + str(FLAGS.mem_opt) + "new"
    graph_location = str(FLAGS.batch_size) + str(
        FLAGS.mem_opt) + "_swap_test.pbtxt"
    print('Saving graph to: %s' % graph_location)
    tf.train.write_graph(sess.graph_def, '.', graph_location, as_text=True)
    many_runs_timeline.save(chrome_trace_filename + '.ctf.json')