예제 #1
0
    def testBasics(self):
        ops.reset_default_graph()
        outfile = os.path.join(test.get_temp_dir(), "dump")
        opts = builder(
            builder.time_and_memory()).with_file_output(outfile).build()

        x = lib.BuildFullModel()

        profile_str = None
        profile_step50 = os.path.join(test.get_temp_dir(), "profile_50")
        with profile_context.ProfileContext(test.get_temp_dir()) as pctx:
            pctx.add_auto_profiling("op",
                                    options=opts,
                                    profile_steps=[15, 50, 100])
            with session.Session() as sess:
                sess.run(variables.global_variables_initializer())
                total_steps = 101 if test.is_gpu_available() else 50
                for i in range(total_steps):
                    sess.run(x)
                    if i == 14 or i == 99:
                        self.assertTrue(gfile.Exists(outfile))
                        gfile.Remove(outfile)
                    if i == 49:
                        self.assertTrue(gfile.Exists(profile_step50))
                        with gfile.Open(outfile, "r") as f:
                            profile_str = f.read()
                        gfile.Remove(outfile)

        with lib.ProfilerFromFile(
                os.path.join(test.get_temp_dir(), "profile_50")) as profiler:
            profiler.profile_operations(options=opts)
            with gfile.Open(outfile, "r") as f:
                self.assertEqual(profile_str, f.read())
예제 #2
0
    def testBasics(self):
        ops.reset_default_graph()
        outfile = os.path.join(test.get_temp_dir(), "dump")
        opts = builder(
            builder.time_and_memory()).with_file_output(outfile).build()

        x = lib.BuildFullModel()

        profile_str = None
        profile_step100 = os.path.join(test.get_temp_dir(), "profile_100")
        with profile_context.ProfileContext(test.get_temp_dir()) as pctx:
            pctx.add_auto_profiling("op",
                                    options=opts,
                                    profile_steps=[15, 50, 100])
            with session.Session() as sess:
                sess.run(variables.global_variables_initializer())
                total_steps = 101
                for i in range(total_steps):
                    sess.run(x)
                    if i == 14 or i == 49:
                        self.assertTrue(gfile.Exists(outfile))
                        gfile.Remove(outfile)
                    if i == 99:
                        self.assertTrue(gfile.Exists(profile_step100))
                        with gfile.Open(outfile, "r") as f:
                            profile_str = f.read()
                        gfile.Remove(outfile)

            self.assertEqual(set([15, 50, 100]),
                             set(pctx.get_profiles("op").keys()))

        with lib.ProfilerFromFile(
                os.path.join(test.get_temp_dir(), "profile_100")) as profiler:
            profiler.profile_operations(options=opts)
            with gfile.Open(outfile, "r") as f:

                if test.is_built_with_rocm():
                    # The profiler output for ROCm mode, includes an extra warning related
                    # to the lack of stream tracing in ROCm moed. Need to skip this warning
                    # when doing the diff in ROCm mode
                    profile_str = "\n".join(profile_str.split("\n")[7:])

                self.assertEqual(profile_str, f.read())
예제 #3
0
    def testSelectEverythingDetail(self):
        ops.reset_default_graph()
        dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).select([
                    'micros', 'bytes', 'params', 'float_ops', 'occurrence',
                    'device', 'op_types', 'input_shapes'
                ]).build())

        with profile_context.ProfileContext(test.get_temp_dir(),
                                            trace_steps=[],
                                            dump_steps=[]) as pctx:
            with session.Session() as sess, ops.device(dev):
                x = lib.BuildSmallModel()

                sess.run(variables.global_variables_initializer())
                pctx.trace_next_step()
                pctx.dump_next_step()
                _ = sess.run(x)

                pctx.profiler.profile_name_scope(options=opts)

                with gfile.Open(outfile, 'r') as f:
                    # pylint: disable=line-too-long
                    dump_str = lib.CheckAndRemoveDoc(f.read())
                    outputs = dump_str.split('\n')

                    self.assertEqual(
                        outputs[0],
                        'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes'
                    )
                    for o in outputs[1:]:
                        if o.find('Conv2D ') > 0:
                            metrics = o[o.find('(') + 1:o.find(')')].split(',')
                            # Make sure time is profiled.
                            gap = 1 if test.is_gpu_available() else 2
                            for i in range(3, 6, gap):
                                mat = re.search('(.*)[um]s/(.*)[um]s',
                                                metrics[i])
                                self.assertGreater(float(mat.group(1)), 0.0)
                                self.assertGreater(float(mat.group(2)), 0.0)
                            # Make sure device is profiled.
                            if test.is_gpu_available():
                                self.assertTrue(metrics[6].find('gpu') > 0)
                                self.assertFalse(metrics[6].find('cpu') > 0)
                            else:
                                self.assertFalse(metrics[6].find('gpu') > 0)
                                self.assertTrue(metrics[6].find('cpu') > 0)
                            # Make sure float_ops is profiled.
                            mat = re.search('(.*)k/(.*)k flops',
                                            metrics[1].strip())
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                            # Make sure op_count is profiled.
                            self.assertEqual(metrics[8].strip(), '1/1|1/1')
                            # Make sure input_shapes is profiled.
                            self.assertEqual(metrics[9].strip(),
                                             '0:2x6x6x3|1:3x3x3x6')

                        if o.find('DW (3x3x3x6') > 0:
                            metrics = o[o.find('(') + 1:o.find(')')].split(',')
                            mat = re.search('(.*)/(.*) params',
                                            metrics[1].strip())
                            self.assertGreater(float(mat.group(1)), 0.0)
                            self.assertGreater(float(mat.group(2)), 0.0)
                    # pylint: enable=line-too-long

        # Test that profiler restored from profile file gives the same result.
        gfile.Remove(outfile)
        profile_file = os.path.join(test.get_temp_dir(), 'profile_1')
        with lib.ProfilerFromFile(profile_file) as profiler:
            profiler.profile_name_scope(options=opts)
            with gfile.Open(outfile, 'r') as f:
                self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))