def test_auto_init(self):
        epoch_counter = [0]

        seq_flow = DataFlow.seq(0, 10, batch_size=2)
        map_flow = seq_flow.map(
            lambda x: (x + epoch_counter[0] * 10,))

        def make_iterator():
            epoch_counter[0] += 1
            return map_flow

        it_flow = DataFlow.iterator_factory(make_iterator)
        flow = it_flow.threaded(3)

        batches = [b[0] for b in flow]
        np.testing.assert_array_equal(
            [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]], batches)

        batches = [b[0] for b in flow]
        np.testing.assert_array_equal(
            [[20, 21], [22, 23], [24, 25], [26, 27], [28, 29]], batches)

        flow.close()
        batches = [b[0] for b in flow]
        np.testing.assert_array_equal(
            [[40, 41], [42, 43], [44, 45], [46, 47], [48, 49]], batches)

        flow.close()
Example #2
0
 def test_flow(self):
     x_flow = DataFlow.arrays([np.arange(10)], batch_size=4)
     y_flow = DataFlow.arrays([np.arange(10, 17)], batch_size=4)
     flow = DataFlow.gather([x_flow, y_flow])
     self.assertIsInstance(flow, GatherFlow)
     self.assertEqual((x_flow, y_flow), flow.flows)
     batches = list(flow)
     self.assertEqual(2, len(batches))
     np.testing.assert_equal(np.arange(4), batches[0][0])
     np.testing.assert_equal(np.arange(10, 14), batches[0][1])
     np.testing.assert_equal(np.arange(4, 8), batches[1][0])
     np.testing.assert_equal(np.arange(14, 17), batches[1][1])
Example #3
0
def _create_sampled_dataflow(arrays, sampler, sample_now, **kwargs):
    if sample_now:
        arrays = sampler(*arrays)
    df = DataFlow.arrays(arrays, **kwargs)
    if not sample_now:
        df = df.map(sampler)
    return df
Example #4
0
    def test_map_array_indices(self):
        source = DataFlow.arrays([
            np.arange(5),
            np.arange(5, 10),
            np.arange(10, 15),
            np.arange(15, 20)
        ],
                                 batch_size=4)

        # assert map a single
        df = source.map(lambda x: [x + 1], array_indices=0)
        self.assertEqual(df.array_indices, (0, ))
        b = list(df)
        np.testing.assert_array_equal(b[0], [
            [1, 2, 3, 4],
            [5, 6, 7, 8],
            [10, 11, 12, 13],
            [15, 16, 17, 18],
        ])
        np.testing.assert_array_equal(b[1], [[5], [9], [14], [19]])

        # assert map multiples
        df = source.map(lambda x, y: [2 * x, 3 * y], array_indices=[1, 3])
        self.assertEqual(df.array_indices, (1, 3))
        b = list(df)
        np.testing.assert_array_equal(b[0], [
            [0, 1, 2, 3],
            [10, 12, 14, 16],
            [10, 11, 12, 13],
            [45, 48, 51, 54],
        ])
        np.testing.assert_array_equal(b[1], [[4], [18], [14], [57]])
Example #5
0
 def test_select(self):
     x = np.arange(5)
     y = np.arange(5, 10)
     z = np.arange(10, 15)
     flow = DataFlow.arrays([x, y, z], batch_size=5).select([0, 2, 0])
     self.assertEqual(1, len(list(flow)))
     for b in flow:
         np.testing.assert_equal([x, z, x], b)
Example #6
0
    def test_run(self):
        with self.test_session() as session:
            df = DataFlow.arrays([np.arange(6, dtype=np.float32)], batch_size=4)

            def log_message(m):
                logged_messages.append(m)
            logged_messages = []

            # test default loss weight and merged feed dict
            with TrainLoop([], max_epoch=2) as loop:
                t = BaseTrainer(loop)
                t._run_step = Mock(return_value=None)
                t._iter_steps = Mock(wraps=lambda: loop.iter_steps(df))
                t.before_epochs.add_hook(
                    functools.partial(log_message, 'before_epoch'))
                t.before_steps.add_hook(
                    functools.partial(log_message, 'before_step'))
                t.after_steps.add_hook(
                    functools.partial(log_message, 'after_step'))
                t.after_epochs.add_hook(
                    functools.partial(log_message, 'after_epoch'))

                t.run()
                self.assertEqual(4, len(t._run_step.call_args_list))
                for i, call_args in enumerate(t._run_step.call_args_list[:-2]):
                    call_session, call_payload = call_args[0]
                    self.assertIs(session, call_session)
                    self.assertEqual(i + 1, call_payload[0])
                    self.assertIsInstance(call_payload[1], tuple)
                    self.assertEqual(1, len(call_payload[1]))
                    np.testing.assert_equal(
                        np.arange(6, dtype=np.float32)[i * 4: (i + 1) * 4],
                        call_payload[1][0]
                    )

                self.assertEqual(
                    ['before_epoch', 'before_step', 'after_step',
                     'before_step', 'after_step', 'after_epoch'] * 2,
                    logged_messages
                )

            # test re-entrant error
            with TrainLoop([], max_epoch=1) as loop:
                t = BaseTrainer(loop)
                t._run_step = Mock(return_value=None)
                t._iter_steps = Mock(wraps=lambda: loop.iter_steps(df))

                def reentrant_error():
                    with pytest.raises(
                            RuntimeError, match=r'`run\(\)` is not re-entrant'):
                        t.run()
                reentrant_error = Mock(wraps=reentrant_error)
                t.after_steps.add_hook(reentrant_error)
                t.run()
                self.assertTrue(reentrant_error.called)
Example #7
0
    def test_run(self):
        with self.test_session() as session:
            df = DataFlow.arrays([np.arange(6, dtype=np.float32)],
                                 batch_size=4)
            ph = tf.placeholder(tf.float32, shape=[None])
            ph2 = tf.placeholder(tf.float32, shape=[])
            ph3 = tf.placeholder(tf.float32, shape=[])

            # test default loss weight and merged feed dict
            with TrainLoop([], max_epoch=1) as loop:
                v = Evaluator(loop,
                              tf.reduce_mean(ph), [ph],
                              df,
                              feed_dict={ph2: 34})
                v._run_batch = Mock(wraps=v._run_batch)

                for epoch in loop.iter_epochs():
                    v.run({ph3: 56})
                    np.testing.assert_almost_equal(
                        2.5, loop._epoch_metrics._metrics['valid_loss'].mean)
                    np.testing.assert_almost_equal(
                        2.5, v.last_metrics_dict['valid_loss'])
                    self.assertIn('eval_time', loop._epoch_metrics._metrics)

                self.assertEqual(2, len(v._run_batch.call_args_list))
                for i, call_args in enumerate(v._run_batch.call_args_list):
                    call_session, call_feed_dict = call_args[0]
                    self.assertIs(session, call_session)
                    np.testing.assert_equal(
                        np.arange(6, dtype=np.float32)[i * 4:(i + 1) * 4],
                        call_feed_dict[ph])
                    self.assertEqual(34, call_feed_dict[ph2])
                    self.assertEqual(56, call_feed_dict[ph3])

            # test None loss weight and None time metric and override feed dict
            with TrainLoop([], max_epoch=1) as loop:
                v = Evaluator(loop, {'valid_loss_x': tf.reduce_mean(ph)}, [ph],
                              df,
                              feed_dict={ph2: 34},
                              batch_weight_func=None,
                              time_metric_name=None)
                v._run_batch = Mock(wraps=v._run_batch)

                for epoch in loop.iter_epochs():
                    v.run({ph2: 56})
                    np.testing.assert_almost_equal(
                        3.0, loop._epoch_metrics._metrics['valid_loss_x'].mean)
                    np.testing.assert_almost_equal(
                        3.0, v.last_metrics_dict['valid_loss_x'])
                    self.assertNotIn('eval_time', loop._epoch_metrics._metrics)

                for i, call_args in enumerate(v._run_batch.call_args_list):
                    call_session, call_feed_dict = call_args[0]
                    self.assertEqual(56, call_feed_dict[ph2])
                    self.assertNotIn(ph3, call_feed_dict)
Example #8
0
    def test_errors(self):
        # test type error
        source = DataFlow.arrays([np.arange(5), np.arange(5, 10)],
                                 batch_size=4)
        df = source.map(lambda x, y: x + y)
        with pytest.raises(TypeError,
                           match='The output of the mapper is expected to '
                           'be a tuple or a list, but got a'):
            _ = list(df)

        # test len(outputs) != len(inputs)
        source = DataFlow.arrays([np.arange(5), np.arange(5, 10)],
                                 batch_size=4)
        df = source.map(lambda x, y: [x + y], [0, 1])
        with pytest.raises(ValueError,
                           match='The number of output arrays of the mapper '
                           'is required to match the inputs, since '
                           '`array_indices` is specified: outputs 1 != '
                           'inputs 2'):
            _ = list(df)
Example #9
0
 def test_arrays(self):
     arrays = [np.arange(5), np.arange(10).reshape([5, 2])]
     df = DataFlow.arrays(arrays, 4, shuffle=False, skip_incomplete=False)
     self.assertIsInstance(df, ArrayFlow)
     for i, arr in enumerate(arrays):
         self.assertIs(arr, df.the_arrays[i])
     self.assertEqual(2, df.array_count)
     self.assertEqual(5, df.data_length)
     self.assertEqual(((), (2, )), df.data_shapes)
     self.assertFalse(df.is_shuffled)
     self.assertFalse(df.skip_incomplete)
Example #10
0
    def test_map_to_tuple(self):
        source = DataFlow.arrays([np.arange(5), np.arange(5, 10)],
                                 batch_size=4)
        df = source.map(lambda x, y: (x + y, ))
        self.assertIs(df.source, source)

        b = list(df)
        self.assertEqual(2, len(b))
        self.assertEqual(1, len(b[0]))
        np.testing.assert_array_equal([5, 7, 9, 11], b[0][0])
        np.testing.assert_array_equal([13], b[1][0])
Example #11
0
    def test_get_arrays(self):
        with pytest.raises(ValueError,
                           match='empty, cannot convert to arrays'):
            _ = DataFlow.arrays([np.arange(0)], batch_size=5).get_arrays()

        # test one batch
        df = DataFlow.arrays([np.arange(5), np.arange(5, 10)], batch_size=6)
        arrays = df.get_arrays()
        np.testing.assert_equal(np.arange(5), arrays[0])
        np.testing.assert_equal(np.arange(5, 10), arrays[1])

        # test two batches
        df = DataFlow.arrays([np.arange(10), np.arange(10, 20)], batch_size=6)
        arrays = df.get_arrays()
        np.testing.assert_equal(np.arange(10), arrays[0])
        np.testing.assert_equal(np.arange(10, 20), arrays[1])

        # test to_arrays_flow
        df2 = df.to_arrays_flow(batch_size=6)
        self.assertIsInstance(df2, ArrayFlow)
Example #12
0
    def test_implicit_iterator(self):
        df = DataFlow.arrays([np.arange(3)], batch_size=2)
        self.assertIsNone(df.current_batch)

        np.testing.assert_equal([[0, 1]], df.next_batch())
        np.testing.assert_equal([[0, 1]], df.current_batch)
        np.testing.assert_equal([[2]], df.next_batch())
        np.testing.assert_equal([[2]], df.current_batch)
        with pytest.raises(StopIteration):
            _ = df.next_batch()
        self.assertIsNone(df.current_batch)

        np.testing.assert_equal([[0, 1]], df.next_batch())
        np.testing.assert_equal([[0, 1]], df.current_batch)
Example #13
0
    def test_run(self):
        ph = tf.placeholder(tf.int32, [5])
        var = tf.get_variable('var', shape=[5], dtype=tf.int32,
                              initializer=tf.zeros_initializer())
        train_op = tf.assign(var, ph)
        df = DataFlow.arrays([np.arange(10, 15, dtype=np.int32)], batch_size=5)
        with self.test_session() as session, \
                TrainLoop([var], max_epoch=1, early_stopping=False) as loop:
            loop.collect_metrics = Mock(wraps=loop.collect_metrics)
            t = LossTrainer(loop, tf.reduce_sum(ph), train_op, [ph], df,
                            metric_name='loss_x')
            ensure_variables_initialized()
            t.run()

            self.assertEqual(
                {'loss_x': 60}, loop.collect_metrics.call_args_list[0][0][0])
            np.testing.assert_equal([10, 11, 12, 13, 14], session.run(var))
Example #14
0
 def test_seq(self):
     df = DataFlow.seq(1,
                       9,
                       2,
                       batch_size=3,
                       shuffle=False,
                       skip_incomplete=False,
                       dtype=np.int64)
     self.assertIsInstance(df, SeqFlow)
     self.assertEqual(1, df.array_count)
     self.assertEqual(4, df.data_length)
     self.assertEqual(((), ), df.data_shapes)
     self.assertEqual(3, df.batch_size)
     self.assertFalse(df.is_shuffled)
     self.assertFalse(df.skip_incomplete)
     self.assertEqual(1, df.start)
     self.assertEqual(9, df.stop)
     self.assertEqual(2, df.step)
Example #15
0
    def test_run(self):
        ph = tf.placeholder(tf.int32, [5])
        var = tf.get_variable('var', shape=[5], dtype=tf.int32,
                              initializer=tf.zeros_initializer())
        summary = tf.summary.histogram(var.name, var)
        train_op = tf.assign(var, ph)
        df = DataFlow.arrays([np.arange(10, 15, dtype=np.int32)], batch_size=5)

        with TemporaryDirectory() as tmpdir:
            with self.test_session() as session, \
                    TrainLoop([var], max_epoch=1, early_stopping=False,
                              summary_dir=tmpdir) as loop:
                loop.collect_metrics = Mock(wraps=loop.collect_metrics)
                loop.add_summary = Mock(wraps=loop.add_summary)
                t = Trainer(loop, train_op, [ph], df,
                            metrics={'loss_x': tf.reduce_sum(ph)},
                            summaries=[summary])
                ensure_variables_initialized()
                t.run()

                self.assertEqual(
                    {'loss_x': 60},
                    loop.collect_metrics.call_args_list[0][0][0]
                )
                self.assertTrue(loop.add_summary.called)
                np.testing.assert_equal([10, 11, 12, 13, 14], session.run(var))

        # test to specify summaries, but without loop.summary_dir, so no
        # summary should run
        with self.test_session() as session, \
                TrainLoop([var], max_epoch=1, early_stopping=False) as loop:
            loop.collect_metrics = Mock(wraps=loop.collect_metrics)
            loop.add_summary = Mock(wraps=loop.add_summary)
            t = Trainer(loop, train_op, [ph], df,
                        metrics={'loss_x': tf.reduce_sum(ph)},
                        summaries=[summary])
            ensure_variables_initialized()
            t.run()

            self.assertEqual(
                {'loss_x': 60},
                loop.collect_metrics.call_args_list[0][0][0]
            )
            self.assertFalse(loop.add_summary.called)
 def test_errors(self):
     with pytest.raises(
             ValueError, match='`prefetch_num` must be at least 1'):
         _ = ThreadingFlow(DataFlow.arrays([np.arange(10)], batch_size=2),
                           prefetch=0)
Example #17
0
    def iter_steps(self, data_generator=None):
        """
        Iterate through the steps.

        This method can only be called when there's no other step loop
        is being iterated, and an epoch loop is active.

        Args:
            data_generator: Optional iterable data to be yielded at every step.
                This is required if `max_step` is not configured, so as to
                prevent an infinite step loop.

        Yields:
            int or (int, any): The global step counter (starting from 1), or
                the tuple of ``(step counter, batch data)`` if `data_generator`
                is specified.
        """
        def loop_condition():
            return self._max_step is None or self.step < self._max_step

        self._require_entered()
        if not self._within_epoch:
            raise RuntimeError('Step loop must be opened within active epoch '
                               'loop')
        if self._within_step:
            raise RuntimeError('Another step loop has been opened')
        if self._max_step is None and data_generator is None:
            raise RuntimeError('`data_generator` is required when `max_step` '
                               'is not configured, so as to prevent an '
                               'unstoppable step loop')

        try:
            if data_generator is not None:
                if isinstance(data_generator, DataFlow):
                    data_flow = data_generator
                else:

                    def iter_factory():
                        if data_gen[0] is not None:
                            for batch in data_gen[0]:
                                yield batch
                        data_gen[0] = None  # force to use data_generator once

                    data_gen = [data_generator]
                    data_flow = DataFlow.iterator_factory(iter_factory)
                self._data_flow = data_flow

            while loop_condition():
                # prepare for the step data
                if self._data_flow is None:
                    yield_obj = self.step + 1
                    step_data = None
                else:
                    try:
                        step_data = self._data_flow.next_batch()
                    except StopIteration:
                        break
                    yield_obj = self.step + 1, step_data

                # yield this step
                self._states.step += 1
                self._within_step = True
                self._step_data = step_data
                self._step_start_time = time.time()

                self.events.fire(EventKeys.BEFORE_STEP, self)
                try:
                    yield yield_obj
                except StopIteration:  # pragma: no cover
                    # might be caused by call to ``data_flow.next_batch()``
                    break
                self.events.reverse_fire(EventKeys.AFTER_STEP, self)

                self._commit_step_stop_time()
        finally:
            self._within_step = False
            self._step_start_time = None
            self._data_flow = None
            self._step_data = None
Example #18
0
    def test_run(self):
        with self.test_session() as session:
            df = DataFlow.arrays([np.arange(6, dtype=np.float32)],
                                 batch_size=4)

            def log_event(m, trainer):
                logged_events.append((m, trainer))

            logged_events = []

            # test default loss weight and merged feed dict
            with TrainLoop([], max_epoch=2) as loop:
                t = BaseTrainer(loop)
                t._run_step = Mock(return_value=None)
                t._iter_steps = Mock(wraps=lambda: loop.iter_steps(df))
                for key in [
                        EventKeys.BEFORE_EPOCH, EventKeys.BEFORE_STEP,
                        EventKeys.STEP_ANNEALING, EventKeys.STEP_EVALUATION,
                        EventKeys.STEP_LOGGING, EventKeys.AFTER_STEP,
                        EventKeys.EPOCH_ANNEALING, EventKeys.EPOCH_EVALUATION,
                        EventKeys.EPOCH_LOGGING, EventKeys.AFTER_EPOCH
                ]:
                    t.events.on(key, functools.partial(log_event, key))

                t.run()
                self.assertEqual(4, len(t._run_step.call_args_list))
                for i, call_args in enumerate(t._run_step.call_args_list[:-2]):
                    call_session, call_payload = call_args[0]
                    self.assertIs(session, call_session)
                    self.assertEqual(i + 1, call_payload[0])
                    self.assertIsInstance(call_payload[1], tuple)
                    self.assertEqual(1, len(call_payload[1]))
                    np.testing.assert_equal(
                        np.arange(6, dtype=np.float32)[i * 4:(i + 1) * 4],
                        call_payload[1][0])

                expected_logged_events = sum([[
                    (EventKeys.BEFORE_EPOCH, t),
                ] + sum([[
                    (EventKeys.BEFORE_STEP, t),
                    (EventKeys.STEP_EVALUATION, t),
                    (EventKeys.STEP_ANNEALING, t),
                    (EventKeys.STEP_LOGGING, t),
                    (EventKeys.AFTER_STEP, t),
                ] for step in [0, 1]], []) + [(EventKeys.EPOCH_EVALUATION, t),
                                              (EventKeys.EPOCH_ANNEALING, t),
                                              (EventKeys.EPOCH_LOGGING, t),
                                              (EventKeys.AFTER_EPOCH, t)]
                                              for epoch in [0, 1]], [])
                self.assertListEqual(logged_events, expected_logged_events)

            # test re-entrant error
            with TrainLoop([], max_epoch=1) as loop:
                t = BaseTrainer(loop)
                t._run_step = Mock(return_value=None)
                t._iter_steps = Mock(wraps=lambda: loop.iter_steps(df))

                def reentrant_error(trainer):
                    self.assertIs(trainer, t)
                    with pytest.raises(RuntimeError,
                                       match=r'`run\(\)` is not re-entrant'):
                        t.run()

                reentrant_error = Mock(wraps=reentrant_error)
                t.events.on(EventKeys.AFTER_STEP, reentrant_error)
                t.run()
                self.assertTrue(reentrant_error.called)
    def test_iterator(self):
        epoch_counter = [0]
        external_counter = [1]

        seq_flow = DataFlow.seq(0, 10, batch_size=2)
        map_flow = seq_flow.map(
            lambda x: (x + epoch_counter[0] * 10 + external_counter[0] * 100,))

        def make_iterator():
            epoch_counter[0] += 1
            return map_flow

        it_flow = DataFlow.iterator_factory(make_iterator)
        with it_flow.threaded(prefetch=2) as flow:
            # the first epoch, expect 0 .. 10
            np.testing.assert_array_equal(
                [[110, 111], [112, 113], [114, 115], [116, 117], [118, 119]],
                [a[0] for a in flow]
            )
            time.sleep(.1)
            external_counter[0] += 1

            # the second epoch, the epoch counter should affect more than
            # the external counter
            np.testing.assert_array_equal(
                # having `prefetch = 2` should affect 3 items, because
                # while the queue size is 2, there are 1 additional prefetched
                # item waiting to be enqueued
                [[120, 121], [122, 123], [124, 125], [226, 227], [228, 229]],
                [a[0] for a in flow]
            )
            time.sleep(.1)
            external_counter[0] += 1

            # the third epoch, we shall carry out an incomplete epoch by break
            for a in flow:
                np.testing.assert_array_equal([230, 231], a[0])
                break
            time.sleep(.1)
            external_counter[0] += 1

            # verify that the epoch counter increases after break
            for i, (a,) in enumerate(flow):
                # because the interruption is not well-predictable under
                # multi-threading context, we shall have a weaker verification
                # than the above
                self.assertTrue((340 + i * 2 == a[0]) or (440 + i * 2 == a[0]))
                self.assertTrue((341 + i * 2 == a[1]) or (441 + i * 2 == a[1]))
            time.sleep(.1)
            external_counter[0] += 1

            # carry out the fourth, incomplete epoch by error
            try:
                for a in flow:
                    np.testing.assert_array_equal([450, 451], a[0])
                    raise _MyError()
            except _MyError:
                pass
            time.sleep(.1)
            external_counter[0] += 1

            # verify that the epoch counter increases after error
            for i, (a,) in enumerate(flow):
                self.assertTrue((560 + i * 2 == a[0]) or (660 + i * 2 == a[0]))
                self.assertTrue((561 + i * 2 == a[1]) or (661 + i * 2 == a[1]))
Example #20
0
 def test_errors(self):
     with pytest.raises(ValueError,
                        match='At least one flow must be specified'):
         _ = DataFlow.gather([])
     with pytest.raises(TypeError, match='Not a DataFlow'):
         _ = DataFlow.gather([1])
Example #21
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \
                 str(config.noExp) + ".model"
    dirName = os.path.basename(__file__).split(".py")[0] + "_" + str(
        config.noExp)
    results = MLResults(os.path.join(config.result_dir, dirName))
    results.save_config(config)  # save experiment settings
    results.make_dirs('train_summary', exist_ok=True)
    results.make_dirs('result_summary', exist_ok=True)
    results.make_dirs('mid_summary', exist_ok=True)

    # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate',
                                          config.initial_lr,
                                          config.lr_anneal_factor,
                                          min_value=1e-6)
    multi_gpu = MultiGPU(disable_prebuild=True)
    # multi_gpu = MultiGPU()

    # derive the training operation
    gradses = []
    grad_vars = []
    train_losses = []
    BATCH_SIZE = get_batch_size(input_x)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            # derive the loss for initializing
            with tf.name_scope('initialization'), \
                    arg_scope([p_net, q_net], is_initializing=True), \
                    spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
                init_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                init_chain = init_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                init_loss = tf.reduce_mean(init_chain.vi.training.vimco())

            # derive the loss and lower-bound for training
            with tf.name_scope('training'), \
                    arg_scope([p_net, q_net], is_training=True):
                train_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                train_chain = train_q_net.chain(p_net,
                                                latent_axis=0,
                                                observed={'x': dev_input_x})
                train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) +
                              tf.losses.get_regularization_loss())
                train_losses.append(train_loss)

            # derive the logits output for testing
            with tf.name_scope('testing'):
                test_q_net = q_net(dev_input_x, n_z=config.test_n_z)
                test_chain = test_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                # log_prob of X and each univariate time series of X
                log_prob = tf.reduce_mean(
                    test_chain.model['x'].distribution.log_prob(dev_input_x),
                    0)
                log_prob_per_element = tf.reduce_sum(log_prob)
                log_prob_per_element_univariate_TS = tf.reduce_sum(
                    log_prob, [0, 1, 3])
                log_prob_per_element_univariate_TS_All = tf.reduce_sum(
                    log_prob, [1, 3])

            # derive the optimizer
            with tf.name_scope('optimizing'):
                params = tf.trainable_variables()
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads = optimizer.compute_gradients(train_loss, params)
                for grad, var in grads:
                    if grad is not None and var is not None:
                        if config.grad_clip_norm:
                            grad = tf.clip_by_norm(grad, config.grad_clip_norm)
                        if config.check_numerics:
                            grad = tf.check_numerics(
                                grad,
                                'gradient for {} has numeric issue'.format(
                                    var.name))
                        grad_vars.append((grad, var))
                gradses.append(grad_vars)

    # merge multi-gpu outputs and operations
    [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE)
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # sort the contribution of each univariate_TS of input
    SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32,
                                              shape=(None, None),
                                              name='SORT_UNIVARIATE_TS_INPUT')
    SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT,
                                     k=config.metricNumber).indices + 1

    # load the training and testing data
    print("=" * 10 + "Shape of Input data" + "=" * 10)
    x, time_indexs, x_test, time_indexs2 = load_matrix_allData(
        config.dataReadformat, config.datapathForTrain, config.datapathForTest,
        config.timeLength, config.metricNumber, "TrainFileNameList.txt",
        "TestFileNameList.txt", results, config.norm)

    x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1])
    print("Test:", x_test.shape)
    if config.batchTest:
        test_flow = DataFlow.arrays(
            [x_test], config.test_batch_size)  # DataFlow is iterator
        del x_test
    x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION)
    x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1])
    x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1])
    train_flow = DataFlow.arrays([x_train],
                                 config.batch_size,
                                 shuffle=False,
                                 skip_incomplete=True)
    val_flow = DataFlow.arrays([x_val], config.test_batch_size)
    print("Note:", config.x_dim,
          ", x_dim = size of datapoint = timeLength * metricNumber")
    print("Input data shape:", x.shape, "Train data shape:", x_train.shape,
          "Validation data shape:", x_val.shape)
    del x_train, x_val, x

    # training part
    with spt.utils.create_session().as_default() as session:
        spt.utils.ensure_variables_initialized()
        saver = CheckpointSaver(tf.trainable_variables(), model_file)
        if os.path.exists(model_file):
            # load the parameters of trained model
            saver.restore_latest()
        else:
            # initialize the network
            while True:
                breakFlag = 0
                for [x] in train_flow:
                    INITLOSS = session.run(init_loss, feed_dict={input_x: x})
                    print('Network initialized, first-batch loss is {:.6g}.'.
                          format(INITLOSS))
                    if np.isnan(INITLOSS) or np.isinf(
                            INITLOSS) or INITLOSS > 10**5:
                        pass
                    else:
                        breakFlag = 1
                        break
                if breakFlag:
                    break

            # train the network
            with train_flow.threaded(10) as train_flow:
                with spt.TrainLoop(
                        params,
                        var_groups=['q_net', 'p_net'],
                        max_epoch=config.max_epoch,
                        max_step=config.max_step,
                        summary_dir=(results.system_path('train_summary')
                                     if config.write_summary else None),
                        summary_graph=tf.get_default_graph(),
                        early_stopping=True) as loop:
                    trainer = spt.Trainer(loop,
                                          train_op, [input_x],
                                          train_flow,
                                          metrics={'loss': train_loss},
                                          summaries=tf.summary.merge_all(
                                              spt.GraphKeys.AUTO_HISTOGRAM))
                    # anneal the learning rate
                    trainer.anneal_after(learning_rate,
                                         epochs=config.lr_anneal_epoch_freq,
                                         steps=config.lr_anneal_step_freq)
                    validator = spt.Validator(
                        loop,
                        train_loss,
                        [input_x],
                        val_flow,
                    )
                    trainer.evaluate_after_epochs(validator, freq=10)
                    trainer.log_after_epochs(freq=1)
                    trainer.run()
                saver.save()

            # save the training infomation
            firWrite = True
            num = 0
            time0 = time.time()
            for [x_train] in train_flow:
                if config.savetrainDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item_Train = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_train}))
                    log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train
                    log_prob_per_element_list_Train = np.sum(np.array(
                        log_prob_per_element_univariate_TS_list_item_Train),
                                                             axis=1).tolist()
                    if firWrite:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train)
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train)
                    else:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train,
                            "\n", "a")
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train, "\n", "a")

                firWrite = False
                num += 1
                if num % 1000 == 0:
                    print(
                        "-----Train %s >>>>>:Sum time of batch instances:%s" %
                        (num, float(time.time() - time0) / float(num)))
            del train_flow, val_flow

        # online test
        time2 = time.time()
        log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], []
        if config.batchTest:
            num = 0
            for [x_test] in test_flow:
                if config.savetestDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_test}))
                    log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist(
                    )
                    log_prob_per_element_list += np.sum(
                        np.array(log_prob_per_element_univariate_TS_list_item),
                        axis=1).tolist()

                num += 1
                if num % 200 == 0:
                    print("-----Test %s >>>>>:Sum time of batch instances:%s" %
                          (num, float(time.time() - time2) / float(num)))
        else:
            num = 1
            for batch_x in x_test:
                if config.savetestTS:
                    log_prob_per_element_list_item = (session.run(
                        log_prob_per_element, feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_list.append(
                        log_prob_per_element_list_item)

                if config.savetestDS:
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS,
                                    feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_univariate_TS_list.append(
                        log_prob_per_element_univariate_TS_list_item)
                    log_prob_per_element_list.append(
                        sum(log_prob_per_element_univariate_TS_list_item))

                if num % 200 == 0:
                    print(
                        "-----Test>>>>>:%d, average time of each instance:%s" %
                        (num, float(time.time() - time2) / float(num)))
                num += 1

        # get the lable file name and its line cnt number
        allLabelFileNameLineCntList = get_machineID(results, config.labelpath)

        print("No of OutlierScores for all dataPoint:(%s):" %
              len(log_prob_per_element_list))
        if config.savetestDS:
            save_file(
                results.system_path("result_summary"),
                "OutlierScores_metric.txt",
                cat_List(allLabelFileNameLineCntList,
                         log_prob_per_element_univariate_TS_list))
        save_file(
            results.system_path("result_summary"), "OutlierScores.txt",
            cat_List(allLabelFileNameLineCntList, log_prob_per_element_list))

        if config.evaluation:
            # Prepraration for the hitory two-metric results
            twoMetricScore = read_file(results.system_path("train_summary"),
                                       "OutlierScores_metric.txt")
            ave_twoMetricScore = np.mean(np.array(twoMetricScore),
                                         axis=0).tolist()
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ["Average score of each univariate time series", "\n"],
                      ",")
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ave_twoMetricScore + ["\n"], ",", "a")
            save_file(results.system_path("result_summary"), "PRF.txt", [
                "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n"
            ], ",", "a")

            # get the sorted item each metric by change score
            twoMetricScoreList = cal_scoreChanges(
                log_prob_per_element_list, ave_twoMetricScore,
                log_prob_per_element_univariate_TS_list)
            MetricResult = session.run(
                SORT_UNIVARIATE_TS,
                feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList})
            save_file(results.system_path("result_summary"),
                      "MetricResult.txt",
                      cat_List(allLabelFileNameLineCntList, MetricResult))

            # POT evalution
            POT_TH = pot_eval(
                read_file(results.system_path("train_summary"),
                          "OutlierScores.txt", "float"), config.q,
                config.level)
            resultArray, outlierLabelfileNameLineCntList = cal_binaryResult(
                log_prob_per_element_list, POT_TH, time_indexs2,
                config.saveMetricInfo, allLabelFileNameLineCntList)
            evaluate(results, config.labelpath, resultArray, time_indexs2,
                     POT_TH)

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()

    interpretation_hit_ratio(truth_filepath=config.interpret_filepath,
                             prediction_filepath=os.path.join(
                                 config.result_dir, dirName, "result_summary",
                                 "MetricResult.txt"))
Example #22
0
    def test_counters(self):
        # test loop with configured `max_epoch`
        with TrainLoop([], max_epoch=2) as loop:
            epoch_counter = 0
            step_counter = 0
            for epoch in loop.iter_epochs():
                epoch_counter += 1
                self.assertEqual(epoch, epoch_counter)
                x_ans = 0
                for step, [x] in \
                        loop.iter_steps(DataFlow.arrays([np.arange(4)], 1)):
                    self.assertEqual(step, loop.step)
                    self.assertEqual(epoch, loop.epoch)
                    self.assertEqual(x, x_ans)
                    x_ans += 1
                    step_counter += 1
                    self.assertEqual(step, step_counter)
                self.assertEqual(step_counter, loop.step)
                self.assertEqual(epoch, loop.epoch)
            self.assertEqual(epoch_counter, 2)
            self.assertEqual(step_counter, 8)

        # test loop with configured `max_step`
        with TrainLoop([], max_step=10) as loop:
            epoch_counter = 0
            step_counter = 0
            for epoch in loop.iter_epochs():
                epoch_counter += 1
                self.assertEqual(epoch, epoch_counter)
                for step in loop.iter_steps():
                    step_counter += 1
                    self.assertEqual(step, step_counter)
            self.assertEqual(epoch_counter, 1)
            self.assertEqual(step_counter, 10)

        # test loop with configured `max_step` with payload
        with TrainLoop([], max_step=10) as loop:
            epoch_counter = 0
            step_counter = 0
            for epoch in loop.iter_epochs():
                epoch_counter += 1
                self.assertEqual(epoch, epoch_counter)
                x_ans = 0
                for step, x in loop.iter_steps(np.arange(4)):
                    self.assertEqual(x, x_ans)
                    x_ans += 1
                    step_counter += 1
                    self.assertEqual(step, step_counter)
            self.assertEqual(epoch_counter, 3)
            self.assertEqual(step_counter, 10)

        # test loop with configured `max_step` and `max_epoch`,
        # while `max_epoch` finishes first
        with TrainLoop([], max_step=10, max_epoch=2) as loop:
            epoch_counter = 0
            step_counter = 0
            for epoch in loop.iter_epochs():
                epoch_counter += 1
                self.assertEqual(epoch, epoch_counter)
                for step, _ in loop.iter_steps(np.arange(4)):
                    step_counter += 1
                    self.assertEqual(step, step_counter)
            self.assertEqual(epoch_counter, 2)
            self.assertEqual(step_counter, 8)

        # test loop with configured `max_step` and `max_epoch`,
        # while `max_step` finishes first
        with TrainLoop([], max_step=10, max_epoch=3) as loop:
            epoch_counter = 0
            step_counter = 0
            for epoch in loop.iter_epochs():
                epoch_counter += 1
                self.assertEqual(epoch, epoch_counter)
                for step, _ in loop.iter_steps(np.arange(4)):
                    step_counter += 1
                    self.assertEqual(step, step_counter)
            self.assertEqual(epoch_counter, 3)
            self.assertEqual(step_counter, 10)
 def test_threaded(self):
     flow = DataFlow.arrays([np.arange(10)], batch_size=2). \
         threaded(prefetch=3)
     self.assertIsInstance(flow, ThreadingFlow)
     self.assertEqual(3, flow.prefetch_num)