Exemple #1
0
def close_queue(step, *queues):
    close_net = core.Net("close_queue_net")
    for queue in queues:
        close_net.CloseBlobsQueue([queue], 0)
    close_step = core.execution_step("%s_step" % str(close_net), close_net)
    return core.execution_step("%s_wraper_step" % str(close_net),
                               [step, close_step])
Exemple #2
0
def _static_threads_task(name, group, final_outputs, reader, num_threads,
                         output, capacity):
    node_name = str(Node.current())
    profiler_name = "{0}/{1}/{2}/{3}/{4}".format(
        node_name, "pipe", name,
        processor_name(input) if input else "NoInput",
        processor_name(output) if output else "NoOutput")

    with Task(name=name, group=group, outputs=final_outputs) as task:
        global_exit_net = core.Net('exit')
        global_init_net = core.Net('init')
        reader.setup_ex(global_init_net, global_exit_net)

        out_queue = None
        writer = None

        steps = []
        for thread_id in range(num_threads):
            with NetBuilder(name='t:%d' % thread_id) as nb:
                init_net = core.Net('init')
                exit_net = core.Net('exit')
                read_nets, status, rec = reader.read_record_ex(
                    init_net, exit_net)
                init_net.ConstantFill([], [status],
                                      shape=[],
                                      value=False,
                                      dtype=core.DataType.BOOL)

                if rec is not None:
                    if writer is None:
                        # hack so that the out queue gets the right name prefix
                        # (otherwise they would be prefixed with the thread id)
                        with NetBuilder(_fullname=task.name):
                            out_queue, writer = _init_output(
                                output, capacity, global_init_net,
                                global_exit_net)
                    write_nets, _ = writer.write_record_ex(
                        rec, init_net, exit_net, status)
                else:
                    write_nets = []

                timer_start_net = core.Net('timer_start')
                timer = timer_start_net.TimerBegin([],
                                                   counter_name=profiler_name)
                timer_end_net = core.Net('timer_end')
                timer_end_net.TimerEnd(timer, [])

                ops.net(init_net)
                ops.net(
                    core.execution_step('body',
                                        [timer_start_net] + list(read_nets) +
                                        list(write_nets) + [timer_end_net],
                                        should_stop_blob=status))
                ops.net(timer_end_net)
                ops.net(exit_net)
            steps.append(core.to_execution_step(nb))
        ops.net(global_init_net)
        ops.net(core.execution_step('body', steps, concurrent_substeps=True))
        ops.net(global_exit_net)
    return out_queue, task
Exemple #3
0
def For(net_or_step, iter_num):
    """
    Execute net_or_step iter_num times.

    Args:
    net_or_step: an instance of a ExecutionStep or a Net.
    iter_num:    the number times to execute the net_or_step.

    Returns:
    A ExecutionStep instance.
    """
    init_net = core.Net('init-net')
    iter_cnt = init_net.CreateCounter([], init_count=iter_num)
    iter_net = core.Net('For-iter')
    iter_done = iter_net.CountDown([iter_cnt])

    if isinstance(net_or_step, core.Net):
        for_step = core.execution_step('For', [iter_net, net_or_step],
                                       should_stop_blob=iter_done)
    elif isinstance(net_or_step, core.ExecutionStep):
        for_step = core.execution_step('For', [Do(iter_net), net_or_step],
                                       should_stop_blob=iter_done)
    else:
        raise ValueError('net_or_step must be a net or a step.')

    return Do(Do(init_net), for_step)
Exemple #4
0
 def test_atomic_ops(self):
     """
     Test that both countdown and checksum are update atomically by having
     cowntdown count from 20k to 0 from parallel the workers and updating
     the checksum to the value fetched. If operations are trully atomic,
     each value from 1 to 20k should be fetched exactly once from the
     countdown, and fed exactly once to the checksum, such that at the end
     checksum must contain the exact value of sum[i=0..20000](i).
     """
     init_net = core.Net("init")
     mutex_countdown = init_net.CreateMutex([])
     mutex_checksum = init_net.CreateMutex([])
     countdown = init_net.ConstantFill([], shape=[], value=20000, dtype=core.DataType.INT32)
     checksum = init_net.ConstantFill([], shape=[], value=0, dtype=core.DataType.INT32)
     minus_one = init_net.ConstantFill([], shape=[], value=-1, dtype=core.DataType.INT32)
     steps = []
     for i in range(0, 100):
         net = core.Net("net:%d" % i)
         _, fetched_count = net.AtomicFetchAdd(
             [mutex_countdown, countdown, minus_one], [countdown, "fetched_count:%d" % i]
         )
         net.AtomicFetchAdd([mutex_checksum, checksum, fetched_count], [checksum, "not_used"])
         steps.append(core.execution_step("worker:%d" % i, net, num_iter=200))
     super_step = core.execution_step("parent", steps, concurrent_substeps=True)
     plan = core.Plan("plan")
     plan.AddStep(core.execution_step("init", init_net))
     plan.AddStep(super_step)
     workspace.RunPlan(plan)
     # checksum = sum[i=1..20000](i) = 20000 * 20001 / 2 = 200010000
     self.assertEquals(workspace.FetchBlob(checksum), 200010000)
Exemple #5
0
 def test_atomic_ops(self):
     """
     Test that both countdown and checksum are update atomically by having
     cowntdown count from 20k to 0 from parallel the workers and updating
     the checksum to the value fetched. If operations are trully atomic,
     each value from 1 to 20k should be fetched exactly once from the
     countdown, and fed exactly once to the checksum, such that at the end
     checksum must contain the exact value of sum[i=0..20000](i).
     """
     init_net = core.Net('init')
     mutex_countdown = init_net.CreateMutex([])
     mutex_checksum = init_net.CreateMutex([])
     countdown = init_net.ConstantIntFill([], shape=[], value=20000.)
     checksum = init_net.ConstantIntFill([], shape=[], value=0.)
     minus_one = init_net.ConstantIntFill([], shape=[], value=-1.)
     steps = []
     for i in range(0, 100):
         net = core.Net('net:%d' % i)
         _, fetched_count = net.AtomicFetchAdd(
             [mutex_countdown, countdown, minus_one],
             [countdown, 'fetched_count:%d' % i])
         net.AtomicFetchAdd([mutex_checksum, checksum, fetched_count],
                            [checksum, 'not_used'])
         steps.append(
             core.execution_step('worker:%d' % i, net, num_iter=200))
     super_step = core.execution_step('parent',
                                      steps,
                                      concurrent_substeps=True)
     plan = core.Plan('plan')
     plan.AddStep(core.execution_step('init', init_net))
     plan.AddStep(super_step)
     workspace.RunPlan(plan)
     # checksum = sum[i=1..20000](i) = 20000 * 20001 / 2 = 200010000
     self.assertEquals(workspace.FetchBlob(checksum), 200010000)
Exemple #6
0
    def build_cache_step(self, overwrite=False):
        """Build a step for generating cache DB file.

            If self.db_path exists and not overwritting, build an empty step.
            Overwise, build a step as follows.
            Pipe original reader to the _DatasetWriter,
            so that dataset field blobs are populated.
            Then save these blobs into a file.

            Args:
                overwrite: bool. If true, ignore the existing file
                    and build a new one overwritting the existing one anyway.

            Returns:
                build_cache_step: ExecutionStep.
                    The step to be run for building a cache DB file.
        """
        if os.path.exists(self.db_path) and not overwrite:
            # cache already exists, no need to rebuild it
            return core.execution_step('build_step', [])

        init_net = core.Net('init')
        self._init_field_blobs_as_empty(init_net)
        with Cluster(), core.NameScope(self.name), TaskGroup() as copy_tg:
            pipe(self.original_reader, self.ds.writer(), num_threads=16)
            copy_step = copy_tg.to_task().get_step()
        save_net = core.Net('save')
        self._save_field_blobs_to_db_file(save_net)

        return core.execution_step('build_cache',
                                   [init_net, copy_step, save_net])
Exemple #7
0
 def get_step(self):
     if self._step is not None and self._step_with_setup is None:
         report_net = self._step.get_all_attributes(Task.REPORT_NET)
         assert len(report_net) <= 1, (
             'Currently only one report net supported per task.')
         if report_net:
             report_net = report_net[0]
             if not hasattr(report_net, '_report_net_used'):
                 self._step.SetReportNet(report_net, 1)
                 report_net._report_net_used = True
         init_nets, exit_nets = get_setup_nets(
             Task.TASK_SETUP, [self._step], self)
         if len(self._outputs) == 0:
             output_net = core.Net('%s:output' % self.name)
             self.add_output(output_net.ConstantFill(
                 [], 1, dtype=core.DataType.INT32, value=0))
             exit_nets.append(output_net)
         self._step_with_setup = core.execution_step(
             self.name,
             [
                 core.execution_step('%s:init' % self.name, init_nets),
                 self._step,
                 core.execution_step('%s:exit' % self.name, exit_nets),
             ]
         )
     elif self._step_with_setup is None:
         self._step_with_setup = core.execution_step(self.name, [])
     return self._step_with_setup
Exemple #8
0
    def get_step(self):
        if self._step is not None and self._step_with_setup is None:
            report_steps = filter(
                lambda s: not hasattr(s, '_report_step_used'),
                self._step.get_all_attributes(Task.REPORT_STEP))
            for step in report_steps:
                step._report_step_used = True
                if not step.Proto().run_every_ms:
                    step.RunEveryMillis(1000)
            init_nets, exit_nets = get_setup_nets(Task.TASK_SETUP,
                                                  [self._step] + report_steps,
                                                  self)
            if len(self._outputs) == 0:
                output_net = core.Net('%s:output' % self.name)
                self.add_output(
                    output_net.ConstantFill([],
                                            1,
                                            dtype=core.DataType.INT32,
                                            value=0))
                exit_nets.append(output_net)

            body = self._step if not report_steps else core.execution_step(
                '%s:body', report_steps + [self._step])
            self._step_with_setup = core.execution_step(
                self.name, [
                    core.execution_step('%s:init' % self.name, init_nets),
                    body,
                    core.execution_step('%s:exit' % self.name, exit_nets),
                ])
        elif self._step_with_setup is None:
            self._step_with_setup = core.execution_step(self.name, [])
        return self._step_with_setup
    def test_collect_tensor_ops(self):
        init_net = core.Net('init_net')
        blobs = ['blob_1', 'blob_2', 'blob_3']
        bvec_map = {}
        ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1)
        for b in blobs:
            init_net.ConstantFill([], [b], shape=[1, 2], value=0)
            bvec_map[b] = b + '_vec'
            init_net.CreateTensorVector([], [bvec_map[b]])

        reader_net = core.Net('reader_net')
        for b in blobs:
            reader_net.Add([b, ONE], [b])

        collect_net = core.Net('collect_net')
        num_to_collect = 1000
        max_example_to_cover = 100000
        for i, b in enumerate(blobs):
            if i == 0:
                bvec_map[b], position = collect_net.CollectTensor(
                    [bvec_map[b], b], [bvec_map[b], 'position'],
                    num_to_collect=num_to_collect)
            else:
                # sample in the same way as the first blob
                bvec_map[b], position = collect_net.CollectTensor(
                    [bvec_map[b], b, position], [bvec_map[b], position],
                    num_to_collect=num_to_collect)

        print('Collect Net Proto: {}'.format(collect_net.Proto()))

        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_init', init_net))
        plan.AddStep(
            core.execution_step('collect_data', [reader_net, collect_net],
                                num_iter=max_example_to_cover))
        workspace.RunPlan(plan)

        # concat the collected tensors
        concat_net = core.Net('concat_net')
        bconcated_map = {}
        for b in blobs:
            bconcated_map[b] = b + '_concated'
            concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]])

        workspace.RunNetOnce(concat_net)

        # check data
        reference_result = workspace.FetchBlob(bconcated_map[blobs[0]])
        self.assertEqual(reference_result.shape,
                         (min(num_to_collect, max_example_to_cover), 2))

        hist, _ = np.histogram(reference_result[:, 0],
                               bins=10,
                               range=(1, max_example_to_cover))
        print('Sample histogram: {}'.format(hist))

        self.assertTrue(all(hist > 0.7 * (num_to_collect / 10)))
        for i in range(1, len(blobs)):
            result = workspace.FetchBlob(bconcated_map[blobs[i]])
            self.assertEqual(reference_result.tolist(), result.tolist())
Exemple #10
0
    def build_cache_step(self, overwrite=False):
        """Build a step for generating cache DB file.

            If self.db_path exists and not overwritting, build an empty step.
            Overwise, build a step as follows.
            Pipe original reader to the _DatasetWriter,
            so that dataset field blobs are populated.
            Then save these blobs into a file.

            Args:
                overwrite: bool. If true, ignore the existing file
                    and build a new one overwritting the existing one anyway.

            Returns:
                build_cache_step: ExcutionStep.
                    The step to be run for building a cache DB file.
        """
        if os.path.exists(self.db_path) and not overwrite:
            # cache already exists, no need to rebuild it
            return core.execution_step('build_step', [])

        init_net = core.Net('init')
        self._init_field_blobs_as_empty(init_net)
        with Cluster(), core.NameScope(self.name), TaskGroup() as copy_tg:
            pipe(self.original_reader, self.ds.writer(), num_threads=16)
            copy_step = copy_tg.to_task().get_step()
        save_net = core.Net('save')
        self._save_field_blobs_to_db_file(save_net)

        return core.execution_step('build_cache', [init_net, copy_step, save_net])
Exemple #11
0
    def test_last_n_window_ops(self):
        collect_net = core.Net("collect_net")
        collect_net.GivenTensorFill(
            [],
            "input",
            shape=[3, 2],
            values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
        )
        input_array = np.array(list(range(1, 7)), dtype=np.float32).reshape(3, 2)

        workspace.CreateBlob("output")
        workspace.FeedBlob("next", np.array(0, dtype=np.int32))
        collect_net.LastNWindowCollector(
            ["output", "next", "input"],
            ["output", "next"],
            num_to_collect=7,
        )
        plan = core.Plan("collect_data")
        plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=1))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob("output")
        npt.assert_array_equal(input_array, reference_result)

        plan = core.Plan("collect_data")
        plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=2))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob("output")
        npt.assert_array_equal(input_array[[1, 2, 2, 0, 1, 2, 0]], reference_result)

        plan = core.Plan("collect_data")
        plan.AddStep(core.execution_step("collect_data", [collect_net], num_iter=3))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob("output")
        npt.assert_array_equal(input_array[[2, 0, 1, 2, 2, 0, 1]], reference_result)
Exemple #12
0
    def tasks_by_node(self, node_remap=None):
        # tasks_by_node can't be called twice because the setup won't
        # work properly a second time.
        node_map = {}
        for task in self.tasks():
            node_map[task.node] =\
                node_remap(task.node) if node_remap else task.node
        if self._tasks_by_node is not None:
            tasks_by_node, prev_node_map = self._tasks_by_node
            assert prev_node_map == node_map, (
                'Cannot call tasks_by_node multiple times.')
            return tasks_by_node

        tasks_by_node = defaultdict(list)
        for task in self.tasks():
            tasks_by_node[node_map[task.node]].append(task)
        grouped_by_node = TaskGroup()
        for node, tasks in tasks_by_node.items():
            node_inits, node_exits = get_setup_nets(
                TaskGroup.LOCAL_SETUP, [t.get_step() for t in tasks], self)
            # shortcut for single task with no queue
            steps = []
            outputs = []
            workspace_type = tasks[0].workspace_type()
            for task in tasks:
                step = task.get_step()
                if step is not None:
                    steps.append(step)
                outputs += task.outputs()
                assert workspace_type == task.workspace_type(), (
                    'All tasks for a given node need same workspace type.')
            if len(steps) == 0:
                steps.append(core.execution_step('empty', []))
            if len(steps) == 1:
                step = steps[0]
            else:
                step = core.execution_step('%s:body' % node,
                                           steps,
                                           concurrent_substeps=True)
            if node in self._report_nets:
                net, interval = self._report_nets[node]
                step.SetReportNet(net, interval)
            if len(node_inits) > 0 or len(node_exits) > 0:
                steps = []
                if len(node_inits) > 0:
                    steps.append(
                        core.execution_step('%s:init' % node, node_inits))
                steps.append(step)
                if len(node_exits) > 0:
                    steps.append(
                        core.execution_step('%s:exit' % node, node_exits))
                step = core.execution_step(node, steps)
            Task(node=node,
                 step=step,
                 outputs=outputs,
                 group=grouped_by_node,
                 workspace_type=workspace_type)
        self._tasks_by_node = (grouped_by_node, node_map)
        return grouped_by_node
Exemple #13
0
    def test_collect_tensor_ops(self):
        init_net = core.Net('init_net')
        blobs = ['blob_1', 'blob_2', 'blob_3']
        bvec_map = {}
        ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1)
        for b in blobs:
            init_net.ConstantFill([], [b], shape=[1, 2], value=0)
            bvec_map[b] = b + '_vec'
            init_net.CreateTensorVector([], [bvec_map[b]])

        reader_net = core.Net('reader_net')
        for b in blobs:
            reader_net.Add([b, ONE], [b])

        collect_net = core.Net('collect_net')
        num_to_collect = 1000
        max_example_to_cover = 100000
        for i, b in enumerate(blobs):
            if i == 0:
                bvec_map[b], position = collect_net.CollectTensor(
                    [bvec_map[b], b], [bvec_map[b], 'position'],
                    num_to_collect=num_to_collect)
            else:
                # sample in the same way as the first blob
                bvec_map[b], position = collect_net.CollectTensor(
                    [bvec_map[b], b, position], [bvec_map[b], position],
                    num_to_collect=num_to_collect)

        print('Collect Net Proto: {}'.format(collect_net.Proto()))

        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_init', init_net))
        plan.AddStep(core.execution_step('collect_data',
                                         [reader_net, collect_net],
                                         num_iter=max_example_to_cover))
        workspace.RunPlan(plan)

        # concat the collected tensors
        concat_net = core.Net('concat_net')
        bconcated_map = {}
        for b in blobs:
            bconcated_map[b] = b + '_concated'
            concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]])

        workspace.RunNetOnce(concat_net)

        # check data
        reference_result = workspace.FetchBlob(bconcated_map[blobs[0]])
        self.assertEqual(reference_result.shape,
                         (min(num_to_collect, max_example_to_cover), 2))

        hist, _ = np.histogram(reference_result[:, 0], bins=10,
                               range=(1, max_example_to_cover))
        print('Sample histogram: {}'.format(hist))

        self.assertTrue(all(hist > 0.7 * (num_to_collect / 10)))
        for i in range(1, len(blobs)):
            result = workspace.FetchBlob(bconcated_map[blobs[i]])
            self.assertEqual(reference_result.tolist(), result.tolist())
Exemple #14
0
def close_queue(step, *queues):
    close_net = core.Net("close_queue_net")
    for queue in queues:
        close_net.CloseBlobsQueue([queue], 0)
    close_step = core.execution_step("%s_step" % str(close_net), close_net)
    return core.execution_step(
        "%s_wraper_step" % str(close_net),
        [step, close_step])
Exemple #15
0
    def tasks_by_node(self, node_remap=None):
        # tasks_by_node can't be called twice because the setup won't
        # work properly a second time.
        node_map = {}
        for task in self.tasks():
            node_map[task.node] =\
                node_remap(task.node) if node_remap else task.node
        if self._tasks_by_node is not None:
            tasks_by_node, prev_node_map = self._tasks_by_node
            assert prev_node_map == node_map, (
                'Cannot call tasks_by_node multiple times.')
            return tasks_by_node

        tasks_by_node = defaultdict(list)
        for task in self.tasks():
            tasks_by_node[node_map[task.node]].append(task)
        grouped_by_node = TaskGroup()
        for node, tasks in tasks_by_node.items():
            node_inits, node_exits = get_setup_nets(
                TaskGroup.LOCAL_SETUP, [t.get_step() for t in tasks], self)
            # shortcut for single task with no queue
            steps = []
            outputs = []
            workspace_type = tasks[0].workspace_type()
            for task in tasks:
                step = task.get_step()
                if step is not None:
                    steps.append(step)
                outputs += task.outputs()
                assert workspace_type == task.workspace_type(), (
                    'All tasks for a given node need same workspace type.')
            if len(steps) == 0:
                steps.append(core.execution_step('empty', []))
            if len(steps) == 1:
                step = steps[0]
            else:
                step = core.execution_step(
                    '%s:body' % node, steps, concurrent_substeps=True)
            if node in self._report_nets:
                net, interval = self._report_nets[node]
                step.SetReportNet(net, interval)
            if len(node_inits) > 0 or len(node_exits) > 0:
                steps = []
                if len(node_inits) > 0:
                    steps.append(
                        core.execution_step('%s:init' % node, node_inits))
                steps.append(step)
                if len(node_exits) > 0:
                    steps.append(
                        core.execution_step('%s:exit' % node, node_exits))
                step = core.execution_step(node, steps)
            Task(
                node=node, step=step, outputs=outputs,
                group=grouped_by_node, workspace_type=workspace_type)
        self._tasks_by_node = (grouped_by_node, node_map)
        return grouped_by_node
Exemple #16
0
def add_setup_steps(step, init_nets, exit_nets, name):
    if not init_nets and not exit_nets:
        return step
    steps = []
    if init_nets:
        steps.append(core.execution_step('%s:init' % name, init_nets))
    steps.append(step)
    if len(exit_nets) > 0:
        steps.append(core.execution_step('%s:exit' % name, exit_nets))
    return core.execution_step(name, steps)
Exemple #17
0
 def get_step(self):
     """
     Create and return a Caffe2 execution step that will run all the tasks
     of this pipeline in parallel.
     """
     return core.execution_step('worker_step', [
         core.execution_step('worker_init', self.init_net),
         core.execution_step(
             'tasks_step', self.tasks, concurrent_substeps=True)
     ])
Exemple #18
0
def add_setup_steps(step, init_nets, exit_nets, name):
    if not init_nets and not exit_nets:
        return step
    steps = []
    if init_nets:
        steps.append(core.execution_step('%s:init' % name, init_nets))
    steps.append(step)
    if len(exit_nets) > 0:
        steps.append(core.execution_step('%s:exit' % name, exit_nets))
    return core.execution_step(name, steps)
Exemple #19
0
    def get_step(self):
        if self._step_with_setup is not None:
            return self._step_with_setup

        if self._step is None:
            self._step_with_setup = core.execution_step(self.name, [])
            return self._step_with_setup

        report_steps = [
            s for s in self._step.get_all_attributes(Task.REPORT_STEP)
            if not hasattr(s, '_report_step_used')
        ]
        for step in report_steps:
            step._report_step_used = True
            if not step.Proto().run_every_ms:
                step.RunEveryMillis(1000)
        task_init_nets, task_exit_nets = get_setup_nets(
            Task.TASK_SETUP, [self._step] + report_steps, self)
        instance_init_nets, instance_exit_nets = get_setup_nets(
            Task.TASK_INSTANCE_SETUP, [self._step] + report_steps, self)
        if len(self._outputs) == 0:
            output_net = core.Net('%s:output' % self.name)
            self.add_output(
                output_net.ConstantFill([],
                                        1,
                                        dtype=core.DataType.INT32,
                                        value=0))
            task_exit_nets.append(output_net)

        # Add instance-level report steps
        body = self._step if not report_steps else core.execution_step(
            '%s:body' % self.name, report_steps + [self._step])
        # Enclose with instance-level (thread-local) setup nets
        step_with_instance_setup = add_setup_steps(body, instance_init_nets,
                                                   instance_exit_nets,
                                                   self.name + ':instance')
        # Set up runtime concurrent instances
        if self._num_instances and self._num_instances > 1:
            step_with_instance_setup.SetCreateWorkspace(True)
            step_with_instance_setup = core.execution_step(
                '%s:parallel', [step_with_instance_setup],
                num_concurrent_instances=self._num_instances)
        # Enclose with task-level setup nets
        self._step_with_setup = add_setup_steps(step_with_instance_setup,
                                                task_init_nets, task_exit_nets,
                                                self.name)

        return self._step_with_setup
Exemple #20
0
    def __init__(self,
                 fields,
                 name=None,
                 capacity=1,
                 enforce_unique_name=False,
                 num_threads=1):
        assert isinstance(fields, list) or isinstance(fields, Struct), (
            'fields must be either a Struct or a list of raw field names.')
        if isinstance(fields, list):
            fields = from_column_list(fields)
        self.schema = fields
        self.name = name or 'queue'
        self.num_threads = num_threads
        num_blobs = len(self.schema.field_names())
        init_net = core.Net(self.name + '/init_net')
        self.blobs_queue = init_net.CreateBlobsQueue(
            [],
            1,
            capacity=capacity,
            num_blobs=num_blobs,
            enforce_unique_name=enforce_unique_name)
        core.workspace.RunNetOnce(init_net)

        self.writer = _QueueWriter(self.blobs_queue, self.schema)
        reader_name = self.name + '_reader'
        self.reader = _QueueReader(self.blobs_queue, self.schema, reader_name)

        exit_net = core.Net(self.name + '/exit_net')
        exit_net.CloseBlobsQueue(self.blobs_queue, 0)
        self.exit_step = core.execution_step(
            '{}_close_step'.format(str(exit_net)), exit_net)
    def test_pair_wise_loss_predictions(self, X, label, gc, dc):
        workspace.FeedBlob('X', X)
        workspace.FeedBlob('label', label)
        new_label = np.array([label[1], label[0]])
        new_x = np.array([X[1], X[0]])
        workspace.FeedBlob('new_x', new_x)
        workspace.FeedBlob('new_label', new_label)
        net = core.Net('net')
        net.PairWiseLoss(['X', 'label'], ['output'])
        net.PairWiseLoss(['new_x', 'new_label'], ['new_output'])
        plan = core.Plan('predict_data')
        plan.AddStep(core.execution_step('predict_data',
                                         [net], num_iter=1))
        workspace.RunPlan(plan)
        output = workspace.FetchBlob('output')
        new_output = workspace.FetchBlob('new_output')
        sign = 1 if label[0] > label[1] else -1
        if label[0] == label[1]:
            self.assertEqual(np.asscalar(output), 0)
            return

        self.assertAlmostEqual(
            np.asscalar(output),
            np.asscalar(np.log(1 + np.exp(sign * (X[1] - X[0])))),
            delta=1e-4
        )
        # check swapping row order doesn't alter overall loss
        self.assertAlmostEqual(output, new_output)
Exemple #22
0
    def compile(cls, runnable):
        if isinstance(runnable, CompiledRunnable):
            assert cls == runnable.session_class, (
                'Runnable was compiled for different session type. ' +
                'Need: %s, got: %s' % (
                    cls.__name__, runnable.session_class.__name__))
            return runnable

        if runnable in cls._compiled_cache:
            return cls._compiled_cache[runnable]

        if isinstance(runnable, TaskGroup):
            tg = runnable
        else:
            tg = TaskGroup(workspace_type=WorkspaceType.GLOBAL)
            if isinstance(runnable, Task):
                tg.add(runnable)
            elif isinstance(runnable, core.ExecutionStep):
                tg.add(Task(step=runnable))
            else:
                step = core.execution_step('runnable', runnable)
                tg.add(Task(step=step))
        compiled = CompiledRunnable(
            cls._compile_task_group(tg), session_class=cls)
        cls._compiled_cache[runnable] = compiled
        return compiled
Exemple #23
0
    def build_cache(self, cache_path, overwrite=False):
        if not self.has_cache() or overwrite:
            self.cache_path = cache_path
        if self.has_cache() and not overwrite:
            # cache already exists, no need to rebuild it
            return core.execution_step('build_step', [])

        init_net = core.Net('init')
        self._init_dataset(init_net)
        with Cluster(), core.NameScope(self.name), TaskGroup() as copy_tg:
            pipe(self.original_reader, self.ds.writer(), num_threads=16)
            copy_step = copy_tg.to_task().get_step()
        save_net = core.Net('save')
        self._save_to_file(save_net)

        return core.execution_step('build_cache', [init_net, copy_step, save_net])
Exemple #24
0
    def __init__(self, fields, name=None, capacity=1,
                 enforce_unique_name=False, num_threads=1):
        assert isinstance(fields, list) or isinstance(fields, Struct), (
            'fields must be either a Struct or a list of raw field names.')
        if isinstance(fields, list):
            fields = from_column_list(fields)
        self.schema = fields
        self.name = name or 'queue'
        self.num_threads = num_threads
        num_blobs = len(self.schema.field_names())
        init_net = core.Net(self.name + '/init_net')
        self.blobs_queue = init_net.CreateBlobsQueue(
            [], 1,
            capacity=capacity,
            num_blobs=num_blobs,
            enforce_unique_name=enforce_unique_name)
        core.workspace.RunNetOnce(init_net)

        self.writer = _QueueWriter(self.blobs_queue, self.schema)
        reader_name = self.name + '_reader'
        self.reader = _QueueReader(self.blobs_queue, self.schema, reader_name)

        exit_net = core.Net(self.name + '/exit_net')
        exit_net.CloseBlobsQueue(self.blobs_queue, 0)
        self.exit_step = core.execution_step(
            '{}_close_step'.format(str(exit_net)),
            exit_net)
Exemple #25
0
    def compile(cls, runnable):
        if isinstance(runnable, CompiledRunnable):
            assert cls == runnable.session_class, (
                'Runnable was compiled for different session type. ' +
                'Need: %s, got: %s' %
                (cls.__name__, runnable.session_class.__name__))
            return runnable

        if runnable in cls._compiled_cache:
            return cls._compiled_cache[runnable]

        if isinstance(runnable, TaskGroup):
            tg = runnable
        else:
            tg = TaskGroup(workspace_type=WorkspaceType.GLOBAL)
            if isinstance(runnable, Task):
                tg.add(runnable)
            elif isinstance(runnable, core.ExecutionStep):
                tg.add(Task(step=runnable))
            else:
                step = core.execution_step('runnable', runnable)
                tg.add(Task(step=step))
        compiled = CompiledRunnable(cls._compile_task_group(tg),
                                    session_class=cls)
        cls._compiled_cache[runnable] = compiled
        return compiled
    def test_pair_wise_loss_predictions(self, X, label, gc, dc):
        workspace.FeedBlob('X', X)
        workspace.FeedBlob('label', label)
        new_label = np.array([label[1], label[0]])
        new_x = np.array([X[1], X[0]])
        workspace.FeedBlob('new_x', new_x)
        workspace.FeedBlob('new_label', new_label)
        net = core.Net('net')
        net.PairWiseLoss(['X', 'label'], ['output'])
        net.PairWiseLoss(['new_x', 'new_label'], ['new_output'])
        plan = core.Plan('predict_data')
        plan.AddStep(core.execution_step('predict_data', [net], num_iter=1))
        workspace.RunPlan(plan)
        output = workspace.FetchBlob('output')
        new_output = workspace.FetchBlob('new_output')
        sign = 1 if label[0] > label[1] else -1
        if label[0] == label[1]:
            self.assertEqual(np.asscalar(output), 0)
            return

        self.assertAlmostEqual(np.asscalar(output),
                               np.asscalar(
                                   np.log(1 + np.exp(sign * (X[1] - X[0])))),
                               delta=1e-4)
        # check swapping row order doesn't alter overall loss
        self.assertAlmostEqual(output, new_output)
Exemple #27
0
def Switch(*conditions):
    """
    Execute the steps for which the condition is true.
    Each condition is a tuple (condition_blob_or_net, step).
    Note:
      1. Multi steps can be executed if their conditions are true.
      2. The conditions_blob_or_net (if it is Net) of all steps will be
         executed once.

    Examples:
    - Switch((cond_1, net_1), (cond_2, net_2), ..., (cond_n, net_n))
    - Switch([(cond_1, net1), (cond_2, net_2), ..., (cond_n, net_n)])
    - Switch((cond_1, net_1))
    """
    if len(conditions) == 0:
        raise ValueError('conditions cannot be empty.')
    elif len(conditions) == 1:
        conditions = conditions[0]
        if not isinstance(conditions, list):
            conditions = [conditions]
    else:
        conditions = list(conditions)

    return core.execution_step(
        'Switch', [_RunOnceIf(cond, step) for cond, step in conditions])
Exemple #28
0
    def test_last_n_window_ops(self):
        collect_net = core.Net('collect_net')
        collect_net.GivenTensorFill(
            [],
            'input',
            shape=[3, 2],
            values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
        )
        input_array =\
            np.array(list(range(1, 7)), dtype=np.float32).reshape(3, 2)

        workspace.CreateBlob('output')
        workspace.FeedBlob('next', np.array(0, dtype=np.int32))
        collect_net.LastNWindowCollector(
            ['output', 'next', 'input'],
            ['output', 'next'],
            num_to_collect=7,
        )
        plan = core.Plan('collect_data')
        plan.AddStep(
            core.execution_step('collect_data', [collect_net],
                                num_iter=1)
        )
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        npt.assert_array_equal(input_array, reference_result)

        plan = core.Plan('collect_data')
        plan.AddStep(
            core.execution_step('collect_data', [collect_net],
                                num_iter=2)
        )
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        npt.assert_array_equal(input_array[[1, 2, 2, 0, 1, 2, 0]],
                               reference_result)

        plan = core.Plan('collect_data')
        plan.AddStep(
            core.execution_step('collect_data', [collect_net],
                                num_iter=3)
        )
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        npt.assert_array_equal(input_array[[2, 0, 1, 2, 2, 0, 1]],
                               reference_result)
Exemple #29
0
    def get_step(self):
        if self._step_with_setup is not None:
            return self._step_with_setup

        if self._step is None:
            self._step_with_setup = core.execution_step(self.name, [])
            return self._step_with_setup

        report_steps = [
            s
            for s in self._step.get_all_attributes(Task.REPORT_STEP)
            if not hasattr(s, '_report_step_used')
        ]
        for step in report_steps:
            step._report_step_used = True
            if not step.Proto().run_every_ms:
                step.RunEveryMillis(1000)
        task_init_nets, task_exit_nets = get_setup_nets(
            Task.TASK_SETUP, [self._step] + report_steps, self)
        instance_init_nets, instance_exit_nets = get_setup_nets(
            Task.TASK_INSTANCE_SETUP, [self._step] + report_steps, self)
        if len(self._outputs) == 0:
            output_net = core.Net('%s:output' % self.name)
            self.add_output(output_net.ConstantFill(
                [], 1, dtype=core.DataType.INT32, value=0))
            task_exit_nets.append(output_net)

        # Add instance-level report steps
        body = self._step if not report_steps else core.execution_step(
            '%s:body' % self.name, report_steps + [self._step])
        # Enclose with instance-level (thread-local) setup nets
        step_with_instance_setup = add_setup_steps(
            body, instance_init_nets, instance_exit_nets,
            self.name + ':instance')
        # Set up runtime concurrent instances
        if self._num_instances and self._num_instances > 1:
            step_with_instance_setup.SetCreateWorkspace(True)
            step_with_instance_setup = core.execution_step(
                '%s:parallel',
                [step_with_instance_setup],
                num_concurrent_instances=self._num_instances)
        # Enclose with task-level setup nets
        self._step_with_setup = add_setup_steps(
            step_with_instance_setup, task_init_nets, task_exit_nets, self.name)

        return self._step_with_setup
Exemple #30
0
def _runtime_threads_task(name, group, final_outputs, reader, num_threads,
                          output, capacity):
    node_name = str(Node.current())
    profiler_name = "{0}/{1}/{2}/{3}/{4}".format(
        node_name,
        "pipe",
        name,
        processor_name(input) if input else "NoInput",
        processor_name(output) if output else "NoOutput")

    with Task(name=name, group=group, outputs=final_outputs,
              num_instances=num_threads) as task:
        global_exit_net = core.Net('pipe:exit')
        global_init_net = core.Net('pipe:init')
        reader.setup_ex(global_init_net, global_exit_net)

        init_net = core.Net('pipe:instance:init')
        exit_net = core.Net('pipe:instance:exit')
        read_nets, status, rec = reader.read_record_ex(init_net, exit_net)
        init_net.ConstantFill(
            [], [status],
            shape=[],
            value=False,
            dtype=core.DataType.BOOL
        )

        if rec is not None:
            out_queue, writer = _init_output(
                output, capacity, global_init_net, global_exit_net)
            write_nets, _ = writer.write_record_ex(
                rec, init_net, exit_net, status)
        else:
            out_queue = None
            write_nets = []

        with ops.task_init():
            ops.net(global_init_net)
        with ops.task_instance_init():
            ops.net(init_net)

        timer_start_net = core.Net('timer_start')
        timer = timer_start_net.TimerBegin([], counter_name=profiler_name)
        timer_end_net = core.Net('timer_end')
        timer_end_net.TimerEnd(timer, [])

        ops.net(core.execution_step(
            'body',
            [timer_start_net] + list(read_nets) + list(write_nets) +
            [timer_end_net],
            should_stop_blob=status))
        ops.net(timer_end_net)

        with ops.task_instance_exit():
            ops.net(exit_net)
        with ops.task_exit():
            ops.net(global_exit_net)

    return out_queue, task
Exemple #31
0
def execution_step_with_progress(name, init_net, substeps, rows_read):
    # progress reporter
    report_net = core.Net('report_net')
    report_net.Print([rows_read], [])
    return core.execution_step(name,
                               substeps,
                               report_net=report_net,
                               concurrent_substeps=True,
                               report_interval=5)
    def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc):
        workspace.FeedBlob('X', X)
        workspace.FeedBlob('dY', dY)
        workspace.FeedBlob('label', label)
        net = core.Net('net')
        net.PairWiseLossGradient(
            ['X', 'label', 'dY'],
            ['dX'],
        )
        plan = core.Plan('predict_data')
        plan.AddStep(core.execution_step('predict_data', [net], num_iter=1))
        workspace.RunPlan(plan)
        dx = workspace.FetchBlob('dX')
        sign = 1 if label[0] > label[1] else -1
        if label[0] == label[1]:
            self.assertEqual(np.asscalar(dx[0]), 0)
            return
        self.assertAlmostEqual(np.asscalar(dx[0]),
                               np.asscalar(-dY[0] * sign /
                                           (1 + np.exp(sign * (X[0] - X[1])))),
                               delta=1e-2 * abs(np.asscalar(dx[0])))

        self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1]))
        delta = 1e-3
        up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32)
        down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32)
        workspace.FeedBlob('up_x', up_x)
        workspace.FeedBlob('down_x', down_x)
        new_net = core.Net('new_net')
        new_net.PairWiseLoss(['up_x', 'label'], ['up_output'])
        new_net.PairWiseLoss(['down_x', 'label'], ['down_output'])

        plan = core.Plan('predict_data')
        plan.AddStep(core.execution_step('predict_data', [new_net],
                                         num_iter=1))
        workspace.RunPlan(plan)
        down_output_pred = workspace.FetchBlob('down_output')
        up_output_pred = workspace.FetchBlob('up_output')
        np.testing.assert_allclose(
            np.asscalar(dx[0]),
            np.asscalar(0.5 * dY[0] *
                        (up_output_pred[0] - down_output_pred[0]) / delta),
            rtol=1e-2,
            atol=1e-2)
    def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc):
        workspace.FeedBlob('X', X)
        workspace.FeedBlob('dY', dY)
        workspace.FeedBlob('label', label)
        net = core.Net('net')
        net.PairWiseLossGradient(
            ['X', 'label', 'dY'],
            ['dX'],
        )
        plan = core.Plan('predict_data')
        plan.AddStep(core.execution_step('predict_data',
                                         [net], num_iter=1))
        workspace.RunPlan(plan)
        dx = workspace.FetchBlob('dX')
        sign = 1 if label[0] > label[1] else -1
        if label[0] == label[1]:
            self.assertEqual(np.asscalar(dx[0]), 0)
            return
        self.assertAlmostEqual(
            np.asscalar(dx[0]),
            np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))),
            delta=1e-2 * abs(np.asscalar(dx[0])))

        self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1]))
        delta = 1e-3
        up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32)
        down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32)
        workspace.FeedBlob('up_x', up_x)
        workspace.FeedBlob('down_x', down_x)
        new_net = core.Net('new_net')
        new_net.PairWiseLoss(['up_x', 'label'], ['up_output'])
        new_net.PairWiseLoss(['down_x', 'label'], ['down_output'])

        plan = core.Plan('predict_data')
        plan.AddStep(core.execution_step('predict_data', [new_net], num_iter=1))
        workspace.RunPlan(plan)
        down_output_pred = workspace.FetchBlob('down_output')
        up_output_pred = workspace.FetchBlob('up_output')
        np.testing.assert_allclose(
            np.asscalar(dx[0]),
            np.asscalar(
                0.5 * dY[0] *
                (up_output_pred[0] - down_output_pred[0]) / delta),
            rtol=1e-2, atol=1e-2)
Exemple #34
0
def execution_step_with_progress(name, init_net, substeps, rows_read):
    # progress reporter
    report_net = core.Net('report_net')
    report_net.Print([rows_read], [])
    return core.execution_step(
        name,
        substeps,
        report_net=report_net,
        concurrent_substeps=True,
        report_interval=5)
Exemple #35
0
 def get_step(self):
     if self._step is not None and self._step_with_setup is None:
         init_nets, exit_nets = get_setup_nets(
             Task.TASK_SETUP, [self._step], self)
         if len(self._outputs) == 0:
             output_net = core.Net("output_net")
             self.add_output(output_net.ConstantFill(
                 [], 1, dtype=core.DataType.INT32, value=0))
             exit_nets.append(output_net)
         self._step_with_setup = core.execution_step(
             'task',
             [
                 core.execution_step('task_init', init_nets),
                 self._step,
                 core.execution_step('task_exit', exit_nets),
             ]
         )
     elif self._step_with_setup is None:
         self._step_with_setup = core.execution_step('task', [])
     return self._step_with_setup
Exemple #36
0
 def get_step(self):
     if self._step is not None and self._step_with_setup is None:
         init_nets, exit_nets = get_setup_nets(
             Task.TASK_SETUP, [self._step], self)
         if len(self._outputs) == 0:
             output_net = core.Net("output_net")
             self.add_output(output_net.ConstantFill(
                 [], 1, dtype=core.DataType.INT32, value=0))
             exit_nets.append(output_net)
         self._step_with_setup = core.execution_step(
             'task',
             [
                 core.execution_step('task_init', init_nets),
                 self._step,
                 core.execution_step('task_exit', exit_nets),
             ]
         )
     elif self._step_with_setup is None:
         self._step_with_setup = core.execution_step('task', [])
     return self._step_with_setup
Exemple #37
0
    def build(self, reader, process=None):
        """
        Build the producer_step to feed data from reader into the queue, and
        return the reader interface.
        Inputs:
            reader:           read data which will be stored in the queue.
            process:          preprocess data before enqueue.
        Outputs:
            reader:           reader to fetch the data from the queue.
            producer_step:    the step insert the data into the queue. Should be
                              run with comsume_step together.
            exit_step:        the step to close queue
            schema:           the schema for the reader.
        """
        producer_steps = []
        for i in range(self.num_threads):
            name = 'reader_' + str(i)
            net_reader = core.Net(name)
            should_stop, fields = reader.read_record(net_reader)
            step_read = core.execution_step(name, net_reader)

            name = 'queue_writer' + str(i)
            net_prod = core.Net(name)
            field_blobs = fields.field_blobs()
            if process:
                field_blobs = process(net_prod, fields).field_blobs()

            self.writer.write(net_prod, field_blobs)
            step_prod = core.execution_step(name, net_prod)
            step = core.execution_step(
                'producer_' + str(i),
                [step_read, step_prod],
                should_stop_blob=should_stop)
            producer_steps.append(step)
        producer_step = core.execution_step(
            'producers',
            producer_steps,
            concurrent_substeps=True)
        return self.reader, producer_step, self.exit_step, self.schema
Exemple #38
0
    def build(self, reader, process=None):
        """
        Build the producer_step to feed data from reader into the queue, and
        return the reader interface.
        Inputs:
            reader:           read data which will be stored in the queue.
            process:          preprocess data before enqueue.
        Outputs:
            reader:           reader to fetch the data from the queue.
            producer_step:    the step insert the data into the queue. Should be
                              run with comsume_step together.
            exit_step:        the step to close queue
            schema:           the schema for the reader.
        """
        producer_steps = []
        for i in range(self.num_threads):
            name = 'reader_' + str(i)
            net_reader = core.Net(name)
            should_stop, fields = reader.read_record(net_reader)
            step_read = core.execution_step(name, net_reader)

            name = 'queue_writer' + str(i)
            net_prod = core.Net(name)
            field_blobs = fields.field_blobs()
            if process:
                field_blobs = process(net_prod, fields).field_blobs()

            self.writer.write(net_prod, field_blobs)
            step_prod = core.execution_step(name, net_prod)
            step = core.execution_step(
                'producer_' + str(i),
                [step_read, step_prod],
                should_stop_blob=should_stop)
            producer_steps.append(step)
        producer_step = core.execution_step(
            'producers',
            producer_steps,
            concurrent_substeps=True)
        return self.reader, producer_step, self.exit_step, self.schema
Exemple #39
0
    def test_last_n_window_ops(self):
        collect_net = core.Net('collect_net')
        collect_net.GivenTensorFill(
            [],
            'input',
            shape=[3, 2],
            values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
        )
        collect_net.LastNWindowCollector(
            ['input'],
            ['output'],
            num_to_collect=7,
        )
        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_data',
                                         [collect_net], num_iter=1))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        self.assertSequenceEqual(
            [item for sublist in reference_result for item in sublist],
            [1, 2, 3, 4, 5, 6])

        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_data',
                                         [collect_net], num_iter=2))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        self.assertSequenceEqual(
            [item for sublist in reference_result for item in sublist],
            [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6])

        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_data',
                                         [collect_net], num_iter=3))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        self.assertSequenceEqual(
            [item for sublist in reference_result for item in sublist],
            [3, 4, 5, 6, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2])
Exemple #40
0
    def test_last_n_window_ops(self):
        collect_net = core.Net('collect_net')
        collect_net.GivenTensorFill(
            [],
            'input',
            shape=[3, 2],
            values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
        )
        collect_net.LastNWindowCollector(
            ['input'],
            ['output'],
            num_to_collect=7,
        )
        plan = core.Plan('collect_data')
        plan.AddStep(
            core.execution_step('collect_data', [collect_net], num_iter=1))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        self.assertSequenceEqual(
            [item for sublist in reference_result for item in sublist],
            [1, 2, 3, 4, 5, 6])

        plan = core.Plan('collect_data')
        plan.AddStep(
            core.execution_step('collect_data', [collect_net], num_iter=2))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        self.assertSequenceEqual(
            [item for sublist in reference_result for item in sublist],
            [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6])

        plan = core.Plan('collect_data')
        plan.AddStep(
            core.execution_step('collect_data', [collect_net], num_iter=3))
        workspace.RunPlan(plan)
        reference_result = workspace.FetchBlob('output')
        self.assertSequenceEqual(
            [item for sublist in reference_result for item in sublist],
            [3, 4, 5, 6, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2])
Exemple #41
0
def DoUntil(condition_blob_or_net, net_or_step):
    """
    Similar to DoWhile() but execute net_or_step when
    condition_blob_or_net returns false
    """
    steps = [_ToExecutionStep(net_or_step)]

    if isinstance(condition_blob_or_net, core.Net):
        steps.append(Do(condition_blob_or_net))
        stop_blob = GetConditionBlobFromNet(condition_blob_or_net)
    else:
        stop_blob = condition_blob_or_net

    stop_blob = core.BlobReference(str(stop_blob))
    return core.execution_step('DoUntil', steps, should_stop_blob=stop_blob)
Exemple #42
0
    def compile(cls, runnable, workspace_type=None, setup_net_list=None):
        if isinstance(runnable, CompiledRunnable):
            assert cls == runnable.session_class, (
                'Runnable was compiled for different session type. ' +
                'Need: %s, got: %s' %
                (cls.__name__, runnable.session_class.__name__))
            return runnable

        if runnable in cls._compiled_cache:
            return cls._compiled_cache[runnable]

        if isinstance(runnable, TaskGroup):
            if workspace_type:
                if runnable.workspace_type():
                    assert runnable.workspace_type() == workspace_type, \
                        "Require {} but already have {}".format(
                            workspace_type, runnable.workspace_type())
                else:
                    runnable._workspace_type = workspace_type
            tg = runnable
        else:
            if workspace_type is None:
                workspace_type = WorkspaceType.GLOBAL
            tg = TaskGroup(workspace_type=workspace_type)
            if isinstance(runnable, Task):
                tg.add(runnable)
            elif isinstance(runnable, core.ExecutionStep):
                tg.add(Task(step=runnable))
            elif isinstance(runnable, core.Plan):
                # ExecutionSteps in Plan() object is supposed to run sequentially, while
                # tasks in TaskGroup run in parallel. So if we have multiple
                # ExecutionSteps in Plan() object, we choose to have a root
                # ExecutionStep to wrap all ExecutionSteps.
                assert len(runnable.Steps()) > 0
                if len(runnable.Steps()) == 1:
                    tg.add(Task(step=runnable.Steps()[0]))
                else:
                    # Task takes a list of ExecutionSteps and automatically wrap into
                    # a root ExecutionStep
                    tg.add(Task(step=runnable.Steps()))
            else:
                step = core.execution_step('runnable', runnable)
                tg.add(Task(step=step))
        compiled = CompiledRunnable(cls._compile_task_group(
            tg, setup_net_list),
                                    session_class=cls)
        cls._compiled_cache[runnable] = compiled
        return compiled
Exemple #43
0
 def run(self, runnable):
     assert self.is_open(), 'Session is closed.'
     if runnable not in self._runnable_cache:
         if isinstance(runnable, TaskGroup):
             tg = runnable
         else:
             tg = TaskGroup(workspace_type=WorkspaceType.GLOBAL)
             if isinstance(runnable, Task):
                 tg.add(runnable)
             elif isinstance(runnable, core.ExecutionStep):
                 tg.add(Task(step=runnable))
             else:
                 step = core.execution_step('runnable', runnable)
                 tg.add(Task(step=step))
         self._runnable_cache[runnable] = tg
     self._run_task_group(self._runnable_cache[runnable])
Exemple #44
0
def Until(condition_blob_or_net, net_or_step):
    """
    Similar to While() but execute net_or_step when
    condition_blob_or_net returns false
    """
    if isinstance(condition_blob_or_net, core.Net):
        stop_blob = GetConditionBlobFromNet(condition_blob_or_net)
        condition_step = Do(condition_blob_or_net)
    else:
        copy_net, stop_blob = _CopyConditionBlobNet(condition_blob_or_net)
        condition_step = Do(copy_net)

    return core.execution_step(
        'Until',
        [condition_step, _ToExecutionStep(net_or_step)],
        should_stop_blob=stop_blob)
Exemple #45
0
    def compile(cls, runnable, workspace_type=None, setup_net_list=None):
        if isinstance(runnable, CompiledRunnable):
            assert cls == runnable.session_class, (
                'Runnable was compiled for different session type. ' +
                'Need: %s, got: %s' % (
                    cls.__name__, runnable.session_class.__name__))
            return runnable

        if runnable in cls._compiled_cache:
            return cls._compiled_cache[runnable]

        if isinstance(runnable, TaskGroup):
            if workspace_type:
                if runnable.workspace_type():
                    assert runnable.workspace_type() == workspace_type, \
                        "Require {} but already have {}".format(
                            workspace_type, runnable.workspace_type())
                else:
                    runnable._workspace_type = workspace_type
            tg = runnable
        else:
            if workspace_type is None:
                workspace_type = WorkspaceType.GLOBAL
            tg = TaskGroup(workspace_type=workspace_type)
            if isinstance(runnable, Task):
                tg.add(runnable)
            elif isinstance(runnable, core.ExecutionStep):
                tg.add(Task(step=runnable))
            elif isinstance(runnable, core.Plan):
                # ExecutionSteps in Plan() object is supposed to run sequentially, while
                # tasks in TaskGroup run in parallel. So if we have multiple
                # ExecutionSteps in Plan() object, we choose to have a root
                # ExecutionStep to wrap all ExecutionSteps.
                assert len(runnable.Steps()) > 0
                if len(runnable.Steps()) == 1:
                    tg.add(Task(step=runnable.Steps()[0]))
                else:
                    # Task takes a list of ExecutionSteps and automatically wrap into
                    # a root ExecutionStep
                    tg.add(Task(step=runnable.Steps()))
            else:
                step = core.execution_step('runnable', runnable)
                tg.add(Task(step=step))
        compiled = CompiledRunnable(
            cls._compile_task_group(tg, setup_net_list), session_class=cls)
        cls._compiled_cache[runnable] = compiled
        return compiled
Exemple #46
0
def Do(*nets_or_steps):
    """
    Execute the sequence of nets or steps once.

    Examples:
    - Do(net1, net2, ..., net_n)
    - Do(list_of_nets)
    - Do(step1, step2, ..., step_n)
    - Do(list_of_steps)
    """
    if len(nets_or_steps) == 0:
        raise ValueError('nets_or_steps cannot be empty.')
    elif len(nets_or_steps) == 1:
        nets_or_steps = nets_or_steps[0]
    else:
        nets_or_steps = list(nets_or_steps)

    return core.execution_step('Do', nets_or_steps)
Exemple #47
0
def _RunOnceIfNot(condition_blob_or_net, net_or_step):
    """
    Similar to _RunOnceIf() but Execute net_or_step once if
    condition_blob_or_net evaluates as false.
    """
    if isinstance(condition_blob_or_net, core.Net):
        condition_blob = GetConditionBlobFromNet(condition_blob_or_net)
        return Do(Do(condition_blob_or_net),
                  _RunOnceIfNot(condition_blob, net_or_step))

    stop_if_net, stop_blob = _CopyConditionBlobNet(condition_blob_or_net)
    stop_net = _StopNet(stop_blob)

    return core.execution_step(
        '_RunOnceIfNot',
        [Do(stop_if_net),
         _ToExecutionStep(net_or_step),
         Do(stop_net)],
        should_stop_blob=stop_blob)
Exemple #48
0
    def execution_step(self, reader_net_name=None, external_should_stop=None):
        """Create an execution step with a net containing read operators.

        The execution step will contain a `stop_blob` that knows how to stop
        the execution loop when end of data was reached.

        E.g.:

            read_step, fields = reader.execution_step()
            consume_net = core.Net('consume')
            consume_net.Print(fields[0], [])
            p = core.Plan('reader')
            p.AddStep(read_step.AddNet(consume_net))
            core.RunPlan(p)

        Args:

            reader_net_name: (optional) the name of the reader_net to be
                             created. The execution step will
                             be named accordingly.

        Returns:
            A tuple (read_step, fields), with:

                read_step: A newly created execution step containing a net with
                           read operations. The step will have `stop_blob` set,
                           in order to stop the loop on end of data.
                fields: A tuple of BlobReference containing the latest batch
                        of data that was read.
        """
        reader_net = core.Net(reader_net_name or 'reader')
        should_stop, fields = self.read_record(reader_net)
        if external_should_stop is not None:
            should_stop = reader_net.Or([external_should_stop, should_stop])
        read_step = core.execution_step(
            '{}_step'.format(reader_net_name),
            reader_net,
            should_stop_blob=should_stop)
        return (read_step, fields)
Exemple #49
0
def _pipe_step(
        input, output=None, num_threads=1, processor=None, name=None,
        capacity=None, group=None, final_outputs=None):
    """
    """
    group = TaskGroup.current(group)
    if name is None:
        name = 'processor:%d' % group.num_registered_tasks()

    if isinstance(input, Reader):
        reader = input
    elif hasattr(input, 'reader'):
        reader = input.reader()
    else:
        raise ValueError('in must be a reader, queue or streaam.')

    if processor is not None:
        reader = ProcessingReader(reader, processor)

    if num_threads == 0:
        assert output is None
        return reader, None

    global_exit_net = core.Net(name + '_producer_global_exit')
    global_init_net = core.Net(name + '_producer_global_init')
    out_queue = None
    writer = None

    reader.setup_ex(global_init_net, global_exit_net)

    steps = []
    for thread_id in range(num_threads):
        init_net = core.Net(name + "_init_net_%d" % thread_id)
        exit_net = core.Net(name + "_exit_net_%d" % thread_id)

        read_nets, status, rec = reader.read_record_ex(init_net, exit_net)

        if rec is not None:
            if writer is None:
                out_queue, writer = _init_output(
                    output, capacity, global_init_net, global_exit_net)
            write_nets, _ = writer.write_record_ex(
                rec, init_net, exit_net, status)
        else:
            write_nets = []

        step = core.execution_step(
            name + "_thread_%d" % thread_id, [
                core.execution_step(name + "_init_step", init_net),
                core.execution_step(
                    name + "_worker_step",
                    list(read_nets) + list(write_nets),
                    should_stop_blob=status
                ), core.execution_step(name + "_exit_step", exit_net)
            ]
        )
        steps.append(step)
    step = core.execution_step(
        "sender_step", [
            core.execution_step('init_step', global_init_net),
            core.execution_step(
                "sender_steps", steps, concurrent_substeps=True),
            core.execution_step('finish_step', global_exit_net),
        ])
    return out_queue, step
Exemple #50
0
def _static_threads_task(name, group, final_outputs, reader, num_threads,
                         output, capacity):
    node_name = str(Node.current())
    profiler_name = "{0}/{1}/{2}/{3}/{4}".format(
        node_name,
        "pipe",
        name,
        processor_name(input) if input else "NoInput",
        processor_name(output) if output else "NoOutput")

    with Task(name=name, group=group, outputs=final_outputs) as task:
        global_exit_net = core.Net('exit')
        global_init_net = core.Net('init')
        reader.setup_ex(global_init_net, global_exit_net)

        out_queue = None
        writer = None

        steps = []
        for thread_id in range(num_threads):
            with NetBuilder(name='t:%d' % thread_id) as nb:
                init_net = core.Net('init')
                exit_net = core.Net('exit')
                read_nets, status, rec = reader.read_record_ex(
                    init_net, exit_net)
                init_net.ConstantFill(
                    [], [status],
                    shape=[],
                    value=False,
                    dtype=core.DataType.BOOL
                )

                if rec is not None:
                    if writer is None:
                        # hack so that the out queue gets the right name prefix
                        # (otherwise they would be prefixed with the thread id)
                        with NetBuilder(_fullname=task.name):
                            out_queue, writer = _init_output(
                                output, capacity, global_init_net,
                                global_exit_net)
                    write_nets, _ = writer.write_record_ex(
                        rec, init_net, exit_net, status)
                else:
                    write_nets = []

                timer_start_net = core.Net('timer_start')
                timer = timer_start_net.TimerBegin([], counter_name=profiler_name)
                timer_end_net = core.Net('timer_end')
                timer_end_net.TimerEnd(timer, [])

                ops.net(init_net)
                ops.net(core.execution_step(
                    'body',
                    [timer_start_net] + list(read_nets) + list(write_nets) +
                    [timer_end_net],
                    should_stop_blob=status))
                ops.net(timer_end_net)
                ops.net(exit_net)
            steps.append(core.to_execution_step(nb))
        ops.net(global_init_net)
        ops.net(core.execution_step('body', steps, concurrent_substeps=True))
        ops.net(global_exit_net)
    return out_queue, task
Exemple #51
0
    def test_record_queue(self):
        num_prod = 8
        num_consume = 3
        schema = Struct(
            ('floats', Map(
                Scalar(np.int32),
                Scalar(np.float32))),
        )
        contents_raw = [
            [1, 2, 3],  # len
            [11, 21, 22, 31, 32, 33],  # key
            [1.1, 2.1, 2.2, 3.1, 3.2, 3.3],  # value
        ]
        contents = from_blob_list(schema, contents_raw)
        ds = Dataset(schema)
        net = core.Net('init')
        ds.init_empty(net)

        content_blobs = NewRecord(net, contents)
        FeedRecord(content_blobs, contents)
        writer = ds.writer(init_net=net)
        writer.write_record(net, content_blobs)
        reader = ds.reader(init_net=net)

        # prepare receiving dataset
        rec_dataset = Dataset(contents, name='rec')
        rec_dataset.init_empty(init_net=net)
        rec_dataset_writer = rec_dataset.writer(init_net=net)

        workspace.RunNetOnce(net)

        queue = RecordQueue(contents, num_threads=num_prod)

        def process(net, fields):
            new_fields = []
            for f in fields.field_blobs():
                new_f = net.Copy(f)
                new_fields.append(new_f)
            new_fields = from_blob_list(fields, new_fields)
            return new_fields

        q_reader, q_step, q_exit, fields = queue.build(reader, process)
        producer_step = core.execution_step('producer', [q_step, q_exit])

        consumer_steps = []
        for i in range(num_consume):
            name = 'queue_reader_' + str(i)
            net_consume = core.Net(name)
            should_stop, fields = q_reader.read_record(net_consume)
            step_consume = core.execution_step(name, net_consume)

            name = 'dataset_writer_' + str(i)
            net_dataset = core.Net(name)
            rec_dataset_writer.write(net_dataset, fields.field_blobs())
            step_dataset = core.execution_step(name, net_dataset)

            step = core.execution_step(
                'consumer_' + str(i),
                [step_consume, step_dataset],
                should_stop_blob=should_stop)
            consumer_steps.append(step)
        consumer_step = core.execution_step(
            'consumers', consumer_steps, concurrent_substeps=True)

        work_steps = core.execution_step(
            'work', [producer_step, consumer_step], concurrent_substeps=True)

        plan = core.Plan('test')
        plan.AddStep(work_steps)
        core.workspace.RunPlan(plan)
        data = workspace.FetchBlobs(rec_dataset.get_blobs())
        self.assertEqual(6, sum(data[0]))
        self.assertEqual(150, sum(data[1]))
        self.assertAlmostEqual(15, sum(data[2]), places=5)
    def test_rebatching_parallel_producer_consumer(
        self, num_producers, num_consumers, producer_input_size,
        producer_num_iterations, capacity
    ):
        ### Init ###
        total_inputs = producer_num_iterations * producer_input_size * num_producers
        inputs = []
        init_net = core.Net('init_net')
        queue = init_net.CreateRebatchingQueue(
            [], 1, capacity=capacity, num_blobs=1
        )

        ### Producers ###
        producer_steps = []
        for i in range(num_producers):
            name = 'producer_%d' % i
            net = core.Net(name)
            values = [
                producer_input_size * i + x for x in range(producer_input_size)
            ]
            for _ in range(producer_num_iterations):
                inputs.extend(values)
            tensors = net.GivenTensorIntFill(
                [], 1, shape=[producer_input_size], values=values
            )

            net.EnqueueRebatchingQueue([queue, tensors], [], enqueue_batch=True)

            step = core.execution_step(
                name, net, num_iter=producer_num_iterations
            )
            producer_steps.append(step)

        producer_step = core.execution_step(
            'producer', [
                core.execution_step(
                    'producers', producer_steps, concurrent_substeps=True
                )
            ]
        )

        ### Consumers ###
        outputs = []

        def append(ins, outs):
            # Extend is atomic
            outputs.extend(ins[0].data.tolist())

        consumer_steps = []
        for i in range(num_consumers):
            # This is just a way of deterministally read all the elements.
            # We make `num_consumers` almost equal splits
            # (the reminder goes to the last consumer).
            num_elements_to_read = total_inputs // num_consumers
            if i == num_consumers - 1:
                num_elements_to_read = num_elements_to_read \
                    + total_inputs % num_consumers

            # If we have nothing to read this consumer will be idle
            if (num_elements_to_read == 0):
                continue

            # Now we have to make a split on number of iterations and the read
            # size for each iteration. This is again just one of many
            # deterministic  ways of doing it. We factorize the total number of
            # elements we have to read and assign half of the factors to the
            # iterations half to the read size.
            factors = list(primefac(num_elements_to_read))

            num_elements_per_iteration = functools.reduce(
                lambda x, y: x * y, factors[len(factors) // 2:], 1
            )

            num_iterations = functools.reduce(
                lambda x, y: x * y, factors[:len(factors) // 2], 1
            )

            name = 'consumer_%d' % i
            net = core.Net(name)
            blobs = net.DequeueRebatchingQueue(
                [queue], 1, num_elements=num_elements_per_iteration
            )
            net.Python(append)([blobs], 0)
            consumer_steps.append(
                core.execution_step(name, net, num_iter=num_iterations)
            )

        consumer_step = core.execution_step(
            'consumer', consumer_steps, concurrent_substeps=True
        )

        init_step = core.execution_step('init', init_net)
        worker_step = core.execution_step(
            'worker', [consumer_step, producer_step], concurrent_substeps=True
        )

        ### Execute Plan ###
        plan = core.Plan('test')
        plan.AddStep(init_step)
        plan.AddStep(worker_step)

        self.ws.run(plan)

        ### Check Results ###
        # We check that the outputs are a permutation of inputs
        inputs.sort()
        outputs.sort()
        self.assertEquals(inputs, outputs)
Exemple #53
0
    def test_dataset_ops(self):
        """
        1. Defining the schema of our dataset.

        This example schema could represent, for example, a search query log.
        """
        schema = Struct(
            # fixed size vector, which will be stored as a matrix when batched
            ('dense', Scalar((np.float32, 3))),
            # could represent a feature map from feature ID to float value
            ('floats', Map(
                Scalar(np.int32), Scalar(np.float32)
            )),
            # could represent a multi-valued categorical feature map
            ('int_lists', Map(
                Scalar(np.int32),
                List(Scalar(np.int64)),
            )),
            # could represent a multi-valued, weighted categorical feature map
            (
                'id_score_pairs', Map(
                    Scalar(np.int32),
                    Map(
                        Scalar(np.int64),
                        Scalar(np.float32),
                        keys_name='ids',
                        values_name='scores'
                    ),
                )
            ),
            # additional scalar information
            (
                'metadata', Struct(
                    ('user_id', Scalar(np.int64)),
                    ('user_embed', Scalar((np.float32, 2))),
                    ('query', Scalar(str)),
                )
            ),
        )
        """
        This is what the flattened fields for this schema look like, along
        with its type. Each one of these fields will be stored, read and
        writen as a tensor.
        """
        expected_fields = [
            ('dense', (np.float32, 3)),
            ('floats:lengths', np.int32),
            ('floats:values:keys', np.int32),
            ('floats:values:values', np.float32),
            ('int_lists:lengths', np.int32),
            ('int_lists:values:keys', np.int32),
            ('int_lists:values:values:lengths', np.int32),
            ('int_lists:values:values:values', np.int64),
            ('id_score_pairs:lengths', np.int32),
            ('id_score_pairs:values:keys', np.int32),
            ('id_score_pairs:values:values:lengths', np.int32),
            ('id_score_pairs:values:values:values:ids', np.int64),
            ('id_score_pairs:values:values:values:scores', np.float32),
            ('metadata:user_id', np.int64),
            ('metadata:user_embed', (np.float32, 2)),
            ('metadata:query', str),
        ]
        zipped = zip(
            expected_fields, schema.field_names(), schema.field_types()
        )
        for (ref_name, ref_type), name, dtype in zipped:
            self.assertEquals(ref_name, name)
            self.assertEquals(np.dtype(ref_type), dtype)
        """
        2. The contents of our dataset.

        Contents as defined below could represent, for example, a log of
        search queries along with dense, sparse features and metadata.
        The datset below has 3 top-level entries.
        """
        contents_raw = [
            # dense
            [[1.1, 1.2, 1.3], [2.1, 2.2, 2.3], [3.1, 3.2, 3.3]],
            # floats
            [1, 2, 3],  # len
            [11, 21, 22, 31, 32, 33],  # key
            [1.1, 2.1, 2.2, 3.1, 3.2, 3.3],  # value
            # int lists
            [2, 0, 1],  # len
            [11, 12, 31],  # key
            [2, 4, 3],  # value:len
            [111, 112, 121, 122, 123, 124, 311, 312, 313],  # value:value
            # id score pairs
            [1, 2, 2],  # len
            [11, 21, 22, 31, 32],  # key
            [1, 1, 2, 2, 3],  # value:len
            [111, 211, 221, 222, 311, 312, 321, 322, 323],  # value:ids
            [11.1, 21.1, 22.1, 22.2, 31.1, 31.2, 32.1, 32.2, 32.3],  # val:score
            # metadata
            [123, 234, 456],  # user_id
            [[0.2, 0.8], [0.5, 0.5], [0.7, 0.3]],  # user_embed
            ['dog posts', 'friends who like to', 'posts about ca'],  # query
        ]
        # convert the above content to ndarrays, checking against the schema
        contents = from_blob_list(schema, contents_raw)
        """
        3. Creating and appending to the dataset.
        We first create an empty dataset with the given schema.
        Then, a Writer is used to append these entries to the dataset.
        """
        ds = dataset.Dataset(schema)
        net = core.Net('init')
        with core.NameScope('init'):
            ds.init_empty(net)

            content_blobs = NewRecord(net, contents)
            FeedRecord(content_blobs, contents)
            writer = ds.writer(init_net=net)
            writer.write_record(net, content_blobs)
        workspace.RunNetOnce(net)
        """
        4. Iterating through the dataset contents.

        If we were to iterate through the top level entries of our dataset,
        this is what we should expect to see:
        """
        entries_raw = [
            (
                [[1.1, 1.2, 1.3]],  # dense
                [1],
                [11],
                [1.1],  # floats
                [2],
                [11, 12],
                [2, 4],
                [111, 112, 121, 122, 123, 124],  # intlst
                [1],
                [11],
                [1],
                [111],
                [11.1],  # id score pairs
                [123],
                [[0.2, 0.8]],
                ['dog posts'],  # metadata
            ),
            (
                [[2.1, 2.2, 2.3]],  # dense
                [2],
                [21, 22],
                [2.1, 2.2],  # floats
                [0],
                [],
                [],
                [],  # int list
                [2],
                [21, 22],
                [1, 2],
                [211, 221, 222],
                [21.1, 22.1, 22.2],
                [234],
                [[0.5, 0.5]],
                ['friends who like to'],  # metadata
            ),
            (
                [[3.1, 3.2, 3.3]],  # dense
                [3],
                [31, 32, 33],
                [3.1, 3.2, 3.3],  # floats
                [1],
                [31],
                [3],
                [311, 312, 313],  # int lst
                [2],
                [31, 32],
                [2, 3],
                [311, 312, 321, 322, 323],
                [31.1, 31.2, 32.1, 32.2, 32.3],  # id score list
                [456],
                [[0.7, 0.3]],
                ['posts about ca'],  # metadata
            ),
            # after the end of the dataset, we will keep getting empty vectors
            ([], ) * 16,
            ([], ) * 16,
        ]
        entries = [from_blob_list(schema, e) for e in entries_raw]
        """
        Let's go ahead and create the reading nets.
        We will run `read` net multiple times and assert that we are reading the
        entries the way we stated above.
        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')
        reader = ds.reader(read_init_net)
        should_continue, batch = reader.read_record(read_next_net)

        workspace.RunNetOnce(read_init_net)
        workspace.CreateNet(read_next_net, True)

        for entry in entries:
            workspace.RunNet(str(read_next_net))
            actual = FetchRecord(batch)
            _assert_records_equal(actual, entry)
        """
        5. Reading/writing in a single plan

        If all of operations on the data are expressible as Caffe2 operators,
        we don't need to load the data to python, iterating through the dataset
        in a single Plan.

        Where we will process the dataset a little and store it in a second
        dataset. We can reuse the same Reader since it supports reset.
        """
        reset_net = core.Net('reset_net')
        reader.reset(reset_net)
        read_step, batch = reader.execution_step()
        """ We will add the line number * 1000 to the feature ids. """
        process_net = core.Net('process')
        line_no = Const(process_net, 0, dtype=np.int32)
        const_one = Const(process_net, 1000, dtype=np.int32)
        process_net.Add([line_no, const_one], [line_no])
        field = batch.floats.keys.get()
        process_net.Print(field, [])
        process_net.Add([field, line_no], field, broadcast=1, axis=0)
        """ Lets create a second dataset and append to it. """
        ds2 = dataset.Dataset(schema, name='dataset2')
        ds2.init_empty(reset_net)
        writer = ds2.writer(reset_net)
        writer.write_record(process_net, batch)
        # commit is not necessary for DatasetWriter but will add it for
        # generality of the example
        commit_net = core.Net('commit')
        writer.commit(commit_net)
        """ Time to create and run a plan which will do the processing """
        plan = core.Plan('process')
        plan.AddStep(core.execution_step('reset', reset_net))
        plan.AddStep(read_step.AddNet(process_net))
        plan.AddStep(core.execution_step('commit', commit_net))
        workspace.RunPlan(plan)
        """
        Now we should have dataset2 populated.
        """
        ds2_data = FetchRecord(ds2.content())
        field = ds2_data.floats.keys
        field.set(blob=field.get() - [1000, 2000, 2000, 3000, 3000, 3000])
        _assert_records_equal(contents, ds2_data)
        """
        6. Slicing a dataset

        You can create a new schema from pieces of another schema and reuse
        the same data.
        """
        subschema = Struct(('top_level', schema.int_lists.values))
        int_list_contents = contents.int_lists.values.field_names()
        self.assertEquals(len(subschema.field_names()), len(int_list_contents))
        """
        7. Random Access a dataset

        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')

        idx = np.array([2, 1, 0])
        indices_blob = Const(read_init_net, idx, name='indices')
        reader = ds.random_reader(read_init_net, indices_blob)
        reader.computeoffset(read_init_net)

        should_stop, batch = reader.read_record(read_next_net)

        workspace.CreateNet(read_init_net, True)
        workspace.RunNetOnce(read_init_net)

        workspace.CreateNet(read_next_net, True)

        for i in range(len(entries)):
            k = idx[i] if i in idx else i
            entry = entries[k]
            workspace.RunNet(str(read_next_net))
            actual = FetchRecord(batch)
            _assert_records_equal(actual, entry)
        workspace.RunNet(str(read_next_net))
        self.assertEquals(True, workspace.FetchBlob(should_stop))
        """
        8. Random Access a dataset with loop_over = true

        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')

        idx = np.array([2, 1, 0])
        indices_blob = Const(read_init_net, idx, name='indices')
        reader = ds.random_reader(read_init_net, indices_blob, loop_over=True)
        reader.computeoffset(read_init_net)

        should_stop, batch = reader.read_record(read_next_net)

        workspace.CreateNet(read_init_net, True)
        workspace.RunNetOnce(read_init_net)

        workspace.CreateNet(read_next_net, True)

        for _ in range(len(entries) * 3):
            workspace.RunNet(str(read_next_net))
            self.assertEquals(False, workspace.FetchBlob(should_stop))
        """
        9. Sort and shuffle a dataset

        This sort the dataset using the score of a certain column,
        and then shuffle within each chunk of size batch_size * shuffle_size
        before shuffling the chunks.

        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')

        reader = ds.random_reader(read_init_net)
        reader.sort_and_shuffle(read_init_net, 'int_lists:lengths', 1, 2)
        reader.computeoffset(read_init_net)

        should_continue, batch = reader.read_record(read_next_net)

        workspace.CreateNet(read_init_net, True)
        workspace.RunNetOnce(read_init_net)

        workspace.CreateNet(read_next_net, True)

        expected_idx = np.array([2, 1, 0])
        for i in range(len(entries)):
            k = expected_idx[i] if i in expected_idx else i
            entry = entries[k]
            workspace.RunNet(str(read_next_net))
            actual = FetchRecord(batch)
            _assert_records_equal(actual, entry)

        """
        Trim a dataset
        """
        trim_net = core.Net('trim_ds')
        ds.trim(trim_net, multiple_of=2)
        workspace.RunNetOnce(trim_net)
        trimmed = FetchRecord(ds.content())
        EXPECTED_SIZES = [2, 2, 3, 3, 2, 2, 2, 6, 2, 3, 3, 4, 4, 2, 2, 2]
        actual_sizes = [d.shape[0] for d in trimmed.field_blobs()]
        self.assertEquals(EXPECTED_SIZES, actual_sizes)
Exemple #54
0
    def tasks_by_node(self, node_remap=None):
        # tasks_by_node can't be called twice because the setup won't
        # work properly a second time.
        node_map = {}
        for task in self.tasks():
            node_map[task.node] =\
                node_remap(task.node) if node_remap else task.node
        if self._tasks_by_node is not None:
            tasks_by_node, prev_node_map = self._tasks_by_node
            assert prev_node_map == node_map, (
                'Cannot call tasks_by_node multiple times.')
            return tasks_by_node

        # now we have report_steps. report_net is deprecated
        for node, (net, interval) in viewitems(self._report_nets):
            self.report_step(net, node=node, interval_ms=interval * 1000)
        self._report_nets = {}

        tasks_by_node = defaultdict(list)
        for task in self.tasks():
            mapped_node = node_map[task.node]
            tasks_by_node[mapped_node].append(task)

        report_steps_by_node = defaultdict(list)
        for original_node, step in self._report_steps:
            report_steps_by_node[node_map[original_node]].append(step)

        grouped_by_node = TaskGroup()
        for node, tasks in viewitems(tasks_by_node):
            report_steps = report_steps_by_node[node]
            node_inits, node_exits = get_setup_nets(
                TaskGroup.LOCAL_SETUP,
                [t.get_step() for t in tasks] + report_steps,
                self)
            # shortcut for single task with no queue
            steps = report_steps
            outputs = []
            grouped_workspace_type = WorkspaceType.PRIVATE
            for task in tasks:
                step = task.get_step()
                step.SetCreateWorkspace(
                    task.workspace_type() == WorkspaceType.PRIVATE)
                if step is not None:
                    steps.append(step)
                outputs += task.outputs()
                # If any of the tasks in the node uses the global workspace,
                # then set the grouped task to use the global workspace as well
                if task.workspace_type() == WorkspaceType.GLOBAL:
                    grouped_workspace_type = WorkspaceType.GLOBAL
            if len(steps) == 0:
                steps.append(core.execution_step('empty', []))
            if len(steps) == 1:
                step = steps[0]
            else:
                step = core.execution_step(
                    '%s:body' % node, steps, concurrent_substeps=True)
            if len(node_inits) > 0 or len(node_exits) > 0:
                steps = []
                if len(node_inits) > 0:
                    steps.append(
                        core.execution_step('%s:init' % node, node_inits))
                steps.append(step)
                if len(node_exits) > 0:
                    steps.append(
                        core.execution_step('%s:exit' % node, node_exits))
                step = core.execution_step(node, steps)
            Task(
                node=node, step=step, outputs=outputs,
                name='grouped_by_node',
                group=grouped_by_node, workspace_type=grouped_workspace_type)
        self._tasks_by_node = (grouped_by_node, node_map)
        return grouped_by_node