Example #1
0
    def start_define_computation(self, computation_decl):
        self.exop_codegen.append("class {}(HetrLocals, ConvLocals):",
                                 computation_decl.computation_op.name)
        with indenting(self.exop_codegen):
            self.exop_codegen.append("def __init__(self, **kwargs):")
            with indenting(self.exop_codegen):
                if is_tracing_enabled():
                    self.exop_codegen.append("""
self.__profiler_start__ = list()
self.__profiler_stop__  = list()
""")
                self.exop_codegen.append('super({}, self).__init__(**kwargs)',
                                         computation_decl.computation_op.name)
                for exop in computation_decl.exop_block:
                    output_decl = exop.output_decls[0] if len(
                        exop.output_decls) > 0 else None
                    # TODO better way to deal with multiple values
                    self.exop_codegen.exop = exop
                    self.exop_codegen.allocate_op(exop.op, output_decl,
                                                  *exop.input_decls)

            self.exop_codegen.endl()

        self.exop_codegen.indent(1)
        self.exop_codegen.append("def __call__(self):")
        self.exop_codegen.indent(1)
        self.codegen_define_length = self.exop_codegen.code_length
Example #2
0
    def transform_ordered_ops(self, computation, ordered_ops, name):
        self.current_computation = computation
        if name is None:
            name = "C_" + str(self.n_computations)
        self.n_computations += 1
        self.compute_code.append("class {}(HetrLocals, ConvLocals):", name)
        with indenting(self.compute_code):
            self.compute_code.append("def __init__(self, **kwargs):")
            with indenting(self.compute_code):
                self.compute_code.append('super({}, self).__init__(**kwargs)',
                                         name)
                self.transform_allocate_ops(ordered_ops)

            self.compute_code.endl()

            self.compute_code.append("def __call__(self):")
            code_length = self.compute_code.code_length

            def tensor_description_value(x):
                if isinstance(x, TensorDescription):
                    return self.get_tensor_description_tensor_view(x)
                return x

            with indenting(self.compute_code):
                for op in ordered_ops:
                    out = tensor_description_value(
                        op.forwarded.tensor_description())
                    call_info = (tensor_description_value(_)
                                 for _ in op.call_info())
                    self.compute_code.generate_op(op, out, *call_info)
                if code_length == self.compute_code.code_length:
                    self.compute_code.append("pass")
            self.compute_code.endl()
        self.name = name
        return name
Example #3
0
    def finish_transform(self):
        if self.model is not None:
            return

        self.code.append(" class Model(object):")
        with indenting(self.code):
            if len(self.device_buffers) == 0:
                self.init_code.append("pass")
            self.code.append(self.init_code.code)
            self.code.endl()

            self.code.append(NumPyConvEngine.all_conv_code())
            self.code.endl()

            self.code.append(self.allocate_storage_code.code)
            self.code.endl()
            if len(self.device_buffers) == 0:
                self.allocate_code.append("pass")
            self.code.append(self.allocate_code.code)
            self.code.endl(2)
            self.code.append(self.compute_code.code)

            # print(self.code.code)
            # print(self.code.filename)

        r = self.code.compile("op", globals())
        self.model = r['Model']()
        self.model.conv_params = self.compute_code.conv_params
        self.model.pool_params = self.compute_code.pool_params
        self.model.conv_slices = self.compute_code.conv_slices
        self.model.pool_slices = self.compute_code.pool_slices

        for computation in self.computations:
            executor = getattr(self.model, computation.name)
            computation.executor = executor
Example #4
0
    def transform_allocate(self):
        self.transformer.init_code.append("{} = None", self.ref_str)
        self.transformer.allocate_storage_code.append("def {}():",
                                                      self.alloc_name)
        with indenting(self.transformer.allocate_storage_code):
            elts = self.bytes // self.dtype.itemsize
            if self.dtype.name == 'float32':
                c_type_name = 'c_float'
            elif self.dtype.name == 'float64':
                c_type_name = 'c_double'
            else:
                c_type_name = None

            if c_type_name is not None and self.transformer.use_mlsl:
                self.transformer.allocate_storage_code.append(
                    """try:
    type_size = ctypes.sizeof(ctypes.{3}(1))
    mlsl_buf_{0} = mlsl_obj.alloc({1} * type_size, 64)
    array_{0} = ctypes.cast(mlsl_buf_{0}, ctypes.POINTER(ctypes.{3} * {1}))
    np_array_{0} = np.frombuffer(array_{0}.contents, dtype=np.dtype('{2}'))
    {0}(np_array_{0})
except NameError as error:
    print str(error)
    {0}(np.empty({1}, dtype=np.dtype('{2}')))""", self.update_name, elts,
                    self.dtype.name, c_type_name)
            else:
                self.transformer.allocate_storage_code.append(
                    "{}(np.empty({}, dtype=np.dtype('{}')))", self.update_name,
                    elts, self.dtype.name)

            self.transformer.allocate_storage_code.endl()

        self.transformer.allocate_storage_code.append("def {}(buffer):",
                                                      self.update_name)
        with indenting(self.transformer.allocate_storage_code):
            self.transformer.allocate_storage_code.append(
                "global {}", self.ref_str)
            self.transformer.allocate_storage_code.append(
                "{} = buffer", self.ref_str)
            self.transform_allocate_views()
        self.transformer.allocate_storage_code.endl()

        self.transformer.allocate_code.append("{}()", self.alloc_name)
Example #5
0
    def transform_allocate(self):
        self.transformer.init_code.append("{} = None", self.ref_str)
        self.transformer.allocate_storage_code.append("def {}():", self.alloc_name)
        with indenting(self.transformer.allocate_storage_code):
            elts = self.bytes // self.dtype.itemsize
            self.transformer.allocate_storage_code.append(
                "{}(np.empty({}, dtype=np.dtype('{}')))",
                self.update_name, elts, self.dtype.name)
            self.transformer.allocate_storage_code.endl()

        self.transformer.allocate_storage_code.append("def {}(buffer):",
                                                      self.update_name)
        with indenting(self.transformer.allocate_storage_code):
            self.transformer.allocate_storage_code.append("global {}", self.ref_str)
            self.transformer.allocate_storage_code.append("{} = buffer", self.ref_str)
            self.transform_allocate_views()
        self.transformer.allocate_storage_code.endl()

        self.transformer.allocate_code.append("{}()", self.alloc_name)
Example #6
0
    def transform_ordered_ops(self, ordered_ops, name):
        if name is None:
            name = "C_" + str(self.n_computations)
        self.n_computations += 1
        self.compute_code.append("class {}(HetrLocals, ConvLocals):", name)
        with indenting(self.compute_code):
            self.compute_code.append("def __call__(self):")
            code_length = self.compute_code.code_length

            def tensor_description_value(x):
                if isinstance(x, TensorDescription):
                    return x.value
                return x

            with indenting(self.compute_code):
                for op in ordered_ops:
                    out = tensor_description_value(op.tensor_description())
                    call_info = (tensor_description_value(_) for _ in op.call_info())
                    self.compute_code.generate_op(op, out, *call_info)
                if code_length == self.compute_code.code_length:
                    self.compute_code.append("pass")
            self.compute_code.endl()
        self.name = name
        return name
Example #7
0
    def transform_ordered_ops(self, ordered_ops, name):
        if name is None:
            name = "c_" + str(self.n_computations)
        self.n_computations += 1
        self.compute_code.append("def {}(self):", name)
        code = self.compute_code.code

        def tensor_description_value(x):
            if isinstance(x, TensorDescription):
                return x.value
            return x

        with indenting(self.compute_code):
            for op in ordered_ops:
                out = tensor_description_value(op.tensor_description())
                call_info = (tensor_description_value(_)
                             for _ in op.call_info())
                self.compute_code.generate_op(op, out, *call_info)
            if code is self.compute_code.code:
                self.compute_code.append("pass")
        self.compute_code.endl()
        return name
Example #8
0
    def finish_transform(self):
        if self.model is not None:
            return

        self.code.append(" class Model(object):")
        with indenting(self.code):
            if len(self.device_buffers) == 0:
                self.init_code.append("pass")
            self.code.append(self.init_code.code)
            self.code.endl()

            self.code.append(NumPyConvEngine.all_conv_code())
            self.code.append(NumPyCodeEngine.lut_code())
            self.code.endl()

            self.code.append(self.allocate_storage_code.code)
            self.code.endl()
            if len(self.device_buffers) == 0:
                self.allocate_code.append("pass")
            self.code.append(self.allocate_code.code)
            self.code.endl(2)
            self.code.append(self.compute_code.code)

            # with open("code_{}.py".format(self.name), "w") as f:
            #     f.write(self.code.code)
            # print(self.code.filename)

        r = self.code.compile("op", globals())
        self.model = r['Model']

        def send(self, send_id):
            send_op = self.send_nodes[send_id]
            q = send_op.shared_q

            # TODO
            # below converts DeviceTensor to numpy array
            # should we instead serialize DeviceTensor?
            x_devicetensor = send_op.args[0].value
            x_nparr = x_devicetensor.get(None)
            q.put(x_nparr)

        def recv(self, recv_id):
            recv_op = self.recv_nodes[recv_id]
            q = recv_op.shared_q
            x = q.get()
            return x

        def gather_send(self, gather_send_id):
            gather_send_op = self.gather_send_nodes[gather_send_id]
            q = gather_send_op.shared_queue
            # TODO
            # below converts DeviceTensor to numpy array
            # should we instead serialize DeviceTensor?
            x_devicetensor = gather_send_op.args[0].value
            x_nparr = x_devicetensor.get(None)
            q.put(x_nparr)

        def gather_recv(self, gather_recv_id):
            gather_recv_op = self.gather_recv_nodes[gather_recv_id]
            x_devicetensor = gather_recv_op.value
            x_nparr = x_devicetensor.get(None)
            for i in range(len(gather_recv_op.from_id)):
                q = gather_recv_op.shared_queue_list[i]
                x = q.get()
                x_nparr[gather_recv_op.slices[i]] = x
            return x_nparr

        def scatter_send(self, scatter_send_id):
            scatter_send_op = self.scatter_send_nodes[scatter_send_id]

            # TODO
            # below converts DeviceTensor to numpy array
            # should we instead serialize DeviceTensor?
            x_devicetensor = scatter_send_op.args[0].value
            x_nparr = x_devicetensor.get(None)
            for i in range(len(scatter_send_op.to_id)):
                q = scatter_send_op.shared_queue_list[i]
                q.put(x_nparr[scatter_send_op.slices[i]])

        def scatter_recv(self, scatter_recv_id):
            scatter_recv_op = self.scatter_recv_nodes[scatter_recv_id]
            q = scatter_recv_op.shared_queue
            x = q.get()
            return x

        self.model.recv_from_send = recv
        self.model.send = send

        self.model.gather_recv_from_gather_send = gather_recv
        self.model.gather_send = gather_send

        self.model.scatter_recv_from_scatter_send = scatter_recv
        self.model.scatter_send = scatter_send

        self.model = self.model()

        self.model.send_nodes = self.compute_code.send_nodes
        self.model.recv_nodes = self.compute_code.recv_nodes

        self.model.gather_send_nodes = self.compute_code.gather_send_nodes
        self.model.gather_recv_nodes = self.compute_code.gather_recv_nodes

        self.model.scatter_send_nodes = self.compute_code.scatter_send_nodes
        self.model.scatter_recv_nodes = self.compute_code.scatter_recv_nodes

        self.model.conv_params = self.compute_code.conv_params
        self.model.pool_params = self.compute_code.pool_params
        self.model.conv_slices = self.compute_code.conv_slices
        self.model.pool_slices = self.compute_code.pool_slices

        for computation in self.computations:
            executor = getattr(self.model, computation.name)
            computation.executor = executor