예제 #1
0
파일: opencl.py 프로젝트: spillai/loopy
    def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var,
                           lhs_expr, rhs_expr, lhs_dtype, rhs_type_context):
        from pymbolic.mapper.stringifier import PREC_NONE

        # FIXME: Could detect operations, generate atomic_{add,...} when
        # appropriate.

        if isinstance(lhs_dtype, NumpyType) and lhs_dtype.numpy_dtype in [
                np.int32, np.int64, np.float32, np.float64
        ]:
            from cgen import Block, DoWhile, Assign
            from loopy.target.c import POD
            old_val_var = codegen_state.var_name_generator("loopy_old_val")
            new_val_var = codegen_state.var_name_generator("loopy_new_val")

            from loopy.kernel.data import TemporaryVariable, temp_var_scope
            ecm = codegen_state.expression_to_code_mapper.with_assignments({
                old_val_var:
                TemporaryVariable(old_val_var, lhs_dtype),
                new_val_var:
                TemporaryVariable(new_val_var, lhs_dtype),
            })

            lhs_expr_code = ecm(lhs_expr, prec=PREC_NONE, type_context=None)

            from pymbolic.mapper.substitutor import make_subst_func
            from pymbolic import var
            from loopy.symbolic import SubstitutionMapper

            subst = SubstitutionMapper(
                make_subst_func({lhs_expr: var(old_val_var)}))
            rhs_expr_code = ecm(subst(rhs_expr),
                                prec=PREC_NONE,
                                type_context=rhs_type_context,
                                needed_dtype=lhs_dtype)

            if lhs_dtype.numpy_dtype.itemsize == 4:
                func_name = "atomic_cmpxchg"
            elif lhs_dtype.numpy_dtype.itemsize == 8:
                func_name = "atom_cmpxchg"
            else:
                raise LoopyError("unexpected atomic size")

            cast_str = ""
            old_val = old_val_var
            new_val = new_val_var

            if lhs_dtype.numpy_dtype.kind == "f":
                if lhs_dtype.numpy_dtype == np.float32:
                    ctype = "int"
                elif lhs_dtype.numpy_dtype == np.float64:
                    ctype = "long"
                else:
                    assert False

                from loopy.kernel.data import TemporaryVariable, GlobalArg
                if isinstance(lhs_var, GlobalArg):
                    var_kind = "__global"
                elif (isinstance(lhs_var, TemporaryVariable)
                      and lhs_var.scope == temp_var_scope.LOCAL):
                    var_kind = "__local"
                elif (isinstance(lhs_var, TemporaryVariable)
                      and lhs_var.scope == temp_var_scope.GLOBAL):
                    var_kind = "__global"
                else:
                    raise LoopyError("unexpected kind of variable '%s' in "
                                     "atomic operation: " %
                                     (lhs_var.name, type(lhs_var).__name__))

                old_val = "*(%s *) &" % ctype + old_val
                new_val = "*(%s *) &" % ctype + new_val
                cast_str = "(%s %s *) " % (var_kind, ctype)

            return Block([
                POD(self, NumpyType(lhs_dtype.dtype, target=self.target),
                    old_val_var),
                POD(self, NumpyType(lhs_dtype.dtype, target=self.target),
                    new_val_var),
                DoWhile(
                    "%(func_name)s("
                    "%(cast_str)s&(%(lhs_expr)s), "
                    "%(old_val)s, "
                    "%(new_val)s"
                    ") != %(old_val)s" % {
                        "func_name": func_name,
                        "cast_str": cast_str,
                        "lhs_expr": lhs_expr_code,
                        "old_val": old_val,
                        "new_val": new_val,
                    },
                    Block([
                        Assign(old_val_var, lhs_expr_code),
                        Assign(new_val_var, rhs_expr_code),
                    ]))
            ])
        else:
            raise NotImplementedError("atomic update for '%s'" % lhs_dtype)
예제 #2
0
def _hackily_ensure_multi_assignment_return_values_are_scoped_private(kernel):
    """
    Multi assignment function calls are currently lowered into OpenCL so that
    the function call::

       a, b = segmented_sum(x, y, z, w)

    becomes::

       a = segmented_sum_mangled(x, y, z, w, &b).

    For OpenCL, the scope of "b" is significant, and the preamble generation
    currently assumes the scope is always private. This function forces that to
    be the case by introducing temporary assignments into the kernel.
    """

    insn_id_gen = kernel.get_instruction_id_generator()
    var_name_gen = kernel.get_var_name_generator()

    new_or_updated_instructions = {}
    new_temporaries = {}

    dep_map = dict((insn.id, insn.depends_on) for insn in kernel.instructions)

    inverse_dep_map = dict((insn.id, set()) for insn in kernel.instructions)

    import six
    for insn_id, deps in six.iteritems(dep_map):
        for dep in deps:
            inverse_dep_map[dep].add(insn_id)

    del dep_map

    # {{{ utils

    def _add_to_no_sync_with(insn_id, new_no_sync_with_params):
        insn = kernel.id_to_insn.get(insn_id)
        insn = new_or_updated_instructions.get(insn_id, insn)
        new_or_updated_instructions[insn_id] = (insn.copy(
            no_sync_with=(insn.no_sync_with
                          | frozenset(new_no_sync_with_params))))

    def _add_to_depends_on(insn_id, new_depends_on_params):
        insn = kernel.id_to_insn.get(insn_id)
        insn = new_or_updated_instructions.get(insn_id, insn)
        new_or_updated_instructions[insn_id] = (insn.copy(
            depends_on=insn.depends_on | frozenset(new_depends_on_params)))

    # }}}

    from loopy.kernel.instruction import CallInstruction
    for insn in kernel.instructions:
        if not isinstance(insn, CallInstruction):
            continue

        if len(insn.assignees) <= 1:
            continue

        assignees = insn.assignees
        assignee_var_names = insn.assignee_var_names()

        new_assignees = [assignees[0]]
        newly_added_assignments_ids = set()
        needs_replacement = False

        last_added_insn_id = insn.id

        from loopy.kernel.data import temp_var_scope, TemporaryVariable

        FIRST_POINTER_ASSIGNEE_IDX = 1  # noqa

        for assignee_nr, assignee_var_name, assignee in zip(
                range(FIRST_POINTER_ASSIGNEE_IDX, len(assignees)),
                assignee_var_names[FIRST_POINTER_ASSIGNEE_IDX:],
                assignees[FIRST_POINTER_ASSIGNEE_IDX:]):

            if (assignee_var_name in kernel.temporary_variables
                    and (kernel.temporary_variables[assignee_var_name].scope
                         == temp_var_scope.PRIVATE)):
                new_assignees.append(assignee)
                continue

            needs_replacement = True

            # {{{ generate a new assignent instruction

            new_assignee_name = var_name_gen(
                "{insn_id}_retval_{assignee_nr}".format(
                    insn_id=insn.id, assignee_nr=assignee_nr))

            new_assignment_id = insn_id_gen(
                "{insn_id}_assign_retval_{assignee_nr}".format(
                    insn_id=insn.id, assignee_nr=assignee_nr))

            newly_added_assignments_ids.add(new_assignment_id)

            import loopy as lp
            new_temporaries[new_assignee_name] = (TemporaryVariable(
                name=new_assignee_name,
                dtype=lp.auto,
                scope=temp_var_scope.PRIVATE))

            from pymbolic import var
            new_assignee = var(new_assignee_name)
            new_assignees.append(new_assignee)

            new_or_updated_instructions[new_assignment_id] = (make_assignment(
                assignees=(assignee, ),
                expression=new_assignee,
                id=new_assignment_id,
                depends_on=frozenset([last_added_insn_id]),
                depends_on_is_final=True,
                no_sync_with=(insn.no_sync_with
                              | frozenset([(insn.id, "any")])),
                predicates=insn.predicates,
                within_inames=insn.within_inames))

            last_added_insn_id = new_assignment_id

            # }}}

        if not needs_replacement:
            continue

        # {{{ update originating instruction

        orig_insn = new_or_updated_instructions.get(insn.id, insn)

        new_or_updated_instructions[insn.id] = (orig_insn.copy(
            assignees=tuple(new_assignees)))

        _add_to_no_sync_with(insn.id, [(id, "any")
                                       for id in newly_added_assignments_ids])

        # }}}

        # {{{ squash spurious memory dependencies amongst new assignments

        for new_insn_id in newly_added_assignments_ids:
            _add_to_no_sync_with(
                new_insn_id,
                [(id, "any")
                 for id in newly_added_assignments_ids if id != new_insn_id])

        # }}}

        # {{{ update instructions that depend on the originating instruction

        for inverse_dep in inverse_dep_map[insn.id]:
            _add_to_depends_on(inverse_dep, newly_added_assignments_ids)

            for insn_id, scope in (
                    new_or_updated_instructions[inverse_dep].no_sync_with):
                if insn_id == insn.id:
                    _add_to_no_sync_with(
                        inverse_dep,
                        [(id, scope) for id in newly_added_assignments_ids])

        # }}}

    new_temporary_variables = kernel.temporary_variables.copy()
    new_temporary_variables.update(new_temporaries)

    new_instructions = (list(new_or_updated_instructions.values()) +
                        list(insn for insn in kernel.instructions
                             if insn.id not in new_or_updated_instructions))

    return kernel.copy(temporary_variables=new_temporary_variables,
                       instructions=new_instructions)
예제 #3
0
 def as_variable(self):
     temporary = self.orig_temporary
     return TemporaryVariable(name=self.name,
                              dtype=temporary.dtype,
                              scope=temp_var_scope.GLOBAL,
                              shape=self.new_shape)