コード例 #1
0
ファイル: genpup.py プロジェクト: raffenet/yaksa
def resized(suffix, dtp, b, last):
    global need_extent
    if (need_extent == True):
        yutils.display(
            OUTFILE, "uintptr_t extent%d = %s->extent / sizeof(%s);\n" %
            (suffix, dtp, b))
    need_extent = False
コード例 #2
0
ファイル: genpup.py プロジェクト: sagarth/yaksa
def contig(suffix, b, blklen, last):
    global num_paren_open
    num_paren_open += 1
    yutils.display(
        OUTFILE, "for (int j%d = 0; j%d < count%d; j%d++) {\n" %
        (suffix, suffix, suffix, suffix))
    global s
    s += " + j%d * stride%d" % (suffix, suffix)
コード例 #3
0
def hindexed(suffix, b, blklen, last):
    global num_paren_open
    num_paren_open += 2
    yutils.display(OUTFILE, "for (int j%d = 0; j%d < count%d; j%d++) {\n" % (suffix, suffix, suffix, suffix))
    yutils.display(OUTFILE, "for (int k%d = 0; k%d < array_of_blocklengths%d[j%d]; k%d++) {\n" % \
            (suffix, suffix, suffix, suffix, suffix))
    global s
    if (last != 1):
        s += " + array_of_displs%d[j%d] / sizeof(%s) + k%d * extent%d" % \
             (suffix, suffix, b, suffix, suffix + 1)
    else:
        s += " + array_of_displs%d[j%d] / sizeof(%s) + k%d" % (suffix, suffix, b, suffix)
コード例 #4
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def contig(suffix, dtp, b, last):
    global s
    global idx
    global need_extent
    yutils.display(
        OUTFILE,
        "intptr_t stride%d = %s->u.contig.child->extent;\n" % (suffix, dtp))
    if (need_extent == True):
        yutils.display(OUTFILE,
                       "uintptr_t extent%d = %s->extent;\n" % (suffix, dtp))
    need_extent = False
    s += " + x%d * stride%d" % (idx, suffix)
    idx = idx + 1
コード例 #5
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def hvector(suffix, dtp, b, last):
    global s
    global idx
    global need_extent
    yutils.display(
        OUTFILE, "intptr_t stride%d = %s->u.hvector.stride;\n" % (suffix, dtp))
    if (need_extent == True):
        yutils.display(OUTFILE,
                       "uintptr_t extent%d = %s->extent;\n" % (suffix, dtp))
    if (last != 1):
        s += " + x%d * stride%d + x%d * extent%d" % (idx, suffix, idx + 1,
                                                     suffix + 1)
        need_extent = True
    else:
        s += " + x%d * stride%d + x%d * sizeof(%s)" % (idx, suffix, idx + 1, b)
        need_extent = False
    idx = idx + 2
コード例 #6
0
def switcher_builtin(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr):
    yutils.display(OUTFILE, "switch (%s->id) {\n" % child_type_str(typelist))

    for b in builtin_types:
        switcher_builtin_element(backend, OUTFILE, blklens, typelist, pupstr, "YAKSA_TYPE__%s" % b.replace(" ", "_"), b.replace(" ", "_"))
    for key in builtin_maps:
        switcher_builtin_element(backend, OUTFILE, blklens, typelist, pupstr, key, builtin_maps[key])

    yutils.display(OUTFILE, "default:\n")
    yutils.display(OUTFILE, "    break;\n")
    yutils.display(OUTFILE, "}\n")
コード例 #7
0
def hvector_decl(nesting, dtp, b):
    yutils.display(
        OUTFILE, "intptr_t count%d = %s->u.hvector.count;\n" % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t blocklength%d ATTRIBUTE((unused)) = %s->u.hvector.blocklength;\n"
        % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t stride%d = %s->u.hvector.stride;\n" % (nesting, dtp))
    yutils.display(
        OUTFILE, "uintptr_t extent%d ATTRIBUTE((unused)) = %s->extent;\n" %
        (nesting, dtp))
コード例 #8
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def hindexed(suffix, dtp, b, last):
    global s
    global idx
    global need_extent
    yutils.display(
        OUTFILE,
        "intptr_t *array_of_displs%d = %s->u.hindexed.array_of_displs;\n" %
        (suffix, dtp))
    if (need_extent == True):
        yutils.display(OUTFILE,
                       "uintptr_t extent%d = %s->extent;\n" % (suffix, dtp))
    if (last != 1):
        s += " + array_of_displs%d[x%d] + x%d * extent%d" % \
             (suffix, idx, idx + 1, suffix + 1)
        need_extent = True
    else:
        s += " + array_of_displs%d[x%d] + x%d * sizeof(%s)" % (suffix, idx,
                                                               idx + 1, b)
        need_extent = False
    idx = idx + 2
コード例 #9
0
def hindexed_decl(nesting, dtp, b):
    yutils.display(
        OUTFILE, "intptr_t count%d = %s->u.hindexed.count;\n" % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t *restrict array_of_blocklengths%d = %s->u.hindexed.array_of_blocklengths;\n"
        % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t *restrict array_of_displs%d = %s->u.hindexed.array_of_displs;\n"
        % (nesting, dtp))
    yutils.display(
        OUTFILE, "uintptr_t extent%d ATTRIBUTE((unused)) = %s->extent;\n" %
        (nesting, dtp))
コード例 #10
0
def blkhindx_decl(nesting, dtp, b):
    yutils.display(
        OUTFILE, "intptr_t count%d = %s->u.blkhindx.count;\n" % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t blocklength%d ATTRIBUTE((unused)) = %s->u.blkhindx.blocklength;\n"
        % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t *restrict array_of_displs%d = %s->u.blkhindx.array_of_displs;\n"
        % (nesting, dtp))
    yutils.display(
        OUTFILE, "uintptr_t extent%d ATTRIBUTE((unused)) = %s->extent;\n" %
        (nesting, dtp))
コード例 #11
0
def contig_decl(nesting, dtp, b):
    yutils.display(OUTFILE,
                   "intptr_t count%d = %s->u.contig.count;\n" % (nesting, dtp))
    yutils.display(
        OUTFILE,
        "intptr_t stride%d = %s->u.contig.child->extent;\n" % (nesting, dtp))
    yutils.display(
        OUTFILE, "uintptr_t extent%d ATTRIBUTE((unused)) = %s->extent;\n" %
        (nesting, dtp))
コード例 #12
0
def hvector(suffix, b, blklen, last):
    global num_paren_open
    num_paren_open += 2
    yutils.display(OUTFILE, "for (int j%d = 0; j%d < count%d; j%d++) {\n" % (suffix, suffix, suffix, suffix))
    if (blklen == "generic"):
        yutils.display(OUTFILE, "for (int k%d = 0; k%d < blocklength%d; k%d++) {\n" % (suffix, suffix, suffix, suffix))
    else:
        yutils.display(OUTFILE, "for (int k%d = 0; k%d < %s; k%d++) {\n" % (suffix, suffix, blklen, suffix))
    global s
    if (last != 1):
        s += " + j%d * stride%d + k%d * extent%d" % (suffix, suffix, suffix, suffix + 1)
    else:
        s += " + j%d * stride%d + k%d" % (suffix, suffix, suffix)
コード例 #13
0
def blkhindx(suffix, b, blklen, last):
    global num_paren_open
    num_paren_open += 2
    yutils.display(OUTFILE, "for (int j%d = 0; j%d < count%d; j%d++) {\n" % (suffix, suffix, suffix, suffix))
    if (blklen == "generic"):
        yutils.display(OUTFILE, "for (int k%d = 0; k%d < blocklength%d; k%d++) {\n" % (suffix, suffix, suffix, suffix))
    else:
        yutils.display(OUTFILE, "for (int k%d = 0; k%d < %s; k%d++) {\n" % (suffix, suffix, blklen, suffix))
    global s
    if (last != 1):
        s += " + array_of_displs%d[j%d] / sizeof(%s) + k%d * extent%d" % \
             (suffix, suffix, b, suffix, suffix + 1)
    else:
        s += " + array_of_displs%d[j%d] / sizeof(%s) + k%d" % (suffix, suffix, b, suffix)
コード例 #14
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def write_headers():
    yutils.display(OUTFILE, "typedef signed char int8_t;\n")
    yutils.display(OUTFILE, "typedef signed short int int16_t;\n")
    yutils.display(OUTFILE, "typedef signed int int32_t;\n")
    yutils.display(OUTFILE, "typedef signed long int64_t;\n")
    yutils.display(OUTFILE, "typedef unsigned char uint8_t;\n")
    yutils.display(OUTFILE, "typedef unsigned short int uint16_t;\n")
    yutils.display(OUTFILE, "typedef unsigned int uint32_t;\n")
    yutils.display(OUTFILE, "typedef unsigned long uint64_t;\n")
    yutils.display(OUTFILE, "#include \"yaksuri_zei_md.h\"\n")
    yutils.display(OUTFILE, "\n")
コード例 #15
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def generate_kernels(b, darray, op):
    global need_extent
    global s
    global idx

    # we need pup kernels for reduction of basic types
    funclist = []
    funclist.append("pack_%s" % op)
    funclist.append("unpack_%s" % op)

    for func in funclist:
        ##### figure out the function name to use
        funcprefix = "%s_" % func
        for d in darray:
            funcprefix = funcprefix + "%s_" % d
        funcprefix = funcprefix + b.replace(" ", "_")

        ##### generate the ZE kernel
        yutils.display(
            OUTFILE,
            "__kernel void yaksuri_zei_kernel_%s(__global const void *inbuf, __global void *outbuf, unsigned long count, __global const yaksuri_zei_md_s *__restrict__ md)\n"
            % funcprefix)
        yutils.display(OUTFILE, "{\n")
        yutils.display(
            OUTFILE,
            "__global const char *__restrict__ sbuf = (__global char *) inbuf;\n"
        )
        yutils.display(
            OUTFILE,
            "__global char *__restrict__ dbuf = (__global char *) outbuf;\n")
        if ("unpack" in func and len(darray) != 0):
            yutils.display(OUTFILE, "dbuf = dbuf - md->true_lb;\n")
        elif (len(darray) != 0):
            yutils.display(
                OUTFILE,
                "sbuf = (__global const char *) ((__global char *)sbuf - md->true_lb);\n"
            )
        yutils.display(OUTFILE, "uintptr_t extent = md->extent;\n")
        yutils.display(OUTFILE, "uintptr_t idx = get_global_id(0);\n")
        yutils.display(OUTFILE, "uintptr_t res = idx;\n")
        yutils.display(OUTFILE,
                       "uintptr_t inner_elements = md->num_elements;\n")
        yutils.display(OUTFILE, "\n")

        yutils.display(OUTFILE, "if (idx >= (count * inner_elements))\n")
        yutils.display(OUTFILE, "    return;\n")
        yutils.display(OUTFILE, "\n")

        # copy loop
        idx = 0
        md = "md"
        for d in darray:
            if (d == "hvector" or d == "blkhindx" or d == "hindexed" or \
                d == "contig"):
                yutils.display(OUTFILE,
                               "uintptr_t x%d = res / inner_elements;\n" % idx)
                idx = idx + 1
                yutils.display(OUTFILE, "res %= inner_elements;\n")
                yutils.display(OUTFILE,
                               "inner_elements /= %s->u.%s.count;\n" % (md, d))
                yutils.display(OUTFILE, "\n")

            if (d == "hvector" or d == "blkhindx"):
                yutils.display(OUTFILE,
                               "uintptr_t x%d = res / inner_elements;\n" % idx)
                idx = idx + 1
                yutils.display(OUTFILE, "res %= inner_elements;\n")
                yutils.display(
                    OUTFILE,
                    "inner_elements /= %s->u.%s.blocklength;\n" % (md, d))
            elif (d == "hindexed"):
                yutils.display(OUTFILE, "uintptr_t x%d;\n" % idx)
                yutils.display(
                    OUTFILE,
                    "for (intptr_t i = 0; i < %s->u.%s.count; i++) {\n" %
                    (md, d))
                yutils.display(
                    OUTFILE,
                    "    uintptr_t in_elems = %s->u.%s.array_of_blocklengths[i] *\n"
                    % (md, d))
                yutils.display(
                    OUTFILE,
                    "                         %s->u.%s.child->num_elements;\n"
                    % (md, d))
                yutils.display(OUTFILE, "    if (res < in_elems) {\n")
                yutils.display(OUTFILE, "        x%d = i;\n" % idx)
                yutils.display(OUTFILE, "        res %= in_elems;\n")
                yutils.display(
                    OUTFILE,
                    "        inner_elements = %s->u.%s.child->num_elements;\n"
                    % (md, d))
                yutils.display(OUTFILE, "        break;\n")
                yutils.display(OUTFILE, "    } else {\n")
                yutils.display(OUTFILE, "        res -= in_elems;\n")
                yutils.display(OUTFILE, "    }\n")
                yutils.display(OUTFILE, "}\n")
                idx = idx + 1
                yutils.display(OUTFILE, "\n")

            md = "%s->u.%s.child" % (md, d)

        yutils.display(OUTFILE, "uintptr_t x%d = res;\n" % idx)
        yutils.display(OUTFILE, "\n")

        dtp = "md"
        s = "x0 * extent"
        idx = 1
        x = 1
        need_extent = False
        for d in darray:
            if (x == len(darray)):
                last = 1
            else:
                last = 0
            getattr(sys.modules[__name__], d)(x, dtp, b, last)
            x = x + 1
            dtp = dtp + "->u.%s.child" % d

        if (func == "pack_REPLACE"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = *((const %s *) (const void *) (sbuf + %s));\n"
                % (b, b, b, s))
        elif (func == "pack_SUM"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) += *((const %s *) (const void *) (sbuf + %s));\n"
                % (b, b, b, s))
        elif (func == "pack_PROD"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) *= *((const %s *) (const void *) (sbuf + %s));\n"
                % (b, b, b, s))
        elif (func == "pack_BOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) |= *((const %s *) (const void *) (sbuf + %s));\n"
                % (b, b, b, s))
        elif (func == "pack_BAND"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) &= *((const %s *) (const void *) (sbuf + %s));\n"
                % (b, b, b, s))
        elif (func == "pack_BXOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) ^= *((const %s *) (const void *) (sbuf + %s));\n"
                % (b, b, b, s))
        elif (func == "pack_LOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = (*((%s *) (void *) (dbuf + idx * sizeof(%s)))) || (*((const %s *) (const void *) (sbuf + %s)));\n"
                % (b, b, b, b, b, s))
        elif (func == "pack_LAND"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = (*((%s *) (void *) (dbuf + idx * sizeof(%s)))) && (*((const %s *) (const void *) (sbuf + %s)));\n"
                % (b, b, b, b, b, s))
        elif (func == "pack_LXOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = !(*((%s *) (void *) (dbuf + idx * sizeof(%s)))) != !(*((const %s *) (const void *) (sbuf + %s)));\n"
                % (b, b, b, b, b, s))
        elif (func == "pack_MAX" and (b == "float" or b == "double")):
            yutils.display(
                OUTFILE,
                "    %s x_[2] = {*((const %s *) (const void *) (sbuf + %s)), *((%s *) (void *) (dbuf + idx * sizeof(%s)))};\n"
                % (b, b, s, b, b))
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = x_[*((const %s *) (const void *) (sbuf + %s)) < *((%s *) (void *) (dbuf + idx * sizeof(%s)))];\n"
                % (b, b, b, s, b, b))
        elif (func == "pack_MAX"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = *((const %s *) (const void *) (sbuf + %s)) ^ ((*((const %s *) (const void *) (sbuf + %s)) ^ *((%s *) (void *) (dbuf + idx * sizeof(%s)))) & -( *((const %s *) (const void *) (sbuf + %s)) < *((%s *) (void *) (dbuf + idx * sizeof(%s)))));\n"
                % (b, b, b, s, b, s, b, b, b, s, b, b))
        elif (func == "pack_MIN" and (b == "float" or b == "double")):
            yutils.display(
                OUTFILE,
                "    %s x_[2] = {*((const %s *) (const void *) (sbuf + %s)), *((%s *) (void *) (dbuf + idx * sizeof(%s)))};\n"
                % (b, b, s, b, b))
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = x_[*((const %s *) (const void *) (sbuf + %s)) > *((%s *) (void *) (dbuf + idx * sizeof(%s)))];\n"
                % (b, b, b, s, b, b))
        elif (func == "pack_MIN"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + idx * sizeof(%s))) = *((%s *) (void *) (dbuf + idx * sizeof(%s))) ^ ((*((const %s *) (const void *) (sbuf + %s)) ^ *((%s *) (void *) (dbuf + idx * sizeof(%s)))) & -( *((const %s *) (const void *) (sbuf + %s)) < *((%s *) (void *) (dbuf + idx * sizeof(%s)))));\n"
                % (b, b, b, b, b, s, b, b, b, s, b, b))
        elif (func == "unpack_REPLACE"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = *((const %s *) (const void *) (sbuf + idx * sizeof(%s)));\n"
                % (b, s, b, b))
        elif (func == "unpack_SUM"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) += *((const %s *) (const void *) (sbuf + idx * sizeof(%s)));\n"
                % (b, s, b, b))
        elif (func == "unpack_PROD"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) *= *((const %s *) (const void *) (sbuf + idx * sizeof(%s)));\n"
                % (b, s, b, b))
        elif (func == "unpack_BOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) |= *((const %s *) (const void *) (sbuf + idx * sizeof(%s)));\n"
                % (b, s, b, b))
        elif (func == "unpack_BAND"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) &= *((const %s *) (const void *) (sbuf + idx * sizeof(%s)));\n"
                % (b, s, b, b))
        elif (func == "unpack_BXOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) ^= *((const %s *) (const void *) (sbuf + idx * sizeof(%s)));\n"
                % (b, s, b, b))
        elif (func == "unpack_LOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = (*((%s *) (void *) (dbuf + %s))) || (*((const %s *) (const void *) (sbuf + idx * sizeof(%s))));\n"
                % (b, s, b, s, b, b))
        elif (func == "unpack_LAND"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = (*((%s *) (void *) (dbuf + %s))) && (*((const %s *) (const void *) (sbuf + idx * sizeof(%s))));\n"
                % (b, s, b, s, b, b))
        elif (func == "unpack_LXOR"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = !(*((%s *) (void *) (dbuf + %s))) != !(*((const %s *) (const void *) (sbuf + idx * sizeof(%s))));\n"
                % (b, s, b, s, b, b))
        elif (func == "unpack_MAX" and (b == "float" or b == "double")):
            yutils.display(
                OUTFILE,
                "    %s x_[2] = {*((const %s *) (const void *) (sbuf + idx * sizeof(%s))), *((%s *) (void *) (dbuf + %s))};\n"
                % (b, b, b, b, s))
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = x_[*((const %s *) (const void *) (sbuf + idx * sizeof(%s))) < *((%s *) (void *) (dbuf + %s))];\n"
                % (b, s, b, b, b, s))
        elif (func == "unpack_MAX"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = *((const %s *) (const void *) (sbuf + idx * sizeof(%s))) ^ ((*((const %s *) (const void *) (sbuf + idx * sizeof(%s))) ^ *((%s *) (void *) (dbuf + %s))) & -( *((const %s *) (const void *) (sbuf + idx * sizeof(%s))) < *((%s *) (void *) (dbuf + %s))));\n"
                % (b, s, b, b, b, b, b, s, b, b, b, s))
        elif (func == "unpack_MIN" and (b == "float" or b == "double")):
            yutils.display(
                OUTFILE,
                "    %s x_[2] = {*((const %s *) (const void *) (sbuf + idx * sizeof(%s))), *((%s *) (void *) (dbuf + %s))};\n"
                % (b, b, b, b, s))
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = x_[*((const %s *) (const void *) (sbuf + idx * sizeof(%s))) > *((%s *) (void *) (dbuf + %s))];\n"
                % (b, s, b, b, b, s))
        elif (func == "unpack_MIN"):
            yutils.display(
                OUTFILE,
                "*((%s *) (void *) (dbuf + %s)) = *((%s *) (void *) (dbuf + %s)) ^ ((*((const %s *) (const void *) (sbuf + idx * sizeof(%s))) ^ *((%s *) (void *) (dbuf + %s))) & -( *((const %s *) (const void *) (sbuf + idx * sizeof(%s))) < *((%s *) (void *) (dbuf + %s))));\n"
                % (b, s, b, s, b, b, b, s, b, b, b, s))

        yutils.display(OUTFILE, "}\n\n")
コード例 #16
0
def switcher_builtin_element(backend, OUTFILE, blklens, typelist, pupstr, key, val):
    yutils.display(OUTFILE, "case %s:\n" % key.upper())

    if (len(typelist) == 0):
        t = ""
    else:
        t = typelist.pop()

    if (t == ""):
        nesting_level = 0
    else:
        nesting_level = len(typelist) + 1

    if ((t == "hvector" or t == "blkhindx") and (len(blklens) > 1)):
        yutils.display(OUTFILE, "switch (%s->u.%s.blocklength) {\n" % (child_type_str(typelist), t))
        for blklen in blklens:
            if (blklen != "generic"):
                yutils.display(OUTFILE, "case %s:\n" % blklen)
            else:
                yutils.display(OUTFILE, "default:\n")
            yutils.display(OUTFILE, "if (max_nesting_level >= %d) {\n" % nesting_level)
            yutils.display(OUTFILE, "%s->pack = yaksuri_%si_%s_blklen_%s_%s;\n" % (backend, backend, pupstr, blklen, val))
            yutils.display(OUTFILE, "%s->unpack = yaksuri_%si_un%s_blklen_%s_%s;\n" % (backend, backend, pupstr, blklen, val))
            yutils.display(OUTFILE, "}\n")
            yutils.display(OUTFILE, "break;\n")
        yutils.display(OUTFILE, "}\n")
    else:
        yutils.display(OUTFILE, "if (max_nesting_level >= %d) {\n" % nesting_level)
        yutils.display(OUTFILE, "%s->pack = yaksuri_%si_%s_%s;\n" % (backend, backend, pupstr, val))
        yutils.display(OUTFILE, "%s->unpack = yaksuri_%si_un%s_%s;\n" % (backend, backend, pupstr, val))
        yutils.display(OUTFILE, "}\n")

    if (t != ""):
        typelist.append(t)
    yutils.display(OUTFILE, "break;\n")
コード例 #17
0
ファイル: genpup.py プロジェクト: yqin/yaksa
                for darray in darraylist:
                    darray.append(d1)
                    darray.append(d2)
                    for op in gencomm.type_ops[b]:
                        generate_kernels(b, darray, op)
                    darray.pop()
                    darray.pop()

                OUTFILE.close()

    ##### generate code to load modules/kernels used by init_hook
    filename = "src/backend/ze/hooks/yaksuri_zei_init_kernels.c"
    yutils.copyright_c(filename)
    OUTFILE = open(filename, "a")
    yutils.display(OUTFILE, "#include \"stdlib.h\"\n")
    yutils.display(OUTFILE, "#include \"yaksi.h\"\n")
    yutils.display(OUTFILE, "#include \"yaksuri_zei.h\"\n")
    yutils.display(OUTFILE, "#include \"level_zero/ze_api.h\"\n\n")

    num_modules = 0
    for b in builtin_types:
        OUTFILE.write(
            "extern const unsigned char yaksuri_zei_pup_%s_str[];\n" %
            b.replace(" ", "_"))
        OUTFILE.write("extern const size_t yaksuri_zei_pup_%s_size;\n" %
                      b.replace(" ", "_"))
        num_modules += 1
        for d1 in gencomm.derived_types:
            ##### generate the core pack/unpack kernels (single level)
            OUTFILE.write(
コード例 #18
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def generate_host_function(b, darray):
    for func in "pack", "unpack":
        funcprefix = "%s_" % func
        for d in darray:
            funcprefix = funcprefix + "%s_" % d
        funcprefix = funcprefix + b.replace(" ", "_")

        yutils.display(
            OUTFILE,
            "void yaksuri_cudai_%s(const void *inbuf, void *outbuf, uintptr_t count, yaksa_op_t op, yaksuri_cudai_md_s *md, int n_threads, int n_blocks_x, int n_blocks_y, int n_blocks_z, int device)\n"
            % funcprefix)
        yutils.display(OUTFILE, "{\n")
        yutils.display(OUTFILE,
                       "void *args[] = { &inbuf, &outbuf, &count, &md };\n")

        yutils.display(OUTFILE, "cudaError_t cerr;\n")
        yutils.display(OUTFILE, "switch (op) {\n")
        for op in gencomm.type_ops[b]:
            funcprefix = "%s_%s_" % (func, op)
            for d in darray:
                funcprefix = funcprefix + "%s_" % d
            funcprefix = funcprefix + b.replace(" ", "_")
            yutils.display(OUTFILE, "case YAKSA_OP__%s:\n" % op)
            yutils.display(
                OUTFILE,
                "cerr = cudaLaunchKernel((const void *) yaksuri_cudai_kernel_%s,\n"
                % funcprefix)
            yutils.display(
                OUTFILE,
                "    dim3(n_blocks_x, n_blocks_y, n_blocks_z), dim3(n_threads), args, 0, yaksuri_cudai_global.stream[device]);\n"
            )
            yutils.display(OUTFILE, "YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);\n")
            yutils.display(OUTFILE, "break;\n\n")
        yutils.display(OUTFILE, "}\n")
        yutils.display(OUTFILE, "}\n\n")
コード例 #19
0
ファイル: genpup.py プロジェクト: yqin/yaksa
def generate_kernels(b, darray, op):
    global need_extent
    global s
    global idx

    for func in "pack", "unpack":
        ##### figure out the function name to use
        funcprefix = "%s_%s_" % (func, op)
        for d in darray:
            funcprefix = funcprefix + "%s_" % d
        funcprefix = funcprefix + b.replace(" ", "_")

        ##### generate the CUDA kernel
        yutils.display(
            OUTFILE,
            "__global__ void yaksuri_cudai_kernel_%s(const void *inbuf, void *outbuf, uintptr_t count, const yaksuri_cudai_md_s *__restrict__ md)\n"
            % funcprefix)
        yutils.display(OUTFILE, "{\n")
        yutils.display(
            OUTFILE, "const char *__restrict__ sbuf = (const char *) inbuf;\n")
        yutils.display(OUTFILE, "char *__restrict__ dbuf = (char *) outbuf;\n")
        yutils.display(OUTFILE, "uintptr_t extent = md->extent;\n")
        yutils.display(
            OUTFILE,
            "uintptr_t idx = blockIdx.x * blockDim.x + threadIdx.x;\n")
        yutils.display(OUTFILE, "uintptr_t res = idx;\n")
        yutils.display(OUTFILE,
                       "uintptr_t inner_elements = md->num_elements;\n")
        yutils.display(OUTFILE, "\n")

        yutils.display(OUTFILE, "if (idx >= (count * inner_elements))\n")
        yutils.display(OUTFILE, "    return;\n")
        yutils.display(OUTFILE, "\n")

        # copy loop
        idx = 0
        md = "md"
        for d in darray:
            if (d == "hvector" or d == "blkhindx" or d == "hindexed" or \
                d == "contig"):
                yutils.display(OUTFILE,
                               "uintptr_t x%d = res / inner_elements;\n" % idx)
                idx = idx + 1
                yutils.display(OUTFILE, "res %= inner_elements;\n")
                yutils.display(OUTFILE,
                               "inner_elements /= %s->u.%s.count;\n" % (md, d))
                yutils.display(OUTFILE, "\n")

            if (d == "hvector" or d == "blkhindx"):
                yutils.display(OUTFILE,
                               "uintptr_t x%d = res / inner_elements;\n" % idx)
                idx = idx + 1
                yutils.display(OUTFILE, "res %= inner_elements;\n")
                yutils.display(
                    OUTFILE,
                    "inner_elements /= %s->u.%s.blocklength;\n" % (md, d))
            elif (d == "hindexed"):
                yutils.display(OUTFILE, "uintptr_t x%d;\n" % idx)
                yutils.display(
                    OUTFILE,
                    "for (intptr_t i = 0; i < %s->u.%s.count; i++) {\n" %
                    (md, d))
                yutils.display(
                    OUTFILE,
                    "    uintptr_t in_elems = %s->u.%s.array_of_blocklengths[i] *\n"
                    % (md, d))
                yutils.display(
                    OUTFILE,
                    "                         %s->u.%s.child->num_elements;\n"
                    % (md, d))
                yutils.display(OUTFILE, "    if (res < in_elems) {\n")
                yutils.display(OUTFILE, "        x%d = i;\n" % idx)
                yutils.display(OUTFILE, "        res %= in_elems;\n")
                yutils.display(
                    OUTFILE,
                    "        inner_elements = %s->u.%s.child->num_elements;\n"
                    % (md, d))
                yutils.display(OUTFILE, "        break;\n")
                yutils.display(OUTFILE, "    } else {\n")
                yutils.display(OUTFILE, "        res -= in_elems;\n")
                yutils.display(OUTFILE, "    }\n")
                yutils.display(OUTFILE, "}\n")
                idx = idx + 1
                yutils.display(OUTFILE, "\n")

            md = "%s->u.%s.child" % (md, d)

        yutils.display(OUTFILE, "uintptr_t x%d = res;\n" % idx)
        yutils.display(OUTFILE, "\n")

        dtp = "md"
        s = "x0 * extent"
        idx = 1
        x = 1
        need_extent = False
        for d in darray:
            if (x == len(darray)):
                last = 1
            else:
                last = 0
            getattr(sys.modules[__name__], d)(x, dtp, b, last)
            x = x + 1
            dtp = dtp + "->u.%s.child" % d

        if (func == "pack"):
            if ((b == "float" or b == "double")
                    and (op == "MAX" or op == "MIN")):
                yutils.display(
                    OUTFILE,
                    "YAKSURI_CUDAI_OP_%s_FLOAT(%s, *((const %s *) (const void *) (sbuf + %s)), *((%s *) (void *) (dbuf + idx * sizeof(%s))));\n"
                    % (op, b, b, s, b, b))
            else:
                yutils.display(
                    OUTFILE,
                    "YAKSURI_CUDAI_OP_%s(*((const %s *) (const void *) (sbuf + %s)), *((%s *) (void *) (dbuf + idx * sizeof(%s))));\n"
                    % (op, b, s, b, b))
        else:
            if ((b == "float" or b == "double")
                    and (op == "MAX" or op == "MIN")):
                yutils.display(
                    OUTFILE,
                    "YAKSURI_CUDAI_OP_%s_FLOAT(%s, *((const %s *) (const void *) (sbuf + idx * sizeof(%s))), *((%s *) (void *) (dbuf + %s)));\n"
                    % (op, b, b, b, b, s))
            else:
                yutils.display(
                    OUTFILE,
                    "YAKSURI_CUDAI_OP_%s(*((const %s *) (const void *) (sbuf + idx * sizeof(%s))), *((%s *) (void *) (dbuf + %s)));\n"
                    % (op, b, b, b, s))

        yutils.display(OUTFILE, "}\n\n")
コード例 #20
0
def resized_decl(nesting, dtp, b):
    yutils.display(OUTFILE, "uintptr_t extent%d ATTRIBUTE((unused)) = %s->extent / sizeof(%s);\n" % (nesting, dtp, b))
コード例 #21
0
ファイル: genpup.py プロジェクト: raffenet/yaksa
def generate_kernels(b, darray):
    global need_extent
    global s
    global idx

    # we don't need pup kernels for basic types
    if (len(darray) == 0):
        return

    for func in "pack", "unpack":
        ##### figure out the function name to use
        funcprefix = "%s_" % func
        for d in darray:
            funcprefix = funcprefix + "%s_" % d
        funcprefix = funcprefix + b.replace(" ", "_")

        ##### generate the CUDA kernel
        yutils.display(
            OUTFILE,
            "__global__ void yaksuri_cudai_kernel_%s(const void *inbuf, void *outbuf, uintptr_t count, const yaksuri_cudai_md_s *__restrict__ md)\n"
            % funcprefix)
        yutils.display(OUTFILE, "{\n")
        yutils.display(
            OUTFILE,
            "const %s *__restrict__ sbuf = (const %s *) inbuf;\n" % (b, b))
        yutils.display(OUTFILE,
                       "%s *__restrict__ dbuf = (%s *) outbuf;\n" % (b, b))
        yutils.display(OUTFILE,
                       "uintptr_t extent = md->extent / sizeof(%s);\n" % b)
        yutils.display(
            OUTFILE,
            "uintptr_t idx = blockIdx.x * blockDim.x + threadIdx.x;\n")
        yutils.display(OUTFILE, "uintptr_t res = idx;\n")
        yutils.display(OUTFILE,
                       "uintptr_t inner_elements = md->num_elements;\n")
        yutils.display(OUTFILE, "\n")

        yutils.display(OUTFILE, "if (idx >= (count * inner_elements))\n")
        yutils.display(OUTFILE, "    return;\n")
        yutils.display(OUTFILE, "\n")

        # copy loop
        idx = 0
        md = "md"
        for d in darray:
            if (d == "hvector" or d == "blkhindx" or d == "hindexed" or \
                d == "contig"):
                yutils.display(OUTFILE,
                               "uintptr_t x%d = res / inner_elements;\n" % idx)
                idx = idx + 1
                yutils.display(OUTFILE, "res %= inner_elements;\n")
                yutils.display(OUTFILE,
                               "inner_elements /= %s->u.%s.count;\n" % (md, d))
                yutils.display(OUTFILE, "\n")

            if (d == "hvector" or d == "blkhindx"):
                yutils.display(OUTFILE,
                               "uintptr_t x%d = res / inner_elements;\n" % idx)
                idx = idx + 1
                yutils.display(OUTFILE, "res %= inner_elements;\n")
                yutils.display(
                    OUTFILE,
                    "inner_elements /= %s->u.%s.blocklength;\n" % (md, d))
            elif (d == "hindexed"):
                yutils.display(OUTFILE, "uintptr_t x%d;\n" % idx)
                yutils.display(
                    OUTFILE,
                    "for (int i = 0; i < %s->u.%s.count; i++) {\n" % (md, d))
                yutils.display(
                    OUTFILE,
                    "    uintptr_t in_elems = %s->u.%s.array_of_blocklengths[i] *\n"
                    % (md, d))
                yutils.display(
                    OUTFILE,
                    "                         %s->u.%s.child->num_elements;\n"
                    % (md, d))
                yutils.display(OUTFILE, "    if (res < in_elems) {\n")
                yutils.display(OUTFILE, "        x%d = i;\n" % idx)
                yutils.display(OUTFILE, "        res %= in_elems;\n")
                yutils.display(
                    OUTFILE,
                    "        inner_elements = %s->u.%s.child->num_elements;\n"
                    % (md, d))
                yutils.display(OUTFILE, "        break;\n")
                yutils.display(OUTFILE, "    } else {\n")
                yutils.display(OUTFILE, "        res -= in_elems;\n")
                yutils.display(OUTFILE, "    }\n")
                yutils.display(OUTFILE, "}\n")
                idx = idx + 1
                yutils.display(OUTFILE, "\n")

            md = "%s->u.%s.child" % (md, d)

        yutils.display(OUTFILE, "uintptr_t x%d = res;\n" % idx)
        yutils.display(OUTFILE, "\n")

        dtp = "md"
        s = "x0 * extent"
        idx = 1
        x = 1
        need_extent = False
        for d in darray:
            if (x == len(darray)):
                last = 1
            else:
                last = 0
            getattr(sys.modules[__name__], d)(x, dtp, b, last)
            x = x + 1
            dtp = dtp + "->u.%s.child" % d

        if (func == "pack"):
            yutils.display(OUTFILE, "dbuf[idx] = sbuf[%s];\n" % s)
        else:
            yutils.display(OUTFILE, "dbuf[%s] = sbuf[idx];\n" % s)

        yutils.display(OUTFILE, "}\n\n")

        # generate the host function
        yutils.display(
            OUTFILE,
            "void yaksuri_cudai_%s(const void *inbuf, void *outbuf, uintptr_t count, yaksuri_cudai_md_s *md, int n_threads, int n_blocks_x, int n_blocks_y, int n_blocks_z, int device)\n"
            % funcprefix)
        yutils.display(OUTFILE, "{\n")
        yutils.display(OUTFILE,
                       "void *args[] = { &inbuf, &outbuf, &count, &md };\n")
        yutils.display(
            OUTFILE,
            "cudaError_t cerr = cudaLaunchKernel((const void *) yaksuri_cudai_kernel_%s,\n"
            % funcprefix)
        yutils.display(
            OUTFILE,
            "    dim3(n_blocks_x, n_blocks_y, n_blocks_z), dim3(n_threads), args, 0, yaksuri_cudai_global.stream[device]);\n"
        )
        yutils.display(OUTFILE, "YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr);\n")
        yutils.display(OUTFILE, "}\n\n")
コード例 #22
0
def switcher(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr, nests):
    yutils.display(OUTFILE, "switch (%s->kind) {\n" % child_type_str(typelist))

    for x in range(len(derived_types)):
        d = derived_types[x]
        if (nests > 1):
            yutils.display(OUTFILE, "case YAKSI_TYPE_KIND__%s:\n" % d.upper())
            typelist.append(d)
            switcher(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr + "_%s" % d, nests - 1)
            typelist.pop()
            yutils.display(OUTFILE, "break;\n")

    if (len(typelist)):
        yutils.display(OUTFILE, "case YAKSI_TYPE_KIND__BUILTIN:\n")
        switcher_builtin(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr)
        yutils.display(OUTFILE, "break;\n")

    yutils.display(OUTFILE, "default:\n")
    yutils.display(OUTFILE, "    break;\n")
    yutils.display(OUTFILE, "}\n")
コード例 #23
0
ファイル: genpup.py プロジェクト: raffenet/yaksa
                        help='maximum nesting levels to generate')
    args = parser.parse_args()
    if (args.pup_max_nesting < 0):
        parser.print_help()
        print
        print("===> ERROR: pup-max-nesting must be positive")
        sys.exit(1)

    ##### generate the core pack/unpack kernels (single level)
    for b in builtin_types:
        for d in gencomm.derived_types:
            filename = "src/backend/cuda/pup/yaksuri_cudai_pup_%s_%s.cu" % (
                d, b.replace(" ", "_"))
            yutils.copyright_c(filename)
            OUTFILE = open(filename, "a")
            yutils.display(OUTFILE, "#include <string.h>\n")
            yutils.display(OUTFILE, "#include <stdint.h>\n")
            yutils.display(OUTFILE, "#include <wchar.h>\n")
            yutils.display(OUTFILE, "#include <assert.h>\n")
            yutils.display(OUTFILE, "#include <cuda.h>\n")
            yutils.display(OUTFILE, "#include <cuda_runtime.h>\n")
            yutils.display(OUTFILE, "#include \"yaksuri_cudai.h\"\n")
            yutils.display(OUTFILE, "#include \"yaksuri_cudai_pup.h\"\n")
            yutils.display(OUTFILE, "\n")

            emptylist = []
            emptylist.append(d)
            generate_kernels(b, emptylist)
            emptylist.pop()

            OUTFILE.close()
コード例 #24
0
def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_maps):
    ##### generate the switching logic to select pup functions
    filename = "src/backend/%s/pup/yaksuri_%si_populate_pupfns.c" % (backend, backend)
    yutils.copyright_c(filename)
    OUTFILE = open(filename, "a")
    yutils.display(OUTFILE, "#include <stdio.h>\n")
    yutils.display(OUTFILE, "#include \"yaksi.h\"\n")
    yutils.display(OUTFILE, "#include \"yaksu.h\"\n")
    yutils.display(OUTFILE, "#include \"yaksuri_%si.h\"\n" % backend)
    yutils.display(OUTFILE, "#include \"yaksuri_%si_populate_pupfns.h\"\n" % backend)
    yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns(yaksi_type_s * type)\n" % backend)
    yutils.display(OUTFILE, "{\n")
    yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n")
    yutils.display(OUTFILE, "yaksuri_%si_type_s *%s = (yaksuri_%si_type_s *) type->backend.%s.priv;\n" \
                   % (backend, backend, backend, backend))
    yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "%s->pack = NULL;\n" % backend)
    yutils.display(OUTFILE, "%s->unpack = NULL;\n" % backend)
    yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "switch (type->kind) {\n")
    for dtype1 in derived_types:
        yutils.display(OUTFILE, "case YAKSI_TYPE_KIND__%s:\n" % dtype1.upper())
        yutils.display(OUTFILE, "switch (type->u.%s.child->kind) {\n" % dtype1)
        for dtype2 in derived_types:
            yutils.display(OUTFILE, "case YAKSI_TYPE_KIND__%s:\n" % dtype2.upper())
            yutils.display(OUTFILE, "rc = yaksuri_%si_populate_pupfns_%s_%s(type);\n" % (backend, dtype1, dtype2))
            yutils.display(OUTFILE, "break;\n")
            yutils.display(OUTFILE, "\n")
        yutils.display(OUTFILE, "case YAKSI_TYPE_KIND__BUILTIN:\n")
        yutils.display(OUTFILE, "rc = yaksuri_%si_populate_pupfns_%s_builtin(type);\n" % (backend, dtype1))
        yutils.display(OUTFILE, "break;\n")
        yutils.display(OUTFILE, "\n")
        yutils.display(OUTFILE, "default:\n")
        yutils.display(OUTFILE, "    break;\n")
        yutils.display(OUTFILE, "}\n")
        yutils.display(OUTFILE, "break;\n")
        yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "default:\n")
    yutils.display(OUTFILE, "    break;\n")
    yutils.display(OUTFILE, "}\n")
    yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "    return rc;\n")
    yutils.display(OUTFILE, "}\n");
    OUTFILE.close()

    for dtype1 in derived_types:
        for dtype2 in derived_types:
            filename = "src/backend/%s/pup/yaksuri_%si_populate_pupfns_%s_%s.c" % (backend, backend, dtype1, dtype2)
            yutils.copyright_c(filename)
            OUTFILE = open(filename, "a")
            yutils.display(OUTFILE, "#include <stdio.h>\n")
            yutils.display(OUTFILE, "#include <stdlib.h>\n")
            yutils.display(OUTFILE, "#include <wchar.h>\n")
            yutils.display(OUTFILE, "#include \"yaksi.h\"\n")
            yutils.display(OUTFILE, "#include \"yaksu.h\"\n")
            yutils.display(OUTFILE, "#include \"yaksuri_%si.h\"\n" % backend)
            yutils.display(OUTFILE, "#include \"yaksuri_%si_populate_pupfns.h\"\n" % backend)
            yutils.display(OUTFILE, "#include \"yaksuri_%si_pup.h\"\n" % backend)
            yutils.display(OUTFILE, "\n")
            yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_%s_%s(yaksi_type_s * type)\n" % (backend, dtype1, dtype2))
            yutils.display(OUTFILE, "{\n")
            yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n")
            yutils.display(OUTFILE, "yaksuri_%si_type_s *%s = (yaksuri_%si_type_s *) type->backend.%s.priv;\n" \
                           % (backend, backend, backend, backend))
            yutils.display(OUTFILE, "\n")
            yutils.display(OUTFILE, "char *str = getenv(\"YAKSA_ENV_MAX_NESTING_LEVEL\");\n")
            yutils.display(OUTFILE, "int max_nesting_level;\n")
            yutils.display(OUTFILE, "if (str) {\n")
            yutils.display(OUTFILE, "max_nesting_level = atoi(str);\n")
            yutils.display(OUTFILE, "} else {\n")
            yutils.display(OUTFILE, "max_nesting_level = YAKSI_ENV_DEFAULT_NESTING_LEVEL;\n")
            yutils.display(OUTFILE, "}\n")
            yutils.display(OUTFILE, "\n")

            pupstr = "pack_%s_%s" % (dtype1, dtype2)
            typelist = [ dtype1, dtype2 ]
            switcher(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr, pup_max_nesting - 1)
            yutils.display(OUTFILE, "\n")
            yutils.display(OUTFILE, "return rc;\n")
            yutils.display(OUTFILE, "}\n")
            OUTFILE.close()

        filename = "src/backend/%s/pup/yaksuri_%si_populate_pupfns_%s_builtin.c" % (backend, backend, dtype1)
        yutils.copyright_c(filename)
        OUTFILE = open(filename, "a")
        yutils.display(OUTFILE, "#include <stdio.h>\n")
        yutils.display(OUTFILE, "#include <stdlib.h>\n")
        yutils.display(OUTFILE, "#include <wchar.h>\n")
        yutils.display(OUTFILE, "#include \"yaksi.h\"\n")
        yutils.display(OUTFILE, "#include \"yaksu.h\"\n")
        yutils.display(OUTFILE, "#include \"yaksuri_%si.h\"\n" % backend)
        yutils.display(OUTFILE, "#include \"yaksuri_%si_populate_pupfns.h\"\n" % backend)
        yutils.display(OUTFILE, "#include \"yaksuri_%si_pup.h\"\n" % backend)
        yutils.display(OUTFILE, "\n")
        yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_%s_builtin(yaksi_type_s * type)\n" % (backend, dtype1))
        yutils.display(OUTFILE, "{\n")
        yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n")
        yutils.display(OUTFILE, "yaksuri_%si_type_s *%s = (yaksuri_%si_type_s *) type->backend.%s.priv;\n" \
                       % (backend, backend, backend, backend))
        yutils.display(OUTFILE, "\n")
        yutils.display(OUTFILE, "char *str = getenv(\"YAKSA_ENV_MAX_NESTING_LEVEL\");\n")
        yutils.display(OUTFILE, "int max_nesting_level;\n")
        yutils.display(OUTFILE, "if (str) {\n")
        yutils.display(OUTFILE, "max_nesting_level = atoi(str);\n")
        yutils.display(OUTFILE, "} else {\n")
        yutils.display(OUTFILE, "max_nesting_level = YAKSI_ENV_DEFAULT_NESTING_LEVEL;\n")
        yutils.display(OUTFILE, "}\n")
        yutils.display(OUTFILE, "\n")

        pupstr = "pack_%s" % dtype1
        typelist = [ dtype1 ]
        switcher_builtin(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr)
        yutils.display(OUTFILE, "\n")
        yutils.display(OUTFILE, "return rc;\n")
        yutils.display(OUTFILE, "}\n")
        OUTFILE.close()

    ##### generate the Makefile for the pup function selection functions
    filename = "src/backend/%s/pup/Makefile.pup.mk" % backend
    yutils.copyright_makefile(filename)
    OUTFILE = open(filename, "a")
    yutils.display(OUTFILE, "libyaksa_la_SOURCES += \\\n")
    for dtype1 in derived_types:
        for dtype2 in derived_types:
            yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns_%s_%s.c \\\n" % (backend, backend, dtype1, dtype2))
        yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns_%s_builtin.c \\\n" % (backend, backend, dtype1))
    yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns.c\n" % (backend, backend))
    yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "noinst_HEADERS += \\\n")
    yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns.h \\\n" % (backend, backend))
    yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_pup.h\n" % (backend, backend))
    OUTFILE.close()

    ##### generate the header file for the pup function selection functions
    filename = "src/backend/%s/pup/yaksuri_%si_populate_pupfns.h" % (backend, backend)
    yutils.copyright_c(filename)
    OUTFILE = open(filename, "a")
    yutils.display(OUTFILE, "#ifndef YAKSURI_%sI_POPULATE_PUPFNS_H_INCLUDED\n" % backend.upper())
    yutils.display(OUTFILE, "#define YAKSURI_%sI_POPULATE_PUPFNS_H_INCLUDED\n" % backend.upper())
    yutils.display(OUTFILE, "\n")
    for dtype1 in derived_types:
        for dtype2 in derived_types:
            yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_%s_%s(yaksi_type_s * type);\n" % (backend, dtype1, dtype2))
        yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_%s_builtin(yaksi_type_s * type);\n" % (backend, dtype1))
    yutils.display(OUTFILE, "\n")
    yutils.display(OUTFILE, "#endif  /* YAKSURI_%sI_POPULATE_PUPFNS_H_INCLUDED */\n" % backend.upper())
    OUTFILE.close()
コード例 #25
0
ファイル: genpup.py プロジェクト: sagarth/yaksa
        print
        print("===> ERROR: pup-max-nesting must be positive")
        sys.exit(1)

    ##### generate the list of derived datatype arrays
    darraylist = []
    yutils.generate_darrays(gencomm.derived_types, darraylist,
                            args.pup_max_nesting)

    ##### generate the core pack/unpack kernels
    for b in builtin_types:
        filename = "src/backend/seq/pup/yaksuri_seqi_pup_%s.c" % b.replace(
            " ", "_")
        yutils.copyright_c(filename)
        OUTFILE = open(filename, "a")
        yutils.display(OUTFILE, "#include <string.h>\n")
        yutils.display(OUTFILE, "#include <stdint.h>\n")
        yutils.display(OUTFILE, "#include <wchar.h>\n")
        yutils.display(OUTFILE, "#include \"yaksuri_seqi_pup.h\"\n")
        yutils.display(OUTFILE, "\n")

        for darray in darraylist:
            for blklen in blklens:
                generate_kernels(b, darray, blklen)

        OUTFILE.close()

    ##### generate the core pack/unpack kernel declarations
    filename = "src/backend/seq/pup/yaksuri_seqi_pup.h"
    yutils.copyright_c(filename)
    OUTFILE = open(filename, "a")
コード例 #26
0
def generate_kernels(b, darray, blklen):
    global num_paren_open
    global s

    # we don't need pup kernels for basic types
    if (len(darray) == 0):
        return

    # individual blocklength optimization is only for
    # hvector and blkhindx
    if (darray[-1] != "hvector" and darray[-1] != "blkhindx" and blklen != "generic"):
        return

    for func in "pack","unpack":
        ##### figure out the function name to use
        s = "int yaksuri_seqi_%s_" % func
        for d in darray:
            s = s + "%s_" % d
        # hvector and hindexed get blklen-specific function names
        if (darray[-1] != "hvector" and darray[-1] != "blkhindx"):
            s = s + b.replace(" ", "_")
        else:
            s = s + "blklen_%s_" % blklen + b.replace(" ", "_")
        yutils.display(OUTFILE, "%s(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type)\n" % s),
        yutils.display(OUTFILE, "{\n")


        ##### variable declarations
        # generic variables
        yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n");
        yutils.display(OUTFILE, "const %s *restrict sbuf = (const %s *) inbuf;\n" % (b, b));
        yutils.display(OUTFILE, "%s *restrict dbuf = (%s *) outbuf;\n" % (b, b));
        yutils.display(OUTFILE, "uintptr_t extent ATTRIBUTE((unused)) = type->extent / sizeof(%s);\n" % b)
        yutils.display(OUTFILE, "\n");

        # variables specific to each nesting level
        s = "type"
        for x in range(len(darray)):
            getattr(sys.modules[__name__], "%s_decl" % darray[x])(x + 1, s, b)
            yutils.display(OUTFILE, "\n")
            s = s + "->u.%s.child" % darray[x]


        ##### non-hvector and non-blkhindx
        yutils.display(OUTFILE, "uintptr_t idx = 0;\n")
        yutils.display(OUTFILE, "for (int i = 0; i < count; i++) {\n")
        num_paren_open += 1
        s = "i * extent"
        for x in range(len(darray)):
            if (x != len(darray) - 1):
                getattr(sys.modules[__name__], darray[x])(x + 1, b, "generic", 0)
            else:
                getattr(sys.modules[__name__], darray[x])(x + 1, b, blklen, 1)

        if (func == "pack"):
            yutils.display(OUTFILE, "dbuf[idx++] = sbuf[%s];\n" % s)
        else:
            yutils.display(OUTFILE, "dbuf[%s] = sbuf[idx++];\n" % s)
        for x in range(num_paren_open):
            yutils.display(OUTFILE, "}\n")
        num_paren_open = 0
        yutils.display(OUTFILE, "\n");
        yutils.display(OUTFILE, "return rc;\n")
        yutils.display(OUTFILE, "}\n\n")
コード例 #27
0
def generate_kernels(b, darray, blklen):
    global num_paren_open
    global s

    # individual blocklength optimization is only for
    # hvector and blkhindx
    if (len(darray) and darray[-1] != "hvector" and darray[-1] != "blkhindx"
            and blklen != "generic"):
        return

    for func in "pack", "unpack":
        ##### figure out the function name to use
        s = "int yaksuri_seqi_%s_" % func
        for d in darray:
            s = s + "%s_" % d
        # hvector and hindexed get blklen-specific function names
        if (len(darray)
                and (darray[-1] == "hvector" or darray[-1] == "blkhindx")):
            s = s + "blklen_%s_" % blklen + b.replace(" ", "_")
        else:
            s = s + b.replace(" ", "_")
        yutils.display(
            OUTFILE,
            "%s(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, yaksa_op_t op)\n"
            % s),
        yutils.display(OUTFILE, "{\n")

        ##### variable declarations
        # generic variables
        yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n")
        yutils.display(OUTFILE,
                       "const char *restrict sbuf = (const char *) inbuf;\n")
        yutils.display(OUTFILE, "char *restrict dbuf = (char *) outbuf;\n")
        yutils.display(
            OUTFILE, "uintptr_t extent ATTRIBUTE((unused)) = type->extent;\n")
        yutils.display(OUTFILE, "\n")

        # variables specific to each nesting level
        s = "type"
        for x in range(len(darray)):
            getattr(sys.modules[__name__], "%s_decl" % darray[x])(x + 1, s, b)
            yutils.display(OUTFILE, "\n")
            s = s + "->u.%s.child" % darray[x]

        yutils.display(OUTFILE, "uintptr_t idx = 0;\n")

        ##### non-hvector and non-blkhindx
        yutils.display(OUTFILE, "switch (op) {\n")
        for op in gencomm.type_ops[b]:
            yutils.display(OUTFILE, "case YAKSA_OP__%s:\n" % op)
            yutils.display(OUTFILE, "{\n")

            yutils.display(OUTFILE, "for (intptr_t i = 0; i < count; i++) {\n")
            num_paren_open += 1
            s = "i * extent"
            for x in range(len(darray)):
                if (x != len(darray) - 1):
                    getattr(sys.modules[__name__], darray[x])(x + 1, b,
                                                              "generic", 0)
                else:
                    getattr(sys.modules[__name__], darray[x])(x + 1, b, blklen,
                                                              1)

            if (func == "pack"):
                if ((b == "float" or b == "double" or b == "long double")
                        and (op == "MAX" or op == "MIN")):
                    yutils.display(
                        OUTFILE,
                        "YAKSURI_SEQI_OP_%s_FLOAT(%s, *((const %s *) (const void *) (sbuf + %s)), *((%s *) (void *) (dbuf + idx)));\n"
                        % (op, b, b, s, b))
                else:
                    yutils.display(
                        OUTFILE,
                        "YAKSURI_SEQI_OP_%s(*((const %s *) (const void *) (sbuf + %s)), *((%s *) (void *) (dbuf + idx)));\n"
                        % (op, b, s, b))
            else:
                if ((b == "float" or b == "double" or b == "long double")
                        and (op == "MAX" or op == "MIN")):
                    yutils.display(
                        OUTFILE,
                        "YAKSURI_SEQI_OP_%s_FLOAT(%s, *((const %s *) (const void *) (sbuf + idx)), *((%s *) (void *) (dbuf + %s)));\n"
                        % (op, b, b, b, s))
                else:
                    yutils.display(
                        OUTFILE,
                        "YAKSURI_SEQI_OP_%s(*((const %s *) (const void *) (sbuf + idx)), *((%s *) (void *) (dbuf + %s)));\n"
                        % (op, b, b, s))

            yutils.display(OUTFILE, "idx += sizeof(%s);\n" % b)
            for x in range(num_paren_open):
                yutils.display(OUTFILE, "}\n")
            num_paren_open = 0

            yutils.display(OUTFILE, "break;\n")
            yutils.display(OUTFILE, "}\n")
            yutils.display(OUTFILE, "\n")

        yutils.display(OUTFILE, "default:\n")
        yutils.display(OUTFILE, "    break;\n")
        yutils.display(OUTFILE, "}\n")

        yutils.display(OUTFILE, "\n")
        yutils.display(OUTFILE, "return rc;\n")
        yutils.display(OUTFILE, "}\n\n")
コード例 #28
0
ファイル: genpup.py プロジェクト: yqin/yaksa
                        default=3,
                        help='maximum nesting levels to generate')
    args = parser.parse_args()
    if (args.pup_max_nesting < 0):
        parser.print_help()
        print
        print("===> ERROR: pup-max-nesting must be positive")
        sys.exit(1)

    ##### generate the core pack/unpack kernels (zero levels)
    for b in builtin_types:
        filename = "src/backend/cuda/pup/yaksuri_cudai_pup_%s.cu" % b.replace(
            " ", "_")
        yutils.copyright_c(filename)
        OUTFILE = open(filename, "a")
        yutils.display(OUTFILE, "#include <string.h>\n")
        yutils.display(OUTFILE, "#include <stdint.h>\n")
        yutils.display(OUTFILE, "#include <wchar.h>\n")
        yutils.display(OUTFILE, "#include <assert.h>\n")
        yutils.display(OUTFILE, "#include <cuda.h>\n")
        yutils.display(OUTFILE, "#include <cuda_runtime.h>\n")
        yutils.display(OUTFILE, "#include \"yaksuri_cudai_base.h\"\n")
        yutils.display(OUTFILE, "#include \"yaksuri_cudai_pup.h\"\n")
        yutils.display(OUTFILE, "\n")

        emptylist = []
        for op in gencomm.type_ops[b]:
            generate_kernels(b, emptylist, op)
        generate_host_function(b, emptylist)

        OUTFILE.close()
コード例 #29
0
                        default=3,
                        help='maximum nesting levels to generate')
    args = parser.parse_args()
    if (args.pup_max_nesting < 0):
        parser.print_help()
        print
        print("===> ERROR: pup-max-nesting must be positive")
        sys.exit(1)

    ##### generate the reduction kernels for contiguous types
    for b in builtin_types:
        filename = "src/backend/seq/pup/yaksuri_seqi_pup_%s.c" % b.replace(
            " ", "_")
        yutils.copyright_c(filename)
        OUTFILE = open(filename, "a")
        yutils.display(OUTFILE, "#include <string.h>\n")
        yutils.display(OUTFILE, "#include <stdint.h>\n")
        yutils.display(OUTFILE, "#include <wchar.h>\n")
        yutils.display(OUTFILE, "#include \"yaksuri_seqi_pup.h\"\n")
        yutils.display(OUTFILE, "\n")

        emptylist = []
        generate_kernels(b, emptylist, 0)

        OUTFILE.close()

    ##### generate the core pack/unpack kernels (single level)
    for b in builtin_types:
        for d in gencomm.derived_types:
            filename = "src/backend/seq/pup/yaksuri_seqi_pup_%s_%s.c" % (
                d, b.replace(" ", "_"))
コード例 #30
0
    parser = argparse.ArgumentParser()
    parser.add_argument('--pup-max-nesting', type=int, default=3, help='maximum nesting levels to generate')
    args = parser.parse_args()
    if (args.pup_max_nesting < 0):
        parser.print_help()
        print
        print("===> ERROR: pup-max-nesting must be positive")
        sys.exit(1)

    ##### generate the core pack/unpack kernels (single level)
    for b in builtin_types:
        for d in gencomm.derived_types:
            filename = "src/backend/seq/pup/yaksuri_seqi_pup_%s_%s.c" % (d, b.replace(" ","_"))
            yutils.copyright_c(filename)
            OUTFILE = open(filename, "a")
            yutils.display(OUTFILE, "#include <string.h>\n")
            yutils.display(OUTFILE, "#include <stdint.h>\n")
            yutils.display(OUTFILE, "#include <wchar.h>\n")
            yutils.display(OUTFILE, "#include \"yaksuri_seqi_pup.h\"\n")
            yutils.display(OUTFILE, "\n")

            emptylist = [ ]
            emptylist.append(d)
            for blklen in blklens:
                generate_kernels(b, emptylist, blklen)
            emptylist.pop()

            OUTFILE.close()

    ##### generate the core pack/unpack kernels (more than one level)
    darraylist = [ ]