Exemple #1
0
    def generate_compounded_merger(self, f, width, ascending, inline):
        type = self.type
        g = self
        maybe_cmp = lambda: ", cmp" if (type == "int64_t" or type == "uint64_t") else ""
        maybe_topbit = lambda: f"\n        {g.vector_type()} topBit = _mm256_set1_epi64x(1LLU << 63);" if (
                type == "uint64_t") else ""

        w1 = int(next_power_of_2(width) / 2)
        w2 = int(width - w1)

        suffix = "ascending" if ascending else "descending"
        rev_suffix = "descending" if ascending else "ascending"

        inl = "INLINE" if inline else "NOINLINE"

        s = f"""    static {inl} void sort_{width:02d}v_merge_{suffix}({g.generate_param_def_list(width)}) {{
        {g.vector_type()}  tmp{maybe_cmp()};{maybe_topbit()}"""
        print(s, file=f)

        for r in range(w1 + 1, width + 1):
            x = r - w1
            s = f"""
        tmp = d{x:02d};
        {g.crappity_crap_crap(f"d{r:02d}", f"d{x:02d}")}
        d{x:02d} = {g.generate_min(f"d{r:02d}", f"d{x:02d}")};
        {g.crappity_crap_crap(f"d{r:02d}", "tmp")}
        d{r:02d} = {g.generate_max(f"d{r:02d}", "tmp")};"""
            print(s, file=f)

        s = f"""
        sort_{w1:02d}v_merge_{suffix}({g.generate_param_list(1, w1)});
        sort_{w2:02d}v_merge_{suffix}({g.generate_param_list(w1 + 1, w2)});"""
        print(s, file=f)
        print("    }", file=f)
Exemple #2
0
    def generate_compounded_merger(self, f, width, ascending, inline):
        type = self.type
        g = self

        w1 = int(next_power_of_2(width) / 2)
        w2 = int(width - w1)

        suffix = "ascending" if ascending else "descending"
        rev_suffix = "descending" if ascending else "ascending"

        inl = "INLINE" if inline else "NOINLINE"

        s = f"""    static {inl} void sort_{width:02d}v_merge_{suffix}({g.generate_param_def_list(width)}) {{
        {g.vector_type()}  tmp;"""
        print(s, file=f)

        for r in range(w1 + 1, width + 1):
            x = r - w1
            s = f"""
        tmp = d{x:02d};
        d{x:02d} = {g.generate_min(f"d{r:02d}", f"d{x:02d}")};
        d{r:02d} = {g.generate_max(f"d{r:02d}", "tmp")};"""
            print(s, file=f)

        s = f"""
        sort_{w1:02d}v_merge_{suffix}({g.generate_param_list(1, w1)});
        sort_{w2:02d}v_merge_{suffix}({g.generate_param_list(w1 + 1, w2)});"""
        print(s, file=f)
        print("    }", file=f)
 def largest_merge_variant_needed(self):
     return next_power_of_2(self.max_bitonic_sort_vectors()) / 2