Esempio n. 1
0
    def test_simple_tiling(self, ker_init, ker_reduce_ind_read, ker_write,
                           ker_write2d, iterset, indset, iterset2indset,
                           ix2, x, y, z, skip_greedy, nu, ts):
        """Check that tiling produces the correct output in a sequence of four
        loops. First two loops are soft-fusible; the remaining three loops are
        fused through tiling. Multiple tile sizes (ts) and unroll factors (nu)
        are tried to check the correctness of different fusion strategies."""

        def time_loop_body():
            op2.par_loop(op2.Kernel(ker_init, "ker_init"), iterset, y(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, z(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write2d, "ker_write2d"), indset, ix2(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_reduce_ind_read, "ker_reduce_ind_read"), iterset,
                         y(op2.INC), ix2(op2.READ, iterset2indset), z(op2.READ))

        # Tiling is skipped until the same sequence is seen three times
        for t in range(2):
            with loop_chain("simple_nu%d" % nu, mode='tile', tile_size=ts, num_unroll=nu):
                time_loop_body()
        assert sum(y.data) == nelems * 3

        for t in range(4):
            with loop_chain("simple_nu%d" % nu, mode='tile', tile_size=ts, num_unroll=nu):
                time_loop_body()
        assert sum(y.data) == nelems * 3
Esempio n. 2
0
    def test_acyclic_raw_dependency(self, ker_ind_inc, ker_write, iterset,
                                    bigiterset, indset, iterset2indset, indset2iterset,
                                    bigiterset2iterset, x, y, bigx, ix, sl, skip_greedy):
        """Check that tiling produces the correct output in a sequence of loops
        characterized by read-after-write dependencies. SLOPE is told to ignore
        write-after-read dependencies; this test shows that the resulting
        inspector/executor scheme created through SLOPE is anyway correct."""

        # Tiling is skipped until the same sequence is seen three times
        for t in range(3):
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, x(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, y(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), bigiterset, bigx(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), indset, ix(op2.WRITE))
            with loop_chain("tiling_acyclic_raw", mode='tile', tile_size=nelems//10,
                            num_unroll=1, seed_loop=sl, ignore_war=True):
                op2.par_loop(op2.Kernel(ker_ind_inc, 'ker_ind_inc'), bigiterset,
                             x(op2.INC, bigiterset2iterset), bigx(op2.READ))
                op2.par_loop(op2.Kernel(ker_ind_inc, 'ker_ind_inc'), iterset,
                             ix(op2.INC, iterset2indset), x(op2.READ))
                op2.par_loop(op2.Kernel(ker_ind_inc, 'ker_ind_inc'), indset,
                             y(op2.INC, indset2iterset), ix(op2.READ))
            assert sum(x.data) == nelems * 3
            assert sum(ix.data) == nelems * 4
            assert sum(y.data) == nelems * 5
Esempio n. 3
0
    def test_advanced_tiling(self, ker_init, ker_reduce_ind_read, ker_ind_reduce,
                             ker_write, ker_write2d, ker_inc, iterset, indset,
                             iterset2indset, indset2iterset, ix2, y, z, skip_greedy,
                             nu, ts, fs, sl):
        """Check that tiling produces the correct output in a sequence of six
        loops. Loops perform direct writes, direct increments, and indirect increments;
        both RAW and WAR dependencies are present. Multiple tile sizes (ts), unroll
        factors (nu), and fusion schemes (fs) are tried to check the correctness of
        different optimization strategies."""

        # Tiling is skipped until the same sequence is seen three times
        for t in range(4):
            with loop_chain("advanced_nu%d" % nu, mode='tile',
                            tile_size=ts, num_unroll=nu, explicit_mode=fs, seed_loop=sl):
                op2.par_loop(op2.Kernel(ker_init, "ker_init"), iterset, y(op2.WRITE))
                op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, z(op2.WRITE))
                op2.par_loop(op2.Kernel(ker_write2d, "ker_write2d"), indset, ix2(op2.WRITE))
                op2.par_loop(op2.Kernel(ker_reduce_ind_read, "ker_reduce_ind_read"), iterset,
                             y(op2.INC), ix2(op2.READ, iterset2indset), z(op2.READ))
                op2.par_loop(op2.Kernel(ker_ind_reduce, "ker_ind_reduce"), indset,
                             ix2(op2.INC), y(op2.READ, indset2iterset))
                op2.par_loop(op2.Kernel(ker_reduce_ind_read, "ker_reduce_ind_read"), iterset,
                             z(op2.INC), ix2(op2.READ, iterset2indset), y(op2.READ))
            assert sum(z.data) == nelems * 27 + nelems
            assert sum(y.data) == nelems * 3
            assert sum(sum(ix2.data)) == nelems * 9
Esempio n. 4
0
    def test_war_dependency(self, ker_ind_reduce, ker_reduce_ind_read,
                            ker_write, ker_write2d, iterset, indset, sl,
                            iterset2indset, indset2iterset, x, y, ix2,
                            skip_greedy):
        """Check that tiling works properly in presence of write-after-read dependencies."""

        op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, y(op2.WRITE))

        # Tiling is skipped until the same sequence is seen three times
        for t in range(3):
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset,
                         x(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write2d, "ker_write2d"), indset,
                         ix2(op2.WRITE))
            with loop_chain("tiling_war",
                            mode='tile',
                            tile_size=nelems // 10,
                            num_unroll=1,
                            seed_loop=sl):
                op2.par_loop(op2.Kernel(ker_ind_reduce, "ker_ind_reduce"),
                             indset, ix2(op2.INC), x(op2.READ, indset2iterset))
                op2.par_loop(
                    op2.Kernel(ker_reduce_ind_read, "ker_reduce_ind_read"),
                    iterset, x(op2.INC), ix2(op2.READ, iterset2indset),
                    y(op2.READ))
            assert sum(sum(ix2.data)) == nelems * (1 + 2) + nelems * 2
            assert sum(x.data) == sum(sum(ix2.data)) + nelems
Esempio n. 5
0
    def test_war_dependency(self, ker_ind_reduce, ker_reduce_ind_read, ker_write,
                            ker_write2d, iterset, indset, sl, iterset2indset,
                            indset2iterset, x, y, ix2, skip_greedy):
        """Check that tiling works properly in presence of write-after-read dependencies."""

        op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, y(op2.WRITE))

        # Tiling is skipped until the same sequence is seen three times
        for t in range(3):
            op2.par_loop(op2.Kernel(ker_write, "ker_write"), iterset, x(op2.WRITE))
            op2.par_loop(op2.Kernel(ker_write2d, "ker_write2d"), indset, ix2(op2.WRITE))
            with loop_chain("tiling_war", mode='tile',
                            tile_size=nelems//10, num_unroll=1, seed_loop=sl):
                op2.par_loop(op2.Kernel(ker_ind_reduce, "ker_ind_reduce"),
                             indset, ix2(op2.INC), x(op2.READ, indset2iterset))
                op2.par_loop(op2.Kernel(ker_reduce_ind_read, "ker_reduce_ind_read"),
                             iterset, x(op2.INC), ix2(op2.READ, iterset2indset),
                             y(op2.READ))
            assert sum(sum(ix2.data)) == nelems * (1 + 2) + nelems * 2
            assert sum(x.data) == sum(sum(ix2.data)) + nelems
Esempio n. 6
0
    def run(self, T, TS=0):
        """ Run the elastic wave simulation until t = T or ntimesteps = TS.
        :param float T: The finish time of the simulation.
        :param float TS: The maximum number of timesteps performed; ignored if = 0.
        :returns: The final solution fields for velocity and stress.
        """

        # Write out the initial condition.
        self.write(self.u1, self.s1, self.tofile)

        info("Generating inverse mass matrix")
        # Pre-assemble the inverse mass matrices, which should stay
        # constant throughout the simulation (assuming no mesh adaptivity).
        start = time()
        self.assemble_inverse_mass()
        end = time()
        info("DONE! (Elapsed: %f s)" % round(end - start, 3))
        op2.MPI.COMM_WORLD.barrier()
        info("Copying inverse mass matrix into a dat...")
        start = time()
        self.copy_massmatrix_into_dat()
        end = time()
        info("DONE! (Elapsed: %f s)" % round(end - start, 3))
        op2.MPI.COMM_WORLD.barrier()

        start = time()
        t = self.dt
        timestep = 0
        ntimesteps = sys.maxint if TS == 0 else TS

        while t <= T + 1e-12 and timestep < ntimesteps:
            if op2.MPI.COMM_WORLD.rank == 0 and timestep % self.output == 0:
                info("t = %f, (timestep = %d)" % (t, timestep))
            with loop_chain("main1",
                            tile_size=self.tiling_size,
                            num_unroll=self.tiling_uf,
                            mode=self.tiling_mode,
                            extra_halo=self.tiling_halo,
                            explicit=self.tiling_explicit,
                            use_glb_maps=self.tiling_glb_maps,
                            use_prefetch=self.tiling_prefetch,
                            coloring=self.tiling_coloring,
                            ignore_war=True,
                            log=self.tiling_log):
                # In case the source is time-dependent, update the time 't' here.
                if (self.source):
                    with timed_region('source term update'):
                        self.source_expression.t = t
                        self.source = self.source_expression

                # Solve for the velocity vector field.
                self.solve(self.rhs_uh1, self.velocity_mass_asdat, self.uh1)
                self.solve(self.rhs_stemp, self.stress_mass_asdat, self.stemp)
                self.solve(self.rhs_uh2, self.velocity_mass_asdat, self.uh2)
                self.solve(self.rhs_u1, self.velocity_mass_asdat, self.u1)

                # Solve for the stress tensor field.
                self.solve(self.rhs_sh1, self.stress_mass_asdat, self.sh1)
                self.solve(self.rhs_utemp, self.velocity_mass_asdat,
                           self.utemp)
                self.solve(self.rhs_sh2, self.stress_mass_asdat, self.sh2)
                self.solve(self.rhs_s1, self.stress_mass_asdat, self.s1)

            self.u0.assign(self.u1)
            self.s0.assign(self.s1)

            # Write out the new fields
            self.write(self.u1, self.s1, self.tofile
                       and timestep % self.output == 0)

            # Move onto next timestep
            t += self.dt
            timestep += 1

        # Write out the final state of the fields
        self.write(self.u1, self.s1, self.tofile)

        end = time()

        return start, end, timestep, self.u1, self.s1