Esempio n. 1
0
def TestRelativeAddressing():
  import corepy.arch.cal.platform as env
  import corepy.arch.cal.isa as cal
  
  proc = env.Processor(0)
  
  input_mem = proc.alloc_remote('I', 4, 16, 1)
  output_mem = proc.alloc_remote('I', 4, 1, 1)
  
  for i in range(16*1*4):
    for j in range(4):
      input_mem[i*4 + j] = i

  prgm = env.Program()  
  code = prgm.get_stream()
  cal.set_active_code(code)
    
  cal.dcl_output(o0, USAGE=cal.usage.generic)
  cal.dcl_literal(l0, 1, 1, 1, 1)
  cal.dcl_literal(l1, 16, 16, 16, 16)
  cal.mov(r0, r0('0000'))
  cal.mov(r1, r1('0000'))
  

  cal.whileloop()
  cal.iadd(r1, r1, g[r0.x])
  cal.iadd(r0, r0, l0)
  cal.breakc(cal.relop.ge, r0, l1)
  cal.endloop()

  cal.mov(o0, r1)
  
  prgm.set_binding('g[]', input_mem)
  prgm.set_binding('o0', output_mem)

  prgm.add(code)
  domain = (0, 0, 128, 128)

  prgm.print_code()
  proc.execute(prgm, domain)
  
  # code.cache_code()
  # print code.render_string
 
  if output_mem[0] == 120:
    print "Passed relative addressing test"
  else:
    print "Failed relative addressing test"

  proc.free(input_mem)
  proc.free(output_mem)
Esempio n. 2
0
def TestRelativeAddressing():
    import corepy.arch.cal.platform as env
    import corepy.arch.cal.isa as cal

    proc = env.Processor(0)

    input_mem = proc.alloc_remote('I', 4, 16, 1)
    output_mem = proc.alloc_remote('I', 4, 1, 1)

    for i in range(16 * 1 * 4):
        for j in range(4):
            input_mem[i * 4 + j] = i

    prgm = env.Program()
    code = prgm.get_stream()
    cal.set_active_code(code)

    cal.dcl_output(o0, USAGE=cal.usage.generic)
    cal.dcl_literal(l0, 1, 1, 1, 1)
    cal.dcl_literal(l1, 16, 16, 16, 16)
    cal.mov(r0, r0('0000'))
    cal.mov(r1, r1('0000'))

    cal.whileloop()
    cal.iadd(r1, r1, g[r0.x])
    cal.iadd(r0, r0, l0)
    cal.breakc(cal.relop.ge, r0, l1)
    cal.endloop()

    cal.mov(o0, r1)

    prgm.set_binding('g[]', input_mem)
    prgm.set_binding('o0', output_mem)

    prgm.add(code)
    domain = (0, 0, 128, 128)

    prgm.print_code()
    proc.execute(prgm, domain)

    # code.cache_code()
    # print code.render_string

    if output_mem[0] == 120:
        print "Passed relative addressing test"
    else:
        print "Failed relative addressing test"

    proc.free(input_mem)
    proc.free(output_mem)
Esempio n. 3
0
def generate(prgm):
    code = prgm.get_stream()
    cal.set_active_code(code)

    r_count = prgm.acquire_register()
    #r_cx = prgm.acquire_register()
    #r_cy = prgm.acquire_register()
    r_sum = prgm.acquire_register()
    r_limit = prgm.acquire_register((64.0, ) * 4)
    r_cmp = prgm.acquire_register()

    cal.dcl_output(reg.o0, USAGE=cal.usage.generic)

    cal.mov(r_count, r_count('0000'))
    #cal.mov(r_cx, r_cx('0000'))
    #cal.mov(r_cy, r_cy('0000'))
    cal.mov(r_sum, r_sum('0000'))

    cal.whileloop()
    #cal.ge(r_cmp, r_count.x, r_limit)
    #cal.break_logicalnz(r_cmp)
    cal.breakc(cal.relop.ge, r_count.x, r_limit)

    cal.mov(r_count, r_count('x0zw'))

    cal.whileloop()
    #cal.ge(r_cmp, r_count.y, r_limit)
    #cal.break_logicalnz(r_cmp)
    cal.breakc(cal.relop.ge, r_count.y, r_limit)

    cal.add(r_sum, r_sum, r_sum('1111'))

    cal.add(r_count, r_count, r_count('0100'))
    cal.endloop()

    cal.add(r_count, r_count, r_count('1000'))
    cal.endloop()

    cal.mov(reg.o0, r_sum)

    return code
Esempio n. 4
0
def generate(prgm):
  code = prgm.get_stream()
  cal.set_active_code(code)

  r_count = prgm.acquire_register()
  #r_cx = prgm.acquire_register()
  #r_cy = prgm.acquire_register()
  r_sum = prgm.acquire_register()
  r_limit = prgm.acquire_register((64.0,) * 4)
  r_cmp = prgm.acquire_register()

  cal.dcl_output(reg.o0, USAGE=cal.usage.generic)

  cal.mov(r_count, r_count('0000'))
  #cal.mov(r_cx, r_cx('0000'))
  #cal.mov(r_cy, r_cy('0000'))
  cal.mov(r_sum, r_sum('0000'))

  cal.whileloop()
  #cal.ge(r_cmp, r_count.x, r_limit)
  #cal.break_logicalnz(r_cmp)
  cal.breakc(cal.relop.ge, r_count.x, r_limit)

  cal.mov(r_count, r_count('x0zw'))

  cal.whileloop()
  #cal.ge(r_cmp, r_count.y, r_limit)
  #cal.break_logicalnz(r_cmp)
  cal.breakc(cal.relop.ge, r_count.y, r_limit)

  cal.add(r_sum, r_sum, r_sum('1111'))

  cal.add(r_count, r_count, r_count('0100'))
  cal.endloop()

  cal.add(r_count, r_count, r_count('1000'))
  cal.endloop()
  
  cal.mov(reg.o0, r_sum)

  return code
Esempio n. 5
0
  def start(self, align = True, branch = True):
    """Do pre-loop iteration initialization"""
    count_vec = [0, 0, 0, 0]

    if self.r_count is None:
      self.r_count = self.code.prgm.acquire_register()
      
    if self.mode == DEC:
      if not self._external_start:
        count_vec[0] = self.n_start
      if not self._external_stop:
        count_vec[1] = self.n_stop
      if not self._external_step:
        count_vec[2] = self.step

    elif self.mode == INC:
      if not self._external_start:
        if self.n_start != 0:
          count_vec[0] = self.n_start
      if not self._external_stop:
        count_vec[1] = self.n
      if not self._external_step:
        count_vec[2] = self.step

    # /end mode if
    
    count_vec[0] = float(count_vec[0])
    count_vec[1] = float(count_vec[1])
    count_vec[2] = float(count_vec[2])
    count_vec[3] = float(count_vec[3])
    if not (self._external_stop and self._external_start and self._external_step):
      self.r_loop_consts = self.code.prgm.acquire_register(count_vec)

    if self._external_start == False:
      self.code.add(cal.mov(self.r_count, self.r_loop_consts.x))
    else:
      self.code.add(cal.mov(self.r_count, self.r_start))

    if self._external_stop == False:
      self.r_stop = self.code.prgm.acquire_register()
      self.code.add(cal.mov(self.r_stop, self.r_loop_consts.y))

    if self._external_step == False:
      self.r_step = self.code.prgm.acquire_register()
      self.code.add(cal.mov(self.r_step, self.r_loop_consts.z))

    if self.r_count is not None:
      self.current_count = self.r_count

    self.code.add(cal.whileloop())

    return
Esempio n. 6
0
def cal_nb_generate(n_bodies, dt):
  code = env.InstructionStream()
  cal.set_active_code(code)
  fn_bodies = float(n_bodies)

  r_count = code.acquire_register()
  r_lpos = code.acquire_register()
  r_rpos = code.acquire_register()
  r_force = code.acquire_register()
  r_diff = code.acquire_register()
  r_dist_vec = code.acquire_register()
  r_dist = code.acquire_register()
  r_force_tmp = code.acquire_register()
  r_force_vec = code.acquire_register()
  r_vel = code.acquire_register()

  #code.add("dcl_input_position_interp(linear_noperspective) v0.x___")
  cal.dcl_input(reg.v0.x___, USAGE=cal.usage.pos, INTERP=cal.interp.linear_noperspective)
  r_bodies = code.acquire_register((fn_bodies,) * 4)
  r_G = code.acquire_register((G,) * 4)
  r_dt = code.acquire_register((dt,) * 4)
  cal.dcl_output(reg.o0, USAGE=cal.usage.generic)
  cal.dcl_output(reg.o1, USAGE=cal.usage.generic)
  cal.dcl_resource(0, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # positions
  cal.dcl_resource(1, cal.pixtex_type.oned, cal.fmt.float, UNNORM=True) # velocities

  # Loop over all other points to calculate the force
  cal.mov(r_count, r_count('0000'))                  # loop counter
  cal.sample(0, 0, r_lpos, reg.v0.x)        # Local position
  cal.mov(r_force, r_force('0000'))                  # total force


  # Compute force using input from every other point
  cal.whileloop()
  # Break if end of points reached
  cal.breakc(cal.relop.ge, r_count, r_bodies)

  cal.sample(0, 0, r_rpos, r_count.x)       # Remote position

  # d_xyz
  cal.sub(r_diff, r_lpos.xyz0, r_rpos.xyz0)   # local pos - remote pos

  # dist_tmp
  cal.mul(r_dist_vec, r_diff.xxxx, r_diff.xxxx)
  cal.mad(r_dist_vec, r_diff.yyyy, r_diff.yyyy, r_dist_vec)
  cal.mad(r_dist_vec, r_diff.zzzz, r_diff.zzzz, r_dist_vec)
  
  # distance
  # TODO - skip rest of force computation if distance is 0
  cal.sqrt_vec(r_dist, r_dist_vec)

  # force G * ((m[i] * m[j]) / dist_tmp)
  cal.mul(r_force_tmp, r_lpos.wwww, r_rpos.wwww)
  cal.div(cal.zeroop.zero, r_force_tmp, r_force_tmp, r_dist_vec)
  cal.mul(r_force_tmp, r_force_tmp, r_G)

  # f_xyz
  cal.div(cal.zeroop.zero, r_force_vec, r_diff.xyz0, r_dist.xyz1)
  cal.mul(r_force_vec, r_force_vec.xyz0, r_force_tmp.xyz0)

  cal.sub(r_force, r_force.xyz0, r_force_vec.xyz0)


  # Increment loop counter, end loop
  cal.add(r_count, r_count, r_count('1111'))
  cal.endloop()

  # Acceleration
  cal.div(cal.zeroop.zero, r_force, r_force.xyz0, r_lpos.wwww)

  # Velocity
  cal.sample(1, 1, r_vel, reg.v0.x)    # Load velocity
  cal.mad(r_vel, r_force, r_dt, r_vel)
  cal.mov(reg.o1, r_vel)

  # Position
  cal.mad(reg.o0, r_vel.xyz0, r_dt.xyz0, r_lpos.xyzw)

  return code
Esempio n. 7
0
def cal_nb_generate_local(n_bodies, dt, steps):
  code = env.InstructionStream()
  cal.set_active_code(code)
  fn_bodies = float(n_bodies)
  steps = float(steps)

  r_count = code.acquire_register()
  r_step = code.acquire_register()
  r_lpos = code.acquire_register()
  r_rpos = code.acquire_register()
  r_force = code.acquire_register()
  r_diff = code.acquire_register()
  r_dist_vec = code.acquire_register()
  r_dist = code.acquire_register()
  r_force_tmp = code.acquire_register()
  r_force_vec = code.acquire_register()
  r_vel = code.acquire_register()

  print "fn_bodies", fn_bodies

  code.add("dcl_input_position_interp(linear_noperspective) v0.xy__")
  #cal.dcl_input(reg.v0.x___, USAGE=cal.usage.pos, INTERP=cal.interp.linear_noperspective)
  r_numsteps = code.acquire_register((steps,) * 4)
  r_bodies = code.acquire_register((fn_bodies,) * 4)
  #r_bodiesquare = code.acquire_register((float(fn_bodies**2),) * 4)
  r_G = code.acquire_register((G,) * 4)
  r_dt = code.acquire_register((dt,) * 4)
  cal.dcl_output(reg.o0, USAGE=cal.usage.generic)
  cal.dcl_output(reg.o1, USAGE=cal.usage.generic)
  cal.dcl_output(reg.o2, USAGE=cal.usage.generic)
  cal.dcl_resource(0, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # positions
  cal.dcl_resource(1, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # velocities

  r_foo = code.acquire_register()
  cal.mov(r_foo, r_foo('0000'))

  r_gpos = code.acquire_register()
  cal.mad(r_gpos, reg.v0.y, r_bodies.x, reg.v0.x)

  r_gvel = code.acquire_register()
  cal.mad(r_gvel, r_bodies.x, r_bodies.x, r_gpos)

  cal.ftoi(r_gpos, r_gpos)
  cal.ftoi(r_gvel, r_gvel)

  cal.sample(0, 0, r_lpos, reg.v0.xy)                # Local position
  cal.sample(1, 1, r_vel, reg.v0.xy)    # Load velocity

  cal.mov(reg.g[r_gpos.x], r_lpos)
  cal.mov(reg.g[r_gvel.x], r_vel)

  cal.mov(r_step, r_step('0000'))

  cal.whileloop()
  cal.breakc(cal.relop.ge, r_step.x, r_numsteps)

  cal.mov(r_count, r_count('0000'))                  # loop counter

  cal.whileloop()
  cal.breakc(cal.relop.ge, r_count.x, r_bodies)

  cal.add(r_foo, r_foo, r_foo('1111'))

  # calculate force
  r_tmp = code.acquire_register()
  cal.ftoi(r_tmp, r_count)

  cal.mov(r_rpos, reg.g[r_tmp.x])

  # d_xyz
  cal.sub(r_diff, r_lpos.xyz0, r_rpos.xyz0)   # local pos - remote pos

  # dist_tmp
  cal.mul(r_dist_vec, r_diff.xxxx, r_diff.xxxx)
  cal.mad(r_dist_vec, r_diff.yyyy, r_diff.yyyy, r_dist_vec)
  cal.mad(r_dist_vec, r_diff.zzzz, r_diff.zzzz, r_dist_vec)
  
  # distance
  # TODO - skip rest of force computation if distance is 0
  cal.sqrt_vec(r_dist, r_dist_vec)

  # force G * ((m[i] * m[j]) / dist_tmp)
  cal.mul(r_force_tmp, r_lpos.wwww, r_rpos.wwww)
  cal.div(r_force_tmp, r_force_tmp, r_dist_vec, ZEROOP = cal.zeroop.zero)
  cal.mul(r_force_tmp, r_force_tmp, r_G)

  # f_xyz
  # TODO - whats going on, is this right?
  cal.div(r_force_vec, r_diff.xyz0, r_dist.xyz1, ZEROOP = cal.zeroop.zero)
  cal.mul(r_force_vec, r_force_vec.xyz0, r_force_tmp.xyz0)

  cal.sub(r_force, r_force.xyz0, r_force_vec.xyz0)

  cal.add(r_count, r_count, r_count('1111'))
  cal.endloop()

  # Acceleration
  cal.div(r_force, r_force.xyz0, r_lpos.wwww, ZEROOP = cal.zeroop.zero)

  # Velocity
  cal.mad(r_vel, r_force, r_dt, r_vel)

  # Position
  cal.mad(reg.o0, r_vel.xyz0, r_dt.xyz0, r_lpos.xyzw)

  # store updated pos and vel
  cal.mov(reg.g[r_gpos.x], r_lpos)
  cal.mov(reg.g[r_gvel.x], r_vel)

  cal.add(r_step, r_step, r_step('1111'))
  cal.endloop()

  cal.mov(reg.o0, r_lpos)
  cal.mov(reg.o1, r_vel)
  cal.mov(reg.o2, r_foo)
  return code
Esempio n. 8
0
def cal_nb_generate_2d(prgm, n_bodies, dt):
  code = prgm.get_stream()
  cal.set_active_code(code)
  fn_bodies = float(n_bodies)

  #r_cx = prgm.acquire_register()
  #r_cy = prgm.acquire_register()
  r_count = prgm.acquire_register()
  r_lpos = prgm.acquire_register()
  r_rpos = prgm.acquire_register()
  r_force = prgm.acquire_register()
  r_diff = prgm.acquire_register()
  r_dist_vec = prgm.acquire_register()
  r_dist = prgm.acquire_register()
  r_force_tmp = prgm.acquire_register()
  r_force_vec = prgm.acquire_register()
  r_vel = prgm.acquire_register()

  #code.add("dcl_input_position_interp(linear_noperspective) v0.xy__")
  cal.dcl_input(reg.v0.xy__, USAGE=cal.usage.pos, INTERP=cal.interp.linear_noperspective)
  r_bodies = prgm.acquire_register((fn_bodies,) * 4)
  r_G = prgm.acquire_register((G,) * 4)
  r_dt = prgm.acquire_register((dt,) * 4)
  cal.dcl_output(reg.o0, USAGE=cal.usage.generic)
  cal.dcl_output(reg.o1, USAGE=cal.usage.generic)
  cal.dcl_resource(0, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # positions
  cal.dcl_resource(1, cal.pixtex_type.twod, cal.fmt.float, UNNORM=True) # velocities

  # Loop over all other points to calculate the force
  cal.mov(r_count, r_count('0000'))                  # loop counter
  #cal.mov(r_cx, r_cx('0000'))                  # loop counter
  #cal.mov(r_cy, r_cy('0000'))                  # loop counter
  cal.sample(0, 0, r_lpos, reg.v0.xy)                # Local position
  cal.mov(r_force, r_force('0000'))                  # total force


  # Compute force using input from every other point
  cal.whileloop()
  cal.breakc(cal.relop.ge, r_count.x, r_bodies)

  cal.mov(r_count, r_count.x0zw)

  cal.whileloop()
  cal.breakc(cal.relop.ge, r_count.y, r_bodies)

  #for i in xrange(0, 4):
  #cal.add(r_count, r_cx('x000'), r_cy('0x00'))
  cal.sample(0, 0, r_rpos, r_count.xy)       # Remote position

  # d_xyz
  cal.sub(r_diff, r_lpos.xyz0, r_rpos.xyz0)   # local pos - remote pos

  # dist_tmp
  #cal.mul(r_dist_vec, r_diff.xxxx, r_diff.xxxx)
  #cal.mad(r_dist_vec, r_diff.yyyy, r_diff.yyyy, r_dist_vec)
  #cal.mad(r_dist_vec, r_diff.zzzz, r_diff.zzzz, r_dist_vec)
  cal.dp3(r_dist_vec, r_diff, r_diff, IEEE = False)
  
  # distance
  # TODO - skip rest of force computation if distance is 0
  cal.sqrt_vec(r_dist, r_dist_vec)

  # force G * ((m[i] * m[j]) / dist_tmp)
  cal.mul(r_force_tmp, r_lpos.wwww, r_rpos.wwww, IEEE = False)
  cal.div(r_force_tmp, r_force_tmp, r_dist_vec, ZEROOP = cal.zeroop.zero)
  cal.mul(r_force_tmp, r_force_tmp, r_G, IEEE = False)

  # f_xyz
  # TODO - whats going on, is this right?
  cal.div(r_force_vec, r_diff.xyz0, r_dist.xyz1, ZEROOP = cal.zeroop.zero)
  cal.mul(r_force_vec, r_force_vec.xyz0, r_force_tmp.xyz0, IEEE = False)

  cal.sub(r_force, r_force.xyz0, r_force_vec.xyz0)

  #cal.add(r_cy, r_cy, r_count('1111'))

  #cal.add(r_count, r_count, r_count('0100'))
  #cal.ifc(cal.relop.ge, r_count.y, r_bodies.y)
  ## TODO - can I merge these two?
  #cal.mov(r_count('_y__'), r_count('x0zw'))
  #cal.add(r_count, r_count, r_count('1000'))
  #cal.endif()

  # Increment loop counter, end loop
  cal.add(r_count, r_count, r_count('0100'))
  cal.endloop()

  cal.add(r_count, r_count, r_count('1000'))
  #cal.add(r_cx, r_cx, r_cx('1111'))
  cal.endloop()

  # Acceleration
  cal.div(r_force, r_force.xyz0, r_lpos.wwww, ZEROOP = cal.zeroop.zero)

  # Velocity
  cal.sample(1, 1, r_vel, reg.v0.xy)    # Load velocity
  cal.mad(r_vel, r_force, r_dt, r_vel, IEEE = False)
  cal.mov(reg.o1, r_vel)

  # Position
  cal.mad(reg.o0, r_vel.xyz0, r_dt.xyz0, r_lpos.xyzw, IEEE = False)

  #cal.mov(reg.g[0], r_vel)

  return code