Ejemplo n.º 1
0
Archivo: prof.py Proyecto: jpaasen/cos
def getInstructions(app_name='testEfficiency_default',
                 functions=['gauss_solve','buildR_kernel','amplitude_capon'],
                 M=16,L=8):
   
   if not ONLY_TIME:
      def run(M=16,L=4,verbose=False):
      
   #      print "nvprof -u us --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L)
         prof = Popen("nvprof -u ns --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L), shell=True, stdout=PIPE, stderr=PIPE)
         
         output, error = prof.communicate()
         
         if error == '':
            if verbose:
               print output
            return output
         else:
            print output
            raise Exception(error)
      
      events   = "inst_issued1_0,inst_issued2_0,inst_issued1_1,inst_issued2_1"#,\
   #               l2_read_requests,l2_write_requests,l2_read_texture_requests"
      events_list = re.split(',',events)
      profiler_output = run(M,L)
            
      lines = re.split("\n+", profiler_output)
      instructions = np.zeros((functions.__len__(),events_list.__len__()))
      for i,l in enumerate(lines):
         
         for j,function in enumerate(functions):
            
            if re.search(".+%s.+"%function, l):
               
               for k,event in enumerate(events_list):
                  
                  timings = re.sub("^\s*", '', lines[i+k+1])
                  timings = re.sub("\s*%s.*"%event, '', timings)
                  columns = re.split("\s+", timings)
                  
                  instructions[j,k] = float(columns[1])
                     
            else:
               pass
         
      ##From CUPTI Users manual:
      ## inst_issued1_0 + (inst_issued2_0 * 2) + inst_issued1_1 + (inst_issued2_1 * 2)

   #   print instructions
      instructions = instructions[:,0] + 2*instructions[:,1] + instructions[:,2] + 2*instructions[:,3]

   #   print "Recorded runtimes [ms]: "
   #   print runtimes[:,3]/1e3
      
      return instructions

   ##   events   = "--events inst_issued1_0,inst_issued2_0,inst_issued1_1,inst_issued2_1"#,\
   ##                     l2_read_requests,l2_write_requests,l2_read_texture_requests"
   else:
      return np.zeros((3,))
Ejemplo n.º 2
0
def getMemoryOps(app_name='testEfficiency_default',
                 functions=['gauss_solve', 'buildR_kernel', 'amplitude_capon'],
                 M=16,
                 L=4):

    if not ONLY_TIME:

        def run(M=24, L=8, verbose=False):

            #      print "nvprof -u us --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L)
            prof = Popen(
                "nvprof -u ns --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"
                % (events, app_name, M, L),
                shell=True,
                stdout=PIPE,
                stderr=PIPE)

            output, error = prof.communicate()

            if error == '':
                if verbose:
                    print output
                return output
            else:
                print output
                raise Exception(error)

        events = "gld_request,gst_request,shared_load,shared_store"
        events_list = re.split(',', events)
        profiler_output = run(M, L)

        lines = re.split("\n+", profiler_output)
        memops = np.zeros((functions.__len__(), events_list.__len__()))
        for i, l in enumerate(lines):

            for j, function in enumerate(functions):

                if re.search(".+%s.+" % function, l):

                    for k, event in enumerate(events_list):

                        timings = re.sub("^\s*", '', lines[i + k + 1])
                        timings = re.sub("\s*%s.*" % event, '', timings)
                        columns = re.split("\s+", timings)

                        memops[j, k] = float(columns[1])

                else:
                    pass

    #   print "Recorded runtimes [ms]: "
    #   print runtimes[:,3]/1e3

        return memops
    else:
        return np.zeros((3, ))
Ejemplo n.º 3
0
 def setUp(self):
    self.n = 5
    self.R = np.zeros((self.n,self.n),dtype=complex)
    self.a = np.zeros((self.n,),dtype=complex)
    mu = 2
    std = 3
    for i in range(self.R.shape[1]):
       self.a[i] = complex(np.random.normal(mu,std),np.random.normal(mu,std))
       for j in range(self.R.shape[0]):
          self.R[j,i] = complex(np.random.normal(mu,std),np.random.normal(mu,std))
Ejemplo n.º 4
0
 def setUp(self):
     self.n = 5
     self.R = np.zeros((self.n, self.n), dtype=complex)
     self.a = np.zeros((self.n, ), dtype=complex)
     mu = 2
     std = 3
     for i in range(self.R.shape[1]):
         self.a[i] = complex(np.random.normal(mu, std),
                             np.random.normal(mu, std))
         for j in range(self.R.shape[0]):
             self.R[j, i] = complex(np.random.normal(mu, std),
                                    np.random.normal(mu, std))
Ejemplo n.º 5
0
Archivo: prof.py Proyecto: jpaasen/cos
def getTimings(app_name='testEfficiency_default',
                 functions=['gauss_solve','buildR_kernel','amplitude_capon'],
                 M=8,L=4):
   
   if USE_CODE_COUNTERS:
      prof = Popen("./%s testEfficiency testMVDRKernelPerformance %d %d"%(app_name,M,L), shell=True, stdout=PIPE, stderr=PIPE)
      output, error = prof.communicate()
      
      return np.zeros((3,))
   
   def run(M=24,L=8,verbose=False):
   
#      print "nvprof -u us -t 0.1 --devices 0 %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L
      prof = Popen("nvprof -u ns %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L), shell=True, stdout=PIPE, stderr=PIPE)
      
      output, error = prof.communicate()
      
      if error == '':
         if verbose:
            print output
         return output
      else:
         print output
         raise Exception(error)
     
   events=''
   profiler_output = run(M,L)
         
   lines = re.split("\n+", profiler_output)
   runtimes = np.zeros([])
   entered_once = False
   for l in lines:
      
      for i,function in enumerate(functions):
         
         if re.search(".+%s.+"%function, l):             
            timings = re.sub("^\s*", '', l)
            timings = re.sub("[\sa-zA-Z]*%s.+"%function, '', timings)         
            columns = re.split("\s+", timings)
            
            if not entered_once:
               entered_once = True
               runtimes = np.zeros((functions.__len__(),columns.__len__()))
               
            runtimes[i] = map(float,columns)
            break
               
         else:
            pass
      
   return runtimes[:,3]/1e3
Ejemplo n.º 6
0
Archivo: prof.py Proyecto: jpaasen/cos
def getMemoryOps(app_name='testEfficiency_default',
                 functions=['gauss_solve','buildR_kernel','amplitude_capon'],
                 M=16,L=4):
   
   if not ONLY_TIME:
      def run(M=24,L=8,verbose=False):
      
   #      print "nvprof -u us --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L)
         prof = Popen("nvprof -u ns --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L), shell=True, stdout=PIPE, stderr=PIPE)
         
         output, error = prof.communicate()
         
         if error == '':
            if verbose:
               print output
            return output
         else:
            print output
            raise Exception(error)
      
      events = "gld_request,gst_request,shared_load,shared_store"
      events_list = re.split(',',events)
      profiler_output = run(M,L)
            
      lines = re.split("\n+", profiler_output)
      memops = np.zeros((functions.__len__(),events_list.__len__()))
      for i,l in enumerate(lines):
         
         for j,function in enumerate(functions):
            
            if re.search(".+%s.+"%function, l):
               
               for k,event in enumerate(events_list):
                  
                  timings = re.sub("^\s*", '', lines[i+k+1])
                  timings = re.sub("\s*%s.*"%event, '', timings)
                  columns = re.split("\s+", timings)
                  
                  memops[j,k] = float(columns[1])
                     
            else:
               pass
         
   #   print "Recorded runtimes [ms]: "
   #   print runtimes[:,3]/1e3
      
      return memops
   else:
      return np.zeros((3,))
Ejemplo n.º 7
0
def uhdu(A, n):
    ''' 
   Calculates the UDUH decomposition of the Hermitian matrix A 
   such that U is unit upper triangular, D is diagonal and UDU'=A 
   (' = H = symmetric conjugated)
   
   Now we avoid using the complex sqrt by instead introducing two complex add
   
   Returns [U D]
   '''
    U = eye(n, dtype=A.dtype)
    D = zeros(n, dtype=A.dtype)

    for i in range(n):

        upperColSum = 0
        for k in range(i):
            upperColSum += U[k, i] * U[k, i].conjugate() * D[k]

        D[i] = A[i, i] - upperColSum

        for j in range(i + 1, n):

            upperColSum = 0
            for k in range(i):
                upperColSum += U[k, i].conjugate() * U[k, j] * D[k]

            U[i, j] = (A[i, j] - upperColSum) / D[i]

    return [U, D]
Ejemplo n.º 8
0
def uhdu(A, n):
    """ 
   Calculates the UDUH decomposition of the Hermitian matrix A 
   such that U is unit upper triangular, D is diagonal and UDU'=A 
   (' = H = symmetric conjugated)
   
   Now we avoid using the complex sqrt by instead introducing two complex add
   
   Returns [U D]
   """
    U = eye(n, dtype=A.dtype)
    D = zeros(n, dtype=A.dtype)

    for i in range(n):

        upperColSum = 0
        for k in range(i):
            upperColSum += U[k, i] * U[k, i].conjugate() * D[k]

        D[i] = A[i, i] - upperColSum

        for j in range(i + 1, n):

            upperColSum = 0
            for k in range(i):
                upperColSum += U[k, i].conjugate() * U[k, j] * D[k]

            U[i, j] = (A[i, j] - upperColSum) / D[i]

    return [U, D]
Ejemplo n.º 9
0
def diagonalSolve(A, b, n):
   ''' Solve the diagonal system Ax = b, A is sparse, hence a vector '''
   x = np.zeros(n, A.dtype)
   
   for i in range(n):      
      x[i] = b[i] / A[i]
   
   return x
Ejemplo n.º 10
0
def diagonalSolve(A, b, n):
    ''' Solve the diagonal system Ax = b, A is sparse, hence a vector '''
    x = np.zeros(n, A.dtype)

    for i in range(n):
        x[i] = b[i] / A[i]

    return x
Ejemplo n.º 11
0
 def testSolveBiCG(self):
    
    x = ls.solveBiCG(self.A, self.b1, self.x0_zero, 0, 0)
    self.assertMatrixAlmosteEqual(self.x1, x, 14)
    
    x = ls.solveBiCG(self.complexA, self.complexb, self.x0_zero, 0, 0)    
    self.assertMatrixAlmosteEqual(self.complexx, x, 13)
    
    x_ref = np.linalg.solve(self.randA, self.randb)
    x = ls.solveBiCG(self.randA, self.randb, np.zeros(self.L, dtype=complex), 0, 0)                   
    self.assertMatrixAlmosteEqual(x_ref, x, 15)
Ejemplo n.º 12
0
def uhduGPUProto(A, n):
    """ 
   Calculates the UDUH decomposition of the Hermitian matrix A 
   such that U is unit upper triangular, D is diagonal and UDU'=A 
   (' = H = symmetric conjugated)
   
   Now we avoid using the complex sqrt by instead introducing two complex add
   
   Returns [U D]
   
   prototype CPU-code for how uhdu-composition should be done on the GPU
   """
    U = eye([n, n], A.dtype)
    D = zeros([n, 1], A.dtype)

    upperColSum = zeros([n, 1], A.dtype)  # shared column sum buffer
    Ai = zeros([n, 1], A.dtype)  # shared A row buffer

    for i in range(n):

        # read one row into "shared" memory
        for k in range(n):
            Ai[k, 0] = A[i, k]

        upperColSum = 0
        for k in range(i):
            upperColSum += U[k, i] * U[k, i].conjugate() * D[k, 0]

        D[i, 0] = A[i, i] - upperColSum

        for j in range(i + 1, n):

            upperColSum = 0
            for k in range(i):
                upperColSum += U[k, i].conjugate() * U[k, j] * D[k, 0]

            U[i, j] = (A[i, j] - upperColSum) / D[i, 0]

    return [U, D]
Ejemplo n.º 13
0
def uhduGPUProto(A, n):
    ''' 
   Calculates the UDUH decomposition of the Hermitian matrix A 
   such that U is unit upper triangular, D is diagonal and UDU'=A 
   (' = H = symmetric conjugated)
   
   Now we avoid using the complex sqrt by instead introducing two complex add
   
   Returns [U D]
   
   prototype CPU-code for how uhdu-composition should be done on the GPU
   '''
    U = eye([n, n], A.dtype)
    D = zeros([n, 1], A.dtype)

    upperColSum = zeros([n, 1], A.dtype)  # shared column sum buffer
    Ai = zeros([n, 1], A.dtype)  # shared A row buffer

    for i in range(n):

        # read one row into "shared" memory
        for k in range(n):
            Ai[k, 0] = A[i, k]

        upperColSum = 0
        for k in range(i):
            upperColSum += U[k, i] * U[k, i].conjugate() * D[k, 0]

        D[i, 0] = A[i, i] - upperColSum

        for j in range(i + 1, n):

            upperColSum = 0
            for k in range(i):
                upperColSum += U[k, i].conjugate() * U[k, j] * D[k, 0]

            U[i, j] = (A[i, j] - upperColSum) / D[i, 0]

    return [U, D]
Ejemplo n.º 14
0
 def testSolveBiCG(self):
    
    n = len(self.b1)
    x0 = mynp.zeros(n, dtype=complex)
    
    x = ls.solveBiCG(self.A, self.b1, x0, 0, n)
    self.assertMatrixAlmosteEqual(self.x1, x, 14)
    
    
    n = len(self.complexb)
    
    x = ls.solveBiCG(self.complexA, self.complexb, x0, 0, n)    
    self.assertMatrixAlmosteEqual(self.complexx, x, 12)
Ejemplo n.º 15
0
def forwardSolve(A, b, n):
   ''' Forward solve the lower triangular system Ax = b '''
   x = np.zeros(n, A.dtype)
   
   for i in range(n):
      
      x[i] = b[i]
      
      for k in range(i):
         x[i] -= A[i, k] * x[k]
      
      x[i] /= A[i, i] 
   
   return x
Ejemplo n.º 16
0
def backtrackSolve(A, b, n):
   ''' Backtrack solve the upper triangular system Ax = b '''
   x = np.zeros(n, A.dtype)
   
   for i in reversed(range(n)):
      
      x[i] = b[i]
      
      for k in reversed(range(i+1, n)):
         x[i] -= A[i, k] * x[k]
         
      x[i] /= A[i, i]
   
   return x
Ejemplo n.º 17
0
def backtrackSolve(A, b, n):
    ''' Backtrack solve the upper triangular system Ax = b '''
    x = np.zeros(n, A.dtype)

    for i in reversed(range(n)):

        x[i] = b[i]

        for k in reversed(range(i + 1, n)):
            x[i] -= A[i, k] * x[k]

        x[i] /= A[i, i]

    return x
Ejemplo n.º 18
0
def forwardSolve(A, b, n):
    ''' Forward solve the lower triangular system Ax = b '''
    x = np.zeros(n, A.dtype)

    for i in range(n):

        x[i] = b[i]

        for k in range(i):
            x[i] -= A[i, k] * x[k]

        x[i] /= A[i, i]

    return x
Ejemplo n.º 19
0
def butlerMatrix(m, n, ix = 0):
   ''' Returns the mxn buttler matrix used for beamspace processing '''
   ''' The matrix is equal to the normalized n-point DFT-matrix     '''
   
   ' An optional list argument ix can be specified to select different beams than the first m'

   B = mynp.zeros([m, n], dtype=complex)
   r = range(m)
   
   if ix != 0:
      r = ix

   for i in r:
      for j in range(n):
         B[i, j] = 1/ma.sqrt(n) * cm.exp(-1j*2*cm.pi*i*j/n)
   
   return B
Ejemplo n.º 20
0
def butlerMatrix(m, n, ix=0):
    ''' Returns the mxn buttler matrix used for beamspace processing '''
    ''' The matrix is equal to the normalized n-point DFT-matrix     '''

    ' An optional list argument ix can be specified to select different beams than the first m'

    B = mynp.zeros([m, n], dtype=complex)
    r = range(m)

    if ix != 0:
        r = ix

    for i in r:
        for j in range(n):
            B[i, j] = 1 / ma.sqrt(n) * cm.exp(-1j * 2 * cm.pi * i * j / n)

    return B
Ejemplo n.º 21
0
def upinterpolate(img):
   #
   # img    Ny x Nx   
   
   Ny,Nx = img.shape
   
   Kx = 2 # Must be even!!!
   
   new_img = np.zeros((Ny,Kx*Nx-Kx),dtype=img.dtype)
   
   for i in range(Kx):
      new_img[:,i::Kx] = img[:,0:-1] + (i+0.5)*(img[:,1:] - img[:,0:-1])/Kx
#   
#   new_img[:,0::Kx] = img[:,0:-1] + a*(img[:,1:]   - img[:,0:-1])
#   new_img[:,1::Kx] = img[:,1:]   + 0.75*(img[:,0:-1] - img[:,1:]  )
   
   return new_img   
   
   
Ejemplo n.º 22
0
def upinterpolate(img):
    #
    # img    Ny x Nx

    Ny, Nx = img.shape

    Kx = 2  # Must be even!!!

    new_img = np.zeros((Ny, Kx * Nx - Kx), dtype=img.dtype)

    for i in range(Kx):
        new_img[:, i::Kx] = img[:, 0:-1] + (i + 0.5) * (img[:, 1:] -
                                                        img[:, 0:-1]) / Kx


#
#   new_img[:,0::Kx] = img[:,0:-1] + a*(img[:,1:]   - img[:,0:-1])
#   new_img[:,1::Kx] = img[:,1:]   + 0.75*(img[:,0:-1] - img[:,1:]  )

    return new_img
Ejemplo n.º 23
0
def cholesky(A, n):
    ''' Calculates the Cholesky decomposition of the Hermitian matrix A such that U is upper triangular and U'*U = A '''

    U = zeros([n, n], A.dtype)

    for i in range(n):

        upperColSum = 0
        for k in range(i):
            upperColSum += U[k, i] * (U[k, i]).conjugate()

        #U[i, i] = cmath.sqrt(A[i, i] - upperColSum)
        U[i, i] = (A[i, i] - upperColSum)**0.5

        for j in range(i + 1, n):

            upperColSum = 0
            for k in range(i):
                upperColSum += U[k, i].conjugate() * U[k, j]

            U[i, j] = (A[i, j] - upperColSum) / U[i, i]

    return U
Ejemplo n.º 24
0
def cholesky(A, n):
    """ Calculates the Cholesky decomposition of the Hermitian matrix A such that U is upper triangular and U'*U = A """

    U = zeros([n, n], A.dtype)

    for i in range(n):

        upperColSum = 0
        for k in range(i):
            upperColSum += U[k, i] * (U[k, i]).conjugate()

        # U[i, i] = cmath.sqrt(A[i, i] - upperColSum)
        U[i, i] = (A[i, i] - upperColSum) ** 0.5

        for j in range(i + 1, n):

            upperColSum = 0
            for k in range(i):
                upperColSum += U[k, i].conjugate() * U[k, j]

            U[i, j] = (A[i, j] - upperColSum) / U[i, i]

    return U
Ejemplo n.º 25
0
    ##
    ##app_name = "testEfficiency_mathcheck"
    ##math_run = run()
    ##events   = "--events gld_request,gst_request,shared_load,shared_store"
    ##math_event_run = run()
    #
    #
    #return info
    ##export PYTHONPATH="/home/me/Work/UiO/Phd/Code/Profile"
    ##python testEfficiency.py testMVDRKernelPerformance
    #
    ##nvprof ./testEfficiency testEfficiency testMVDRKernelPerformance

M = 8
L_list = np.arange(M - 1) + 2
time = np.zeros((L_list.shape[0], 6))
for l, L in enumerate(L_list):
    time[l] = collectResults(M, L)
np.savetxt('time-M%d.txt' % M, time)

M = 16
L_list = np.arange(M - 1) + 2
time = np.zeros((L_list.shape[0], 6))
for l, L in enumerate(L_list):
    time[l] = collectResults(M, L)
np.savetxt('time-M%d.txt' % M, time)

M = 32
L_list = np.arange(M - 1) + 2
time = np.zeros((L_list.shape[0], 6))
for l, L in enumerate(L_list):
Ejemplo n.º 26
0
   def setUp(self):
      
      self.n = 3
      
      self.A = np.array([[2.0, -1.0, 0.0], [-1.0, 2.0, -1.0], [0.0, -1.0, 2.0]],dtype=complex)
      self.A2 = np.array([[4.0, -2.0, 0.0], [-2.0, 4.0, -2.0], [0.0, -2.0, 4.0]],dtype=complex)
      self.R = np.array([[1.414213562373095, -0.707106781186547, 0.0],
              [0.0, 1.224744871391589, -0.816496580927726],
              [0.0, 0.0, 1.154700538379252]],dtype=complex)
      
      self.AA = np.array([[5, -4, 1], [-4, 6, -4], [1, -4, 5]],dtype=complex)
      self.B = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]],dtype=complex)
      self.BT = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]],dtype=complex)
      
      self.b1 = np.array([1.0, 1.0, 1.0],dtype=complex)
      self.x1 = np.array([3.0/2, 2.0, 3.0/2],dtype=complex)

      self.b2 = np.array([1, 2, 3],dtype=complex)
      self.x2 = np.array([5.0/2, 4, 7.0/2],dtype=complex)
      
      self.C = np.array([[1, 2, 3], [0, 1, 1], [0, 0, 1]],dtype=complex)
      self.b1c = np.array([1, 1, 1],dtype=complex)
      self.x1c = np.array([-2, 0, 1],dtype=complex)
      self.x1cT = np.array([1, -1, -1],dtype=complex)
      
      self.x0_zero = np.zeros((3,), dtype=complex)
      
      self.Ab2 = np.array([0, 0, 4],dtype=complex)
      self.Ab1 = np.array([1, 0, 1],dtype=complex)
      
      self.Ab1b1T = np.array([[3, 1, 3], [1, 6, 5], [3, 5, 11]],dtype=complex)
      self.invA = np.array([[0.750, 0.50, 0.250], [0.50, 1.0, 0.50], [0.250, 0.50, 0.750]],dtype=complex)
      self.invAb1b1T = np.array([[0.465909090909091, 0.045454545454545, -0.147727272727273],
                        [0.045454545454545, 0.272727272727273, -0.136363636363636],
                        [-0.147727272727273, -0.136363636363636, 0.193181818181818]],dtype=complex)
      
      self.complexA = np.array([[2.0, 3.0 + 1.0j, 2.0 - 2.0j], 
                   [3.0 - 1.0j, 9.0, -2.0j], 
                   [2.0 + 2.0j, 2.0j, 14.0]], dtype=complex)
      self.complexR = np.array([[1.414213562373095, 2.121320343559642 + 0.707106781186547j, 1.414213562373095 - 1.414213562373095j], 
                   [0.0, 2.0, -1.0 + 1.0j], 
                   [0.0, 0.0, 2.828427124746190]], dtype=complex)
      self.complexb = np.array([1.0, 1.0 + 1.0j, 1.0 - 2.0j], dtype=complex)
      self.complexy = np.array([0.707106781186547 - 0.0j,
                   -0.250 + 0.750j,
                   -0.353553390593273 - 0.883883476483184j], dtype=complex)
      self.complexx = np.array([1.593749999999999 - 0.06250j,
                   -0.343750 + 0.281250j,
                   -0.1250 - 0.31250j], dtype=complex)
      self.complexAb = np.array([2.0 - 2.0j, 8.0 + 6.0j, 14.0 - 24.0j], dtype=complex)
      
      self.but4 = np.array([[0.5]*4, [0.5, -0.5j, -0.5, 0.5j], [0.5, -0.5]*2, [0.5, 0.5j, -0.5, -0.5j]], dtype=complex)
      self.but3 = np.array([[0.577350269189626, 0.577350269189626, 0.577350269189626],                    
                   [0.577350269189626, -0.288675134594813 - 0.50j, -0.288675134594813 + 0.50j],               
                   [0.577350269189626, -0.288675134594813 + 0.50j, -0.288675134594813 - 0.50j]], dtype=complex)
      self.bsComplexb = np.array([1.732050807568878 - 0.577350269189626j, 1.50 + 0.288675134594813j], dtype=complex)
      
      self.diag = 0.2
      self.x = np.array([1.0, 1.0 + 1.0j, 1.0 - 2.0j, 2.0 + 1.0j], dtype=complex)
      
      self.complexAbbH = np.array([[10.0, 11.0 + 1.0j, 10.0 - 2.0j],
                          [11.0 - 1.0j, 17.0, 8.0 - 2.0j],
                          [10.0 + 2.0j, 8.0 + 2.0j, 22.0]], dtype=complex)
      self.complexInvAbbH = np.array([[1.067010309278351, -0.407216494845361 - 0.015463917525773j, -0.190721649484536 + 0.020618556701031j],
                             [-0.407216494845361 + 0.015463917525773j, 0.247422680412371, 0.077319587628866 - 0.015463917525773j],
                             [-0.190721649484536 - 0.020618556701031j,  0.077319587628866 + 0.015463917525773j,  0.087628865979381]], dtype=complex)  
      self.complexInvA = np.array([[1.906250, -0.593750 - 0.156250j,  -0.250 + 0.18750j],
                          [-0.593750 + 0.156250j,  0.31250, 0.06250 - 0.06250j],                    
                          [-0.250 - 0.18750j,  0.06250 + 0.06250j, 0.1250]], dtype=complex)
      
      self.complexA4x4 = np.array([[22.0, 8.0, 11.0 - 11.0j, 22.0 - 7.0j],
                          [8.0, 22.0, 17.0 - 2.0j, 11.0 - 7.0j],
                          [11.0 + 11.0j, 17.0 + 2.0j, 45.0, 23.0 - 5.0j],
                          [22.0 + 7.0j, 11.0 + 7.0j, 23.0 + 5.0j, 37.0]], dtype=complex)
      self.U4x4 = np.array([[1.0000, 0.3636, 0.50 - 0.50j, 1.0 - 0.3182j],
                   [0.0, 1.0, 0.6810 + 0.1048j, 0.1571 - 0.2333j],
                   [0.0, 0.0, 1.0, 0.2776 - 0.3670j],
                   [0.0, 0.0, 0.0, 1.0]], dtype=complex)
      self.D4x4 = np.array([22.0, 19.0909, 24.9381, 5.9806])
      
      self.sonardata_R = np.array(np.load('./data/data_R.npy')) # created without diagonal loading
      self.sonardata_a = np.array(np.load('./data/data_a.npy'))
      self.sonardata_Ria = np.array(np.load('./data/data_Ria.npy'))
      self.sonardata_ar = np.array(np.load('./data/data_ar.npy'))
      self.sonardata_n = 32
      
      # random data for testing
      self.L = L = 24
      self.d = d = 100
      U = np.triu(np.random.randn(L,L) + np.random.randn(L,L)*1j) + np.eye(L)*d
      self.randA = np.dot(U.conjugate().T, U) 
      self.randb = np.random.randn(L) + np.random.randn(L)*1j 
Ejemplo n.º 27
0
Archivo: prof.py Proyecto: jpaasen/cos
   ##
   ##app_name = "testEfficiency_mathcheck"
   ##math_run = run()
   ##events   = "--events gld_request,gst_request,shared_load,shared_store"
   ##math_event_run = run()
   #
   #
   #return info
   ##export PYTHONPATH="/home/me/Work/UiO/Phd/Code/Profile"
   ##python testEfficiency.py testMVDRKernelPerformance
   #
   ##nvprof ./testEfficiency testEfficiency testMVDRKernelPerformance

M = 8
L_list = np.arange(M-1)+2
time = np.zeros((L_list.shape[0],6))
for l,L in enumerate(L_list):
   time[l] = collectResults(M,L)
np.savetxt('time-M%d.txt'%M,time)

M = 16
L_list = np.arange(M-1)+2
time = np.zeros((L_list.shape[0],6))
for l,L in enumerate(L_list):
   time[l] = collectResults(M,L)
np.savetxt('time-M%d.txt'%M,time)

M = 32
L_list = np.arange(M-1)+2
time = np.zeros((L_list.shape[0],6))
for l,L in enumerate(L_list):
Ejemplo n.º 28
0
def getTimings(app_name='testEfficiency_default',
               functions=['gauss_solve', 'buildR_kernel', 'amplitude_capon'],
               M=8,
               L=4):

    if USE_CODE_COUNTERS:
        prof = Popen("./%s testEfficiency testMVDRKernelPerformance %d %d" %
                     (app_name, M, L),
                     shell=True,
                     stdout=PIPE,
                     stderr=PIPE)
        output, error = prof.communicate()

        return np.zeros((3, ))

    def run(M=24, L=8, verbose=False):

        #      print "nvprof -u us -t 0.1 --devices 0 %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L
        prof = Popen(
            "nvprof -u ns %s ./%s testEfficiency testMVDRKernelPerformance %d %d"
            % (events, app_name, M, L),
            shell=True,
            stdout=PIPE,
            stderr=PIPE)

        output, error = prof.communicate()

        if error == '':
            if verbose:
                print output
            return output
        else:
            print output
            raise Exception(error)

    events = ''
    profiler_output = run(M, L)

    lines = re.split("\n+", profiler_output)
    runtimes = np.zeros([])
    entered_once = False
    for l in lines:

        for i, function in enumerate(functions):

            if re.search(".+%s.+" % function, l):
                timings = re.sub("^\s*", '', l)
                timings = re.sub("[\sa-zA-Z]*%s.+" % function, '', timings)
                columns = re.split("\s+", timings)

                if not entered_once:
                    entered_once = True
                    runtimes = np.zeros(
                        (functions.__len__(), columns.__len__()))

                runtimes[i] = map(float, columns)
                break

            else:
                pass

    return runtimes[:, 3] / 1e3
Ejemplo n.º 29
0
def getInstructions(
        app_name='testEfficiency_default',
        functions=['gauss_solve', 'buildR_kernel', 'amplitude_capon'],
        M=16,
        L=8):

    if not ONLY_TIME:

        def run(M=16, L=4, verbose=False):

            #      print "nvprof -u us --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"%(events,app_name,M,L)
            prof = Popen(
                "nvprof -u ns --devices 0 --events %s ./%s testEfficiency testMVDRKernelPerformance %d %d"
                % (events, app_name, M, L),
                shell=True,
                stdout=PIPE,
                stderr=PIPE)

            output, error = prof.communicate()

            if error == '':
                if verbose:
                    print output
                return output
            else:
                print output
                raise Exception(error)

        events = "inst_issued1_0,inst_issued2_0,inst_issued1_1,inst_issued2_1"  #,\
        #               l2_read_requests,l2_write_requests,l2_read_texture_requests"
        events_list = re.split(',', events)
        profiler_output = run(M, L)

        lines = re.split("\n+", profiler_output)
        instructions = np.zeros((functions.__len__(), events_list.__len__()))
        for i, l in enumerate(lines):

            for j, function in enumerate(functions):

                if re.search(".+%s.+" % function, l):

                    for k, event in enumerate(events_list):

                        timings = re.sub("^\s*", '', lines[i + k + 1])
                        timings = re.sub("\s*%s.*" % event, '', timings)
                        columns = re.split("\s+", timings)

                        instructions[j, k] = float(columns[1])

                else:
                    pass

        ##From CUPTI Users manual:
        ## inst_issued1_0 + (inst_issued2_0 * 2) + inst_issued1_1 + (inst_issued2_1 * 2)

    #   print instructions
        instructions = instructions[:,
                                    0] + 2 * instructions[:,
                                                          1] + instructions[:,
                                                                            2] + 2 * instructions[:,
                                                                                                  3]

        #   print "Recorded runtimes [ms]: "
        #   print runtimes[:,3]/1e3

        return instructions

    ##   events   = "--events inst_issued1_0,inst_issued2_0,inst_issued1_1,inst_issued2_1"#,\
    ##                     l2_read_requests,l2_write_requests,l2_read_texture_requests"
    else:
        return np.zeros((3, ))