Ejemplo n.º 1
0
    def getDeviceProps(self):
        '''Get device properties'''

        # write the query code
        qsrc = "enum_cuda_props.cu"
        qexec = qsrc + ".o"
        qout = qexec + ".props"
        if not os.path.exists(qout):
            # check for nvcc
            qcmd = 'which nvcc'
            status = os.system(qcmd)
            if status != 0:
                g.err("%s: could not locate nvcc with '%s'" %
                      (self.__class__, qcmd))

            try:
                f = open(qsrc, 'w')
                f.write(CUDA_DEVICE_QUERY_SKELET)
                f.close()
            except:
                g.err('%s: cannot open file for writing: %s' %
                      (self.__class__, qsrc))

            # compile the query
            cmd = 'nvcc -o %s %s' % (qexec, qsrc)
            status = os.system(cmd)
            if status:
                g.err('%s: failed to compile cuda device query code: "%s"' %
                      (self.__class__, cmd))

            # execute the query
            runcmd = './%s' % (qexec)
            status = os.system(runcmd)
            if status:
                g.err('%s: failed to execute cuda device query code: "%s"' %
                      (self.__class__, runcmd))
            os.remove(qsrc)
            os.remove(qexec)

        # read device properties
        props = {}
        try:
            f = open(qout, 'r')
            for line in f:
                eline = ast.literal_eval(line)
                props[eline[0]] = eline[1]
            f.close()
        except:
            g.err('%s: cannot open query output file for reading: %s' %
                  (self.__class__, qout))

        if props['devId'] == -2:
            g.err("%s: there is no CUDA 1.0 enabled GPU on this machine" %
                  self.__class__)

        if props['major'] < 2 and props['minor'] < 3:
            g.warn(
                "%s: running on compute capability less than 1.3 is not recommended, detected %s.%s."
                % (self.__class__, props['major'], props['minor']))

        # set the arch to the latest supported by the device
        if self.tinfo is None:
            bcmd = "gcc"
        else:
            bcmd = self.tinfo.build_cmd

        if bcmd.startswith('gcc'):
            bcmd = 'nvcc'
        if bcmd.find('-arch') == -1:
            bcmd += ' -arch=sm_' + str(props['major']) + str(props['minor'])
        if self.perf_params.has_key('CFLAGS') and bcmd.find('@CFLAGS') == -1:
            bcmd += ' @CFLAGS'
        self.tinfo.build_cmd = bcmd

        # return queried device props
        return props
Ejemplo n.º 2
0
    def getDeviceProps(self):
      '''Get device properties'''

      # write the query code
      qsrc  = "enum_cuda_props.cu"
      qexec = qsrc + ".o"
      qout  = qexec + ".props"
      if not os.path.exists(qout):
        # check for nvcc
        qcmd = 'which nvcc'
        status = os.system(qcmd)
        if status != 0:
          g.err("%s: could not locate nvcc with '%s'" % (self.__class__, qcmd))

        try:
          f = open(qsrc, 'w')
          f.write(CUDA_DEVICE_QUERY_SKELET)
          f.close()
        except:
          g.err('%s: cannot open file for writing: %s' % (self.__class__, qsrc))
        
        # compile the query
        cmd = 'nvcc -o %s %s' % (qexec, qsrc)
        status = os.system(cmd)
        if status:
          g.err('%s: failed to compile cuda device query code: "%s"' % (self.__class__, cmd))

        # execute the query
        runcmd = './%s' % (qexec)
        status = os.system(runcmd)
        if status:
          g.err('%s: failed to execute cuda device query code: "%s"' % (self.__class__, runcmd))
        os.remove(qsrc)
        os.remove(qexec)
        
      # read device properties
      props = {}
      try:
        f = open(qout, 'r')
        for line in f:
            eline = ast.literal_eval(line)
            props[eline[0]] = eline[1]
        f.close()
      except:
        g.err('%s: cannot open query output file for reading: %s' % (self.__class__, qout))
  
      if props['devId'] == -2:
        g.err("%s: there is no CUDA 1.0 enabled GPU on this machine" % self.__class__)
      
      if props['major'] < 2 and props['minor'] < 3:
        g.warn("%s: running on compute capability less than 1.3 is not recommended, detected %s.%s." % (self.__class__, props['major'], props['minor']))

      # set the arch to the latest supported by the device
      if self.tinfo is None:
          bcmd = "gcc"
      else:
          bcmd = self.tinfo.build_cmd
          
      if bcmd.startswith('gcc'):
        bcmd = 'nvcc'
      if bcmd.find('-arch') == -1:
        bcmd += ' -arch=sm_' + str(props['major']) + str(props['minor'])
      if self.perf_params.has_key('CFLAGS') and bcmd.find('@CFLAGS') == -1:
        bcmd += ' @CFLAGS'
      self.tinfo.build_cmd = bcmd

      # return queried device props
      return props
Ejemplo n.º 3
0
    def readTransfArgs(self, perf_params, transf_args):
        '''Process the given transformation arguments'''

        # expected argument names
        PLATFORM = 'platform'
        DEVICE = 'device'
        WORKGROUPS = 'workGroups'
        WORKITEMS = 'workItemsPerGroup'
        CB = 'cacheBlocks'
        STREAMCOUNT = 'streamCount'
        UIF = 'unrollInner'
        CLFLAGS = 'clFlags'
        THREADCOUNT = 'threadCount'
        BLOCKCOUNT = 'blockCount'
        VECHINT = 'vecHint'
        SIZEHINT = 'sizeHint'

        # default argument values
        platform = 0
        device = 0
        workGroups = None
        workItemsPerGroup = None
        cacheBlocks = False
        streamCount = 1
        unrollInner = None
        clFlags = None
        vecHint = 0
        sizeHint = False

        # iterate over all transformation arguments
        errors = ''
        for aname, rhs, line_no in transf_args:
            # evaluate the RHS expression
            try:
                rhs = eval(rhs, perf_params)
            except Exception, e:
                g.err(
                    'orio.module.loop.submodule.opencl.opencl: %s: failed to evaluate the argument expression: %s\n --> %s: %s'
                    % (line_no, rhs, e.__class__.__name__, e))

            if aname == PLATFORM:
                # TODO: validate
                platform = rhs
            elif aname == DEVICE:
                # TODO: validate
                device = rhs
            elif aname == WORKGROUPS:
                # TODO: validate
                workGroups = rhs
            elif aname == WORKITEMS:
                # TODO: validate
                workItemsPerGroup = rhs
            elif aname == CB:
                # TODO: validate
                cacheBlocks = rhs
            elif aname == STREAMCOUNT:
                # TODO: validate
                streamCount = rhs
            elif aname == UIF:
                # TODO: validate
                unrollInner = rhs
            elif aname == CLFLAGS:
                clFlags = rhs
            elif aname == THREADCOUNT:
                g.warn(
                    "Interpreting CUDA threadCount as OpenCL workItemsPerGroup"
                )
                workItemsPerGroup = rhs
            elif aname == BLOCKCOUNT:
                g.warn("Interpreting CUDA blockCount as OpenCL workGroups")
                workGroups = rhs
            elif aname == VECHINT:
                vecHint = rhs
            elif aname == SIZEHINT:
                sizeHint = rhs
            else:
                g.err('%s: %s: unrecognized transformation argument: "%s"' %
                      (self.__class__, line_no, aname))
Ejemplo n.º 4
0
    def readTransfArgs(self, perf_params, transf_args):
        '''Process the given transformation arguments'''

        # expected argument names
        PLATFORM    = 'platform'
        DEVICE      = 'device'
        WORKGROUPS  = 'workGroups'
        WORKITEMS   = 'workItemsPerGroup'
        CB          = 'cacheBlocks'
        STREAMCOUNT = 'streamCount'
        UIF         = 'unrollInner'
        CLFLAGS     = 'clFlags'
        THREADCOUNT = 'threadCount'
        BLOCKCOUNT  = 'blockCount'
        VECHINT     = 'vecHint'
        SIZEHINT    = 'sizeHint'

        # default argument values
        platform = 0
        device = 0
        workGroups  = None
        workItemsPerGroup   = None
        cacheBlocks  = False
        streamCount  = 1
        unrollInner  = None
        clFlags      = None
        vecHint      = 0
        sizeHint     = False

        # iterate over all transformation arguments
        errors = ''
        for aname, rhs, line_no in transf_args:
            # evaluate the RHS expression
            try:
                rhs = eval(rhs, perf_params)
            except Exception, e:
                g.err('orio.module.loop.submodule.opencl.opencl: %s: failed to evaluate the argument expression: %s\n --> %s: %s' % (line_no, rhs,e.__class__.__name__, e))

            if aname == PLATFORM:
                # TODO: validate
                platform = rhs
            elif aname == DEVICE:
                # TODO: validate
                device = rhs
            elif aname == WORKGROUPS:
                # TODO: validate
                workGroups = rhs
            elif aname == WORKITEMS:
                # TODO: validate
                workItemsPerGroup = rhs
            elif aname == CB:
                # TODO: validate
                cacheBlocks = rhs
            elif aname == STREAMCOUNT:
                # TODO: validate
                streamCount = rhs
            elif aname == UIF:
                # TODO: validate
                unrollInner = rhs
            elif aname == CLFLAGS:
                clFlags = rhs
            elif aname == THREADCOUNT:
                g.warn("Interpreting CUDA threadCount as OpenCL workItemsPerGroup")
                workItemsPerGroup = rhs
            elif aname == BLOCKCOUNT:
                g.warn("Interpreting CUDA blockCount as OpenCL workGroups")
                workGroups = rhs
            elif aname == VECHINT:
                vecHint = rhs
            elif aname == SIZEHINT:
                sizeHint = rhs
            else:
                g.err('%s: %s: unrecognized transformation argument: "%s"' % (self.__class__, line_no, aname))