コード例 #1
0
ファイル: bmt.py プロジェクト: vitduck/BMT
    def download(self): 
        for url in self.src: 
            file_name = url.split('/')[-1]
            file_path = os.path.join(self.builddir, file_name)

            if not os.path.exists(file_path): 
                syscmd(f'wget --no-check-certificate {url} -O {file_path}')
コード例 #2
0
ファイル: bmt.py プロジェクト: vitduck/BMT
    def run(self, redirect=0):
        #logging.info(f'{"Output":7} : {os.path.relpath(self.output, self.rootdir)}')
        logging.info(f'{"Output":7} : {os.path.join(self.outdir, self.output)}')
       
        # redirect output to file 
        if redirect: 
            syscmd(self.runcmd, self.output) 
        else: 
            syscmd(self.runcmd) 

        self.parse()

        time.sleep(5)
コード例 #3
0
ファイル: bmt.py プロジェクト: vitduck/BMT
 def check_prerequisite(self, module, min_ver):  
     # insert hostname after ssh 
     cmd     = prerequisite.cmd[module].replace('ssh', f'{ssh_cmd} {self.nodelist[0]}')
     regex   = prerequisite.regex[module]
     version = re.search(regex, syscmd(cmd)).group(1)
             
     if packaging.version.parse(version) < packaging.version.parse(min_ver):
         logging.error(f'{module} >= {min_ver} is required by {self.name}')
         sys.exit() 
コード例 #4
0
def nvidia_smi(node):
    device = {}

    nvidia_smi = syscmd(f'{ssh_cmd} {node} "nvidia-smi -L"')

    for line in nvidia_smi.splitlines():
        id, name, uuid = re.search('^GPU (\d+): (.+?) \(UUID: (.+?)\)',
                                   line).groups()
        device[id] = [name, uuid]

    return device
コード例 #5
0
def gpu_affinity(node):
    affinity = []
    topology = syscmd(f'{ssh_cmd} {node} "nvidia-smi topo -m"')

    for line in topology.splitlines():
        if re.search('^GPU\d+', line):
            numa = line.split()[-1]
            if re.search('^\d+$', numa):
                affinity.append(numa)
            else:
                affinity.append('0')

    return affinity
コード例 #6
0
def device_query(node, builddir='./'):
    # requirement to build deviceQuery
    sample_url = [
        'https://raw.githubusercontent.com/NVIDIA/cuda-samples/master/Common/helper_cuda.h',
        'https://raw.githubusercontent.com/NVIDIA/cuda-samples/master/Common/helper_string.h',
        'https://raw.githubusercontent.com/NVIDIA/cuda-samples/master/Samples/1_Utilities/deviceQuery/deviceQuery.cpp'
    ]

    # download cuda samples
    for url in sample_url:
        file_name = url.split('/')[-1]
        file_path = os.path.join(builddir, file_name)

        if not os.path.exists(file_path):
            syscmd(f'wget {url} -O {file_path}')

    # build deviceQuerry on host
    syscmd(f'builtin cd {builddir}; nvcc -I. deviceQuery.cpp -o deviceQuery')

    # execute deviceQuerry in remote host
    query = syscmd(
        f'{ssh_cmd} {node} '
        f'"cd {builddir}; module load {" ".join(get_module())}; ./deviceQuery"'
    )

    for line in query.splitlines():
        if re.search('\/ Runtime Version', line):
            runtime = line.split()[-1]

        if re.search('Minor version number', line):
            cuda_cc = line.split()[-1].replace('.', '')
            break

    # clean up
    #  for file in ['deviceQuery.cpp', 'helper_cuda.h', 'helper_string.h', 'deviceQuery']:
    #  os.remove(f'hotexamples_com/{file}')

    return runtime, cuda_cc
コード例 #7
0
def lscpu(node):
    host = {}
    numa = []
    lscpu = syscmd(f'{ssh_cmd} {node} lscpu')

    for line in lscpu.splitlines():
        if re.search('^CPU\(s\)', line):
            host['CPUs'] = int(line.split()[-1])
        if re.search('Model name', line):
            host['Model'] = ' '.join(line.split()[2:])
        if re.search('Thread\(s\)', line):
            host['Threads'] = line.split()[-1]
        if re.search('^NUMA node\d+', line):
            numa.append(line.split()[-1])
        if re.search('Flags', line):
            avx = re.findall('(avx\w*)\s+', line)
            host['AVXs'] = ', '.join([flag.upper() for flag in avx])

        host['NUMA'] = numa

    return host
コード例 #8
0
def gpu_memory(node):
    memory = syscmd(
        f'{ssh_cmd} {node} "nvidia-smi -i 0 --query-gpu=memory.total --format=csv,noheader"'
    ).split()[0]

    return int(memory)
コード例 #9
0
ファイル: bmt.py プロジェクト: vitduck/BMT
 def build(self):
     for cmd in self.buildcmd: 
         syscmd(cmd)
コード例 #10
0
def cpu_memory(host):
    mem_kb = syscmd(
        f'{ssh_cmd} {host} grep MemTotal /proc/meminfo').split()[1] * 1

    return int(mem_kb)