Example #1
0
def is_node_free(node_name):
    print node_name
    try:
        cpu_usage = float(subprocess.Popen(SSH_CMD.split(' ') + [node_name,
            PYTHON_CMD, '%s/scripts/get_cpu_usage.py' % CLUSTER_DIR],
            stdout=subprocess.PIPE).communicate()[0])
        cpu_usage /= 100.0
        mem_usage = float(subprocess.Popen(SSH_CMD.split(' ') + [node_name,
            PYTHON_CMD, '%s/scripts/get_mem_usage.py' % CLUSTER_DIR],
            stdout=subprocess.PIPE).communicate()[0])
    except Exception as e:
        print e
        return False
    print cpu_usage, mem_usage
    return cpu_usage < CPU_FREE_TOL and mem_usage < RAM_FREE_TOL
Example #2
0
def get_free_gpus(node_name):
    '''
    Based off Awni's runAll.py
    '''
    output = subprocess.Popen(SSH_CMD.split(' ') + [node_name, 'nvidia-smi', '-q', '-x'],
            stdout=subprocess.PIPE).communicate()[0]
    if not output:
        print 'No output for %s' % node_name
        return []
    try:
        tree = et.fromstring(output.strip())
    except xml.parsers.expat.ExpatError:
        print 'Invalid XML: ', output.strip()
        return []

    gpus = tree.findall('gpu')
    print 'Detected %d gpus on %s' % (len(gpus), node_name)
    free_gpus = []
    for i, gpu in enumerate(gpus):
        mem = gpu.findall('memory_usage')
        if len(mem) == 0:
            mem = gpu.findall('fb_memory_usage')
        if len(mem) == 0:
            print 'Couldn\'t get memory usage on %s' % node_name
            return []
        mem = mem[0]
        tot = int(mem.findall('total')[0].text.split()[0])
        used = int(mem.findall('used')[0].text.split()[0])
        print used, '/', tot
        if float(used) / tot < GPU_FREE_TOL:
            free_gpus.append(i)

    return free_gpus
Example #3
0
def get_free_gpus(node_name):
    '''
    Based off Awni's runAll.py
    '''
    output = subprocess.Popen(SSH_CMD.split(' ') +
                              [node_name, 'nvidia-smi', '-q', '-x'],
                              stdout=subprocess.PIPE).communicate()[0]
    if not output:
        print 'No output for %s' % node_name
        return []
    try:
        tree = et.fromstring(output.strip())
    except xml.parsers.expat.ExpatError:
        print 'Invalid XML: ', output.strip()
        return []

    gpus = tree.findall('gpu')
    print 'Detected %d gpus on %s' % (len(gpus), node_name)
    free_gpus = []
    for i, gpu in enumerate(gpus):
        mem = gpu.findall('memory_usage')
        if len(mem) == 0:
            mem = gpu.findall('fb_memory_usage')
        if len(mem) == 0:
            print 'Couldn\'t get memory usage on %s' % node_name
            return []
        mem = mem[0]
        tot = int(mem.findall('total')[0].text.split()[0])
        used = int(mem.findall('used')[0].text.split()[0])
        print used, '/', tot
        if float(used) / tot < GPU_FREE_TOL:
            free_gpus.append(i)

    return free_gpus
Example #4
0
def is_node_free(node_name):
    print node_name
    try:
        cpu_usage = float(
            subprocess.Popen(SSH_CMD.split(' ') + [
                node_name, PYTHON_CMD,
                '%s/scripts/get_cpu_usage.py' % CLUSTER_DIR
            ],
                             stdout=subprocess.PIPE).communicate()[0])
        cpu_usage /= 100.0
        mem_usage = float(
            subprocess.Popen(SSH_CMD.split(' ') + [
                node_name, PYTHON_CMD,
                '%s/scripts/get_mem_usage.py' % CLUSTER_DIR
            ],
                             stdout=subprocess.PIPE).communicate()[0])
    except Exception as e:
        print e
        return False
    print cpu_usage, mem_usage
    return cpu_usage < CPU_FREE_TOL and mem_usage < RAM_FREE_TOL