Example #1
0
def main(args):
    if args.gpu is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    else:
        try:
            import py3nvml
            py3nvml.grab_gpus(1, gpu_fraction=0.95)
        except ImportError:
            print("Could not import py3nvml")

    test_agent = TestAgent(args)

    if args.test:
        # Get the config
        with open(os.path.join('models', args.name, 'config.json')) as fp:
            config_dict = json.load(fp)
        args_dict = vars(args)
        args_dict.update(config_dict)

        test_wrapper(test_agent, args)
    else:
        agent = TrainAgent(args)
        try:
            agent.train(args.data,
                        args.max_grad_norm,
                        args.wd,
                        test_agent,
                        args=args)
        except KeyboardInterrupt:
            test_wrapper(test_agent, args)
Example #2
0
def test_kernel():
    py3nvml.grab_gpus(0)
    kernel_module = tf.load_op_library(
        os.path.join(LIB_BASE, 'kernel_example.so'))
    with tf.Session() as sess:
        result = kernel_module.example([5, 4, 3, 2, 1])
        np.testing.assert_array_equal(result.eval(), np.array([10, 8, 6, 4,
                                                               2]))
Example #3
0
def setup():
    global barbara, barbara_t
    global bshape, bshape_half
    global ch
    py3nvml.grab_gpus(1, gpu_fraction=0.5)
    barbara = datasets.barbara()
    barbara = (barbara/barbara.max()).astype('float32')
    barbara = barbara.transpose([2, 0, 1])
    bshape = list(barbara.shape)
    bshape_half = bshape[:]
    bshape_half[1] //= 2
    barbara_t = torch.unsqueeze(
        torch.tensor(barbara, dtype=torch.float32, device=dev), dim=0)
    ch = barbara_t.shape[1]
    def init_client(self, gpus):
        gpus = [int(gpu.strip()) for gpu in gpus.split(",")]
        if gpus:
            py3nvml.grab_gpus(len(gpus), gpu_fraction=0, gpu_select=gpus)

        ray.init(
            num_gpus=len(gpus),
            configure_logging=False,
            include_dashboard=False,
            namespace="ai.purplesmart.kernels",
            _redis_max_memory=250 * 1024 * 1024,
        )

        import counter

        self.counter = counter.create_counter()
        self.client = serve.start(detached=True)
Example #5
0
def setup():
    global barbara, barbara_t, tf
    global bshape, bshape_extracol
    global ref_rowfilter, ch
    py3nvml.grab_gpus(1, gpu_fraction=0.5)
    barbara = datasets.barbara()
    barbara = (barbara / barbara.max()).astype('float32')
    barbara = barbara.transpose([2, 0, 1])
    bshape = list(barbara.shape)
    bshape_extracol = bshape[:]
    bshape_extracol[2] += 1
    barbara_t = torch.unsqueeze(torch.tensor(barbara, dtype=torch.float32),
                                dim=0).to(dev)
    ch = barbara_t.shape[1]

    # Some useful functions
    ref_rowfilter = lambda x, h: np.stack([np_colfilter(s.T, h).T for s in x],
                                          axis=0)
def setup():
    global barbara, barbara_t
    global bshape, bshape_half
    global ref_rowdfilt, ch
    py3nvml.grab_gpus(1, gpu_fraction=0.5, env_set_ok=True)
    barbara = datasets.barbara()
    barbara = (barbara / barbara.max()).astype('float32')
    barbara = barbara.transpose([2, 0, 1])
    bshape = list(barbara.shape)
    bshape_half = bshape[:]
    bshape_half[2] //= 2
    barbara_t = torch.unsqueeze(torch.tensor(barbara, dtype=torch.float32),
                                dim=0).to(dev)
    ch = barbara_t.shape[1]

    # Some useful functions
    ref_rowdfilt = lambda x, ha, hb: np.stack(
        [np_coldfilt(s.T, ha, hb).T for s in x], axis=0)
    # If we don't use a scheduler, just train 1 network in a simple loop
    if args.no_scheduler:
        # Create reporting objects
        args.verbose = True
        outdir = os.path.join(os.environ['HOME'], 'gainlayer_results', args.outdir)
        tr_writer = SummaryWriter(os.path.join(outdir, 'train'))
        val_writer = SummaryWriter(os.path.join(outdir, 'val'))
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        # Choose the model to run and build it
        if args.type is None:
            type_ = 'ref'
        else:
            type_ = args.type[0]
        py3nvml.grab_gpus(ceil(args.num_gpus))
        cfg = {'args': args, 'type': type_, 'num_gpus': args.num_gpus,
               'dwt': args.dwt, 'C': args.C,
               'lr': args.lr, 'lr1': args.lr1, 'mom': args.mom, 'mom1': args.mom1,
               'wd': args.wd, 'q': args.q, 'wd1': args.wd1, 'opt1': args.opt1}
        trn = TrainNET(cfg)
        trn._final_epoch = args.epochs

        # Copy this source file to the output directory for record keeping
        if args.resume:
            trn._restore(os.path.join(outdir, 'model_last.pth'))
        else:
            save_experiment_info(outdir, args.seed, args.no_comment, trn.model)

        if args.seed is not None and trn.use_cuda:
            torch.backends.cudnn.deterministic = True
Example #8
0
# Author: Bichen Wu ([email protected]) 08/25/2016
"""Evaluation"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import py3nvml
py3nvml.grab_gpus(num_gpus=1, gpu_fraction=0.9)

import cv2
from datetime import datetime
import os.path
import sys
import time

import numpy as np
from six.moves import xrange
import tensorflow as tf

from config import *
from dataset import pascal_voc, kitti
from utils.util import bbox_transform, Timer
from nets import *

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string(
    'dataset', 'KITTI', """Currently support PASCAL_VOC or KITTI dataset.""")
tf.app.flags.DEFINE_string('data_path', '', """Root directory of data""")
tf.app.flags.DEFINE_string(
    'image_set', 'test', """Only used for VOC data."""
Example #9
0

def test_dtcwt2(size, J, no_grad=False, dev='cuda'):
    x = torch.randn(*size, requires_grad=(not no_grad)).to(dev)
    h0a, h0b, _, _, h1a, h1b, _, _ = level1('farras')
    cols, rows = lowlevel2.prep_filt_quad_afb2d(h0a, h1a, h0b, h1b, device=dev)
    yh = []
    for j in range(3):
        x, y = lowlevel2.quad_afb2d(x, cols, rows, mode='zero')
        yh.append(y)
    return x, yh


if __name__ == "__main__":
    args = parser.parse_args()
    py3nvml.grab_gpus(1)
    if args.size > 0:
        size = (args.batch, 5, args.size, args.size)
    else:
        size = (args.batch, 5, 128, 128)

    if args.ref:
        print('Running dtcwt with FFTs')
        reference_fftconv(size, args.j, args.no_grad, args.device)
    elif args.convolution:
        print('Running 11x11 convolution')
        reference_conv(size, args.no_grad, args.device)
    elif args.dwt:
        print('Running separable dwt')
        separable_dwt(size, args.j, args.no_grad, args.device)
    elif args.fb:
Example #10
0
# # Implementing a deep neural network using tensorflow
# In this notebok i will implement a 3 hidden layer neural network and feed in all the neccesary data. We will be focused on using S1_A1_E1.mat which means we are only concerned with one subject.

# ## 1. Import all the neccesary packages
import math
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
import scipy
import scipy.io as sio
import time
import py3nvml
py3nvml.grab_gpus(num_gpus=1, gpu_select=[1])

# ## 2. Load the data from .mat file and split it into a training (70%) and test (30%) data.
def load_dataset(path, NoZero):
    X_path = path + 'emgShuffled.mat'
    #Load the emg data
    XData = sio.loadmat(X_path)
    X_orig = (np.array((XData['emgShuffled']))).T
    X_train_orig = X_orig[:,0:int(0.7*X_orig.shape[1])]
    X_test_orig = X_orig[:,int(0.7*X_orig.shape[1])+1::]
    #Flatten the EMG data
    X_train_orig = X_train_orig/np.amax(X_train_orig)
    X_test_orig = X_test_orig/np.amax(X_test_orig)
    #Load the labels
    Y_path = path + 'yShuffled.mat'
    YData = sio.loadmat(Y_path)
    Y_orig = (np.array((YData['yShuffled']))).T
    if NoZero == True:
    def run(self):
        # def run(args, subprocess_func, subprocess_verbose=False):

        list_of_configs = [
            abs_path for abs_path in sorted(
                glob.glob(self.path_to_configs + '/*%s' % (self.config_ext)))
        ]
        list_of_gpus = self.available_gpus
        max_utilization = self.max_gpu_utilization
        max_jobs_per_gpu = self.max_jobs_per_gpu

        queued = copy.deepcopy(list_of_configs)
        if len(queued) == 0:
            raise AssertionError('No tasks(configs) given!')
        pools = {}
        running = OrderedDict()
        done = OrderedDict()
        failed = OrderedDict()
        curr_user = getpass.getuser()
        last_task_time = -float('inf')
        last_log_time = -float('inf')
        alpha = np.exp(-3 / self.time_between_tasks)
        total_gpu_utilization_filt = [0.0] * len(list_of_gpus)
        user_gpu_utilization_filt = [0.0] * len(list_of_gpus)
        while len(queued) + len(running) > 0:
            time.sleep(0.01)

            # allocate GPU (every log_refresh_rate seconds)
            cand_gpu = []
            cand_gpu_util = []
            cand_gpumem_util = []
            if time.time() - last_log_time >= 1.0:
                total_compute_procs, user_compute_procs = get_num_procs(
                    allocated_gpus=list_of_gpus, username=curr_user)
                total_gpu_utilization = get_gpu_utilization(
                    allocated_gpus=list_of_gpus)
                total_gpumem_utilization = get_gpumem_utilization(
                    allocated_gpus=list_of_gpus)
                user_gpu_utilization = [
                    ceil(x / (y + 1e-12) * z)
                    for x, y, z in zip(user_compute_procs, total_compute_procs,
                                       total_gpu_utilization)
                ]
                total_gpu_utilization_filt = [
                    (1 - alpha) * x + alpha * X for x, X in zip(
                        total_gpu_utilization, total_gpu_utilization_filt)
                ]
                user_gpu_utilization_filt = [
                    (1 - alpha) * x + alpha * X for x, X in zip(
                        user_gpu_utilization, user_gpu_utilization_filt)
                ]
                for i, gpuid, in enumerate(list_of_gpus):
                    tot_util_cond = total_gpu_utilization_filt[i] <= (
                        100 - self.utilization_margin)
                    tot_memutil_cond = total_gpumem_utilization[
                        i] <= 50  # (1 - gpu_fraction)*100
                    user_util_cond = user_gpu_utilization_filt[i] < floor(
                        max_utilization[i] *
                        (100 - self.utilization_margin) / 100)
                    user_numproc_cond = user_compute_procs[
                        i] < max_jobs_per_gpu[i] or max_jobs_per_gpu[i] == -1
                    if tot_util_cond and user_util_cond and user_numproc_cond and tot_memutil_cond:
                        cand_gpu.append(gpuid)
                        cand_gpu_util.append(total_gpu_utilization_filt[i])
                        cand_gpumem_util.append(total_gpumem_utilization[i])

            # run task (every time_between_tasks seconds)
            if len(queued) == 0 or len(cand_gpu) == 0 or time.time(
            ) - last_task_time < self.time_between_tasks:  # no available GPUs or no queued tasks
                pass
            else:
                min_util_idx = cand_gpu_util.index(min(cand_gpu_util))
                if py3nvml.grab_gpus(num_gpus=1,
                                     gpu_select=[cand_gpu[min_util_idx]],
                                     gpu_fraction=0.5,
                                     max_procs=-1) == 0:
                    # if for some reason cannot allocate gpu
                    # print('CUDA_VISIBLE_DEVICES = %s'%(os.environ.get('CUDA_VISIBLE_DEVICES')))
                    last_task_time = time.time()
                    continue
                signal.signal(signal.SIGINT, signal.SIG_IGN)
                if self.child_verbose:
                    p = multiprocessing.Pool(processes=1)
                else:
                    p = multiprocessing.Pool(processes=1, initializer=mute)
                pools[queued[0]] = p
                with open(queued[0], 'r') as f:
                    # running[queued[0]] = p.map_async(self.child_process, self._get_child_process_args(f))
                    running[queued[0]] = p.apply_async(
                        self.child_process, self._get_child_process_args(f))
                signal.signal(signal.SIGINT, self.default_handler)
                queued.pop(0)
                last_task_time = time.time()

            # log (every log_refresh_rate seconds)
            if time.time() - last_log_time >= 1.0:
                # update thread status
                ready = []
                for key in running:
                    if running[key].ready():  # call has been executed
                        ready.append(key)
                        if running[key].successful(
                        ):  # process terminated successfully
                            done[key] = running[key]
                        else:  # process terminated with errors
                            failed[key] = running[key]
                for key in ready:
                    running.pop(key)
                    pools[key].close()
                    pools[key].terminate()
                    pools.pop(key)

                entry_len = 150
                print(''.center(entry_len, '+'))
                print(
                    datetime.datetime.now(dateutil.tz.tzlocal()).strftime(
                        ' %Y/%m/%d_%H:%M:%S ').center(entry_len, '-'))
                print((
                    '+ USER: %s (process limit: %s, utilization limit: %s%%)' %
                    (curr_user, max_jobs_per_gpu, max_utilization)).ljust(
                        entry_len, ' '))
                for i, gpuid in enumerate(list_of_gpus):
                    tup = (gpuid, )
                    tup += (user_compute_procs[i], )
                    tup += (total_compute_procs[i], )
                    tup += (user_gpu_utilization[i], )
                    tup += (total_gpu_utilization[i], )
                    tup += (total_gpumem_utilization[i], )
                    print((
                        '+  gpu%d compute processes (%d/%d) utilization rate (%d%%/%d%%) memory usage (--%%/%d%%)'
                        % tup).ljust(entry_len, ' '))
                print((' %d QUEUED ' % (len(queued))).center(entry_len, '-'))
                if self.kwargs.get('logging'):
                    print((' %d LOGGING ' % (len(running))).center(
                        entry_len, '-'))
                else:
                    print((' %d RUNNING ' % (len(running))).center(
                        entry_len, '-'))
                for key in running:
                    name_str = os.path.basename(key)
                    try:
                        tqdm_stat = pickle.load(
                            open(os.path.join('/tmp', name_str + '.tqdm'),
                                 'rb'))
                        tqdm_str = 'gpu%s pid=%d |%d%%| %d/%d [%s<%s, %sit/s]' % tqdm_stat
                    except:
                        tqdm_str = ''
                    name_str = '+  ' + name_str
                    print(name_str + tqdm_str.rjust(entry_len - len(name_str)))
                print((' %d FAILED ' % (len(failed))).center(entry_len, '-'))
                for key in failed:
                    print(os.path.basename(key))
                print((' %d DONE ' % (len(done))).center(entry_len, '-'))
                for key in done:
                    print(os.path.basename(key))
                print(''.center(entry_len, '+'))
                print('+')
                last_log_time = time.time()

            if self.terminate:
                self.resume = prompt_yes_or_no('Resume?')
                if self.resume:
                    IPython.embed()
                    self.terminate = False
            if self.terminate:
                break

        print('summary - done: %d, failed: %d, halted: %d, queued: %d' %
              (len(done), len(failed), len(running), len(queued)))
    def run(self):

        # Access shared queues
        shared_pending_job_q = self.get_pending_job_q()
        shared_worker_status_q = self.get_worker_status_q()

        # Worker state
        self.worker_resume = True
        self.worker_terminate = False
        procs = {}
        running = OrderedDict()
        done = OrderedDict()
        failed = OrderedDict()
        last_job_time = -float('inf')

        alpha = np.exp(-3 / self.limits.time_between_jobs)
        total_gpu_utilization_filt = {
            gpu_id: 0.0
            for gpu_id in self.limits.available_gpus
        }
        user_gpu_utilization_filt = {
            gpu_id: 0.0
            for gpu_id in self.limits.available_gpus
        }
        worker_gpu_utilization_filt = {
            gpu_id: 0.0
            for gpu_id in self.limits.available_gpus
        }
        num_pending = shared_pending_job_q.qsize()
        while num_pending + len(running):
            curr_user = getpass.getuser()
            list_of_gpus = self.limits.available_gpus
            max_utilization = self.limits.gpu_utilization_limit
            max_jobs_per_gpu = self.limits.gpu_job_limit

            # 1. update candidate GPU
            total_compute_procs, user_compute_procs, pid_compute_procs = \
                get_num_procs(allocated_gpus=list_of_gpus, username=curr_user, version='v2')
            worker_compute_procs = copy.deepcopy(user_compute_procs)
            total_gpu_utilization = get_gpu_utilization(
                allocated_gpus=list_of_gpus)
            user_gpu_utilization = [
                ceil(x / (y + 1e-12) * z)
                for x, y, z in zip(user_compute_procs, total_compute_procs,
                                   total_gpu_utilization)
            ]
            total_gpumem_utilization, user_gpumem_utilization, pid_gpumem_utilization = \
                get_gpumem_utilization(allocated_gpus=list_of_gpus, username=curr_user, version='v2')

            total_gpu_utilization_filt = [
                (1 - alpha) * x + alpha * X for x, X in zip(
                    total_gpu_utilization, total_gpu_utilization_filt)
            ]
            user_gpu_utilization_filt = [
                (1 - alpha) * x + alpha * X for x, X in zip(
                    user_gpu_utilization, user_gpu_utilization_filt)
            ]

            cand_gpu, cand_gpu_util, cand_gpumem_util = [], [], []
            for i, gpuid, in enumerate(list_of_gpus):
                if gpuid < 0:  # CPU mode
                    all_pid_compute_procs = [
                        item for sublist in pid_compute_procs
                        for item in sublist
                    ]
                    worker_compute_procs[i] = sum([
                        running[key].pid not in all_pid_compute_procs
                        for key in running
                    ])
                    user_compute_procs[i] = worker_compute_procs[i]
                else:
                    worker_compute_procs[i] = sum([
                        running[key].pid in pid_compute_procs[i]
                        for key in running
                    ])

                tot_util_cond = total_gpu_utilization_filt[i] <= (
                    100 - self.limits.utilization_margin)
                tot_memutil_cond = total_gpumem_utilization[
                    i] <= self.limits.max_gpu_mem_usage  # (1 - gpu_fraction)*100
                user_util_cond = user_gpu_utilization_filt[i] < floor(
                    max_utilization[i] *
                    (100 - self.limits.utilization_margin) / 100)
                user_numproc_cond = user_compute_procs[i] < max_jobs_per_gpu[
                    i] or max_jobs_per_gpu[i] == -1
                worker_numproc_cond = worker_compute_procs[
                    i] < max_jobs_per_gpu[i] or max_jobs_per_gpu[i] == -1

                if self.limits.apply_limits == 'user':
                    is_cand = tot_util_cond and user_util_cond and user_numproc_cond and tot_memutil_cond
                elif self.limits.apply_limits == 'worker':
                    is_cand = tot_util_cond and worker_numproc_cond and tot_memutil_cond
                else:
                    is_cand = False
                    print(
                        "Invalid apply_limits. Available options are ['user', 'worker']"
                    )

                if is_cand:
                    cand_gpu.append(gpuid)
                    cand_gpu_util.append(total_gpu_utilization_filt[i])
                    cand_gpumem_util.append(total_gpumem_utilization[i])

            # 2. run job process
            if len(cand_gpu) == 0 or time.time(
            ) - last_job_time < self.limits.time_between_jobs:  # no available GPUs or no queued tasks
                pass
            else:
                min_util_idx = cand_gpu_util.index(min(cand_gpu_util))
                min_util_cand_gpu = cand_gpu[min_util_idx]
                if min_util_cand_gpu < 0:  # CPU mode
                    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
                    grab_device_success = True
                else:
                    grab_device_success = py3nvml.grab_gpus(
                        num_gpus=1,
                        gpu_select=[cand_gpu[min_util_idx]],
                        gpu_fraction=(100 - self.limits.max_gpu_mem_usage) /
                        100,
                        max_procs=-1) > 0
                if not grab_device_success:
                    # if for some reason cannot allocate gpu
                    # print('CUDA_VISIBLE_DEVICES = %s'%(os.environ.get('CUDA_VISIBLE_DEVICES')))
                    # last_job_time = time.time()
                    continue
                try:
                    job = shared_pending_job_q.get_nowait(
                    )  # {'tag': , 'config': , 'worker_args': , 'worker_kwargs': }
                    num_pending -= 1
                    # {'tag': path, 'config': json.load(f, object_hook=lambda d : SimpleNamespace(**d)),
                    # 'worker_args': worker_args, 'worker_kwargs': worker_kwargs}

                    signal.signal(signal.SIGINT, signal.SIG_IGN)
                    job['worker_kwargs'].update({
                        'config': job['config'],
                        'config_path': job['tag']
                    })
                    p = multiprocessing.Process(target=self.worker,
                                                args=job['worker_args'],
                                                kwargs=job['worker_kwargs'])
                    procs[job['tag']] = p
                    p.start()
                    running[job['tag']] = p
                    signal.signal(signal.SIGINT, self.default_handler)

                    last_job_time = time.time()
                except queue.Empty:
                    pass
                except (EOFError, BrokenPipeError) as e:
                    print('lost connection to server')
            # update thread status
            ready = []
            for key in running:
                if not running[key].is_alive():  # call has been executed
                    ready.append(key)
                    if running[
                            key].exitcode == 0:  # process terminated successfully
                        done[key] = running[key]
                    else:  # process terminated with errors
                        failed[key] = running[key]
            for key in ready:
                running.pop(key)
                procs[key].terminate()
                # procs[key].close()
                procs.pop(key)

            # 3. display status
            entry_len = 150
            print(''.center(entry_len, '+'))
            print(
                datetime.datetime.now(dateutil.tz.tzlocal()).strftime(
                    ' %Y/%m/%d_%H:%M:%S ').center(entry_len, '-'))
            # worker status
            if self.limits.apply_limits == 'user':
                print('+ WORKER: %s (apply limits on user %s)' %
                      (self.name, curr_user))
            elif self.limits.apply_limits == 'worker':
                print('+ WORKER: %s (apply limits on current worker)' %
                      (self.name))
            else:
                print(
                    "Invalid apply_limits. Available options are ['user', 'worker']"
                )
            print(('+ (gpu_ids=%s, job_limit=%s, util_limit=%s%%)' %
                   (list_of_gpus, max_jobs_per_gpu, max_utilization)).ljust(
                       entry_len, ' '))
            for i, gpuid in enumerate(list_of_gpus):
                tup = (gpuid, )
                tup += (user_compute_procs[i], )
                tup += (worker_compute_procs[i], )
                tup += (total_compute_procs[i], )
                tup += (user_gpu_utilization[i], )
                tup += (total_gpu_utilization[i], )
                tup += (user_gpumem_utilization[i], )
                tup += (total_gpumem_utilization[i], )
                print((
                    '+  gpu%d compute processes (%d(%d)/%d) utilization rate (%d%%/%d%%) memory usage (%d%%/%d%%)'
                    % tup).ljust(entry_len, ' '))
            # job status
            print((' %d PENDING ' % (num_pending)).center(entry_len, '-'))
            # if self.kwargs.get('logging'):
            #     print((' %d LOGGING '%(len(running))).center(entry_len,'-'))
            # else:
            #     print((' %d RUNNING '%(len(running))).center(entry_len,'-'))
            print((' %d LOGGING/RUNNING ' % (len(running))).center(
                entry_len, '-'))
            tqdm_stats = []
            for key in running:
                name_str = os.path.basename(key)
                try:
                    tqdm_stat = pickle.load(
                        open(os.path.join('/tmp', name_str + '.tqdm'), 'rb'))
                    tqdm_stats.append(tqdm_stat)
                    tqdm_str = 'gpu%s pid=%d |%d%%| %d/%d [%s<%s, %sit/s]' % tqdm_stat
                except:
                    tqdm_stats.append(None)
                    tqdm_str = ''
                name_str = '+  ' + name_str
                print(name_str + tqdm_str.rjust(entry_len - len(name_str)))
            print((' %d FAILED ' % (len(failed))).center(entry_len, '-'))
            for key in failed:
                print(os.path.basename(key))
            print((' %d DONE ' % (len(done))).center(entry_len, '-'))
            for key in done:
                print(os.path.basename(key))
            print(''.center(entry_len, '+'))
            print('+')

            # 4. report status to scheduler
            try:
                shared_worker_status_q.put({
                    self.name: {
                        'limit':
                        vars(self.limits),
                        'status': {
                            'worker_compute_procs': user_compute_procs,
                            'total_compute_procs': total_compute_procs,
                            'worker_gpu_utilization': user_gpu_utilization,
                            'total_gpu_utilization': total_gpu_utilization,
                            'worker_gpumem_utilization':
                            user_gpumem_utilization,
                            'total_gpumem_utilization':
                            total_gpumem_utilization
                        },
                        'running':
                        OrderedDict(
                            ((key, tqdm_stat)
                             for key, tqdm_stat in zip(running, tqdm_stats))),
                        'done':
                        OrderedDict(((key, None) for key in done)),
                        'failed':
                        OrderedDict(((key, None) for key in failed)),
                        'last_updated':
                        time.time()
                    }
                })
            except (EOFError,
                    BrokenPipeError) as e:  # lost connection to server
                print('lost connection to server')

            # 5. SIGINT(ctrl-c) handler
            if self.worker_terminate:
                self.worker_resume = prompt_yes_or_no('Resume?')
                if self.worker_resume:
                    IPython.embed()
                    self.worker_terminate = False
            if self.worker_terminate:
                for key in running:
                    running[key].terminate()
                break

            # run while loop every second
            self.rate.sleep()
            try:
                num_pending = shared_pending_job_q.qsize()
            except (EOFError, BrokenPipeError) as e:
                print('lost connection to server')  # lost connection to server

        print('summary - done: %d, failed: %d, halted: %d, pending: %d' %
              (len(done), len(failed), len(running), num_pending))
Example #13
0
	time.sleep(wait_time_seconds)
	with open('/proc/stat', 'r') as fh: # see https://linux.die.net/man/5/proc how to interpret this
		lines = fh.readlines()
		cpulines = [ line.strip().split(" ") for line in lines if line.startswith('cpu') ][1:]
		idle_time_stop = [ safe_int(cpuinfo[4], idle_time_start[i]) for i, cpuinfo in enumerate(cpulines) ]
	cpu_idle_info = [ ( idle_time_stop[i]-idle_time_start[i], cpu_id) for i, cpu_id in enumerate(cpu_ids) ]
	cpu_idle_info.sort(reverse=True)
	return cpu_idle_info	
			

if __name__=='__main__':
	if len(sys.argv)<3:
		print("Missing arguments: [cpu-count] [gpu-count]")
		sys.exit(1)
	cpu_count = int(sys.argv[1])
	gpu_count = int(sys.argv[2])
	argv_rest = sys.argv[2:]
	cpu_idle_info = find_idle_cpus(0.5)
	my_pid = os.getpid()
	cpu_list = ",".join([ cpu_idle_count[1] for cpu_idle_count in cpu_idle_info[:cpu_count] ])
	argv = []+sys.argv
	command = [ "taskset", "-a", "-c", cpu_list ] + argv[3:]
	#print(command)
	py3nvml.grab_gpus(num_gpus=gpu_count, gpu_fraction=0.95)
	environ = dict(os.environ)
	
	environ['LD_PRELOAD'] = os.path.abspath(os.path.dirname(__file__)) + "/limit_visible_cpus.so"
	environ['OMP_NUM_THREADS'] = str(cpu_count)
	# This call never returns.
	os.execvpe(command[0], command, environ)
	
Example #14
0
def setup():
    py3nvml.grab_gpus(1, gpu_fraction=0.5)
Example #15
0
    h, m = divmod(m, 60)

    return h, m, s


# If seed was not provided, create one and seed numpy and pytorch
if args.seed < 0:
    args.seed = np.random.randint(1 << 16)
np.random.seed(args.seed)
random.seed(args.seed)
torch.backends.cudnn.deterministic = True
torch.manual_seed(args.seed)

# Hyperparameter settings
py3nvml.grab_gpus(args.num_gpus,
                  gpu_select=args.gpu_select,
                  gpu_fraction=0.7,
                  max_procs=0)
use_cuda = torch.cuda.is_available()
best_acc = 0
start_epoch, batch_size = 1, args.batch_size

# ##############################################################################
#  Model
print('\n[Phase 1] : Model setup')
if len(args.layers_per_scale) == 1:
    args.layers_per_scale = args.layers_per_scale[0]

if args.resume:
    # Load checkpoint
    print('| Resuming from checkpoint...')
    chkpt_dir = os.path.join(args.exp_dir, 'chkpt')
def setup():
    global mode, o_dim, ri_dim
    mode = mode_to_int('symmetric')
    o_dim = 2
    ri_dim = -1
    py3nvml.grab_gpus(1, gpu_fraction=0.5, env_set_ok=True)
Example #17
0
def setup():
    py3nvml.grab_gpus(1, gpu_fraction=0.5, env_set_ok=True)
Example #18
0
def test_zeroout():
    py3nvml.grab_gpus(0)
    zero_out_module = tf.load_op_library(os.path.join(LIB_BASE, 'zero_out.so'))
    with tf.Session() as sess:
        result = zero_out_module.zero_out([5, 4, 3, 2, 1])
        np.testing.assert_array_equal(result.eval(), np.array([5, 0, 0, 0, 0]))