def main(args): if args.gpu is not None: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu else: try: import py3nvml py3nvml.grab_gpus(1, gpu_fraction=0.95) except ImportError: print("Could not import py3nvml") test_agent = TestAgent(args) if args.test: # Get the config with open(os.path.join('models', args.name, 'config.json')) as fp: config_dict = json.load(fp) args_dict = vars(args) args_dict.update(config_dict) test_wrapper(test_agent, args) else: agent = TrainAgent(args) try: agent.train(args.data, args.max_grad_norm, args.wd, test_agent, args=args) except KeyboardInterrupt: test_wrapper(test_agent, args)
def test_kernel(): py3nvml.grab_gpus(0) kernel_module = tf.load_op_library( os.path.join(LIB_BASE, 'kernel_example.so')) with tf.Session() as sess: result = kernel_module.example([5, 4, 3, 2, 1]) np.testing.assert_array_equal(result.eval(), np.array([10, 8, 6, 4, 2]))
def setup(): global barbara, barbara_t global bshape, bshape_half global ch py3nvml.grab_gpus(1, gpu_fraction=0.5) barbara = datasets.barbara() barbara = (barbara/barbara.max()).astype('float32') barbara = barbara.transpose([2, 0, 1]) bshape = list(barbara.shape) bshape_half = bshape[:] bshape_half[1] //= 2 barbara_t = torch.unsqueeze( torch.tensor(barbara, dtype=torch.float32, device=dev), dim=0) ch = barbara_t.shape[1]
def init_client(self, gpus): gpus = [int(gpu.strip()) for gpu in gpus.split(",")] if gpus: py3nvml.grab_gpus(len(gpus), gpu_fraction=0, gpu_select=gpus) ray.init( num_gpus=len(gpus), configure_logging=False, include_dashboard=False, namespace="ai.purplesmart.kernels", _redis_max_memory=250 * 1024 * 1024, ) import counter self.counter = counter.create_counter() self.client = serve.start(detached=True)
def setup(): global barbara, barbara_t, tf global bshape, bshape_extracol global ref_rowfilter, ch py3nvml.grab_gpus(1, gpu_fraction=0.5) barbara = datasets.barbara() barbara = (barbara / barbara.max()).astype('float32') barbara = barbara.transpose([2, 0, 1]) bshape = list(barbara.shape) bshape_extracol = bshape[:] bshape_extracol[2] += 1 barbara_t = torch.unsqueeze(torch.tensor(barbara, dtype=torch.float32), dim=0).to(dev) ch = barbara_t.shape[1] # Some useful functions ref_rowfilter = lambda x, h: np.stack([np_colfilter(s.T, h).T for s in x], axis=0)
def setup(): global barbara, barbara_t global bshape, bshape_half global ref_rowdfilt, ch py3nvml.grab_gpus(1, gpu_fraction=0.5, env_set_ok=True) barbara = datasets.barbara() barbara = (barbara / barbara.max()).astype('float32') barbara = barbara.transpose([2, 0, 1]) bshape = list(barbara.shape) bshape_half = bshape[:] bshape_half[2] //= 2 barbara_t = torch.unsqueeze(torch.tensor(barbara, dtype=torch.float32), dim=0).to(dev) ch = barbara_t.shape[1] # Some useful functions ref_rowdfilt = lambda x, ha, hb: np.stack( [np_coldfilt(s.T, ha, hb).T for s in x], axis=0)
# If we don't use a scheduler, just train 1 network in a simple loop if args.no_scheduler: # Create reporting objects args.verbose = True outdir = os.path.join(os.environ['HOME'], 'gainlayer_results', args.outdir) tr_writer = SummaryWriter(os.path.join(outdir, 'train')) val_writer = SummaryWriter(os.path.join(outdir, 'val')) if not os.path.exists(outdir): os.mkdir(outdir) # Choose the model to run and build it if args.type is None: type_ = 'ref' else: type_ = args.type[0] py3nvml.grab_gpus(ceil(args.num_gpus)) cfg = {'args': args, 'type': type_, 'num_gpus': args.num_gpus, 'dwt': args.dwt, 'C': args.C, 'lr': args.lr, 'lr1': args.lr1, 'mom': args.mom, 'mom1': args.mom1, 'wd': args.wd, 'q': args.q, 'wd1': args.wd1, 'opt1': args.opt1} trn = TrainNET(cfg) trn._final_epoch = args.epochs # Copy this source file to the output directory for record keeping if args.resume: trn._restore(os.path.join(outdir, 'model_last.pth')) else: save_experiment_info(outdir, args.seed, args.no_comment, trn.model) if args.seed is not None and trn.use_cuda: torch.backends.cudnn.deterministic = True
# Author: Bichen Wu ([email protected]) 08/25/2016 """Evaluation""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import py3nvml py3nvml.grab_gpus(num_gpus=1, gpu_fraction=0.9) import cv2 from datetime import datetime import os.path import sys import time import numpy as np from six.moves import xrange import tensorflow as tf from config import * from dataset import pascal_voc, kitti from utils.util import bbox_transform, Timer from nets import * FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string( 'dataset', 'KITTI', """Currently support PASCAL_VOC or KITTI dataset.""") tf.app.flags.DEFINE_string('data_path', '', """Root directory of data""") tf.app.flags.DEFINE_string( 'image_set', 'test', """Only used for VOC data."""
def test_dtcwt2(size, J, no_grad=False, dev='cuda'): x = torch.randn(*size, requires_grad=(not no_grad)).to(dev) h0a, h0b, _, _, h1a, h1b, _, _ = level1('farras') cols, rows = lowlevel2.prep_filt_quad_afb2d(h0a, h1a, h0b, h1b, device=dev) yh = [] for j in range(3): x, y = lowlevel2.quad_afb2d(x, cols, rows, mode='zero') yh.append(y) return x, yh if __name__ == "__main__": args = parser.parse_args() py3nvml.grab_gpus(1) if args.size > 0: size = (args.batch, 5, args.size, args.size) else: size = (args.batch, 5, 128, 128) if args.ref: print('Running dtcwt with FFTs') reference_fftconv(size, args.j, args.no_grad, args.device) elif args.convolution: print('Running 11x11 convolution') reference_conv(size, args.no_grad, args.device) elif args.dwt: print('Running separable dwt') separable_dwt(size, args.j, args.no_grad, args.device) elif args.fb:
# # Implementing a deep neural network using tensorflow # In this notebok i will implement a 3 hidden layer neural network and feed in all the neccesary data. We will be focused on using S1_A1_E1.mat which means we are only concerned with one subject. # ## 1. Import all the neccesary packages import math import numpy as np import tensorflow as tf from tensorflow.python.framework import ops import scipy import scipy.io as sio import time import py3nvml py3nvml.grab_gpus(num_gpus=1, gpu_select=[1]) # ## 2. Load the data from .mat file and split it into a training (70%) and test (30%) data. def load_dataset(path, NoZero): X_path = path + 'emgShuffled.mat' #Load the emg data XData = sio.loadmat(X_path) X_orig = (np.array((XData['emgShuffled']))).T X_train_orig = X_orig[:,0:int(0.7*X_orig.shape[1])] X_test_orig = X_orig[:,int(0.7*X_orig.shape[1])+1::] #Flatten the EMG data X_train_orig = X_train_orig/np.amax(X_train_orig) X_test_orig = X_test_orig/np.amax(X_test_orig) #Load the labels Y_path = path + 'yShuffled.mat' YData = sio.loadmat(Y_path) Y_orig = (np.array((YData['yShuffled']))).T if NoZero == True:
def run(self): # def run(args, subprocess_func, subprocess_verbose=False): list_of_configs = [ abs_path for abs_path in sorted( glob.glob(self.path_to_configs + '/*%s' % (self.config_ext))) ] list_of_gpus = self.available_gpus max_utilization = self.max_gpu_utilization max_jobs_per_gpu = self.max_jobs_per_gpu queued = copy.deepcopy(list_of_configs) if len(queued) == 0: raise AssertionError('No tasks(configs) given!') pools = {} running = OrderedDict() done = OrderedDict() failed = OrderedDict() curr_user = getpass.getuser() last_task_time = -float('inf') last_log_time = -float('inf') alpha = np.exp(-3 / self.time_between_tasks) total_gpu_utilization_filt = [0.0] * len(list_of_gpus) user_gpu_utilization_filt = [0.0] * len(list_of_gpus) while len(queued) + len(running) > 0: time.sleep(0.01) # allocate GPU (every log_refresh_rate seconds) cand_gpu = [] cand_gpu_util = [] cand_gpumem_util = [] if time.time() - last_log_time >= 1.0: total_compute_procs, user_compute_procs = get_num_procs( allocated_gpus=list_of_gpus, username=curr_user) total_gpu_utilization = get_gpu_utilization( allocated_gpus=list_of_gpus) total_gpumem_utilization = get_gpumem_utilization( allocated_gpus=list_of_gpus) user_gpu_utilization = [ ceil(x / (y + 1e-12) * z) for x, y, z in zip(user_compute_procs, total_compute_procs, total_gpu_utilization) ] total_gpu_utilization_filt = [ (1 - alpha) * x + alpha * X for x, X in zip( total_gpu_utilization, total_gpu_utilization_filt) ] user_gpu_utilization_filt = [ (1 - alpha) * x + alpha * X for x, X in zip( user_gpu_utilization, user_gpu_utilization_filt) ] for i, gpuid, in enumerate(list_of_gpus): tot_util_cond = total_gpu_utilization_filt[i] <= ( 100 - self.utilization_margin) tot_memutil_cond = total_gpumem_utilization[ i] <= 50 # (1 - gpu_fraction)*100 user_util_cond = user_gpu_utilization_filt[i] < floor( max_utilization[i] * (100 - self.utilization_margin) / 100) user_numproc_cond = user_compute_procs[ i] < max_jobs_per_gpu[i] or max_jobs_per_gpu[i] == -1 if tot_util_cond and user_util_cond and user_numproc_cond and tot_memutil_cond: cand_gpu.append(gpuid) cand_gpu_util.append(total_gpu_utilization_filt[i]) cand_gpumem_util.append(total_gpumem_utilization[i]) # run task (every time_between_tasks seconds) if len(queued) == 0 or len(cand_gpu) == 0 or time.time( ) - last_task_time < self.time_between_tasks: # no available GPUs or no queued tasks pass else: min_util_idx = cand_gpu_util.index(min(cand_gpu_util)) if py3nvml.grab_gpus(num_gpus=1, gpu_select=[cand_gpu[min_util_idx]], gpu_fraction=0.5, max_procs=-1) == 0: # if for some reason cannot allocate gpu # print('CUDA_VISIBLE_DEVICES = %s'%(os.environ.get('CUDA_VISIBLE_DEVICES'))) last_task_time = time.time() continue signal.signal(signal.SIGINT, signal.SIG_IGN) if self.child_verbose: p = multiprocessing.Pool(processes=1) else: p = multiprocessing.Pool(processes=1, initializer=mute) pools[queued[0]] = p with open(queued[0], 'r') as f: # running[queued[0]] = p.map_async(self.child_process, self._get_child_process_args(f)) running[queued[0]] = p.apply_async( self.child_process, self._get_child_process_args(f)) signal.signal(signal.SIGINT, self.default_handler) queued.pop(0) last_task_time = time.time() # log (every log_refresh_rate seconds) if time.time() - last_log_time >= 1.0: # update thread status ready = [] for key in running: if running[key].ready(): # call has been executed ready.append(key) if running[key].successful( ): # process terminated successfully done[key] = running[key] else: # process terminated with errors failed[key] = running[key] for key in ready: running.pop(key) pools[key].close() pools[key].terminate() pools.pop(key) entry_len = 150 print(''.center(entry_len, '+')) print( datetime.datetime.now(dateutil.tz.tzlocal()).strftime( ' %Y/%m/%d_%H:%M:%S ').center(entry_len, '-')) print(( '+ USER: %s (process limit: %s, utilization limit: %s%%)' % (curr_user, max_jobs_per_gpu, max_utilization)).ljust( entry_len, ' ')) for i, gpuid in enumerate(list_of_gpus): tup = (gpuid, ) tup += (user_compute_procs[i], ) tup += (total_compute_procs[i], ) tup += (user_gpu_utilization[i], ) tup += (total_gpu_utilization[i], ) tup += (total_gpumem_utilization[i], ) print(( '+ gpu%d compute processes (%d/%d) utilization rate (%d%%/%d%%) memory usage (--%%/%d%%)' % tup).ljust(entry_len, ' ')) print((' %d QUEUED ' % (len(queued))).center(entry_len, '-')) if self.kwargs.get('logging'): print((' %d LOGGING ' % (len(running))).center( entry_len, '-')) else: print((' %d RUNNING ' % (len(running))).center( entry_len, '-')) for key in running: name_str = os.path.basename(key) try: tqdm_stat = pickle.load( open(os.path.join('/tmp', name_str + '.tqdm'), 'rb')) tqdm_str = 'gpu%s pid=%d |%d%%| %d/%d [%s<%s, %sit/s]' % tqdm_stat except: tqdm_str = '' name_str = '+ ' + name_str print(name_str + tqdm_str.rjust(entry_len - len(name_str))) print((' %d FAILED ' % (len(failed))).center(entry_len, '-')) for key in failed: print(os.path.basename(key)) print((' %d DONE ' % (len(done))).center(entry_len, '-')) for key in done: print(os.path.basename(key)) print(''.center(entry_len, '+')) print('+') last_log_time = time.time() if self.terminate: self.resume = prompt_yes_or_no('Resume?') if self.resume: IPython.embed() self.terminate = False if self.terminate: break print('summary - done: %d, failed: %d, halted: %d, queued: %d' % (len(done), len(failed), len(running), len(queued)))
def run(self): # Access shared queues shared_pending_job_q = self.get_pending_job_q() shared_worker_status_q = self.get_worker_status_q() # Worker state self.worker_resume = True self.worker_terminate = False procs = {} running = OrderedDict() done = OrderedDict() failed = OrderedDict() last_job_time = -float('inf') alpha = np.exp(-3 / self.limits.time_between_jobs) total_gpu_utilization_filt = { gpu_id: 0.0 for gpu_id in self.limits.available_gpus } user_gpu_utilization_filt = { gpu_id: 0.0 for gpu_id in self.limits.available_gpus } worker_gpu_utilization_filt = { gpu_id: 0.0 for gpu_id in self.limits.available_gpus } num_pending = shared_pending_job_q.qsize() while num_pending + len(running): curr_user = getpass.getuser() list_of_gpus = self.limits.available_gpus max_utilization = self.limits.gpu_utilization_limit max_jobs_per_gpu = self.limits.gpu_job_limit # 1. update candidate GPU total_compute_procs, user_compute_procs, pid_compute_procs = \ get_num_procs(allocated_gpus=list_of_gpus, username=curr_user, version='v2') worker_compute_procs = copy.deepcopy(user_compute_procs) total_gpu_utilization = get_gpu_utilization( allocated_gpus=list_of_gpus) user_gpu_utilization = [ ceil(x / (y + 1e-12) * z) for x, y, z in zip(user_compute_procs, total_compute_procs, total_gpu_utilization) ] total_gpumem_utilization, user_gpumem_utilization, pid_gpumem_utilization = \ get_gpumem_utilization(allocated_gpus=list_of_gpus, username=curr_user, version='v2') total_gpu_utilization_filt = [ (1 - alpha) * x + alpha * X for x, X in zip( total_gpu_utilization, total_gpu_utilization_filt) ] user_gpu_utilization_filt = [ (1 - alpha) * x + alpha * X for x, X in zip( user_gpu_utilization, user_gpu_utilization_filt) ] cand_gpu, cand_gpu_util, cand_gpumem_util = [], [], [] for i, gpuid, in enumerate(list_of_gpus): if gpuid < 0: # CPU mode all_pid_compute_procs = [ item for sublist in pid_compute_procs for item in sublist ] worker_compute_procs[i] = sum([ running[key].pid not in all_pid_compute_procs for key in running ]) user_compute_procs[i] = worker_compute_procs[i] else: worker_compute_procs[i] = sum([ running[key].pid in pid_compute_procs[i] for key in running ]) tot_util_cond = total_gpu_utilization_filt[i] <= ( 100 - self.limits.utilization_margin) tot_memutil_cond = total_gpumem_utilization[ i] <= self.limits.max_gpu_mem_usage # (1 - gpu_fraction)*100 user_util_cond = user_gpu_utilization_filt[i] < floor( max_utilization[i] * (100 - self.limits.utilization_margin) / 100) user_numproc_cond = user_compute_procs[i] < max_jobs_per_gpu[ i] or max_jobs_per_gpu[i] == -1 worker_numproc_cond = worker_compute_procs[ i] < max_jobs_per_gpu[i] or max_jobs_per_gpu[i] == -1 if self.limits.apply_limits == 'user': is_cand = tot_util_cond and user_util_cond and user_numproc_cond and tot_memutil_cond elif self.limits.apply_limits == 'worker': is_cand = tot_util_cond and worker_numproc_cond and tot_memutil_cond else: is_cand = False print( "Invalid apply_limits. Available options are ['user', 'worker']" ) if is_cand: cand_gpu.append(gpuid) cand_gpu_util.append(total_gpu_utilization_filt[i]) cand_gpumem_util.append(total_gpumem_utilization[i]) # 2. run job process if len(cand_gpu) == 0 or time.time( ) - last_job_time < self.limits.time_between_jobs: # no available GPUs or no queued tasks pass else: min_util_idx = cand_gpu_util.index(min(cand_gpu_util)) min_util_cand_gpu = cand_gpu[min_util_idx] if min_util_cand_gpu < 0: # CPU mode os.environ['CUDA_VISIBLE_DEVICES'] = '-1' grab_device_success = True else: grab_device_success = py3nvml.grab_gpus( num_gpus=1, gpu_select=[cand_gpu[min_util_idx]], gpu_fraction=(100 - self.limits.max_gpu_mem_usage) / 100, max_procs=-1) > 0 if not grab_device_success: # if for some reason cannot allocate gpu # print('CUDA_VISIBLE_DEVICES = %s'%(os.environ.get('CUDA_VISIBLE_DEVICES'))) # last_job_time = time.time() continue try: job = shared_pending_job_q.get_nowait( ) # {'tag': , 'config': , 'worker_args': , 'worker_kwargs': } num_pending -= 1 # {'tag': path, 'config': json.load(f, object_hook=lambda d : SimpleNamespace(**d)), # 'worker_args': worker_args, 'worker_kwargs': worker_kwargs} signal.signal(signal.SIGINT, signal.SIG_IGN) job['worker_kwargs'].update({ 'config': job['config'], 'config_path': job['tag'] }) p = multiprocessing.Process(target=self.worker, args=job['worker_args'], kwargs=job['worker_kwargs']) procs[job['tag']] = p p.start() running[job['tag']] = p signal.signal(signal.SIGINT, self.default_handler) last_job_time = time.time() except queue.Empty: pass except (EOFError, BrokenPipeError) as e: print('lost connection to server') # update thread status ready = [] for key in running: if not running[key].is_alive(): # call has been executed ready.append(key) if running[ key].exitcode == 0: # process terminated successfully done[key] = running[key] else: # process terminated with errors failed[key] = running[key] for key in ready: running.pop(key) procs[key].terminate() # procs[key].close() procs.pop(key) # 3. display status entry_len = 150 print(''.center(entry_len, '+')) print( datetime.datetime.now(dateutil.tz.tzlocal()).strftime( ' %Y/%m/%d_%H:%M:%S ').center(entry_len, '-')) # worker status if self.limits.apply_limits == 'user': print('+ WORKER: %s (apply limits on user %s)' % (self.name, curr_user)) elif self.limits.apply_limits == 'worker': print('+ WORKER: %s (apply limits on current worker)' % (self.name)) else: print( "Invalid apply_limits. Available options are ['user', 'worker']" ) print(('+ (gpu_ids=%s, job_limit=%s, util_limit=%s%%)' % (list_of_gpus, max_jobs_per_gpu, max_utilization)).ljust( entry_len, ' ')) for i, gpuid in enumerate(list_of_gpus): tup = (gpuid, ) tup += (user_compute_procs[i], ) tup += (worker_compute_procs[i], ) tup += (total_compute_procs[i], ) tup += (user_gpu_utilization[i], ) tup += (total_gpu_utilization[i], ) tup += (user_gpumem_utilization[i], ) tup += (total_gpumem_utilization[i], ) print(( '+ gpu%d compute processes (%d(%d)/%d) utilization rate (%d%%/%d%%) memory usage (%d%%/%d%%)' % tup).ljust(entry_len, ' ')) # job status print((' %d PENDING ' % (num_pending)).center(entry_len, '-')) # if self.kwargs.get('logging'): # print((' %d LOGGING '%(len(running))).center(entry_len,'-')) # else: # print((' %d RUNNING '%(len(running))).center(entry_len,'-')) print((' %d LOGGING/RUNNING ' % (len(running))).center( entry_len, '-')) tqdm_stats = [] for key in running: name_str = os.path.basename(key) try: tqdm_stat = pickle.load( open(os.path.join('/tmp', name_str + '.tqdm'), 'rb')) tqdm_stats.append(tqdm_stat) tqdm_str = 'gpu%s pid=%d |%d%%| %d/%d [%s<%s, %sit/s]' % tqdm_stat except: tqdm_stats.append(None) tqdm_str = '' name_str = '+ ' + name_str print(name_str + tqdm_str.rjust(entry_len - len(name_str))) print((' %d FAILED ' % (len(failed))).center(entry_len, '-')) for key in failed: print(os.path.basename(key)) print((' %d DONE ' % (len(done))).center(entry_len, '-')) for key in done: print(os.path.basename(key)) print(''.center(entry_len, '+')) print('+') # 4. report status to scheduler try: shared_worker_status_q.put({ self.name: { 'limit': vars(self.limits), 'status': { 'worker_compute_procs': user_compute_procs, 'total_compute_procs': total_compute_procs, 'worker_gpu_utilization': user_gpu_utilization, 'total_gpu_utilization': total_gpu_utilization, 'worker_gpumem_utilization': user_gpumem_utilization, 'total_gpumem_utilization': total_gpumem_utilization }, 'running': OrderedDict( ((key, tqdm_stat) for key, tqdm_stat in zip(running, tqdm_stats))), 'done': OrderedDict(((key, None) for key in done)), 'failed': OrderedDict(((key, None) for key in failed)), 'last_updated': time.time() } }) except (EOFError, BrokenPipeError) as e: # lost connection to server print('lost connection to server') # 5. SIGINT(ctrl-c) handler if self.worker_terminate: self.worker_resume = prompt_yes_or_no('Resume?') if self.worker_resume: IPython.embed() self.worker_terminate = False if self.worker_terminate: for key in running: running[key].terminate() break # run while loop every second self.rate.sleep() try: num_pending = shared_pending_job_q.qsize() except (EOFError, BrokenPipeError) as e: print('lost connection to server') # lost connection to server print('summary - done: %d, failed: %d, halted: %d, pending: %d' % (len(done), len(failed), len(running), num_pending))
time.sleep(wait_time_seconds) with open('/proc/stat', 'r') as fh: # see https://linux.die.net/man/5/proc how to interpret this lines = fh.readlines() cpulines = [ line.strip().split(" ") for line in lines if line.startswith('cpu') ][1:] idle_time_stop = [ safe_int(cpuinfo[4], idle_time_start[i]) for i, cpuinfo in enumerate(cpulines) ] cpu_idle_info = [ ( idle_time_stop[i]-idle_time_start[i], cpu_id) for i, cpu_id in enumerate(cpu_ids) ] cpu_idle_info.sort(reverse=True) return cpu_idle_info if __name__=='__main__': if len(sys.argv)<3: print("Missing arguments: [cpu-count] [gpu-count]") sys.exit(1) cpu_count = int(sys.argv[1]) gpu_count = int(sys.argv[2]) argv_rest = sys.argv[2:] cpu_idle_info = find_idle_cpus(0.5) my_pid = os.getpid() cpu_list = ",".join([ cpu_idle_count[1] for cpu_idle_count in cpu_idle_info[:cpu_count] ]) argv = []+sys.argv command = [ "taskset", "-a", "-c", cpu_list ] + argv[3:] #print(command) py3nvml.grab_gpus(num_gpus=gpu_count, gpu_fraction=0.95) environ = dict(os.environ) environ['LD_PRELOAD'] = os.path.abspath(os.path.dirname(__file__)) + "/limit_visible_cpus.so" environ['OMP_NUM_THREADS'] = str(cpu_count) # This call never returns. os.execvpe(command[0], command, environ)
def setup(): py3nvml.grab_gpus(1, gpu_fraction=0.5)
h, m = divmod(m, 60) return h, m, s # If seed was not provided, create one and seed numpy and pytorch if args.seed < 0: args.seed = np.random.randint(1 << 16) np.random.seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True torch.manual_seed(args.seed) # Hyperparameter settings py3nvml.grab_gpus(args.num_gpus, gpu_select=args.gpu_select, gpu_fraction=0.7, max_procs=0) use_cuda = torch.cuda.is_available() best_acc = 0 start_epoch, batch_size = 1, args.batch_size # ############################################################################## # Model print('\n[Phase 1] : Model setup') if len(args.layers_per_scale) == 1: args.layers_per_scale = args.layers_per_scale[0] if args.resume: # Load checkpoint print('| Resuming from checkpoint...') chkpt_dir = os.path.join(args.exp_dir, 'chkpt')
def setup(): global mode, o_dim, ri_dim mode = mode_to_int('symmetric') o_dim = 2 ri_dim = -1 py3nvml.grab_gpus(1, gpu_fraction=0.5, env_set_ok=True)
def setup(): py3nvml.grab_gpus(1, gpu_fraction=0.5, env_set_ok=True)
def test_zeroout(): py3nvml.grab_gpus(0) zero_out_module = tf.load_op_library(os.path.join(LIB_BASE, 'zero_out.so')) with tf.Session() as sess: result = zero_out_module.zero_out([5, 4, 3, 2, 1]) np.testing.assert_array_equal(result.eval(), np.array([5, 0, 0, 0, 0]))