def main(): """ main entry point :returns: 0 on success """ subcmd_handlers = { 'client': run_client, 'server': run_server, 'cluster': run_cluster, } # get the argument parser parser = cli.configure_parser() # parse arguments args = parser.parse_args() # normalize arguments args = cli.normalize_args(args) if args is None: return -1 # set log level LOG.setLevel(logging.DEBUG if args.verbose else logging.INFO) LOG.debug('logging system init') LOG.debug('running with args: %s', args) # create tmpdir and run handler with util.TempDir(preserve=args.keep_tmpdir) as tmpdir: return subcmd_handlers[args.subcmd](args, tmpdir)
def __enter__(self): """ Create tempdir :returns: tempdir path """ self.tempdir = tempfile.mkdtemp(prefix=self.prefix) LOG.debug('Using tempdir: %s', self.tempdir) return self.tempdir
def unpack(self, item_keys): """ unpack program files and set up build dir structure :item_keys: items to unpack """ LOG.debug('Extracting user program code') for key in item_keys: filename = PROGRAM_SOURCE_FILE_NAMES[key] code = self.data['code'][key] path = os.path.join(self.build_dir, filename) with open(path, 'w') as fp: fp.write(code)
def run(self): """ main loop, wait for event, then process if shutdown scheduled continue until queue empty :returns: does not return, uses exception to end control :raises queue.Empty: when shutdown requested and queue empty """ while True: event = SERVER_QUEUE.get(block=not self.shutdown_scheduled) LOG.debug('received event: %s', event) self.handle_event(event) LOG.debug('handled event: %s', event) SERVER_QUEUE.task_done()
def get_reasonable_block_size(props, size_mult=32): """ get reasonable cuda block size :props: gpu properties dict :size_mult: block size multiple :returns: reasonable block size """ max_reasonable_size = props['max_block_size'] min_reasonable_size = props['max_sm_threads'] / props['max_sm_blocks'] avg_reasonable_size = (max_reasonable_size + min_reasonable_size) / 2 reasonable_block_size = int(avg_reasonable_size/size_mult) * size_mult LOG.debug('Using CUDA block size: %s', reasonable_block_size) return reasonable_block_size
def scan_hardware(args, tmpdir): """ scan system hardware :args: parsed cmdline args :tmpdir: temporary directory :returns: dict with hardware info """ hardware = { 'CPU': check_cpus(), 'GPU': check_gpus(args, tmpdir), } LOG.debug('hardware scan found: %s', hardware) return hardware
def copy_build_files(self, build_files): """ copy build files from data/build_files into build dir :build_files: names of files to copy """ LOG.debug('Copying additional build files') for build_file in build_files: resource_path = os.path.join( PROGRAM_DATA_DIRNAME, 'build_files', build_file) data = pkgutil.get_data('lizard', resource_path) path = os.path.join(self.build_dir, build_file) with open(path, 'wb') as fp: fp.write(data)
def partition_data(self, data): """ load dataset and partition among clients :data: data """ client_uuids = list(self.hardware.keys()) client_count = len(client_uuids) LOG.debug("data size %i", sys.getsizeof(data)) # FIXME this is a really rough estimate as the final calculation is done # after casting to double data_generator = self.py_mod.split_data(data) LOG.info(self.global_params) split_size = self.global_params[0] // client_count + 1 LOG.debug("split size %i", split_size) post_datasets = {} for client_uuid in client_uuids: LOG.info("Splitting data") # FIXME use hardware scan to discover GPU mem size # currently rounded slightly down to avoid overflowing in loop # 8G gpu ram size # gpu_mem_remaining = 8589934592 gpu_mem_remaining = 8500000000 split_remaining = split_size data_count = 0 LOG.info("global_params %s", self.global_params) dataset = [] # subtract params size gpu_mem_remaining = (gpu_mem_remaining - sys.getsizeof(self.global_params)) try: while split_remaining > 0 and gpu_mem_remaining > 0: next_split = next(data_generator) split_remaining = split_remaining - 1 gpu_mem_remaining = (gpu_mem_remaining - sys.getsizeof(next_split)) dataset.append(next_split) data_count = data_count + 1 except StopIteration: pass dataset_enc = [data_count, dataset] self.client_datasets[client_uuid] = dataset_enc self._initialize_global_state()
def build( self, cuda_bin=None, include_path=None, unpack=True, set_compute_level=True): """ set up user program resources and build shared obj :cuda_bin: path to cuda tools bin :include_path: path to cuda include dir :unpack: if true, unpack program json :set_compute_level: if true, specify appropriate compute level """ if not self.build_dir or not os.path.isdir(self.build_dir): raise ValueError("Build dir not set up") if unpack: files = ['cuda', 'python', 'header'] if self.use_c_extention: files.append('cpp') self.unpack(files) build_files = ['Makefile'] if self.use_c_extention: build_files.append('setup.py') self.copy_build_files(build_files) make_cmd = ['make', '-C', self.build_dir] if cuda_bin is not None: nvcc_path = os.path.join(cuda_bin, 'nvcc') make_cmd.append('NVCC={}'.format(nvcc_path)) if include_path is not None: make_cmd.append('CUDA_L64=-L{}'.format(include_path)) if set_compute_level: flag_value = '-arch={}'.format(self.compute_level) make_cmd.append('COMPUTE_LEVEL_FLAG={}'.format(flag_value)) LOG.debug('Using compute level: %s', flag_value) else: LOG.warning('Using default compute level, not optimized') LOG.debug('Building CUDA shared object') util.subp(make_cmd) if self.use_c_extention: LOG.debug('Building Python wrapper module') # XXX # FIXME create hardcoded tmp dir used by dynamic linker shared_dll = 'user_program_cuda.so' tmp_dir = '/tmp/lizard-slayer/' pathlib.Path(tmp_dir).mkdir(exist_ok=True) for the_file in os.listdir(tmp_dir): file_path = os.path.join(tmp_dir, the_file) if os.path.isfile(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) setup_cmd = ['python3', 'setup.py', 'build_ext', '-b', tmp_dir] util.subp(setup_cmd, cwd=self.build_dir) # copy over the shared library to be found by the linker shutil.copyfile(os.path.join(self.build_dir, shared_dll), os.path.join(tmp_dir, shared_dll)) # FIXME remove path sys.path.append(tmp_dir) sys.path.append(self.build_dir) self.ready = True else: LOG.debug('No python c extention for user program') self.ready = True
def handle_event_run_program(event): """ handle 'run_program' eent :event: event to handle :returns: program result :raises: Exception: if error occurs or invalid request """ runtime_id = util.hex_uuid() dataset_enc = event.data['dataset_enc'] prog_checksum = event.data['checksum'] global_params_enc = event.data['global_params_enc'] init_path = os.path.join('/runtimes', prog_checksum, runtime_id) iterate_path = os.path.join(init_path, 'iterate') cleanup_path = os.path.join(init_path, 'cleanup') wakeup_ev = threading.Event() def multi_callback_wakeup(event_props): wakeup_ev.set() def runtime_init_callback(client, event_props): if event_props['status'] != events.EventStatus.SUCCESS.value: raise ValueError('{}: error on prog runtime init'.format(client)) with server.state_access() as s: program = s.registered_progs[prog_checksum] if not program.ready: raise ValueError('cannot run program, not ready') runtime = program.get_new_server_runtime(runtime_id) runtime.prepare_datastructures(global_params_enc) runtime.partition_data(dataset_enc) runtime_init_remote_event_ids = [] for client_uuid, dataset_enc in runtime.dataset_partitions_encoded.items(): data = { 'runtime_id': runtime_id, 'checksum': prog_checksum, 'dataset_enc': dataset_enc, 'global_params_enc': global_params_enc, 'send_remote_event': True, } with server.state_access() as s: c = s.clients[client_uuid] res = c.post(init_path, data, callback_func=runtime_init_callback) runtime_init_remote_event_ids.append(res['event_id']) with remote_event.remote_events_access() as r: r.register_multi_callback( runtime_init_remote_event_ids, multi_callback_wakeup) wakeup_ev.wait(timeout=300) wakeup_ev.clear() LOG.info('Runtime initialized for user program: %s', program) aggregation_lock = threading.Lock() def run_iteration_callback(client, event_props): if event_props['status'] != events.EventStatus.SUCCESS.value: raise ValueError('{}: error running prog iteration'.format(client)) with aggregation_lock: runtime.aggregate(event_props['result']['aggregation_result_enc']) runtime.reset_aggregation_result() iteration_count = 0 while True: post_data = { 'runtime_id': runtime_id, 'checksum': prog_checksum, 'global_state_enc': runtime.global_state_encoded, 'send_remote_event': True, } with server.state_access() as s: s.post_all( iterate_path, post_data, callback_func=run_iteration_callback, multi_callback_func=multi_callback_wakeup) wakeup_ev.wait(timeout=600) wakeup_ev.clear() runtime.update_global_state() runtime.reset_aggregation_result() LOG.debug('Completed iteration for user program: %s', program) iteration_count = iteration_count + 1 if runtime.done: break LOG.info('Cleaning up...') def runtime_cleanup_callback(client, event_props): if event_props['status'] != events.EventStatus.SUCCESS.value: raise ValueError('{}: error on prog runtime clean up'.format(client)) post_data = { 'runtime_id': runtime_id, 'checksum': prog_checksum, 'send_remote_event': True, } with server.state_access() as s: s.post_all( cleanup_path, post_data, callback_func=runtime_cleanup_callback, multi_callback_func=multi_callback_wakeup) wakeup_ev.wait(timeout=60) wakeup_ev.clear() LOG.info('Finished running user program: %s %i', program, iteration_count) return { 'end_aggregate': runtime.top_level_aggregate_encoded, 'end_global_state': runtime.global_state_encoded, }