def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger(self): if not mock: self.skipTest('unittest.mock is not available') with mock.patch('logging.getLogger') as getLogger: install_mp_handler(self.logger) self.assertEqual(0, getLogger.call_count)
def test_when_no_logger_is_specified_then_it_uses_the_root_logger(self): if not mock: self.skipTest('unittest.mock is not available') with mock.patch('logging.getLogger') as getLogger: getLogger.return_value = self.logger install_mp_handler() getLogger.assert_called_once_with() wrapper_handler, = self.logger.handlers self.assertIsInstance(wrapper_handler, MultiProcessingHandler) self.assertIs(wrapper_handler.sub_handler, self.handler)
def parse_args(description, ParamsClass=SieverParams, **kwds): """ Parse command line arguments. The command line parser accepts the standard parameters as printed by calling it with ``--help``. All other parameters are used to construct params objects. For example. ./foo 80 --workers 4 --trials 2 -S 1337 --a 1 2 - b 3 4 would operate on dimension 80 with parameters (a: 1, b: 3), (a: 1, b: 4), (a: 2, b: 3), (a: 2, b: 4), i.e. the Cartesian product of all parameters. It will run two trials each using four workers. Note that each worker may use several threads, too. The starting seed is `1337`. :param description: help message :param kwds: default parameters """ parser = argparse.ArgumentParser( description=description, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'lower_bound', type=int, help="lowest lattice dimension to consider (inclusive)") parser.add_argument( '-u', '--upper-bound', type=int, dest="upper_bound", default=0, help="upper bound on lattice dimension to consider (exclusive)") parser.add_argument('-s', '--step-size', type=int, dest="step_size", default=2, help="increment lattice dimension in these steps") parser.add_argument('-t', '--trials', type=int, dest="trials", default=1, help="number of experiments to run per dimension") parser.add_argument('-w', '--workers', type=int, dest="workers", default=1, help="number of parallel experiments to run") parser.add_argument('-p', '--pickle', action='store_true', dest="pickle", help="pickle statistics") parser.add_argument('-S', '--seed', type=int, dest="seed", default=0, help="randomness seed") parser.add_argument( '--dry-run', dest="dry_run", action='store_true', help= "Show parameters that would be used but don't run any actual experiments." ) parser.add_argument('--show-defaults', dest="show_defaults", action='store_true', help="Show default parameters and exit.") parser.add_argument('--loglvl', type=str, help="Logging level (one of DEBUG, WARN, INFO)", default="INFO") parser.add_argument('--log-filename', dest="log_filename", type=str, help="Logfile filename", default=None) args, unknown = parser.parse_known_args() kwds_ = OrderedDict() for k, v in six.iteritems(kwds): k_ = k.replace("__", "/") kwds_[k_] = v kwds = kwds_ if args.show_defaults: pp = ParamsClass(**kwds) slen = max(len(p) for p in pp) + 1 fmt = "{key:%ds}: {value}" % slen for k, v in six.iteritems(pp): print(fmt.format(key=k, value=v)) exit(0) all_params = OrderedDict([("", ParamsClass(**kwds))]) unknown_args = OrderedDict() unknown = apply_aliases(unknown) # NOTE: This seems like the kind of thing the standard library can do (better) i = 0 while i < len(unknown): k = unknown[i] if not (k.startswith("--") or k.startswith("-")): raise ValueError("Failure to parse command line argument '%s'" % k) k = re.match("^-+(.*)", k).groups()[0] k = k.replace("-", "_") unknown_args[k] = [] i += 1 for i in range(i, len(unknown)): v = unknown[i] if v.startswith("--") or v.startswith("-"): i -= 1 break try: v = eval(v, {"BKZ": BKZ}) except NameError: v = v except SyntaxError: v = v if not isinstance(v, (list, tuple)): v = [v] unknown_args[k].extend(v) i += 1 if not unknown_args[k]: unknown_args[k] = [True] for k, v in six.iteritems(unknown_args): all_params_ = OrderedDict() for p in all_params: for v_ in v: p_ = copy.copy(all_params[p]) p_[k] = v_ all_params_[p + "'%s': %s, " % (k, v_)] = p_ all_params = all_params_ log_filename = args.log_filename if log_filename is None: log_filename = log_filenamef() multiprocessing_logging.install_mp_handler() if not os.path.isdir("logs"): os.makedirs("logs") logging.basicConfig( level=logging.DEBUG, format='%(levelname)5s:%(name)12s:%(asctime)s: %(message)s', datefmt='%Y/%m/%d %H:%M:%S %Z', filename=log_filename) console = logging.StreamHandler() console.setLevel(getattr(logging, args.loglvl.upper())) console.setFormatter(logging.Formatter('%(name)s: %(message)s', )) logging.getLogger('').addHandler(console) if args.dry_run: for params in all_params: print(params) exit(0) return args, all_params
def main(): args = parse_arguments() logging.basicConfig( level=getattr(logging, args.log_level.upper()), format='%(asctime)s - %(process)s - %(levelname)s - %(message)s') install_mp_handler() os.nice(20) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) input_files = collect_inputs(args.input_dirs)'Scheduled {} files for shuffling.'.format(len(input_files))) if not input_files: logging.critical('No input files!') sys.exit(1) output_files = [ os.path.join(args.output_dir, os.path.basename(f)) for f in input_files ] with openall(input_files[0]) as inf: header = inf.readline().strip() with Pool(args.processes) as inpool, Pool(args.processes) as outpool: m = Manager() queue = m.Queue(maxsize=1000) num_readers = m.Value('I', args.processes) lock = m.Lock() # Each worker gets a chunk of all input / output files input_chunks = list(split_into(input_files, args.processes)) output_chunks = list(split_into(output_files, args.processes)) producer_f = partial(producer, queue=queue, num_readers=num_readers, lock=lock) inresult = inpool.map_async(producer_f, input_chunks) consumer_f = partial(consumer, queue=queue, header=header, documents=args.documents, num_readers=num_readers, lock=lock) outresult = outpool.map_async(consumer_f, output_chunks) docs_read, docs_written = sum(inresult.get()), sum(outresult.get()) logging.debug('Joining processes...') inpool.close() outpool.close() inpool.join() outpool.join() logging.debug('Joined processes.') if docs_read != docs_written: logging.error(f'The number of documents read ({docs_read}) and ' f'the number of documents written ({docs_written}) ' f'differs!')'Done.')
def test_when_a_logger_is_passed_then_it_wraps_all_handlers(self): install_mp_handler(self.logger) wrapper_handler, = self.logger.handlers self.assertIsInstance(wrapper_handler, MultiProcessingHandler) self.assertIs(wrapper_handler.sub_handler, self.handler)
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path, log_path=None, verbose=False, disable_logging=False, num_workers=1, **ffmpeg_cfg): """ Downloads Flickr dataset files Args: dataset_path: Path to dataset file containin URLs (Type: str) data_dir: Output directory where video will be saved if output path is not explicitly given (Type: str) ffmpeg_path: Path to ffmpeg executable (Type: str) ffprobe_path: Path to ffprobe executable (Type: str) Keyword Args: log_path: Path to log file. If None, defaults to "flickr-soundnet-dl.log" (Type: str or None) verbose: If True, prints detailed messages to console (Type: bool) disable_logging: If True, does not log to a file (Type: bool) num_workers: Number of multiprocessing workers used to download videos (Type: int) **ffmpeg_cfg: ffmpeg configurations """ init_console_logger(LOGGER, verbose=verbose) if not disable_logging: init_file_logger(LOGGER, log_path=log_path) multiprocessing_logging.install_mp_handler() print(verbose) LOGGER.debug('Initialized logging.') audio_dir = os.path.join(data_dir, 'audio') video_dir = os.path.join(data_dir, 'video') if not os.path.isdir(audio_dir): os.makedirs(audio_dir) if not os.path.isdir(video_dir): os.makedirs(video_dir) pool = mp.Pool(num_workers) try: with open(dataset_path, 'r') as f: for line_idx, line in enumerate(f): url = line.strip() media_filename = extract_flickr_id(url) video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4')) audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac')) if os.path.exists(video_filepath) and os.path.exists(audio_filepath): info_msg = 'Already downloaded video {}. Skipping.' continue worker_args = [url, data_dir, ffmpeg_path, ffprobe_path] pool.apply_async(partial(download_flickr_video, **ffmpeg_cfg), worker_args) except KeyboardInterrupt:"Forcing exit.") exit() finally: try: pool.close() pool.join() except KeyboardInterrupt:"Forcing exit.") exit()'Finished downloading videos!')
import multiprocessing from pathlib import Path from typing import List import warnings from import fits from astropy.wcs import WCS, FITSFixedWarning import mocpy import multiprocessing_logging logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-25s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) multiprocessing_logging.install_mp_handler() logger = logging.getLogger(__name__) def stokes_type(stokes_str: str) -> List[str]: STOKES_PARAMS = ("I", "Q", "U", "V") stokes_str = stokes_str.upper() for char in stokes_str: if char not in STOKES_PARAMS: raise ValueError( f"Stokes parameter must be one of {''.join(STOKES_PARAMS)}") return [char for char in stokes_str] def get_moc_output_dir(image_path: Path) -> Path: output_dir_name ="IMAGES", "MOCS")
def test_lj_sim_manager_openmm_integration_run( self, class_tmp_path_factory, boundary_condition_class, resampler_class, work_mapper_class, platform, lj_params, lj_omm_sys, lj_integrator, lj_reporter_classes, lj_reporter_kwargs, lj_init_walkers, lj_openmm_runner, lj_unbinding_bc, lj_wexplore_resampler, lj_revo_resampler, ): """Run all combinations of components in the fixtures for the smallest amount of time, just to make sure they all work together and don't give errors.""" logging.getLogger().setLevel(logging.DEBUG) install_mp_handler() logging.debug("Starting the test") print("starting the test") # the configuration class gives us a convenient way to # parametrize our reporters for the locale from wepy.orchestration.configuration import Configuration # the runner from wepy.runners.openmm import OpenMMRunner # mappers from wepy.work_mapper.mapper import Mapper from wepy.work_mapper.worker import WorkerMapper from wepy.work_mapper.task_mapper import TaskMapper # the worker types for the WorkerMapper from wepy.work_mapper.worker import Worker from wepy.runners.openmm import OpenMMCPUWorker, OpenMMGPUWorker # the walker task types for the TaskMapper from wepy.work_mapper.task_mapper import WalkerTaskProcess from wepy.runners.openmm import OpenMMCPUWalkerTaskProcess, OpenMMGPUWalkerTaskProcess n_cycles = 1 n_steps = 2 num_workers = 2 # generate the reporters and temporary directory for this test # combination tmpdir_template = 'lj_fixture_{plat}-{wm}-{res}-{bc}' tmpdir_name = tmpdir_template.format(plat=platform, wm=work_mapper_class, res=resampler_class, bc=boundary_condition_class) # make a temporary directory for this configuration to work with tmpdir = str(class_tmp_path_factory.mktemp(tmpdir_name)) # make a config so that the reporters get parametrized properly reporters = Configuration( work_dir=tmpdir, reporter_classes=lj_reporter_classes, reporter_partial_kwargs=lj_reporter_kwargs).reporters steps = [n_steps for _ in range(n_cycles)] # choose the components based on the parametrization boundary_condition = None resampler = None walker_fixtures = [lj_init_walkers] runner_fixtures = [lj_openmm_runner] boundary_condition_fixtures = [lj_unbinding_bc] resampler_fixtures = [lj_wexplore_resampler, lj_revo_resampler] walkers = lj_init_walkers boundary_condition = [ boundary_condition for boundary_condition in boundary_condition_fixtures if type(boundary_condition).__name__ == boundary_condition_class ][0] resampler = [ resampler for resampler in resampler_fixtures if type(resampler).__name__ == resampler_class ][0] assert boundary_condition is not None assert resampler is not None # generate the work mapper given the type and the platform work_mapper_classes = { mapper_class.__name__: mapper_class for mapper_class in [Mapper, WorkerMapper, TaskMapper] } # # select the right one given the option # work_mapper_type = [mapper_type for mapper_type in work_mapper_classes # if type(mapper_type).__name__ == work_mapper_class][0] # decide based on the platform and the work mapper which # platform dependent components to build if work_mapper_class == 'Mapper': # then there is no settings work_mapper = Mapper() elif work_mapper_class == 'WorkerMapper': if platform == 'CUDA' or platform == 'OpenCL': work_mapper = WorkerMapper(num_workers=num_workers, worker_type=OpenMMGPUWorker, device_ids={ '0': 0, '1': 1 }, proc_start_method='spawn') if platform == 'OpenCL': work_mapper = WorkerMapper( num_workers=num_workers, worker_type=OpenMMGPUWorker, device_ids={ '0': 0, '1': 1 }, ) elif platform == 'CPU': work_mapper = WorkerMapper( num_workers=num_workers, worker_type=OpenMMCPUWorker, worker_attributes={'num_threads': 1}) elif platform == 'Reference': work_mapper = WorkerMapper( num_workers=num_workers, worker_type=Worker, ) elif work_mapper_class == 'TaskMapper': if platform == 'CUDA': work_mapper = TaskMapper( num_workers=num_workers, walker_task_type=OpenMMGPUWalkerTaskProcess, device_ids={ '0': 0, '1': 1 }, proc_start_method='spawn') elif platform == 'OpenCL': work_mapper = TaskMapper( num_workers=num_workers, walker_task_type=OpenMMGPUWalkerTaskProcess, device_ids={ '0': 0, '1': 1 }) elif platform == 'CPU': work_mapper = TaskMapper( num_workers=num_workers, walker_task_type=OpenMMCPUWalkerTaskProcess, worker_attributes={'num_threads': 1}) elif platform == 'Reference': work_mapper = TaskMapper( num_workers=num_workers, worker_type=WalkerTaskProcess, ) else: raise ValueError("Platform {} not recognized".format(platform)) # initialize the runner with the platform runner = OpenMMRunner(lj_omm_sys.system, lj_omm_sys.topology, lj_integrator, platform=platform) logging.debug("Constructing the manager") manager = Manager(walkers, runner=runner, boundary_conditions=boundary_condition, resampler=resampler, work_mapper=work_mapper, reporters=reporters) # since different work mappers need different process start # methods for different platforms i.e. CUDA and linux fork # vs. spawn we choose the appropriate one for each method. logging.debug("Starting the simulation") walkers, filters = manager.run_simulation(n_cycles, steps, num_workers=num_workers)
def main(): parser = argparse.ArgumentParser(description='''Starts a BLEva Gateway service on the device.''', epilog='''Note: This requires a BLED112 dongle from Bluegiga.''') parser.add_argument('-u', '--url', help='''URL of BLEva server''', required=True) parser.add_argument('-d', '--debug', help='Debug level (0-4)', type=int, default=20, choices=[10, 20, 30, 40, 50]) args = parser.parse_args() url = args.url print url tty_paths = util.get_tty_paths() FORMAT = '%(asctime)s - %(name)s - %(processName)s - %(levelname)s - %(message)s' logging.basicConfig(format=FORMAT, filename='bled112.log') logger = logging.getLogger('BLEva') logger.setLevel(args.debug) import multiprocessing_logging multiprocessing_logging.install_mp_handler()'\n--------------------')'BLEva has started')'\n--------------------') while True:'\n--------------------')'BLEva is waiting for new benchmark') print "BLEva is waiting for new benchmarks" b = getBenchmark(url + '/benchmark') print b if b != '':'BLEva received new benchmark') print "got new benchmark" j = json.loads(b) instances = [] for dongle in j['dongles']: gap_role = dongle['gap_role'] gatt_role = dongle['gatt_role'] replicas = dongle['replicas'] print replicas logger.debug("Replicas: " + str(replicas)) if replicas > len(tty_paths): raise Exception("Too few dongles connected.") for replica in xrange(0, replicas): if gap_role in ['broadcaster', 'peripheral']: a = dongle['steps'] steps = [] for v in a: s = Step() s.time = v['time'] print "json time " + str(['time']) s.ble_operation = v['ble_operation'] # s.adv_data = map(ord, v['adv_data'][2:].decode("hex")) # s.short_name = util.pad_truncate(s.short_name, 5) # s.long_name = util.pad_truncate(s.long_name, 12) s.long_name = v['long_name'] if replica < 10: s.short_name = v['short_name'] + str(0) + str( replica) if s.long_name != "": s.long_name = v['long_name'] + str( 0) + str(replica) else: s.short_name = v['short_name'] + str(replica) if s.long_name != "": s.long_name = v['long_name'] + str(replica) s.short_name = util.pad_truncate(s.short_name, 7) if s.long_name != "": s.long_name = util.pad_truncate( s.long_name, 14) logger.debug("Replica Short Name: " + s.short_name) logger.debug("Replica Long Name: " + s.long_name) s.major = int( v['major'], 0) # NOTE base=0 guesses base from string s.minor = int(v['minor'], 0) s.adv_interval_min = int(v['adv_interval_min'], 0) s.adv_interval_max = int(v['adv_interval_max'], 0) s.adv_channels = int(v['adv_channels'], 0) s.gap_discoverable_mode = ble_codes.gap_discoverable_mode[ v['gap_discoverable_mode']] s.gap_connectable_mode = ble_codes.gap_connectable_mode[ v['gap_connectable_mode']] if "connection_interval_min" in v: s.connection_interval_min = v[ "connection_interval_min"] if "connection_interval_max" in v: s.connection_interval_max = v[ "connection_interval_max"] if "slave_latency" in v: s.slave_latency = v["slave_latency"] if "supervision_timeout" in v: s.supervision_timeout = v[ "supervision_timeout"] steps.append(s) peripheral = Peripheral(logger=logger, steps=steps, port_name=tty_paths[replica], gap_role=gap_role, gatt_role=gatt_role) instances.append(peripheral)'BLEva is starting benchmark now') print "BLEva is starting benchmark now" processes = [] logger.debug('Telling Phone to start') print "notifying phone" urllib2.urlopen(url + '/benchmark/sync/dongle').read() print "done notified" if not IBEACON: for i in instances: print i p = mp.Process(target=i.start_benchmark, name=i.steps[0].short_name) p.start() processes.append(p) for p in processes: p.join() else: time.sleep(40) print "finished one benchmark"'BLEva finished one benchmark' ) # FIXME fix logger to also log spawned processes if b == '': print "BLEva server not available, sleeping a while and try again." 'BLEva server not available, sleeping a while and try again.' ) # FIXME fix logger to also log spawned processes time.sleep( 10) # sleep and then try again until server is available
def main(): config = get_config() logging.basicConfig( format= '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%S%z", level=logging.DEBUG) multiprocessing_logging.install_mp_handler() logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR) if config.zac.health_file is not None: health_file = os.path.abspath(config.zac.health_file)"Main start (%d) version %s", os.getpid(), __version__) stop_event = multiprocessing.Event() state_manager = multiprocessing.Manager() processes = [] source_hosts_queues = [] source_collectors = get_source_collectors(config) for source_collector in source_collectors: source_hosts_queue = multiprocessing.Queue() process = processing.SourceCollectorProcess(source_collector["name"], state_manager.dict(), source_collector["module"], source_collector["config"], source_hosts_queue) source_hosts_queues.append(source_hosts_queue) processes.append(process) try: process = processing.SourceHandlerProcess("source-handler", state_manager.dict(), config.zac.db_uri, source_hosts_queues) processes.append(process) process = processing.SourceMergerProcess("source-merger", state_manager.dict(), config.zac.db_uri, config.zac.host_modifier_dir) processes.append(process) process = processing.ZabbixHostUpdater("zabbix-host-updater", state_manager.dict(), config.zac.db_uri, config.zabbix) processes.append(process) process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater", state_manager.dict(), config.zac.db_uri, config.zabbix) processes.append(process) process = processing.ZabbixTemplateUpdater("zabbix-template-updater", state_manager.dict(), config.zac.db_uri, config.zabbix) processes.append(process) except exceptions.ZACException as e: logging.error("Failed to initialize child processes. Exiting: %s", str(e)) sys.exit(1) for process in processes: process.start() with processing.SignalHandler(stop_event): status_interval = 60 next_status = while not stop_event.is_set(): if next_status < if health_file is not None: write_health(health_file, processes, source_hosts_queues, config.zabbix.failsafe) log_process_status(processes) next_status = + datetime.timedelta( seconds=status_interval) dead_process_names = [ for process in processes if not process.is_alive() ] if dead_process_names: logging.error("A child has died: %s. Exiting", ', '.join(dead_process_names)) stop_event.set() time.sleep(1) logging.debug( "Queues: %s", ", ".join([str(queue.qsize()) for queue in source_hosts_queues])) for process in processes:"Terminating: %s(%d)",, process.terminate() alive_processes = [ process for process in processes if process.is_alive() ] while alive_processes: process = alive_processes[0]"Waiting for: %s(%d)",, log_process_status(processes) # TODO: Too verbose? process.join(10) if process.exitcode is None: logging.warning( "Process hanging. Signaling new terminate: %s(%d)",, process.terminate() time.sleep(1) alive_processes = [ process for process in processes if process.is_alive() ]"Main exit")
import multiprocessing as mp from collections import defaultdict from queue import Full, Empty import logging import numpy as np from multiprocessing_logging import install_mp_handler from .base_iterator import BaseIterator from ...routines.mp_routines import ArrayDictQueue from ... import ROOT_LOGGER_NAME, ROOT_LOGGER_LEVEL logger = logging.getLogger('{}.{}'.format(ROOT_LOGGER_NAME, __name__)) logger.setLevel(ROOT_LOGGER_LEVEL) install_mp_handler(logger=logger) class MultiProcessIterator(BaseIterator): """Iterates through data with base iterator interface, implementing in-batch parallelism""" def __init__(self, num_processes, max_tasks=100, max_results=100, use_shared=False, *args, **kwargs): """ :param num_processes: number of processes two be used by iterator :param max_tasks: max number of tasks to be put in tasks queue :param max_results: max volume of output queue :param use_shared: whether to use array queue for passing big arrays without pickling them """ super(MultiProcessIterator, self).__init__(*args, **kwargs) self._num_processes = num_processes self._max_tasks = max(max_tasks, self._batch_size) self._max_results = max(max_results, self._batch_size)
def __init__(self, producer_func, producer_config_args, pipe_funcs, pipe_funcs_config_args, pipe_n_procs, accumulator_object, accumulator_func, accumulator_config_args, worker_get_limit=5): # enforce the contract. try: assert isinstance(worker_get_limit, int) and worker_get_limit > 1 except AssertionError: raise AssertionError('worker_get_limit must be an integer > 1') # allow multiple producers self._multiple_producers = isinstance(producer_func, tuple) # check functions try: if self._multiple_producers: for func in producer_func: assert callable(func) else: assert callable(producer_func) except AssertionError: raise AssertionError( 'must provide a callable function for producer') try: if self._multiple_producers: for func in producer_func: assert isgeneratorfunction(func) else: assert isgeneratorfunction(producer_func) except AssertionError: raise AssertionError( 'producer function(s) must (all) be a generator function') try: assert isinstance(pipe_funcs, tuple) except AssertionError: raise AssertionError( 'must supply a tuple of callable functions for pipe_funcs') for pf in pipe_funcs: try: assert callable(pf) except AssertionError: raise AssertionError( 'all elements inside of pipe_funcs must be callable functions' ) # check arguments try: if self._multiple_producers: for args in producer_config_args: assert isinstance(args, tuple) else: assert isinstance(producer_config_args, tuple) except AssertionError: raise AssertionError( 'function arguments must be provided as a tuple') try: assert isinstance(pipe_funcs_config_args, tuple) for pfa in pipe_funcs_config_args: assert isinstance(pfa, tuple) except AssertionError: raise AssertionError( 'pipe function arguments must be provided as a tuple of tuples' ) # check procs try: assert isinstance(pipe_n_procs, tuple) for n in pipe_n_procs: assert isinstance(n, int) except AssertionError: raise AssertionError('must provide a tuple of integers') # check agreement between corellated inputs try: assert len(pipe_funcs) == len(pipe_funcs_config_args) and len( pipe_funcs) == len(pipe_n_procs) except AssertionError: raise AssertionError( 'must provide one tuple of arguments and a number of processes for each pipe function' ) try: assert len(pipe_funcs) != 0 except AssertionError: raise AssertionError('must provide work for the pipe to do') # check accumulator function try: assert callable(accumulator_func) except AssertionError: raise AssertionError( 'must provide callable function for accumulator_func') # check accumulator args try: assert isinstance(accumulator_config_args, tuple) except AssertionError: raise AssertionError( 'must privde a tuple of arguments for accumulator_config_args') # contract satisfied self.N = len(pipe_funcs) # used all over in here # setup handlers to send child process logs into main thread's logger install_mp_handler() self.producer_func = producer_func self.producer_config_args = producer_config_args self.pipe_funcs = pipe_funcs self.pipe_funcs_config_args = pipe_funcs_config_args self.pipe_n_procs = pipe_n_procs self.accumulator_object = accumulator_object self.accumulator_func = accumulator_func self.accumulator_config_args = accumulator_config_args self.worker_get_limit = worker_get_limit # use a manager server to make cleanup easy self._sync_server = Manager() # 1 manager for each pipe func self._managers = [None for _ in range(self.N)] self._error_flag = self._sync_server.Value('i', int(False)) # 1 producer finished flag for each manager, 1 for the consumer self._flags = [ self._sync_server.Value('i', 0) for _ in range(self.N + 1) ] # 1 out(in) queue per pipe_func, + 1 extra in(out) self._queues = [self._sync_server.Queue() for _ in range(self.N + 1)] self._total_produced = self._sync_server.Value('i', 0) self._total_consumed = 0
time.sleep(1) def excepthook(exctype, value, traceback): for p in multiprocessing.active_children(): p.terminate() raise sys.excepthook = excepthook with open('config.logging.json', 'rt') as f: config = json.load(f) logging.config.dictConfig(config) import multiprocessing_logging multiprocessing_logging.install_mp_handler() multiprocessing_logging.install_mp_handler(logging.getLogger('overseer.quality')) multiprocessing_logging.install_mp_handler(logging.getLogger('protocol')) logger = logging.getLogger('overseer') config = rc_config() overseer_uuid = '%s' % uuid.uuid4() site_uuid = config.site_uuid'Overseer %s initializing' % (overseer_uuid))'Site UUID: %s' % site_uuid) demods = {}
def emmet(spec_or_dbfile, run, issue, sbatch, bb, yes, no_dupe_check, verbose): """Command line interface for emmet""" logger.setLevel(logging.DEBUG if verbose else logging.INFO) ctx = click.get_current_context() ctx.ensure_object(dict) if not sbatch and bb: raise EmmetCliError( "Burst buffer only available in SBatch mode (--sbatch).") if spec_or_dbfile: client = calcdb_from_mgrant(spec_or_dbfile) ctx.obj["CLIENT"] = client # ctx.obj["MONGO_HANDLER"] = BufferedMongoHandler( #, # port=client.port, # database_name=client.db_name, # username=client.user, # password=client.password, # level=logging.WARNING, # authentication_db=client.db_name, # collection="emmet_logs", # buffer_periodical_flush_timing=False, # flush manually # ) # logger.addHandler(ctx.obj["MONGO_HANDLER"]) # coll = ctx.obj["MONGO_HANDLER"].collection # ensure_indexes(SETTINGS.log_fields, [coll]) if run: if not issue: raise EmmetCliError(f"Need issue number via --issue!") ctx.obj["LOG_STREAM"] = StringIO() memory_handler = logging.StreamHandler(ctx.obj["LOG_STREAM"]) formatter = logging.Formatter( "%(asctime)s %(name)-12s %(levelname)-8s %(message)s") memory_handler.setFormatter(formatter) logger.addHandler(memory_handler) CREDENTIALS = os.path.join(os.path.expanduser("~"), ".emmet_credentials") if not os.path.exists(CREDENTIALS): user = click.prompt("GitHub Username") password = click.prompt("GitHub Password", hide_input=True) auth = authorize( user, password, ["user", "repo", "gist"], "emmet CLI", two_factor_callback=opt_prompt, ) with open(CREDENTIALS, "w") as fd: fd.write(auth.token) with open(CREDENTIALS, "r") as fd: token = fd.readline().strip() ctx.obj["GH"] = login(token=token) else: click.secho("DRY RUN! Add --run flag to execute changes.", fg="green") install_mp_handler(logger=logger)
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path, log_path=None, verbose=False, disable_logging=False, num_workers=1, **ffmpeg_cfg): """ Downloads Flickr dataset files Args: dataset_path: Path to dataset file containin URLs (Type: str) data_dir: Output directory where video will be saved if output path is not explicitly given (Type: str) ffmpeg_path: Path to ffmpeg executable (Type: str) ffprobe_path: Path to ffprobe executable (Type: str) Keyword Args: log_path: Path to log file. If None, defaults to "flickr-soundnet-dl.log" (Type: str or None) verbose: If True, prints detailed messages to console (Type: bool) disable_logging: If True, does not log to a file (Type: bool) num_workers: Number of multiprocessing workers used to download videos (Type: int) **ffmpeg_cfg: ffmpeg configurations """ init_console_logger(LOGGER, verbose=verbose) if not disable_logging: init_file_logger(LOGGER, log_path=log_path) multiprocessing_logging.install_mp_handler() LOGGER.debug('Initialized logging.') audio_dir = os.path.join(data_dir, 'audio') video_dir = os.path.join(data_dir, 'video') if not os.path.isdir(audio_dir): os.makedirs(audio_dir) if not os.path.isdir(video_dir): os.makedirs(video_dir) ffmpeg_cfg_gpu = dict.copy(ffmpeg_cfg) ffmpeg_cfg_gpu["video_codec"] += "_nvenc" url_queue = mp.Queue(3*num_workers) #lock = mp.Lock() cv_main_to_worker = mp.Condition() cv_worker_to_main = mp.Condition() p_list = [] try: worker_args = (url_queue, cv_main_to_worker, cv_worker_to_main, data_dir, ffmpeg_path, ffprobe_path) for i in range(num_workers): if i < 2: p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg_gpu) else: p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg) p.start() p_list.append(p) with open(dataset_path, 'r') as f: for line in f: url = line.strip() if url: media_filename = extract_flickr_id(url) video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4')) skip_audio = ffmpeg_cfg.get("skip_audio", True) if not skip_audio: audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac')) else: audio_filepath = None if (skip_audio and os.path.exists(video_filepath)) or (not skip_audio and os.path.exists(video_filepath) and os.path.exists(audio_filepath)): info_msg = 'Already downloaded video {}. Skipping.' continue while True: try: url_queue.put(url, False) break except queue.Full: with cv_worker_to_main: cv_worker_to_main.wait(5.0) with cv_main_to_worker: cv_main_to_worker.notify()'Notify a worker {}'.format(url)) with cv_main_to_worker: for i in range(num_workers): url_queue.put("#END#") cv_main_to_worker.notify_all()'End of enqueue') except KeyboardInterrupt:"Received KeyboardInterrupt") with cv_main_to_worker: cv_main_to_worker.notify_all() for p in p_list: p.join()"Forcing exit.") exit() finally: try: for p in p_list: p.join() except KeyboardInterrupt:"Received KeyboardInterrupt") with cv_main_to_worker: cv_main_to_worker.notify_all() for p in p_list: p.join()"Forcing exit.") exit()'Finished downloading videos!')
def make_app(config_file): fileConfig(config_file) multiprocessing_logging.install_mp_handler() with, 'r', 'utf8') as f: config = konfig.Config(f) bottle.debug(config['henet'].get('debug', DEFAULT_DEBUG)) app = # bottle config used by bottle-utils csrf_config = dict([('csrf.%s' % key, val) for key, val in config.items('csrf')]) app.config.update(csrf_config) # setting up languages default_locale = config['henet'].get('default_locale', 'fr_FR') langs = [[p.strip() for p in lang.split(',')] for lang in config['henet'].get('langs', ['fr_FR'])] app = I18NPlugin(app, langs=langs, default_locale=default_locale, locale_dir=LOCALES_PATH) cats = [] config_cats = config['henet']['categories'] if not isinstance(config_cats, list): config_cats = [config_cats] for cat in config_cats: values = dict(config[cat].items()) # defaults if 'can_create' not in values: values['can_create'] = True cats.append((cat, values)) pages = [] config_pages = config['henet']['pages'] if not isinstance(config_pages, list): config_pages = [config_pages] for page in config_pages: values = dict(config[page].items()) # defaults if 'can_create' not in values: values['can_create'] = True pages.append((page, values)) use_comments = config['henet'].get('comments', True) use_media = config['henet'].get('media', True) app_stack.vars = app.vars = {'pages': pages, 'categories': cats, 'get_alerts': get_alerts, 'site_url': config['henet']['site_url'], 'use_comments': use_comments, 'use_media': use_media, 'langs': langs} app_stack.view = partial(view, **app.vars) app_stack._config = app._config = config app_stack.workers = app.workers = MemoryWorkers() app_stack.use_comments = app.use_comments = use_comments app_stack.use_media = app.use_media = use_media app_stack.add_alert = app.add_alert = add_alert smtp_config = dict(config.items('smtp')) def _send_email(*args): args = list(args) + [smtp_config] app.workers.apply_async('send-email', send_email, args) app_stack.send_email = app.send_email = _send_email from henet import views # NOQA def _close_workers(*args): app.workers.close() sys.exit(0) subscribe(ALL_EVENTS, add_alert) signal.signal(signal.SIGINT, _close_workers) return app
def sync(config, logs): """ Main Sync process """ logging.my_logfile(logs=logs) logging.my_fmt(label='main_sync') starttime = modify = {} workers = {} # this is the array of running pnns pnns = None # this is the array of pnn to be launched pool = None pcli = PhEDEx() install_mp_handler() conf = _load_config(config, modify, starttime) pnns = [] size = conf['main']['pool'] logging.summary('Starting') while conf['main']['run']: if pool is None: logging.notice('Started pool of size %d', size) pool = multiprocessing.NDPool(size) add = [ pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run'] if pnn not in workers if pnn not in pnns ] pnns += add random.shuffle(pnns) if not _ping(): logging.warning('Cannot ping, not launching workers') else: _launch_workers(pool, workers, pnns, pcli) pnns = [] _poll_workers(workers, pnns) conf = _load_config(config, modify, starttime) if not conf['main']['run'] or\ conf['main']['pool'] != size: # trigger draining of all workers, close the pool and wait # for the task to be over conf = _load_config(config, {'default': {'run': False}}, starttime) _drain_up(workers, pnns) workers = {} pool.close() pool = None size = conf['main']['pool'] else: time.sleep(conf['main']['sleep']) logging.summary('Exiting.') return config
def download_audioset(data_dir, ffmpeg_path, ffprobe_path, eval_segments_path, balanced_train_segments_path, unbalanced_train_segments_path, disable_logging=False, verbose=False, num_workers=4, log_path=None, **ffmpeg_cfg): """ Download AudioSet files Args: data_dir: Directory where dataset files will be saved (Type: str) ffmpeg_path: Path to ffmpeg executable (Type: str) ffprobe_path: Path to ffprobe executable (Type: str) eval_segments_path: Path to evaluation segments file (Type: str) balanced_train_segments_path: Path to balanced train segments file (Type: str) unbalanced_train_segments_path: Path to unbalanced train segments file (Type: str) Keyword Args: disable_logging: Disables logging to a file if True (Type: bool) verbose: Prints verbose information to stdout if True (Type: bool) num_workers: Number of multiprocessing workers used to download videos (Type: int) log_path: Path where log file will be saved. If None, saved to './audiosetdl.log' (Type: str or None) **ffmpeg_cfg: Configuration for audio and video downloading and decoding done by ffmpeg (Type: dict[str, *]) """ init_console_logger(LOGGER, verbose=verbose) if not disable_logging: init_file_logger(LOGGER, log_path=log_path) multiprocessing_logging.install_mp_handler() LOGGER.debug('Initialized logging.') download_subset(eval_segments_path, "evaluation", data_dir, ffmpeg_path, ffprobe_path, num_workers, **ffmpeg_cfg) download_subset(balanced_train_segments_path, "balanced_train", data_dir, ffmpeg_path, ffprobe_path, num_workers, **ffmpeg_cfg)
def main(argv=None): if argv is None: argv = sys.argv[1:] class ArgumentParserWithDefaults(argparse.ArgumentParser): ''' From ''' def add_argument(self, *args, help=None, default=None, **kwargs): if help is not None: kwargs['help'] = help if default is not None and args[0] != '-h': kwargs['default'] = default if help is not None: kwargs['help'] += ' (default: {})'.format(default) super().add_argument(*args, **kwargs) parser = ArgumentParserWithDefaults( formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument("--debug", dest="debug", help="Enable interactive debugger on error", action='store_true') parser.add_argument("-c", "--chart_output", dest="chart_output", help="Chart output directory", required=True) parser.add_argument("-o", "--output", dest="output", help="Output directory", required=True) parser.add_argument("-s", "--sim-output", dest="sim_output", help="Sim output directory", required=True) parser.add_argument("-w", "--window-size", dest="window_size", help="Minutes over which to collect data", default=3, type=int) parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) if 'multiprocessing' in sys.modules: import multiprocessing_logging multiprocessing_logging.install_mp_handler() if args.debug: import pdb, traceback try: return main_method(args) except: extype, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb) else: return main_method(args)
filename = os.path.basename(__file__) logfile = os.path.splitext(filename)[0] + '.log' fh = logging.FileHandler(logfile, mode='w') fh.setLevel(logging.DEBUG) # create console handler with a higher log level ch = logging.StreamHandler() ch.setLevel(logging.INFO) # create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) fh.setFormatter(formatter) # add the handlers to logger logger.addHandler(ch) logger.addHandler(fh) multiprocessing_logging.install_mp_handler(logger=logger) # ==================================== SALE ORDER LINE ==================================== def update_sale_order_line(pid, data_pool, product_ids, uom_ids, order_tax_code_ids): sock = xmlrpclib.ServerProxy(URL, allow_none=True) while data_pool: try: data = data_pool.pop() order_id = data.get('order_id') order_lines = sock.execute(DB, UID, PSW, 'sale.order.line', 'search_read', [('order_id', '=', order_id)], ['product_id', 'product_uom'])
def update_database(): with logging_redirect_tqdm(): install_mp_handler() latest_update = Update.get_latest_update(success=False) if latest_update.status not in [ Update.Status.ERROR, Update.Status.SUCCESS ]: print( 'Last update revision=%s is still not ended (or hang or crashed). ' 'Would you like to start new update (y) continue previous (N)?' % response = input() if response.lower() == 'y': latest_update.status = 'error' latest_update = Update.objects.create( status=Update.Status.IN_PROGRESS) else: latest_update = Update.objects.create( status=Update.Status.IN_PROGRESS) # scraping level 3 indexes first level_3_koatuu = list( set([koatuu for koatuu in _get_indexes() if koatuu.level <= 2])) # sort unique ids then (stable sort, so level is still sorted) level_3_koatuu.sort(key=attrgetter('unique_id')) # sort level from 1 to 3 keeping stable unique_id level_3_koatuu.sort(key=attrgetter('level')) if latest_update.latest_koatuu:"Searching for the latest koatuu scraped") latest_koatuu_obj = next( (koatuu for koatuu in level_3_koatuu if koatuu.unique_id == latest_update.latest_koatuu), None) if latest_koatuu_obj is None: level_3_koatuu = [] else:"Found latest koatuu scraped %s", latest_koatuu_obj) level_3_koatuu = level_3_koatuu[level_3_koatuu. index(latest_koatuu_obj):]'Koatuu to scrape only %s', len(level_3_koatuu)) if level_3_koatuu: _download_and_insert(latest_update, level_3_koatuu)'All insert l1 operations ended') # process level 4 indexes only for regions where parcels # number is more than 100000 annotated = Landuse.objects.all().values('koatuu').filter( total=Count('koatuu')).order_by('-total') level_4_koatuu = [] all_koatuu = list(set([koatuu for koatuu in _get_indexes()])) for result in annotated: if result['total'] < 100000: continue koatuu_obj = next(koatuu for koatuu in all_koatuu if koatuu.unique_id == str(result['koatuu'])) if koatuu_obj.level == 2: level_3_koatuus = [ *set([ koatuu for koatuu in all_koatuu if koatuu.level == 3 and str(koatuu.parent) == koatuu_obj.unique_id ]) ] level_4_koatuu.extend(level_3_koatuus) for level_3_koatuu in level_3_koatuus: level_4_koatuu.extend([ *set([ koatuu for koatuu in all_koatuu if koatuu.level == 4 and str(koatuu.parent) == level_3_koatuu.unique_id ]) ]) if koatuu_obj.level == 3: level_4_koatuu.extend([ *set([ koatuu for koatuu in all_koatuu if koatuu.level == 4 and str(koatuu.parent) == koatuu_obj.unique_id ]) ]) level_4_koatuu.sort(key=attrgetter('unique_id')) _download_and_insert(latest_update, level_4_koatuu) # detecting changes to create analysis table create_changeset( revision=Update.objects.get(, previous=Update.objects.get(id=Update.get_latest_update().id), ) # everything is ok => success status Update.objects.filter( status=Update.Status.SUCCESS)
if __name__ == "__main__": import os import shutil import sys import logging from multiprocessing_logging import install_mp_handler from wepy_tools.sim_makers.openmm.lennard_jones import LennardJonesPairOpenMMSimMaker OUTPUT_DIR = "_output/sim_maker_run" logging.getLogger().setLevel(logging.DEBUG) install_mp_handler() if sys.argv[1] == "-h" or sys.argv[1] == "--help": print( "arguments: n_cycles, n_steps, n_walkers, n_workers, platform, resampler" ) exit() else: n_cycles = int(sys.argv[1]) n_steps = int(sys.argv[2]) n_walkers = int(sys.argv[3]) n_workers = int(sys.argv[4]) platform = sys.argv[5] resampler = sys.argv[6] print("Number of steps: {}".format(n_steps)) print("Number of cycles: {}".format(n_cycles))
def main(): config = get_config() logging.basicConfig( format= '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%S%z", level=logging.DEBUG) multiprocessing_logging.install_mp_handler() logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR) zabbix_config = dict(config["zabbix"]) zabbix_config["failsafe"] = int(zabbix_config.get("failsafe", "20")) if zabbix_config["dryrun"] == "false": zabbix_config["dryrun"] = False elif zabbix_config["dryrun"] == "true": zabbix_config["dryrun"] = True else: raise Exception()"Main start (%d) version %s", os.getpid(), __version__) stop_event = multiprocessing.Event() processes = [] source_hosts_queues = [] source_collectors = get_source_collectors(config) for source_collector in source_collectors: source_hosts_queue = multiprocessing.Queue() process = processing.SourceCollectorProcess(source_collector["name"], source_collector["module"], source_collector["config"], source_hosts_queue) source_hosts_queues.append(source_hosts_queue) processes.append(process) process.start() process = processing.SourceHandlerProcess("source-handler", config["zac"]["db_uri"], source_hosts_queues) process.start() processes.append(process) process = processing.SourceMergerProcess( "source-merger", config["zac"]["db_uri"], config["zac"]["host_modifier_dir"]) process.start() processes.append(process) process = processing.ZabbixHostUpdater("zabbix-host-updater", config["zac"]["db_uri"], zabbix_config) process.start() processes.append(process) process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater", config["zac"]["db_uri"], zabbix_config) process.start() processes.append(process) process = processing.ZabbixTemplateUpdater("zabbix-template-updater", config["zac"]["db_uri"], zabbix_config) process.start() processes.append(process) with processing.SignalHandler(stop_event): status_interval = 60 next_status = while not stop_event.is_set(): if next_status < log_process_status(processes) next_status = + datetime.timedelta( seconds=status_interval) dead_process_names = [ for process in processes if not process.is_alive() ] if dead_process_names: logging.error("A child has died: %s. Exiting", ', '.join(dead_process_names)) stop_event.set() time.sleep(1) logging.debug( "Queues: %s", ", ".join([str(queue.qsize()) for queue in source_hosts_queues])) for process in processes:"Terminating: %s(%d)",, process.terminate() alive_processes = [ process for process in processes if process.is_alive() ] while alive_processes: process = alive_processes[0]"Waiting for: %s(%d)",, log_process_status(processes) # TODO: Too verbose? process.join(10) if process.exitcode is None: logging.warning( "Process hanging. Signaling new terminate: %s(%d)",, process.terminate() time.sleep(1) alive_processes = [ process for process in processes if process.is_alive() ]"Main exit")
def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger( self): with mock.patch('logging.getLogger') as getLogger: install_mp_handler(self.logger) self.assertEqual(0, getLogger.call_count)
def main(argv): parser = argparse.ArgumentParser( description='Perform segmentation on .svg and .png files.') parser.add_argument('dirs', nargs='+', help='Directories that stores .svg&.png files.') parser.add_argument( '--num-workers', default=0, type=int, dest='num_workers', help='Number of processes. 0 for all available cpu cores.') parser.add_argument('--log', default='segmentation.log', type=str, dest='log_file', help='Path to log file.') parser.add_argument('--conf', default='seg_conf.json', type=str, dest='confidence_file', help='Path to segmentation confidence file.') parser.add_argument( '--no-optimize', default=True, action='store_false', dest='optimize', help= "Dont't use svgo optimization. This will produce larger svg files but cost much less time." ) parser.add_argument('--export-contour', default=False, action='store_true', dest='export_contour', help='Export contour segmentation results.') parser.add_argument('--export-mask', default=False, action='store_true', dest='export_mask', help='Export morphed mask for debug use.') args = parser.parse_args(argv[1:]) global logger logger = get_logger('segmentation', args.log_file, echo=False, multiprocessing=True) install_mp_handler(logger) global USE_OPTIMIZE, EXPORT_CONTOUR_RESULTS, EXPORT_MASK USE_OPTIMIZE = args.optimize EXPORT_CONTOUR_RESULTS = args.export_contour EXPORT_MASK = args.export_mask num_workers = args.num_workers if num_workers == 0: num_workers = multiprocessing.cpu_count()'Using {} processes.'.format(num_workers)) src_dirs = args.dirs for src_dir in src_dirs: if not osp.isdir(src_dir): continue'Processing {} ...'.format(src_dir)) tgt_dir = src_dir tgts = [] for f in glob.glob(osp.join(src_dir, '*.eps')): _id, _ = osp.splitext(osp.basename(f)) svg_file, png_file = osp.join(src_dir, _id + '.svg'), osp.join( src_dir, _id + '.png') if osp.exists(svg_file) and osp.exists(png_file): tgts.append({ 'id': _id, 'svg_file': svg_file, 'png_file': png_file }) conf = seg(tgts, tgt_dir, num_workers=num_workers) with open(args.confidence_file, 'w') as f: f.write( json.dumps([{ 'id': tgt['id'], 'score': s } for tgt, s in zip(tgts, conf)]))
from collections import OrderedDict import numpy as np import click from data_iterator import TextIterator from params import load_params logging.basicConfig(level=logging.WARN, format="%(asctime)s - %(levelname)s %(module)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") import multiprocessing_logging multiprocessing_logging.install_mp_handler() def error_process(params, device, **model_options): import theano import theano.sandbox.cuda from build_model import build_model theano.sandbox.cuda.use(device) tparams = OrderedDict() for param_name, param in params.items(): tparams[param_name] = theano.shared(param, name=param_name) process_name = current_process().name
def processYears(self, years: List[int]) -> None: ''' Parameters ---------- years : List[int] list of years to (re-)process ''' # If necessary, creates indexes for the data collection. self._createDataColIndex() # Does the grid collection exists? col_grid = self._createMongoConn(cfg=self.cfg)['col_grid'] ndoc = col_grid.count_documents(filter={}) if ndoc == 0:'Creation of the grid collection...') # Does the reference netCDF file exists? fname = self.downloadDir + '' if os.path.exists(fname) is False: print('Downloading mask data...') self.getMasks() self.createGridCollection(mask=False) # Get all grid_ids self.all_ids = col_grid.distinct(key='id_grid') for year in years: self.year = int(year)' --- PROCESSING YEAR {year} ---') if is True:'Proceeding with downloads...') today = if (year == today.year): months = np.arange(1, today.month + 1).tolist() else: months = np.arange(1, 12 + 1).tolist() # Are these months present as nc files ? # List this year's nc files try: ncfiles = self.listNetCDFfiles(year) except FileNotFoundError:'No ERA5T files downloaded for {year} yet.') months_to_download = months else: fmonths_present = sorted( list( map( lambda x: int(x[x.find("-") + 1:x.find(".nc")] ), ncfiles))) fmonths_needed = months # Months needed but not present : missing_months = list( set(fmonths_needed) - (set(fmonths_present))) months_to_download = missing_months # months_to_download = list( # set(missing_months + months)) # distinct months # fmonths_present # print(ncfiles) # print(fmonths_present) # print('...') # print(missing_months) # import sys # sys.exit(0) if len(months_to_download) > 0:'Downloading files for YEAR {year}....\n' + f'Months: {months_to_download}') # Parralel download of monthly data: install_mp_handler() p = ThreadPool(processes=12) # one thread per month m: self.getFiles(year=year, month=m), months_to_download) p.close() p.join()'Downloading files for YEAR {year} Done.') else:'All files already present for year {year}.') else:'Proceeding without downloads') # List all the current year's nc files after download nc_local = self.listNetCDFfiles(year=year) # Open them all in one ds object # arrays will be loaded in chronological order try: ds = xr.open_mfdataset(nc_local, combine='by_coords') except Exception as e: else: self.df_missing_dates = self.findMissingDates(ds) # Create the tile (chunks) elements # This operation starts to be useful at high grid resolution # i.e., from 0.25 x 0.25. For coarser grid (i.e., 0.1 x 0.1) # this is not really essential. delta = 30 # grid chunk in degrees (should be a multiple of # both 360 and 180) # ERA's lon have range [0, 360] and not [-180, 180] ilons = np.arange(0, 360, delta) ilats = np.arange(-60, 90, delta) elements = itertools.product(*[ilons, ilats]) # Explore the grid chunks and select # those containing grid cells def worker_initializer00(): global col_grid cons = self._createMongoConn(cfg=self.cfg) col_grid = cons['col_grid'] p = ThreadPool(processes=self.nthreads, initializer=worker_initializer00) res = lambda e: self.exploreChunks(ilon_chunk=e[0], ilat_chunk=e[1], delta=delta, mask_query=None, retrn='ndocs', col_grid=col_grid), elements) p.close() p.join() df_e = pd.DataFrame(res) df_e = df_e.query('n > 0').sort_values(by='n').reset_index( drop=True) # Do the insertion N = df_e.shape[0] for i in np.arange(N):'Year {year}: processing chunk {i}/{N}') ilon = df_e.loc[i, 'ilon_chunk'] ilat = df_e.loc[i, 'ilat_chunk'] n = df_e.loc[i, 'n'] self.insertChunk(ilon, ilat, delta, ds, 'insert')'{n} documents inserted')' --- PROCESSING YEAR %s DONE !---' % year)