def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger(self): if not mock: self.skipTest('unittest.mock is not available') with mock.patch('logging.getLogger') as getLogger: install_mp_handler(self.logger) self.assertEqual(0, getLogger.call_count)
def test_when_no_logger_is_specified_then_it_uses_the_root_logger(self): if not mock: self.skipTest('unittest.mock is not available') with mock.patch('logging.getLogger') as getLogger: getLogger.return_value = self.logger install_mp_handler() getLogger.assert_called_once_with() wrapper_handler, = self.logger.handlers self.assertIsInstance(wrapper_handler, MultiProcessingHandler) self.assertIs(wrapper_handler.sub_handler, self.handler)
def parse_args(description, ParamsClass=SieverParams, **kwds): """ Parse command line arguments. The command line parser accepts the standard parameters as printed by calling it with ``--help``. All other parameters are used to construct params objects. For example. ./foo 80 --workers 4 --trials 2 -S 1337 --a 1 2 - b 3 4 would operate on dimension 80 with parameters (a: 1, b: 3), (a: 1, b: 4), (a: 2, b: 3), (a: 2, b: 4), i.e. the Cartesian product of all parameters. It will run two trials each using four workers. Note that each worker may use several threads, too. The starting seed is `1337`. :param description: help message :param kwds: default parameters """ parser = argparse.ArgumentParser( description=description, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'lower_bound', type=int, help="lowest lattice dimension to consider (inclusive)") parser.add_argument( '-u', '--upper-bound', type=int, dest="upper_bound", default=0, help="upper bound on lattice dimension to consider (exclusive)") parser.add_argument('-s', '--step-size', type=int, dest="step_size", default=2, help="increment lattice dimension in these steps") parser.add_argument('-t', '--trials', type=int, dest="trials", default=1, help="number of experiments to run per dimension") parser.add_argument('-w', '--workers', type=int, dest="workers", default=1, help="number of parallel experiments to run") parser.add_argument('-p', '--pickle', action='store_true', dest="pickle", help="pickle statistics") parser.add_argument('-S', '--seed', type=int, dest="seed", default=0, help="randomness seed") parser.add_argument( '--dry-run', dest="dry_run", action='store_true', help= "Show parameters that would be used but don't run any actual experiments." ) parser.add_argument('--show-defaults', dest="show_defaults", action='store_true', help="Show default parameters and exit.") parser.add_argument('--loglvl', type=str, help="Logging level (one of DEBUG, WARN, INFO)", default="INFO") parser.add_argument('--log-filename', dest="log_filename", type=str, help="Logfile filename", default=None) args, unknown = parser.parse_known_args() kwds_ = OrderedDict() for k, v in six.iteritems(kwds): k_ = k.replace("__", "/") kwds_[k_] = v kwds = kwds_ if args.show_defaults: pp = ParamsClass(**kwds) slen = max(len(p) for p in pp) + 1 fmt = "{key:%ds}: {value}" % slen for k, v in six.iteritems(pp): print(fmt.format(key=k, value=v)) exit(0) all_params = OrderedDict([("", ParamsClass(**kwds))]) unknown_args = OrderedDict() unknown = apply_aliases(unknown) # NOTE: This seems like the kind of thing the standard library can do (better) i = 0 while i < len(unknown): k = unknown[i] if not (k.startswith("--") or k.startswith("-")): raise ValueError("Failure to parse command line argument '%s'" % k) k = re.match("^-+(.*)", k).groups()[0] k = k.replace("-", "_") unknown_args[k] = [] i += 1 for i in range(i, len(unknown)): v = unknown[i] if v.startswith("--") or v.startswith("-"): i -= 1 break try: v = eval(v, {"BKZ": BKZ}) except NameError: v = v except SyntaxError: v = v if not isinstance(v, (list, tuple)): v = [v] unknown_args[k].extend(v) i += 1 if not unknown_args[k]: unknown_args[k] = [True] for k, v in six.iteritems(unknown_args): all_params_ = OrderedDict() for p in all_params: for v_ in v: p_ = copy.copy(all_params[p]) p_[k] = v_ all_params_[p + "'%s': %s, " % (k, v_)] = p_ all_params = all_params_ log_filename = args.log_filename if log_filename is None: log_filename = log_filenamef() multiprocessing_logging.install_mp_handler() if not os.path.isdir("logs"): os.makedirs("logs") logging.basicConfig( level=logging.DEBUG, format='%(levelname)5s:%(name)12s:%(asctime)s: %(message)s', datefmt='%Y/%m/%d %H:%M:%S %Z', filename=log_filename) console = logging.StreamHandler() console.setLevel(getattr(logging, args.loglvl.upper())) console.setFormatter(logging.Formatter('%(name)s: %(message)s', )) logging.getLogger('').addHandler(console) if args.dry_run: for params in all_params: print(params) exit(0) return args, all_params
def main(): args = parse_arguments() logging.basicConfig( level=getattr(logging, args.log_level.upper()), format='%(asctime)s - %(process)s - %(levelname)s - %(message)s') install_mp_handler() os.nice(20) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) input_files = collect_inputs(args.input_dirs) logging.info('Scheduled {} files for shuffling.'.format(len(input_files))) if not input_files: logging.critical('No input files!') sys.exit(1) output_files = [ os.path.join(args.output_dir, os.path.basename(f)) for f in input_files ] with openall(input_files[0]) as inf: header = inf.readline().strip() with Pool(args.processes) as inpool, Pool(args.processes) as outpool: m = Manager() queue = m.Queue(maxsize=1000) num_readers = m.Value('I', args.processes) lock = m.Lock() # Each worker gets a chunk of all input / output files input_chunks = list(split_into(input_files, args.processes)) output_chunks = list(split_into(output_files, args.processes)) producer_f = partial(producer, queue=queue, num_readers=num_readers, lock=lock) inresult = inpool.map_async(producer_f, input_chunks) consumer_f = partial(consumer, queue=queue, header=header, documents=args.documents, num_readers=num_readers, lock=lock) outresult = outpool.map_async(consumer_f, output_chunks) docs_read, docs_written = sum(inresult.get()), sum(outresult.get()) logging.debug('Joining processes...') inpool.close() outpool.close() inpool.join() outpool.join() logging.debug('Joined processes.') if docs_read != docs_written: logging.error(f'The number of documents read ({docs_read}) and ' f'the number of documents written ({docs_written}) ' f'differs!') logging.info('Done.')
def test_when_a_logger_is_passed_then_it_wraps_all_handlers(self): install_mp_handler(self.logger) wrapper_handler, = self.logger.handlers self.assertIsInstance(wrapper_handler, MultiProcessingHandler) self.assertIs(wrapper_handler.sub_handler, self.handler)
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path, log_path=None, verbose=False, disable_logging=False, num_workers=1, **ffmpeg_cfg): """ Downloads Flickr dataset files Args: dataset_path: Path to dataset file containin URLs (Type: str) data_dir: Output directory where video will be saved if output path is not explicitly given (Type: str) ffmpeg_path: Path to ffmpeg executable (Type: str) ffprobe_path: Path to ffprobe executable (Type: str) Keyword Args: log_path: Path to log file. If None, defaults to "flickr-soundnet-dl.log" (Type: str or None) verbose: If True, prints detailed messages to console (Type: bool) disable_logging: If True, does not log to a file (Type: bool) num_workers: Number of multiprocessing workers used to download videos (Type: int) **ffmpeg_cfg: ffmpeg configurations """ init_console_logger(LOGGER, verbose=verbose) if not disable_logging: init_file_logger(LOGGER, log_path=log_path) multiprocessing_logging.install_mp_handler() print(verbose) LOGGER.debug('Initialized logging.') audio_dir = os.path.join(data_dir, 'audio') video_dir = os.path.join(data_dir, 'video') if not os.path.isdir(audio_dir): os.makedirs(audio_dir) if not os.path.isdir(video_dir): os.makedirs(video_dir) pool = mp.Pool(num_workers) try: with open(dataset_path, 'r') as f: for line_idx, line in enumerate(f): url = line.strip() media_filename = extract_flickr_id(url) video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4')) audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac')) if os.path.exists(video_filepath) and os.path.exists(audio_filepath): info_msg = 'Already downloaded video {}. Skipping.' LOGGER.info(info_msg.format(media_filename)) continue worker_args = [url, data_dir, ffmpeg_path, ffprobe_path] pool.apply_async(partial(download_flickr_video, **ffmpeg_cfg), worker_args) except KeyboardInterrupt: LOGGER.info("Forcing exit.") exit() finally: try: pool.close() pool.join() except KeyboardInterrupt: LOGGER.info("Forcing exit.") exit() LOGGER.info('Finished downloading videos!')
import multiprocessing from pathlib import Path from typing import List import warnings from astropy.io import fits from astropy.wcs import WCS, FITSFixedWarning import mocpy import multiprocessing_logging logging.basicConfig( level=logging.INFO, format="%(asctime)s %(name)-25s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) multiprocessing_logging.install_mp_handler() logger = logging.getLogger(__name__) def stokes_type(stokes_str: str) -> List[str]: STOKES_PARAMS = ("I", "Q", "U", "V") stokes_str = stokes_str.upper() for char in stokes_str: if char not in STOKES_PARAMS: raise ValueError( f"Stokes parameter must be one of {''.join(STOKES_PARAMS)}") return [char for char in stokes_str] def get_moc_output_dir(image_path: Path) -> Path: output_dir_name = image_path.parent.name.replace("IMAGES", "MOCS")
def test_lj_sim_manager_openmm_integration_run( self, class_tmp_path_factory, boundary_condition_class, resampler_class, work_mapper_class, platform, lj_params, lj_omm_sys, lj_integrator, lj_reporter_classes, lj_reporter_kwargs, lj_init_walkers, lj_openmm_runner, lj_unbinding_bc, lj_wexplore_resampler, lj_revo_resampler, ): """Run all combinations of components in the fixtures for the smallest amount of time, just to make sure they all work together and don't give errors.""" logging.getLogger().setLevel(logging.DEBUG) install_mp_handler() logging.debug("Starting the test") print("starting the test") # the configuration class gives us a convenient way to # parametrize our reporters for the locale from wepy.orchestration.configuration import Configuration # the runner from wepy.runners.openmm import OpenMMRunner # mappers from wepy.work_mapper.mapper import Mapper from wepy.work_mapper.worker import WorkerMapper from wepy.work_mapper.task_mapper import TaskMapper # the worker types for the WorkerMapper from wepy.work_mapper.worker import Worker from wepy.runners.openmm import OpenMMCPUWorker, OpenMMGPUWorker # the walker task types for the TaskMapper from wepy.work_mapper.task_mapper import WalkerTaskProcess from wepy.runners.openmm import OpenMMCPUWalkerTaskProcess, OpenMMGPUWalkerTaskProcess n_cycles = 1 n_steps = 2 num_workers = 2 # generate the reporters and temporary directory for this test # combination tmpdir_template = 'lj_fixture_{plat}-{wm}-{res}-{bc}' tmpdir_name = tmpdir_template.format(plat=platform, wm=work_mapper_class, res=resampler_class, bc=boundary_condition_class) # make a temporary directory for this configuration to work with tmpdir = str(class_tmp_path_factory.mktemp(tmpdir_name)) # make a config so that the reporters get parametrized properly reporters = Configuration( work_dir=tmpdir, reporter_classes=lj_reporter_classes, reporter_partial_kwargs=lj_reporter_kwargs).reporters steps = [n_steps for _ in range(n_cycles)] # choose the components based on the parametrization boundary_condition = None resampler = None walker_fixtures = [lj_init_walkers] runner_fixtures = [lj_openmm_runner] boundary_condition_fixtures = [lj_unbinding_bc] resampler_fixtures = [lj_wexplore_resampler, lj_revo_resampler] walkers = lj_init_walkers boundary_condition = [ boundary_condition for boundary_condition in boundary_condition_fixtures if type(boundary_condition).__name__ == boundary_condition_class ][0] resampler = [ resampler for resampler in resampler_fixtures if type(resampler).__name__ == resampler_class ][0] assert boundary_condition is not None assert resampler is not None # generate the work mapper given the type and the platform work_mapper_classes = { mapper_class.__name__: mapper_class for mapper_class in [Mapper, WorkerMapper, TaskMapper] } # # select the right one given the option # work_mapper_type = [mapper_type for mapper_type in work_mapper_classes # if type(mapper_type).__name__ == work_mapper_class][0] # decide based on the platform and the work mapper which # platform dependent components to build if work_mapper_class == 'Mapper': # then there is no settings work_mapper = Mapper() elif work_mapper_class == 'WorkerMapper': if platform == 'CUDA' or platform == 'OpenCL': work_mapper = WorkerMapper(num_workers=num_workers, worker_type=OpenMMGPUWorker, device_ids={ '0': 0, '1': 1 }, proc_start_method='spawn') if platform == 'OpenCL': work_mapper = WorkerMapper( num_workers=num_workers, worker_type=OpenMMGPUWorker, device_ids={ '0': 0, '1': 1 }, ) elif platform == 'CPU': work_mapper = WorkerMapper( num_workers=num_workers, worker_type=OpenMMCPUWorker, worker_attributes={'num_threads': 1}) elif platform == 'Reference': work_mapper = WorkerMapper( num_workers=num_workers, worker_type=Worker, ) elif work_mapper_class == 'TaskMapper': if platform == 'CUDA': work_mapper = TaskMapper( num_workers=num_workers, walker_task_type=OpenMMGPUWalkerTaskProcess, device_ids={ '0': 0, '1': 1 }, proc_start_method='spawn') elif platform == 'OpenCL': work_mapper = TaskMapper( num_workers=num_workers, walker_task_type=OpenMMGPUWalkerTaskProcess, device_ids={ '0': 0, '1': 1 }) elif platform == 'CPU': work_mapper = TaskMapper( num_workers=num_workers, walker_task_type=OpenMMCPUWalkerTaskProcess, worker_attributes={'num_threads': 1}) elif platform == 'Reference': work_mapper = TaskMapper( num_workers=num_workers, worker_type=WalkerTaskProcess, ) else: raise ValueError("Platform {} not recognized".format(platform)) # initialize the runner with the platform runner = OpenMMRunner(lj_omm_sys.system, lj_omm_sys.topology, lj_integrator, platform=platform) logging.debug("Constructing the manager") manager = Manager(walkers, runner=runner, boundary_conditions=boundary_condition, resampler=resampler, work_mapper=work_mapper, reporters=reporters) # since different work mappers need different process start # methods for different platforms i.e. CUDA and linux fork # vs. spawn we choose the appropriate one for each method. logging.debug("Starting the simulation") walkers, filters = manager.run_simulation(n_cycles, steps, num_workers=num_workers)
def main(): parser = argparse.ArgumentParser(description='''Starts a BLEva Gateway service on the device.''', epilog='''Note: This requires a BLED112 dongle from Bluegiga.''') parser.add_argument('-u', '--url', help='''URL of BLEva server''', required=True) parser.add_argument('-d', '--debug', help='Debug level (0-4)', type=int, default=20, choices=[10, 20, 30, 40, 50]) args = parser.parse_args() url = args.url print url tty_paths = util.get_tty_paths() FORMAT = '%(asctime)s - %(name)s - %(processName)s - %(levelname)s - %(message)s' logging.basicConfig(format=FORMAT, filename='bled112.log') logger = logging.getLogger('BLEva') logger.setLevel(args.debug) import multiprocessing_logging multiprocessing_logging.install_mp_handler() logger.info('\n--------------------') logger.info('BLEva has started') logger.info('\n--------------------') while True: logger.info('\n--------------------') logger.info('BLEva is waiting for new benchmark') print "BLEva is waiting for new benchmarks" b = getBenchmark(url + '/benchmark') print b if b != '': logger.info('BLEva received new benchmark') print "got new benchmark" j = json.loads(b) instances = [] for dongle in j['dongles']: gap_role = dongle['gap_role'] gatt_role = dongle['gatt_role'] replicas = dongle['replicas'] print replicas logger.debug("Replicas: " + str(replicas)) if replicas > len(tty_paths): raise Exception("Too few dongles connected.") for replica in xrange(0, replicas): if gap_role in ['broadcaster', 'peripheral']: a = dongle['steps'] steps = [] for v in a: s = Step() s.time = v['time'] print "json time " + str(['time']) s.ble_operation = v['ble_operation'] # s.adv_data = map(ord, v['adv_data'][2:].decode("hex")) # s.short_name = util.pad_truncate(s.short_name, 5) # s.long_name = util.pad_truncate(s.long_name, 12) s.long_name = v['long_name'] if replica < 10: s.short_name = v['short_name'] + str(0) + str( replica) if s.long_name != "": s.long_name = v['long_name'] + str( 0) + str(replica) else: s.short_name = v['short_name'] + str(replica) if s.long_name != "": s.long_name = v['long_name'] + str(replica) s.short_name = util.pad_truncate(s.short_name, 7) if s.long_name != "": s.long_name = util.pad_truncate( s.long_name, 14) logger.debug("Replica Short Name: " + s.short_name) logger.debug("Replica Long Name: " + s.long_name) s.major = int( v['major'], 0) # NOTE base=0 guesses base from string s.minor = int(v['minor'], 0) s.adv_interval_min = int(v['adv_interval_min'], 0) s.adv_interval_max = int(v['adv_interval_max'], 0) s.adv_channels = int(v['adv_channels'], 0) s.gap_discoverable_mode = ble_codes.gap_discoverable_mode[ v['gap_discoverable_mode']] s.gap_connectable_mode = ble_codes.gap_connectable_mode[ v['gap_connectable_mode']] if "connection_interval_min" in v: s.connection_interval_min = v[ "connection_interval_min"] if "connection_interval_max" in v: s.connection_interval_max = v[ "connection_interval_max"] if "slave_latency" in v: s.slave_latency = v["slave_latency"] if "supervision_timeout" in v: s.supervision_timeout = v[ "supervision_timeout"] steps.append(s) peripheral = Peripheral(logger=logger, steps=steps, port_name=tty_paths[replica], gap_role=gap_role, gatt_role=gatt_role) instances.append(peripheral) logger.info('BLEva is starting benchmark now') print "BLEva is starting benchmark now" processes = [] logger.debug('Telling Phone to start') print "notifying phone" urllib2.urlopen(url + '/benchmark/sync/dongle').read() print "done notified" if not IBEACON: for i in instances: print i p = mp.Process(target=i.start_benchmark, name=i.steps[0].short_name) p.start() processes.append(p) for p in processes: p.join() else: time.sleep(40) print "finished one benchmark" logger.info('BLEva finished one benchmark' ) # FIXME fix logger to also log spawned processes if b == '': print "BLEva server not available, sleeping a while and try again." logger.info( 'BLEva server not available, sleeping a while and try again.' ) # FIXME fix logger to also log spawned processes time.sleep( 10) # sleep and then try again until server is available
def main(): config = get_config() logging.basicConfig( format= '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%S%z", level=logging.DEBUG) multiprocessing_logging.install_mp_handler() logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR) if config.zac.health_file is not None: health_file = os.path.abspath(config.zac.health_file) logging.info("Main start (%d) version %s", os.getpid(), __version__) stop_event = multiprocessing.Event() state_manager = multiprocessing.Manager() processes = [] source_hosts_queues = [] source_collectors = get_source_collectors(config) for source_collector in source_collectors: source_hosts_queue = multiprocessing.Queue() process = processing.SourceCollectorProcess(source_collector["name"], state_manager.dict(), source_collector["module"], source_collector["config"], source_hosts_queue) source_hosts_queues.append(source_hosts_queue) processes.append(process) try: process = processing.SourceHandlerProcess("source-handler", state_manager.dict(), config.zac.db_uri, source_hosts_queues) processes.append(process) process = processing.SourceMergerProcess("source-merger", state_manager.dict(), config.zac.db_uri, config.zac.host_modifier_dir) processes.append(process) process = processing.ZabbixHostUpdater("zabbix-host-updater", state_manager.dict(), config.zac.db_uri, config.zabbix) processes.append(process) process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater", state_manager.dict(), config.zac.db_uri, config.zabbix) processes.append(process) process = processing.ZabbixTemplateUpdater("zabbix-template-updater", state_manager.dict(), config.zac.db_uri, config.zabbix) processes.append(process) except exceptions.ZACException as e: logging.error("Failed to initialize child processes. Exiting: %s", str(e)) sys.exit(1) for process in processes: process.start() with processing.SignalHandler(stop_event): status_interval = 60 next_status = datetime.datetime.now() while not stop_event.is_set(): if next_status < datetime.datetime.now(): if health_file is not None: write_health(health_file, processes, source_hosts_queues, config.zabbix.failsafe) log_process_status(processes) next_status = datetime.datetime.now() + datetime.timedelta( seconds=status_interval) dead_process_names = [ process.name for process in processes if not process.is_alive() ] if dead_process_names: logging.error("A child has died: %s. Exiting", ', '.join(dead_process_names)) stop_event.set() time.sleep(1) logging.debug( "Queues: %s", ", ".join([str(queue.qsize()) for queue in source_hosts_queues])) for process in processes: logging.info("Terminating: %s(%d)", process.name, process.pid) process.terminate() alive_processes = [ process for process in processes if process.is_alive() ] while alive_processes: process = alive_processes[0] logging.info("Waiting for: %s(%d)", process.name, process.pid) log_process_status(processes) # TODO: Too verbose? process.join(10) if process.exitcode is None: logging.warning( "Process hanging. Signaling new terminate: %s(%d)", process.name, process.pid) process.terminate() time.sleep(1) alive_processes = [ process for process in processes if process.is_alive() ] logging.info("Main exit")
import multiprocessing as mp from collections import defaultdict from queue import Full, Empty import logging import numpy as np from multiprocessing_logging import install_mp_handler from .base_iterator import BaseIterator from ...routines.mp_routines import ArrayDictQueue from ... import ROOT_LOGGER_NAME, ROOT_LOGGER_LEVEL logger = logging.getLogger('{}.{}'.format(ROOT_LOGGER_NAME, __name__)) logger.setLevel(ROOT_LOGGER_LEVEL) install_mp_handler(logger=logger) class MultiProcessIterator(BaseIterator): """Iterates through data with base iterator interface, implementing in-batch parallelism""" def __init__(self, num_processes, max_tasks=100, max_results=100, use_shared=False, *args, **kwargs): """ :param num_processes: number of processes two be used by iterator :param max_tasks: max number of tasks to be put in tasks queue :param max_results: max volume of output queue :param use_shared: whether to use array queue for passing big arrays without pickling them """ super(MultiProcessIterator, self).__init__(*args, **kwargs) self._num_processes = num_processes self._max_tasks = max(max_tasks, self._batch_size) self._max_results = max(max_results, self._batch_size)
def __init__(self, producer_func, producer_config_args, pipe_funcs, pipe_funcs_config_args, pipe_n_procs, accumulator_object, accumulator_func, accumulator_config_args, worker_get_limit=5): # enforce the contract. try: assert isinstance(worker_get_limit, int) and worker_get_limit > 1 except AssertionError: raise AssertionError('worker_get_limit must be an integer > 1') # allow multiple producers self._multiple_producers = isinstance(producer_func, tuple) # check functions try: if self._multiple_producers: for func in producer_func: assert callable(func) else: assert callable(producer_func) except AssertionError: raise AssertionError( 'must provide a callable function for producer') try: if self._multiple_producers: for func in producer_func: assert isgeneratorfunction(func) else: assert isgeneratorfunction(producer_func) except AssertionError: raise AssertionError( 'producer function(s) must (all) be a generator function') try: assert isinstance(pipe_funcs, tuple) except AssertionError: raise AssertionError( 'must supply a tuple of callable functions for pipe_funcs') for pf in pipe_funcs: try: assert callable(pf) except AssertionError: raise AssertionError( 'all elements inside of pipe_funcs must be callable functions' ) # check arguments try: if self._multiple_producers: for args in producer_config_args: assert isinstance(args, tuple) else: assert isinstance(producer_config_args, tuple) except AssertionError: raise AssertionError( 'function arguments must be provided as a tuple') try: assert isinstance(pipe_funcs_config_args, tuple) for pfa in pipe_funcs_config_args: assert isinstance(pfa, tuple) except AssertionError: raise AssertionError( 'pipe function arguments must be provided as a tuple of tuples' ) # check procs try: assert isinstance(pipe_n_procs, tuple) for n in pipe_n_procs: assert isinstance(n, int) except AssertionError: raise AssertionError('must provide a tuple of integers') # check agreement between corellated inputs try: assert len(pipe_funcs) == len(pipe_funcs_config_args) and len( pipe_funcs) == len(pipe_n_procs) except AssertionError: raise AssertionError( 'must provide one tuple of arguments and a number of processes for each pipe function' ) try: assert len(pipe_funcs) != 0 except AssertionError: raise AssertionError('must provide work for the pipe to do') # check accumulator function try: assert callable(accumulator_func) except AssertionError: raise AssertionError( 'must provide callable function for accumulator_func') # check accumulator args try: assert isinstance(accumulator_config_args, tuple) except AssertionError: raise AssertionError( 'must privde a tuple of arguments for accumulator_config_args') # contract satisfied self.N = len(pipe_funcs) # used all over in here # setup handlers to send child process logs into main thread's logger install_mp_handler() self.producer_func = producer_func self.producer_config_args = producer_config_args self.pipe_funcs = pipe_funcs self.pipe_funcs_config_args = pipe_funcs_config_args self.pipe_n_procs = pipe_n_procs self.accumulator_object = accumulator_object self.accumulator_func = accumulator_func self.accumulator_config_args = accumulator_config_args self.worker_get_limit = worker_get_limit # use a manager server to make cleanup easy self._sync_server = Manager() # 1 manager for each pipe func self._managers = [None for _ in range(self.N)] self._error_flag = self._sync_server.Value('i', int(False)) # 1 producer finished flag for each manager, 1 for the consumer self._flags = [ self._sync_server.Value('i', 0) for _ in range(self.N + 1) ] # 1 out(in) queue per pipe_func, + 1 extra in(out) self._queues = [self._sync_server.Queue() for _ in range(self.N + 1)] self._total_produced = self._sync_server.Value('i', 0) self._total_consumed = 0
time.sleep(1) def excepthook(exctype, value, traceback): for p in multiprocessing.active_children(): p.terminate() raise sys.excepthook = excepthook with open('config.logging.json', 'rt') as f: config = json.load(f) logging.config.dictConfig(config) import multiprocessing_logging multiprocessing_logging.install_mp_handler() multiprocessing_logging.install_mp_handler(logging.getLogger('overseer.quality')) multiprocessing_logging.install_mp_handler(logging.getLogger('protocol')) logger = logging.getLogger('overseer') config = rc_config() overseer_uuid = '%s' % uuid.uuid4() site_uuid = config.site_uuid logger.info('Overseer %s initializing' % (overseer_uuid)) logger.info('Site UUID: %s' % site_uuid) demods = {}
def emmet(spec_or_dbfile, run, issue, sbatch, bb, yes, no_dupe_check, verbose): """Command line interface for emmet""" logger.setLevel(logging.DEBUG if verbose else logging.INFO) ctx = click.get_current_context() ctx.ensure_object(dict) if not sbatch and bb: raise EmmetCliError( "Burst buffer only available in SBatch mode (--sbatch).") if spec_or_dbfile: client = calcdb_from_mgrant(spec_or_dbfile) ctx.obj["CLIENT"] = client # ctx.obj["MONGO_HANDLER"] = BufferedMongoHandler( # host=client.host, # port=client.port, # database_name=client.db_name, # username=client.user, # password=client.password, # level=logging.WARNING, # authentication_db=client.db_name, # collection="emmet_logs", # buffer_periodical_flush_timing=False, # flush manually # ) # logger.addHandler(ctx.obj["MONGO_HANDLER"]) # coll = ctx.obj["MONGO_HANDLER"].collection # ensure_indexes(SETTINGS.log_fields, [coll]) if run: if not issue: raise EmmetCliError(f"Need issue number via --issue!") ctx.obj["LOG_STREAM"] = StringIO() memory_handler = logging.StreamHandler(ctx.obj["LOG_STREAM"]) formatter = logging.Formatter( "%(asctime)s %(name)-12s %(levelname)-8s %(message)s") memory_handler.setFormatter(formatter) logger.addHandler(memory_handler) CREDENTIALS = os.path.join(os.path.expanduser("~"), ".emmet_credentials") if not os.path.exists(CREDENTIALS): user = click.prompt("GitHub Username") password = click.prompt("GitHub Password", hide_input=True) auth = authorize( user, password, ["user", "repo", "gist"], "emmet CLI", two_factor_callback=opt_prompt, ) with open(CREDENTIALS, "w") as fd: fd.write(auth.token) with open(CREDENTIALS, "r") as fd: token = fd.readline().strip() ctx.obj["GH"] = login(token=token) else: click.secho("DRY RUN! Add --run flag to execute changes.", fg="green") install_mp_handler(logger=logger)
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path, log_path=None, verbose=False, disable_logging=False, num_workers=1, **ffmpeg_cfg): """ Downloads Flickr dataset files Args: dataset_path: Path to dataset file containin URLs (Type: str) data_dir: Output directory where video will be saved if output path is not explicitly given (Type: str) ffmpeg_path: Path to ffmpeg executable (Type: str) ffprobe_path: Path to ffprobe executable (Type: str) Keyword Args: log_path: Path to log file. If None, defaults to "flickr-soundnet-dl.log" (Type: str or None) verbose: If True, prints detailed messages to console (Type: bool) disable_logging: If True, does not log to a file (Type: bool) num_workers: Number of multiprocessing workers used to download videos (Type: int) **ffmpeg_cfg: ffmpeg configurations """ init_console_logger(LOGGER, verbose=verbose) if not disable_logging: init_file_logger(LOGGER, log_path=log_path) multiprocessing_logging.install_mp_handler() LOGGER.debug('Initialized logging.') audio_dir = os.path.join(data_dir, 'audio') video_dir = os.path.join(data_dir, 'video') if not os.path.isdir(audio_dir): os.makedirs(audio_dir) if not os.path.isdir(video_dir): os.makedirs(video_dir) ffmpeg_cfg_gpu = dict.copy(ffmpeg_cfg) ffmpeg_cfg_gpu["video_codec"] += "_nvenc" url_queue = mp.Queue(3*num_workers) #lock = mp.Lock() cv_main_to_worker = mp.Condition() cv_worker_to_main = mp.Condition() p_list = [] try: worker_args = (url_queue, cv_main_to_worker, cv_worker_to_main, data_dir, ffmpeg_path, ffprobe_path) for i in range(num_workers): if i < 2: p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg_gpu) else: p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg) p.start() p_list.append(p) with open(dataset_path, 'r') as f: for line in f: url = line.strip() if url: media_filename = extract_flickr_id(url) video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4')) skip_audio = ffmpeg_cfg.get("skip_audio", True) if not skip_audio: audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac')) else: audio_filepath = None if (skip_audio and os.path.exists(video_filepath)) or (not skip_audio and os.path.exists(video_filepath) and os.path.exists(audio_filepath)): info_msg = 'Already downloaded video {}. Skipping.' LOGGER.info(info_msg.format(media_filename)) continue while True: try: url_queue.put(url, False) break except queue.Full: with cv_worker_to_main: cv_worker_to_main.wait(5.0) with cv_main_to_worker: cv_main_to_worker.notify() LOGGER.info('Notify a worker {}'.format(url)) with cv_main_to_worker: for i in range(num_workers): url_queue.put("#END#") cv_main_to_worker.notify_all() LOGGER.info('End of enqueue') except KeyboardInterrupt: LOGGER.info("Received KeyboardInterrupt") with cv_main_to_worker: cv_main_to_worker.notify_all() for p in p_list: p.join() LOGGER.info("Forcing exit.") exit() finally: try: for p in p_list: p.join() except KeyboardInterrupt: LOGGER.info("Received KeyboardInterrupt") with cv_main_to_worker: cv_main_to_worker.notify_all() for p in p_list: p.join() LOGGER.info("Forcing exit.") exit() LOGGER.info('Finished downloading videos!')
def make_app(config_file): fileConfig(config_file) multiprocessing_logging.install_mp_handler() with codecs.open(config_file, 'r', 'utf8') as f: config = konfig.Config(f) bottle.debug(config['henet'].get('debug', DEFAULT_DEBUG)) app = bottle.app() # bottle config used by bottle-utils csrf_config = dict([('csrf.%s' % key, val) for key, val in config.items('csrf')]) app.config.update(csrf_config) # setting up languages default_locale = config['henet'].get('default_locale', 'fr_FR') langs = [[p.strip() for p in lang.split(',')] for lang in config['henet'].get('langs', ['fr_FR'])] app = I18NPlugin(app, langs=langs, default_locale=default_locale, locale_dir=LOCALES_PATH) cats = [] config_cats = config['henet']['categories'] if not isinstance(config_cats, list): config_cats = [config_cats] for cat in config_cats: values = dict(config[cat].items()) # defaults if 'can_create' not in values: values['can_create'] = True cats.append((cat, values)) pages = [] config_pages = config['henet']['pages'] if not isinstance(config_pages, list): config_pages = [config_pages] for page in config_pages: values = dict(config[page].items()) # defaults if 'can_create' not in values: values['can_create'] = True pages.append((page, values)) use_comments = config['henet'].get('comments', True) use_media = config['henet'].get('media', True) app_stack.vars = app.vars = {'pages': pages, 'categories': cats, 'get_alerts': get_alerts, 'site_url': config['henet']['site_url'], 'use_comments': use_comments, 'use_media': use_media, 'langs': langs} app_stack.view = partial(view, **app.vars) app_stack._config = app._config = config app_stack.workers = app.workers = MemoryWorkers() app_stack.use_comments = app.use_comments = use_comments app_stack.use_media = app.use_media = use_media app_stack.add_alert = app.add_alert = add_alert smtp_config = dict(config.items('smtp')) def _send_email(*args): args = list(args) + [smtp_config] app.workers.apply_async('send-email', send_email, args) app_stack.send_email = app.send_email = _send_email from henet import views # NOQA def _close_workers(*args): app.workers.close() sys.exit(0) subscribe(ALL_EVENTS, add_alert) signal.signal(signal.SIGINT, _close_workers) return app
def sync(config, logs): """ Main Sync process """ logging.my_logfile(logs=logs) logging.my_fmt(label='main_sync') starttime = datetime.now() modify = {} workers = {} # this is the array of running pnns pnns = None # this is the array of pnn to be launched pool = None pcli = PhEDEx() install_mp_handler() conf = _load_config(config, modify, starttime) pnns = [] size = conf['main']['pool'] logging.summary('Starting') while conf['main']['run']: if pool is None: logging.notice('Started pool of size %d', size) pool = multiprocessing.NDPool(size) add = [ pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run'] if pnn not in workers if pnn not in pnns ] pnns += add random.shuffle(pnns) if not _ping(): logging.warning('Cannot ping, not launching workers') else: _launch_workers(pool, workers, pnns, pcli) pnns = [] _poll_workers(workers, pnns) conf = _load_config(config, modify, starttime) if not conf['main']['run'] or\ conf['main']['pool'] != size: # trigger draining of all workers, close the pool and wait # for the task to be over conf = _load_config(config, {'default': {'run': False}}, starttime) _drain_up(workers, pnns) workers = {} pool.close() pool = None size = conf['main']['pool'] else: time.sleep(conf['main']['sleep']) logging.summary('Exiting.') return config
def download_audioset(data_dir, ffmpeg_path, ffprobe_path, eval_segments_path, balanced_train_segments_path, unbalanced_train_segments_path, disable_logging=False, verbose=False, num_workers=4, log_path=None, **ffmpeg_cfg): """ Download AudioSet files Args: data_dir: Directory where dataset files will be saved (Type: str) ffmpeg_path: Path to ffmpeg executable (Type: str) ffprobe_path: Path to ffprobe executable (Type: str) eval_segments_path: Path to evaluation segments file (Type: str) balanced_train_segments_path: Path to balanced train segments file (Type: str) unbalanced_train_segments_path: Path to unbalanced train segments file (Type: str) Keyword Args: disable_logging: Disables logging to a file if True (Type: bool) verbose: Prints verbose information to stdout if True (Type: bool) num_workers: Number of multiprocessing workers used to download videos (Type: int) log_path: Path where log file will be saved. If None, saved to './audiosetdl.log' (Type: str or None) **ffmpeg_cfg: Configuration for audio and video downloading and decoding done by ffmpeg (Type: dict[str, *]) """ init_console_logger(LOGGER, verbose=verbose) if not disable_logging: init_file_logger(LOGGER, log_path=log_path) multiprocessing_logging.install_mp_handler() LOGGER.debug('Initialized logging.') download_subset(eval_segments_path, "evaluation", data_dir, ffmpeg_path, ffprobe_path, num_workers, **ffmpeg_cfg) download_subset(balanced_train_segments_path, "balanced_train", data_dir, ffmpeg_path, ffprobe_path, num_workers, **ffmpeg_cfg)
def main(argv=None): if argv is None: argv = sys.argv[1:] class ArgumentParserWithDefaults(argparse.ArgumentParser): ''' From https://stackoverflow.com/questions/12151306/argparse-way-to-include-default-values-in-help ''' def add_argument(self, *args, help=None, default=None, **kwargs): if help is not None: kwargs['help'] = help if default is not None and args[0] != '-h': kwargs['default'] = default if help is not None: kwargs['help'] += ' (default: {})'.format(default) super().add_argument(*args, **kwargs) parser = ArgumentParserWithDefaults( formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-l", "--logconfig", dest="logconfig", help="logging configuration (default: logging.json)", default='logging.json') parser.add_argument("--debug", dest="debug", help="Enable interactive debugger on error", action='store_true') parser.add_argument("-c", "--chart_output", dest="chart_output", help="Chart output directory", required=True) parser.add_argument("-o", "--output", dest="output", help="Output directory", required=True) parser.add_argument("-s", "--sim-output", dest="sim_output", help="Sim output directory", required=True) parser.add_argument("-w", "--window-size", dest="window_size", help="Minutes over which to collect data", default=3, type=int) parser.add_argument( "--first-timestamp-file", dest="first_timestamp_file", help= "Path to file containing the log timestamp that the simulation started", required=True) args = parser.parse_args(argv) map_utils.setup_logging(default_path=args.logconfig) if 'multiprocessing' in sys.modules: import multiprocessing_logging multiprocessing_logging.install_mp_handler() if args.debug: import pdb, traceback try: return main_method(args) except: extype, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb) else: return main_method(args)
filename = os.path.basename(__file__) logfile = os.path.splitext(filename)[0] + '.log' fh = logging.FileHandler(logfile, mode='w') fh.setLevel(logging.DEBUG) # create console handler with a higher log level ch = logging.StreamHandler() ch.setLevel(logging.INFO) # create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) fh.setFormatter(formatter) # add the handlers to logger logger.addHandler(ch) logger.addHandler(fh) multiprocessing_logging.install_mp_handler(logger=logger) # ==================================== SALE ORDER LINE ==================================== def update_sale_order_line(pid, data_pool, product_ids, uom_ids, order_tax_code_ids): sock = xmlrpclib.ServerProxy(URL, allow_none=True) while data_pool: try: data = data_pool.pop() order_id = data.get('order_id') order_lines = sock.execute(DB, UID, PSW, 'sale.order.line', 'search_read', [('order_id', '=', order_id)], ['product_id', 'product_uom'])
def update_database(): with logging_redirect_tqdm(): install_mp_handler() latest_update = Update.get_latest_update(success=False) if latest_update.status not in [ Update.Status.ERROR, Update.Status.SUCCESS ]: print( 'Last update revision=%s is still not ended (or hang or crashed). ' 'Would you like to start new update (y) continue previous (N)?' % latest_update.id) response = input() if response.lower() == 'y': latest_update.status = 'error' latest_update.save() latest_update = Update.objects.create( status=Update.Status.IN_PROGRESS) else: latest_update = Update.objects.create( status=Update.Status.IN_PROGRESS) # scraping level 3 indexes first level_3_koatuu = list( set([koatuu for koatuu in _get_indexes() if koatuu.level <= 2])) # sort unique ids then (stable sort, so level is still sorted) level_3_koatuu.sort(key=attrgetter('unique_id')) # sort level from 1 to 3 keeping stable unique_id level_3_koatuu.sort(key=attrgetter('level')) if latest_update.latest_koatuu: logging.info("Searching for the latest koatuu scraped") latest_koatuu_obj = next( (koatuu for koatuu in level_3_koatuu if koatuu.unique_id == latest_update.latest_koatuu), None) if latest_koatuu_obj is None: level_3_koatuu = [] else: logging.info("Found latest koatuu scraped %s", latest_koatuu_obj) level_3_koatuu = level_3_koatuu[level_3_koatuu. index(latest_koatuu_obj):] logging.info('Koatuu to scrape only %s', len(level_3_koatuu)) if level_3_koatuu: _download_and_insert(latest_update, level_3_koatuu) logging.info('All insert l1 operations ended') # process level 4 indexes only for regions where parcels # number is more than 100000 annotated = Landuse.objects.all().values('koatuu').filter( revision=latest_update.id).annotate( total=Count('koatuu')).order_by('-total') level_4_koatuu = [] all_koatuu = list(set([koatuu for koatuu in _get_indexes()])) for result in annotated: if result['total'] < 100000: continue koatuu_obj = next(koatuu for koatuu in all_koatuu if koatuu.unique_id == str(result['koatuu'])) if koatuu_obj.level == 2: level_3_koatuus = [ *set([ koatuu for koatuu in all_koatuu if koatuu.level == 3 and str(koatuu.parent) == koatuu_obj.unique_id ]) ] level_4_koatuu.extend(level_3_koatuus) for level_3_koatuu in level_3_koatuus: level_4_koatuu.extend([ *set([ koatuu for koatuu in all_koatuu if koatuu.level == 4 and str(koatuu.parent) == level_3_koatuu.unique_id ]) ]) if koatuu_obj.level == 3: level_4_koatuu.extend([ *set([ koatuu for koatuu in all_koatuu if koatuu.level == 4 and str(koatuu.parent) == koatuu_obj.unique_id ]) ]) level_4_koatuu.sort(key=attrgetter('unique_id')) _download_and_insert(latest_update, level_4_koatuu) # detecting changes to create analysis table create_changeset( revision=Update.objects.get(id=latest_update.id), previous=Update.objects.get(id=Update.get_latest_update().id), ) # everything is ok => success status Update.objects.filter(id=latest_update.id).update( status=Update.Status.SUCCESS)
if __name__ == "__main__": import os import shutil import sys import logging from multiprocessing_logging import install_mp_handler from wepy_tools.sim_makers.openmm.lennard_jones import LennardJonesPairOpenMMSimMaker OUTPUT_DIR = "_output/sim_maker_run" logging.getLogger().setLevel(logging.DEBUG) install_mp_handler() if sys.argv[1] == "-h" or sys.argv[1] == "--help": print( "arguments: n_cycles, n_steps, n_walkers, n_workers, platform, resampler" ) exit() else: n_cycles = int(sys.argv[1]) n_steps = int(sys.argv[2]) n_walkers = int(sys.argv[3]) n_workers = int(sys.argv[4]) platform = sys.argv[5] resampler = sys.argv[6] print("Number of steps: {}".format(n_steps)) print("Number of cycles: {}".format(n_cycles))
def main(): config = get_config() logging.basicConfig( format= '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s', datefmt="%Y-%m-%dT%H:%M:%S%z", level=logging.DEBUG) multiprocessing_logging.install_mp_handler() logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR) zabbix_config = dict(config["zabbix"]) zabbix_config["failsafe"] = int(zabbix_config.get("failsafe", "20")) if zabbix_config["dryrun"] == "false": zabbix_config["dryrun"] = False elif zabbix_config["dryrun"] == "true": zabbix_config["dryrun"] = True else: raise Exception() logging.info("Main start (%d) version %s", os.getpid(), __version__) stop_event = multiprocessing.Event() processes = [] source_hosts_queues = [] source_collectors = get_source_collectors(config) for source_collector in source_collectors: source_hosts_queue = multiprocessing.Queue() process = processing.SourceCollectorProcess(source_collector["name"], source_collector["module"], source_collector["config"], source_hosts_queue) source_hosts_queues.append(source_hosts_queue) processes.append(process) process.start() process = processing.SourceHandlerProcess("source-handler", config["zac"]["db_uri"], source_hosts_queues) process.start() processes.append(process) process = processing.SourceMergerProcess( "source-merger", config["zac"]["db_uri"], config["zac"]["host_modifier_dir"]) process.start() processes.append(process) process = processing.ZabbixHostUpdater("zabbix-host-updater", config["zac"]["db_uri"], zabbix_config) process.start() processes.append(process) process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater", config["zac"]["db_uri"], zabbix_config) process.start() processes.append(process) process = processing.ZabbixTemplateUpdater("zabbix-template-updater", config["zac"]["db_uri"], zabbix_config) process.start() processes.append(process) with processing.SignalHandler(stop_event): status_interval = 60 next_status = datetime.datetime.now() while not stop_event.is_set(): if next_status < datetime.datetime.now(): log_process_status(processes) next_status = datetime.datetime.now() + datetime.timedelta( seconds=status_interval) dead_process_names = [ process.name for process in processes if not process.is_alive() ] if dead_process_names: logging.error("A child has died: %s. Exiting", ', '.join(dead_process_names)) stop_event.set() time.sleep(1) logging.debug( "Queues: %s", ", ".join([str(queue.qsize()) for queue in source_hosts_queues])) for process in processes: logging.info("Terminating: %s(%d)", process.name, process.pid) process.terminate() alive_processes = [ process for process in processes if process.is_alive() ] while alive_processes: process = alive_processes[0] logging.info("Waiting for: %s(%d)", process.name, process.pid) log_process_status(processes) # TODO: Too verbose? process.join(10) if process.exitcode is None: logging.warning( "Process hanging. Signaling new terminate: %s(%d)", process.name, process.pid) process.terminate() time.sleep(1) alive_processes = [ process for process in processes if process.is_alive() ] logging.info("Main exit")
def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger( self): with mock.patch('logging.getLogger') as getLogger: install_mp_handler(self.logger) self.assertEqual(0, getLogger.call_count)
def main(argv): parser = argparse.ArgumentParser( description='Perform segmentation on .svg and .png files.') parser.add_argument('dirs', nargs='+', help='Directories that stores .svg&.png files.') parser.add_argument( '--num-workers', default=0, type=int, dest='num_workers', help='Number of processes. 0 for all available cpu cores.') parser.add_argument('--log', default='segmentation.log', type=str, dest='log_file', help='Path to log file.') parser.add_argument('--conf', default='seg_conf.json', type=str, dest='confidence_file', help='Path to segmentation confidence file.') parser.add_argument( '--no-optimize', default=True, action='store_false', dest='optimize', help= "Dont't use svgo optimization. This will produce larger svg files but cost much less time." ) parser.add_argument('--export-contour', default=False, action='store_true', dest='export_contour', help='Export contour segmentation results.') parser.add_argument('--export-mask', default=False, action='store_true', dest='export_mask', help='Export morphed mask for debug use.') args = parser.parse_args(argv[1:]) global logger logger = get_logger('segmentation', args.log_file, echo=False, multiprocessing=True) install_mp_handler(logger) global USE_OPTIMIZE, EXPORT_CONTOUR_RESULTS, EXPORT_MASK USE_OPTIMIZE = args.optimize EXPORT_CONTOUR_RESULTS = args.export_contour EXPORT_MASK = args.export_mask num_workers = args.num_workers if num_workers == 0: num_workers = multiprocessing.cpu_count() logger.info('Using {} processes.'.format(num_workers)) src_dirs = args.dirs for src_dir in src_dirs: if not osp.isdir(src_dir): continue logger.info('Processing {} ...'.format(src_dir)) tgt_dir = src_dir tgts = [] for f in glob.glob(osp.join(src_dir, '*.eps')): _id, _ = osp.splitext(osp.basename(f)) svg_file, png_file = osp.join(src_dir, _id + '.svg'), osp.join( src_dir, _id + '.png') if osp.exists(svg_file) and osp.exists(png_file): tgts.append({ 'id': _id, 'svg_file': svg_file, 'png_file': png_file }) conf = seg(tgts, tgt_dir, num_workers=num_workers) with open(args.confidence_file, 'w') as f: f.write( json.dumps([{ 'id': tgt['id'], 'score': s } for tgt, s in zip(tgts, conf)]))
from collections import OrderedDict import numpy as np import click from data_iterator import TextIterator from params import load_params logging.basicConfig(level=logging.WARN, format="%(asctime)s - %(levelname)s %(module)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S") import multiprocessing_logging multiprocessing_logging.install_mp_handler() def error_process(params, device, **model_options): import theano import theano.sandbox.cuda from build_model import build_model theano.sandbox.cuda.use(device) tparams = OrderedDict() for param_name, param in params.items(): tparams[param_name] = theano.shared(param, name=param_name) process_name = current_process().name
def processYears(self, years: List[int]) -> None: ''' Parameters ---------- years : List[int] list of years to (re-)process ''' # If necessary, creates indexes for the data collection. self._createDataColIndex() # Does the grid collection exists? col_grid = self._createMongoConn(cfg=self.cfg)['col_grid'] ndoc = col_grid.count_documents(filter={}) if ndoc == 0: logging.info('Creation of the grid collection...') # Does the reference netCDF file exists? fname = self.downloadDir + 'era5_masks.nc' if os.path.exists(fname) is False: print('Downloading mask data...') self.getMasks() self.createGridCollection(mask=False) # Get all grid_ids self.all_ids = col_grid.distinct(key='id_grid') for year in years: self.year = int(year) logging.info(f' --- PROCESSING YEAR {year} ---') if self.download is True: logging.info('Proceeding with downloads...') today = datetime.today() if (year == today.year): months = np.arange(1, today.month + 1).tolist() else: months = np.arange(1, 12 + 1).tolist() # Are these months present as nc files ? # List this year's nc files try: ncfiles = self.listNetCDFfiles(year) except FileNotFoundError: logging.info(f'No ERA5T files downloaded for {year} yet.') months_to_download = months else: fmonths_present = sorted( list( map( lambda x: int(x[x.find("-") + 1:x.find(".nc")] ), ncfiles))) fmonths_needed = months # Months needed but not present : missing_months = list( set(fmonths_needed) - (set(fmonths_present))) months_to_download = missing_months # months_to_download = list( # set(missing_months + months)) # distinct months # fmonths_present # print(ncfiles) # print(fmonths_present) # print('...') # print(missing_months) # import sys # sys.exit(0) if len(months_to_download) > 0: logging.info(f'Downloading files for YEAR {year}....\n' + f'Months: {months_to_download}') # Parralel download of monthly data: install_mp_handler() p = ThreadPool(processes=12) # one thread per month p.map(lambda m: self.getFiles(year=year, month=m), months_to_download) p.close() p.join() logging.info(f'Downloading files for YEAR {year} Done.') else: logging.info(f'All files already present for year {year}.') else: logging.info('Proceeding without downloads') # List all the current year's nc files after download nc_local = self.listNetCDFfiles(year=year) # Open them all in one ds object # arrays will be loaded in chronological order try: ds = xr.open_mfdataset(nc_local, combine='by_coords') except Exception as e: logging.info(e) else: self.df_missing_dates = self.findMissingDates(ds) # Create the tile (chunks) elements # This operation starts to be useful at high grid resolution # i.e., from 0.25 x 0.25. For coarser grid (i.e., 0.1 x 0.1) # this is not really essential. delta = 30 # grid chunk in degrees (should be a multiple of # both 360 and 180) # ERA's lon have range [0, 360] and not [-180, 180] ilons = np.arange(0, 360, delta) ilats = np.arange(-60, 90, delta) elements = itertools.product(*[ilons, ilats]) # Explore the grid chunks and select # those containing grid cells def worker_initializer00(): global col_grid cons = self._createMongoConn(cfg=self.cfg) col_grid = cons['col_grid'] p = ThreadPool(processes=self.nthreads, initializer=worker_initializer00) res = p.map( lambda e: self.exploreChunks(ilon_chunk=e[0], ilat_chunk=e[1], delta=delta, mask_query=None, retrn='ndocs', col_grid=col_grid), elements) p.close() p.join() df_e = pd.DataFrame(res) df_e = df_e.query('n > 0').sort_values(by='n').reset_index( drop=True) # Do the insertion N = df_e.shape[0] for i in np.arange(N): logging.info(f'Year {year}: processing chunk {i}/{N}') ilon = df_e.loc[i, 'ilon_chunk'] ilat = df_e.loc[i, 'ilat_chunk'] n = df_e.loc[i, 'n'] self.insertChunk(ilon, ilat, delta, ds, 'insert') logging.info(f'{n} documents inserted') logging.info(' --- PROCESSING YEAR %s DONE !---' % year)