def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger(self):
        if not mock:
            self.skipTest('unittest.mock is not available')

        with mock.patch('logging.getLogger') as getLogger:
            install_mp_handler(self.logger)

            self.assertEqual(0, getLogger.call_count)
    def test_when_no_logger_is_specified_then_it_uses_the_root_logger(self):
        if not mock:
            self.skipTest('unittest.mock is not available')

        with mock.patch('logging.getLogger') as getLogger:
            getLogger.return_value = self.logger

            install_mp_handler()

            getLogger.assert_called_once_with()

        wrapper_handler, = self.logger.handlers
        self.assertIsInstance(wrapper_handler, MultiProcessingHandler)
        self.assertIs(wrapper_handler.sub_handler, self.handler)
Example #3
0
def parse_args(description, ParamsClass=SieverParams, **kwds):
    """
    Parse command line arguments.

    The command line parser accepts the standard parameters as printed by calling it with
    ``--help``.  All other parameters are used to construct params objects.  For example.

    ./foo 80 --workers 4 --trials 2 -S 1337 --a 1 2 - b 3 4

    would operate on dimension 80 with parameters (a: 1, b: 3), (a: 1, b: 4), (a: 2, b: 3), (a: 2,
    b: 4), i.e. the Cartesian product of all parameters.  It will run two trials each using four
    workers. Note that each worker may use several threads, too. The starting seed is `1337`.

    :param description: help message
    :param kwds: default parameters

    """

    parser = argparse.ArgumentParser(
        description=description,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'lower_bound',
        type=int,
        help="lowest lattice dimension to consider (inclusive)")
    parser.add_argument(
        '-u',
        '--upper-bound',
        type=int,
        dest="upper_bound",
        default=0,
        help="upper bound on lattice dimension to consider (exclusive)")
    parser.add_argument('-s',
                        '--step-size',
                        type=int,
                        dest="step_size",
                        default=2,
                        help="increment lattice dimension in these steps")
    parser.add_argument('-t',
                        '--trials',
                        type=int,
                        dest="trials",
                        default=1,
                        help="number of experiments to run per dimension")
    parser.add_argument('-w',
                        '--workers',
                        type=int,
                        dest="workers",
                        default=1,
                        help="number of parallel experiments to run")
    parser.add_argument('-p',
                        '--pickle',
                        action='store_true',
                        dest="pickle",
                        help="pickle statistics")
    parser.add_argument('-S',
                        '--seed',
                        type=int,
                        dest="seed",
                        default=0,
                        help="randomness seed")
    parser.add_argument(
        '--dry-run',
        dest="dry_run",
        action='store_true',
        help=
        "Show parameters that would be used but don't run any actual experiments."
    )
    parser.add_argument('--show-defaults',
                        dest="show_defaults",
                        action='store_true',
                        help="Show default parameters and exit.")
    parser.add_argument('--loglvl',
                        type=str,
                        help="Logging level (one of DEBUG, WARN, INFO)",
                        default="INFO")
    parser.add_argument('--log-filename',
                        dest="log_filename",
                        type=str,
                        help="Logfile filename",
                        default=None)
    args, unknown = parser.parse_known_args()

    kwds_ = OrderedDict()
    for k, v in six.iteritems(kwds):
        k_ = k.replace("__", "/")
        kwds_[k_] = v
    kwds = kwds_

    if args.show_defaults:
        pp = ParamsClass(**kwds)
        slen = max(len(p) for p in pp) + 1
        fmt = "{key:%ds}: {value}" % slen
        for k, v in six.iteritems(pp):
            print(fmt.format(key=k, value=v))
        exit(0)

    all_params = OrderedDict([("", ParamsClass(**kwds))])

    unknown_args = OrderedDict()
    unknown = apply_aliases(unknown)

    # NOTE: This seems like the kind of thing the standard library can do (better)
    i = 0
    while i < len(unknown):
        k = unknown[i]
        if not (k.startswith("--") or k.startswith("-")):
            raise ValueError("Failure to parse command line argument '%s'" % k)
        k = re.match("^-+(.*)", k).groups()[0]
        k = k.replace("-", "_")
        unknown_args[k] = []
        i += 1
        for i in range(i, len(unknown)):
            v = unknown[i]
            if v.startswith("--") or v.startswith("-"):
                i -= 1
                break
            try:
                v = eval(v, {"BKZ": BKZ})
            except NameError:
                v = v
            except SyntaxError:
                v = v
            if not isinstance(v, (list, tuple)):
                v = [v]
            unknown_args[k].extend(v)
        i += 1
        if not unknown_args[k]:
            unknown_args[k] = [True]

    for k, v in six.iteritems(unknown_args):
        all_params_ = OrderedDict()
        for p in all_params:
            for v_ in v:
                p_ = copy.copy(all_params[p])
                p_[k] = v_
                all_params_[p + "'%s': %s, " % (k, v_)] = p_
        all_params = all_params_

    log_filename = args.log_filename
    if log_filename is None:
        log_filename = log_filenamef()

    multiprocessing_logging.install_mp_handler()

    if not os.path.isdir("logs"):
        os.makedirs("logs")
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(levelname)5s:%(name)12s:%(asctime)s: %(message)s',
        datefmt='%Y/%m/%d %H:%M:%S %Z',
        filename=log_filename)

    console = logging.StreamHandler()
    console.setLevel(getattr(logging, args.loglvl.upper()))
    console.setFormatter(logging.Formatter('%(name)s: %(message)s', ))
    logging.getLogger('').addHandler(console)

    if args.dry_run:
        for params in all_params:
            print(params)
        exit(0)

    return args, all_params
Example #4
0
def main():
    args = parse_arguments()

    logging.basicConfig(
        level=getattr(logging, args.log_level.upper()),
        format='%(asctime)s - %(process)s - %(levelname)s - %(message)s')
    install_mp_handler()

    os.nice(20)
    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)

    input_files = collect_inputs(args.input_dirs)
    logging.info('Scheduled {} files for shuffling.'.format(len(input_files)))
    if not input_files:
        logging.critical('No input files!')
        sys.exit(1)

    output_files = [
        os.path.join(args.output_dir, os.path.basename(f)) for f in input_files
    ]

    with openall(input_files[0]) as inf:
        header = inf.readline().strip()

    with Pool(args.processes) as inpool, Pool(args.processes) as outpool:
        m = Manager()
        queue = m.Queue(maxsize=1000)
        num_readers = m.Value('I', args.processes)
        lock = m.Lock()

        # Each worker gets a chunk of all input / output files
        input_chunks = list(split_into(input_files, args.processes))
        output_chunks = list(split_into(output_files, args.processes))

        producer_f = partial(producer,
                             queue=queue,
                             num_readers=num_readers,
                             lock=lock)
        inresult = inpool.map_async(producer_f, input_chunks)
        consumer_f = partial(consumer,
                             queue=queue,
                             header=header,
                             documents=args.documents,
                             num_readers=num_readers,
                             lock=lock)
        outresult = outpool.map_async(consumer_f, output_chunks)

        docs_read, docs_written = sum(inresult.get()), sum(outresult.get())

        logging.debug('Joining processes...')
        inpool.close()
        outpool.close()
        inpool.join()
        outpool.join()
        logging.debug('Joined processes.')

        if docs_read != docs_written:
            logging.error(f'The number of documents read ({docs_read}) and '
                          f'the number of documents written ({docs_written}) '
                          f'differs!')

    logging.info('Done.')
    def test_when_a_logger_is_passed_then_it_wraps_all_handlers(self):
        install_mp_handler(self.logger)

        wrapper_handler, = self.logger.handlers
        self.assertIsInstance(wrapper_handler, MultiProcessingHandler)
        self.assertIs(wrapper_handler.sub_handler, self.handler)
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path,
                            log_path=None, verbose=False, disable_logging=False,
                            num_workers=1, **ffmpeg_cfg):
    """
    Downloads Flickr dataset files

    Args:
        dataset_path:  Path to dataset file containin URLs
                       (Type: str)

        data_dir:      Output directory where video will be saved if output
                       path is not explicitly given
                       (Type: str)

        ffmpeg_path:   Path to ffmpeg executable
                       (Type: str)

        ffprobe_path:  Path to ffprobe executable
                       (Type: str)

    Keyword Args:
        log_path:         Path to log file. If None, defaults to "flickr-soundnet-dl.log"
                          (Type: str or None)

        verbose:          If True, prints detailed messages to console
                          (Type: bool)

        disable_logging:  If True, does not log to a file
                          (Type: bool)

        num_workers:      Number of multiprocessing workers used to download videos
                          (Type: int)

        **ffmpeg_cfg:     ffmpeg configurations
    """

    init_console_logger(LOGGER, verbose=verbose)
    if not disable_logging:
        init_file_logger(LOGGER, log_path=log_path)
    multiprocessing_logging.install_mp_handler()
    print(verbose)
    LOGGER.debug('Initialized logging.')

    audio_dir = os.path.join(data_dir, 'audio')
    video_dir = os.path.join(data_dir, 'video')

    if not os.path.isdir(audio_dir):
        os.makedirs(audio_dir)

    if not os.path.isdir(video_dir):
        os.makedirs(video_dir)

    pool = mp.Pool(num_workers)
    try:
        with open(dataset_path, 'r') as f:
            for line_idx, line in enumerate(f):
                url = line.strip()
                media_filename = extract_flickr_id(url)
                video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4'))
                audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac'))

                if os.path.exists(video_filepath) and os.path.exists(audio_filepath):
                    info_msg = 'Already downloaded video {}. Skipping.'
                    LOGGER.info(info_msg.format(media_filename))
                    continue

                worker_args = [url, data_dir, ffmpeg_path, ffprobe_path]
                pool.apply_async(partial(download_flickr_video, **ffmpeg_cfg), worker_args)

    except KeyboardInterrupt:
        LOGGER.info("Forcing exit.")
        exit()
    finally:
        try:
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            LOGGER.info("Forcing exit.")
            exit()

    LOGGER.info('Finished downloading videos!')
import multiprocessing
from pathlib import Path
from typing import List
import warnings

from astropy.io import fits
from astropy.wcs import WCS, FITSFixedWarning
import mocpy
import multiprocessing_logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)-25s %(levelname)-8s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
multiprocessing_logging.install_mp_handler()
logger = logging.getLogger(__name__)


def stokes_type(stokes_str: str) -> List[str]:
    STOKES_PARAMS = ("I", "Q", "U", "V")
    stokes_str = stokes_str.upper()
    for char in stokes_str:
        if char not in STOKES_PARAMS:
            raise ValueError(
                f"Stokes parameter must be one of {''.join(STOKES_PARAMS)}")
    return [char for char in stokes_str]


def get_moc_output_dir(image_path: Path) -> Path:
    output_dir_name = image_path.parent.name.replace("IMAGES", "MOCS")
Example #8
0
    def test_lj_sim_manager_openmm_integration_run(
        self,
        class_tmp_path_factory,
        boundary_condition_class,
        resampler_class,
        work_mapper_class,
        platform,
        lj_params,
        lj_omm_sys,
        lj_integrator,
        lj_reporter_classes,
        lj_reporter_kwargs,
        lj_init_walkers,
        lj_openmm_runner,
        lj_unbinding_bc,
        lj_wexplore_resampler,
        lj_revo_resampler,
    ):
        """Run all combinations of components in the fixtures for the smallest
        amount of time, just to make sure they all work together and don't give errors."""

        logging.getLogger().setLevel(logging.DEBUG)
        install_mp_handler()
        logging.debug("Starting the test")

        print("starting the test")

        # the configuration class gives us a convenient way to
        # parametrize our reporters for the locale
        from wepy.orchestration.configuration import Configuration

        # the runner
        from wepy.runners.openmm import OpenMMRunner

        # mappers
        from wepy.work_mapper.mapper import Mapper
        from wepy.work_mapper.worker import WorkerMapper
        from wepy.work_mapper.task_mapper import TaskMapper

        # the worker types for the WorkerMapper
        from wepy.work_mapper.worker import Worker
        from wepy.runners.openmm import OpenMMCPUWorker, OpenMMGPUWorker

        # the walker task types for the TaskMapper
        from wepy.work_mapper.task_mapper import WalkerTaskProcess
        from wepy.runners.openmm import OpenMMCPUWalkerTaskProcess, OpenMMGPUWalkerTaskProcess

        n_cycles = 1
        n_steps = 2
        num_workers = 2

        # generate the reporters and temporary directory for this test
        # combination

        tmpdir_template = 'lj_fixture_{plat}-{wm}-{res}-{bc}'
        tmpdir_name = tmpdir_template.format(plat=platform,
                                             wm=work_mapper_class,
                                             res=resampler_class,
                                             bc=boundary_condition_class)

        # make a temporary directory for this configuration to work with
        tmpdir = str(class_tmp_path_factory.mktemp(tmpdir_name))

        # make a config so that the reporters get parametrized properly
        reporters = Configuration(
            work_dir=tmpdir,
            reporter_classes=lj_reporter_classes,
            reporter_partial_kwargs=lj_reporter_kwargs).reporters

        steps = [n_steps for _ in range(n_cycles)]

        # choose the components based on the parametrization
        boundary_condition = None
        resampler = None

        walker_fixtures = [lj_init_walkers]
        runner_fixtures = [lj_openmm_runner]
        boundary_condition_fixtures = [lj_unbinding_bc]
        resampler_fixtures = [lj_wexplore_resampler, lj_revo_resampler]

        walkers = lj_init_walkers

        boundary_condition = [
            boundary_condition
            for boundary_condition in boundary_condition_fixtures
            if type(boundary_condition).__name__ == boundary_condition_class
        ][0]
        resampler = [
            resampler for resampler in resampler_fixtures
            if type(resampler).__name__ == resampler_class
        ][0]

        assert boundary_condition is not None
        assert resampler is not None

        # generate the work mapper given the type and the platform

        work_mapper_classes = {
            mapper_class.__name__: mapper_class
            for mapper_class in [Mapper, WorkerMapper, TaskMapper]
        }

        # # select the right one given the option
        # work_mapper_type = [mapper_type for mapper_type in work_mapper_classes
        #                     if type(mapper_type).__name__ == work_mapper_class][0]

        # decide based on the platform and the work mapper which
        # platform dependent components to build
        if work_mapper_class == 'Mapper':
            # then there is no settings
            work_mapper = Mapper()

        elif work_mapper_class == 'WorkerMapper':

            if platform == 'CUDA' or platform == 'OpenCL':
                work_mapper = WorkerMapper(num_workers=num_workers,
                                           worker_type=OpenMMGPUWorker,
                                           device_ids={
                                               '0': 0,
                                               '1': 1
                                           },
                                           proc_start_method='spawn')
            if platform == 'OpenCL':

                work_mapper = WorkerMapper(
                    num_workers=num_workers,
                    worker_type=OpenMMGPUWorker,
                    device_ids={
                        '0': 0,
                        '1': 1
                    },
                )

            elif platform == 'CPU':
                work_mapper = WorkerMapper(
                    num_workers=num_workers,
                    worker_type=OpenMMCPUWorker,
                    worker_attributes={'num_threads': 1})

            elif platform == 'Reference':
                work_mapper = WorkerMapper(
                    num_workers=num_workers,
                    worker_type=Worker,
                )

        elif work_mapper_class == 'TaskMapper':

            if platform == 'CUDA':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    walker_task_type=OpenMMGPUWalkerTaskProcess,
                    device_ids={
                        '0': 0,
                        '1': 1
                    },
                    proc_start_method='spawn')

            elif platform == 'OpenCL':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    walker_task_type=OpenMMGPUWalkerTaskProcess,
                    device_ids={
                        '0': 0,
                        '1': 1
                    })

            elif platform == 'CPU':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    walker_task_type=OpenMMCPUWalkerTaskProcess,
                    worker_attributes={'num_threads': 1})

            elif platform == 'Reference':
                work_mapper = TaskMapper(
                    num_workers=num_workers,
                    worker_type=WalkerTaskProcess,
                )

        else:
            raise ValueError("Platform {} not recognized".format(platform))

        # initialize the runner with the platform
        runner = OpenMMRunner(lj_omm_sys.system,
                              lj_omm_sys.topology,
                              lj_integrator,
                              platform=platform)

        logging.debug("Constructing the manager")

        manager = Manager(walkers,
                          runner=runner,
                          boundary_conditions=boundary_condition,
                          resampler=resampler,
                          work_mapper=work_mapper,
                          reporters=reporters)

        # since different work mappers need different process start
        # methods for different platforms i.e. CUDA and linux fork
        # vs. spawn we choose the appropriate one for each method.

        logging.debug("Starting the simulation")

        walkers, filters = manager.run_simulation(n_cycles,
                                                  steps,
                                                  num_workers=num_workers)
    def test_when_a_logger_is_passed_then_it_wraps_all_handlers(self):
        install_mp_handler(self.logger)

        wrapper_handler, = self.logger.handlers
        self.assertIsInstance(wrapper_handler, MultiProcessingHandler)
        self.assertIs(wrapper_handler.sub_handler, self.handler)
Example #10
0
def main():
    parser = argparse.ArgumentParser(description='''Starts a BLEva Gateway
            service on the device.''',
                                     epilog='''Note: This requires a BLED112
            dongle from Bluegiga.''')
    parser.add_argument('-u',
                        '--url',
                        help='''URL of BLEva server''',
                        required=True)
    parser.add_argument('-d',
                        '--debug',
                        help='Debug level (0-4)',
                        type=int,
                        default=20,
                        choices=[10, 20, 30, 40, 50])
    args = parser.parse_args()
    url = args.url
    print url
    tty_paths = util.get_tty_paths()
    FORMAT = '%(asctime)s - %(name)s - %(processName)s - %(levelname)s - %(message)s'
    logging.basicConfig(format=FORMAT, filename='bled112.log')
    logger = logging.getLogger('BLEva')
    logger.setLevel(args.debug)
    import multiprocessing_logging
    multiprocessing_logging.install_mp_handler()
    logger.info('\n--------------------')
    logger.info('BLEva has started')
    logger.info('\n--------------------')
    while True:
        logger.info('\n--------------------')
        logger.info('BLEva is waiting for new benchmark')
        print "BLEva is waiting for new benchmarks"
        b = getBenchmark(url + '/benchmark')
        print b
        if b != '':
            logger.info('BLEva received new benchmark')
            print "got new benchmark"
            j = json.loads(b)
            instances = []
            for dongle in j['dongles']:
                gap_role = dongle['gap_role']
                gatt_role = dongle['gatt_role']
                replicas = dongle['replicas']
                print replicas
                logger.debug("Replicas: " + str(replicas))
                if replicas > len(tty_paths):
                    raise Exception("Too few dongles connected.")
                for replica in xrange(0, replicas):
                    if gap_role in ['broadcaster', 'peripheral']:
                        a = dongle['steps']
                        steps = []
                        for v in a:
                            s = Step()
                            s.time = v['time']
                            print "json time " + str(['time'])
                            s.ble_operation = v['ble_operation']
                            # s.adv_data = map(ord, v['adv_data'][2:].decode("hex"))
                            # s.short_name = util.pad_truncate(s.short_name, 5)
                            # s.long_name = util.pad_truncate(s.long_name, 12)
                            s.long_name = v['long_name']
                            if replica < 10:
                                s.short_name = v['short_name'] + str(0) + str(
                                    replica)
                                if s.long_name != "":
                                    s.long_name = v['long_name'] + str(
                                        0) + str(replica)
                            else:
                                s.short_name = v['short_name'] + str(replica)
                                if s.long_name != "":
                                    s.long_name = v['long_name'] + str(replica)
                            s.short_name = util.pad_truncate(s.short_name, 7)
                            if s.long_name != "":
                                s.long_name = util.pad_truncate(
                                    s.long_name, 14)
                            logger.debug("Replica Short Name: " + s.short_name)
                            logger.debug("Replica Long Name: " + s.long_name)
                            s.major = int(
                                v['major'],
                                0)  # NOTE base=0 guesses base from string
                            s.minor = int(v['minor'], 0)
                            s.adv_interval_min = int(v['adv_interval_min'], 0)
                            s.adv_interval_max = int(v['adv_interval_max'], 0)
                            s.adv_channels = int(v['adv_channels'], 0)
                            s.gap_discoverable_mode = ble_codes.gap_discoverable_mode[
                                v['gap_discoverable_mode']]
                            s.gap_connectable_mode = ble_codes.gap_connectable_mode[
                                v['gap_connectable_mode']]
                            if "connection_interval_min" in v:
                                s.connection_interval_min = v[
                                    "connection_interval_min"]
                            if "connection_interval_max" in v:
                                s.connection_interval_max = v[
                                    "connection_interval_max"]
                            if "slave_latency" in v:
                                s.slave_latency = v["slave_latency"]
                            if "supervision_timeout" in v:
                                s.supervision_timeout = v[
                                    "supervision_timeout"]
                            steps.append(s)
                        peripheral = Peripheral(logger=logger,
                                                steps=steps,
                                                port_name=tty_paths[replica],
                                                gap_role=gap_role,
                                                gatt_role=gatt_role)
                        instances.append(peripheral)
            logger.info('BLEva is starting benchmark now')
            print "BLEva is starting benchmark now"
            processes = []
            logger.debug('Telling Phone to start')
            print "notifying phone"
            urllib2.urlopen(url + '/benchmark/sync/dongle').read()
            print "done notified"
            if not IBEACON:
                for i in instances:
                    print i
                    p = mp.Process(target=i.start_benchmark,
                                   name=i.steps[0].short_name)
                    p.start()
                    processes.append(p)
                for p in processes:
                    p.join()
            else:
                time.sleep(40)
            print "finished one benchmark"
            logger.info('BLEva finished one benchmark'
                        )  # FIXME fix logger to also log spawned processes
        if b == '':
            print "BLEva server not available, sleeping a while and try again."
            logger.info(
                'BLEva server not available, sleeping a while and try again.'
            )  # FIXME fix logger to also log spawned processes
            time.sleep(
                10)  # sleep and then try again until server is available
Example #11
0
def main():
    config = get_config()

    logging.basicConfig(
        format=
        '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s',
        datefmt="%Y-%m-%dT%H:%M:%S%z",
        level=logging.DEBUG)
    multiprocessing_logging.install_mp_handler()
    logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)

    if config.zac.health_file is not None:
        health_file = os.path.abspath(config.zac.health_file)

    logging.info("Main start (%d) version %s", os.getpid(), __version__)

    stop_event = multiprocessing.Event()
    state_manager = multiprocessing.Manager()
    processes = []

    source_hosts_queues = []
    source_collectors = get_source_collectors(config)
    for source_collector in source_collectors:
        source_hosts_queue = multiprocessing.Queue()
        process = processing.SourceCollectorProcess(source_collector["name"],
                                                    state_manager.dict(),
                                                    source_collector["module"],
                                                    source_collector["config"],
                                                    source_hosts_queue)
        source_hosts_queues.append(source_hosts_queue)
        processes.append(process)

    try:
        process = processing.SourceHandlerProcess("source-handler",
                                                  state_manager.dict(),
                                                  config.zac.db_uri,
                                                  source_hosts_queues)
        processes.append(process)

        process = processing.SourceMergerProcess("source-merger",
                                                 state_manager.dict(),
                                                 config.zac.db_uri,
                                                 config.zac.host_modifier_dir)
        processes.append(process)

        process = processing.ZabbixHostUpdater("zabbix-host-updater",
                                               state_manager.dict(),
                                               config.zac.db_uri,
                                               config.zabbix)
        processes.append(process)

        process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater",
                                                    state_manager.dict(),
                                                    config.zac.db_uri,
                                                    config.zabbix)
        processes.append(process)

        process = processing.ZabbixTemplateUpdater("zabbix-template-updater",
                                                   state_manager.dict(),
                                                   config.zac.db_uri,
                                                   config.zabbix)
        processes.append(process)
    except exceptions.ZACException as e:
        logging.error("Failed to initialize child processes. Exiting: %s",
                      str(e))
        sys.exit(1)

    for process in processes:
        process.start()

    with processing.SignalHandler(stop_event):
        status_interval = 60
        next_status = datetime.datetime.now()

        while not stop_event.is_set():
            if next_status < datetime.datetime.now():
                if health_file is not None:
                    write_health(health_file, processes, source_hosts_queues,
                                 config.zabbix.failsafe)
                log_process_status(processes)
                next_status = datetime.datetime.now() + datetime.timedelta(
                    seconds=status_interval)

            dead_process_names = [
                process.name for process in processes
                if not process.is_alive()
            ]
            if dead_process_names:
                logging.error("A child has died: %s. Exiting",
                              ', '.join(dead_process_names))
                stop_event.set()

            time.sleep(1)

        logging.debug(
            "Queues: %s",
            ", ".join([str(queue.qsize()) for queue in source_hosts_queues]))

        for process in processes:
            logging.info("Terminating: %s(%d)", process.name, process.pid)
            process.terminate()

        alive_processes = [
            process for process in processes if process.is_alive()
        ]
        while alive_processes:
            process = alive_processes[0]
            logging.info("Waiting for: %s(%d)", process.name, process.pid)
            log_process_status(processes)  # TODO: Too verbose?
            process.join(10)
            if process.exitcode is None:
                logging.warning(
                    "Process hanging. Signaling new terminate: %s(%d)",
                    process.name, process.pid)
                process.terminate()
            time.sleep(1)
            alive_processes = [
                process for process in processes if process.is_alive()
            ]

    logging.info("Main exit")
Example #12
0
import multiprocessing as mp
from collections import defaultdict
from queue import Full, Empty

import logging
import numpy as np
from multiprocessing_logging import install_mp_handler

from .base_iterator import BaseIterator
from ...routines.mp_routines import ArrayDictQueue
from ... import ROOT_LOGGER_NAME, ROOT_LOGGER_LEVEL
logger = logging.getLogger('{}.{}'.format(ROOT_LOGGER_NAME, __name__))
logger.setLevel(ROOT_LOGGER_LEVEL)
install_mp_handler(logger=logger)


class MultiProcessIterator(BaseIterator):
    """Iterates through data with base iterator interface, implementing in-batch parallelism"""
    def __init__(self, num_processes, max_tasks=100, max_results=100, use_shared=False, *args, **kwargs):
        """
        :param num_processes: number of processes two be used by iterator
        :param max_tasks: max number of tasks to be put in tasks queue
        :param max_results: max volume of output queue
        :param use_shared: whether to use array queue for passing big arrays without pickling them
        """

        super(MultiProcessIterator, self).__init__(*args, **kwargs)
        self._num_processes = num_processes
        self._max_tasks = max(max_tasks, self._batch_size)
        self._max_results = max(max_results, self._batch_size)
Example #13
0
 def __init__(self,
              producer_func,
              producer_config_args,
              pipe_funcs,
              pipe_funcs_config_args,
              pipe_n_procs,
              accumulator_object,
              accumulator_func,
              accumulator_config_args,
              worker_get_limit=5):
     # enforce the contract.
     try:
         assert isinstance(worker_get_limit, int) and worker_get_limit > 1
     except AssertionError:
         raise AssertionError('worker_get_limit must be an integer > 1')
     # allow multiple producers
     self._multiple_producers = isinstance(producer_func, tuple)
     # check functions
     try:
         if self._multiple_producers:
             for func in producer_func:
                 assert callable(func)
         else:
             assert callable(producer_func)
     except AssertionError:
         raise AssertionError(
             'must provide a callable function for producer')
     try:
         if self._multiple_producers:
             for func in producer_func:
                 assert isgeneratorfunction(func)
         else:
             assert isgeneratorfunction(producer_func)
     except AssertionError:
         raise AssertionError(
             'producer function(s) must (all) be a generator function')
     try:
         assert isinstance(pipe_funcs, tuple)
     except AssertionError:
         raise AssertionError(
             'must supply a tuple of callable functions for pipe_funcs')
     for pf in pipe_funcs:
         try:
             assert callable(pf)
         except AssertionError:
             raise AssertionError(
                 'all elements inside of pipe_funcs must be callable functions'
             )
     # check arguments
     try:
         if self._multiple_producers:
             for args in producer_config_args:
                 assert isinstance(args, tuple)
         else:
             assert isinstance(producer_config_args, tuple)
     except AssertionError:
         raise AssertionError(
             'function arguments must be provided as a tuple')
     try:
         assert isinstance(pipe_funcs_config_args, tuple)
         for pfa in pipe_funcs_config_args:
             assert isinstance(pfa, tuple)
     except AssertionError:
         raise AssertionError(
             'pipe function arguments must be provided as a tuple of tuples'
         )
     # check procs
     try:
         assert isinstance(pipe_n_procs, tuple)
         for n in pipe_n_procs:
             assert isinstance(n, int)
     except AssertionError:
         raise AssertionError('must provide a tuple of integers')
     # check agreement between corellated inputs
     try:
         assert len(pipe_funcs) == len(pipe_funcs_config_args) and len(
             pipe_funcs) == len(pipe_n_procs)
     except AssertionError:
         raise AssertionError(
             'must provide one tuple of arguments and a number of processes for each pipe function'
         )
     try:
         assert len(pipe_funcs) != 0
     except AssertionError:
         raise AssertionError('must provide work for the pipe to do')
     # check accumulator function
     try:
         assert callable(accumulator_func)
     except AssertionError:
         raise AssertionError(
             'must provide callable function for accumulator_func')
     # check accumulator args
     try:
         assert isinstance(accumulator_config_args, tuple)
     except AssertionError:
         raise AssertionError(
             'must privde a tuple of arguments for accumulator_config_args')
     # contract satisfied
     self.N = len(pipe_funcs)  # used all over in here
     # setup handlers to send child process logs into main thread's logger
     install_mp_handler()
     self.producer_func = producer_func
     self.producer_config_args = producer_config_args
     self.pipe_funcs = pipe_funcs
     self.pipe_funcs_config_args = pipe_funcs_config_args
     self.pipe_n_procs = pipe_n_procs
     self.accumulator_object = accumulator_object
     self.accumulator_func = accumulator_func
     self.accumulator_config_args = accumulator_config_args
     self.worker_get_limit = worker_get_limit
     # use a manager server to make cleanup easy
     self._sync_server = Manager()
     # 1 manager for each pipe func
     self._managers = [None for _ in range(self.N)]
     self._error_flag = self._sync_server.Value('i', int(False))
     # 1 producer finished flag for each manager, 1 for the consumer
     self._flags = [
         self._sync_server.Value('i', 0) for _ in range(self.N + 1)
     ]
     # 1 out(in) queue per pipe_func, + 1 extra in(out)
     self._queues = [self._sync_server.Queue() for _ in range(self.N + 1)]
     self._total_produced = self._sync_server.Value('i', 0)
     self._total_consumed = 0
Example #14
0
		time.sleep(1)
def excepthook(exctype, value, traceback):
    for p in multiprocessing.active_children():
    	p.terminate()
    raise

sys.excepthook = excepthook


with open('config.logging.json', 'rt') as f:
    config = json.load(f)

logging.config.dictConfig(config)

import multiprocessing_logging
multiprocessing_logging.install_mp_handler()
multiprocessing_logging.install_mp_handler(logging.getLogger('overseer.quality'))
multiprocessing_logging.install_mp_handler(logging.getLogger('protocol'))

logger = logging.getLogger('overseer')

config = rc_config()       
overseer_uuid = '%s' % uuid.uuid4()
site_uuid = config.site_uuid


logger.info('Overseer %s initializing' % (overseer_uuid))
logger.info('Site UUID: %s' % site_uuid)


demods = {}
Example #15
0
def emmet(spec_or_dbfile, run, issue, sbatch, bb, yes, no_dupe_check, verbose):
    """Command line interface for emmet"""
    logger.setLevel(logging.DEBUG if verbose else logging.INFO)
    ctx = click.get_current_context()
    ctx.ensure_object(dict)

    if not sbatch and bb:
        raise EmmetCliError(
            "Burst buffer only available in SBatch mode (--sbatch).")

    if spec_or_dbfile:
        client = calcdb_from_mgrant(spec_or_dbfile)
        ctx.obj["CLIENT"] = client
        # ctx.obj["MONGO_HANDLER"] = BufferedMongoHandler(
        #    host=client.host,
        #    port=client.port,
        #    database_name=client.db_name,
        #    username=client.user,
        #    password=client.password,
        #    level=logging.WARNING,
        #    authentication_db=client.db_name,
        #    collection="emmet_logs",
        #    buffer_periodical_flush_timing=False,  # flush manually
        # )
        # logger.addHandler(ctx.obj["MONGO_HANDLER"])
        # coll = ctx.obj["MONGO_HANDLER"].collection
        # ensure_indexes(SETTINGS.log_fields, [coll])

    if run:
        if not issue:
            raise EmmetCliError(f"Need issue number via --issue!")

        ctx.obj["LOG_STREAM"] = StringIO()
        memory_handler = logging.StreamHandler(ctx.obj["LOG_STREAM"])
        formatter = logging.Formatter(
            "%(asctime)s %(name)-12s %(levelname)-8s %(message)s")
        memory_handler.setFormatter(formatter)
        logger.addHandler(memory_handler)

        CREDENTIALS = os.path.join(os.path.expanduser("~"),
                                   ".emmet_credentials")
        if not os.path.exists(CREDENTIALS):
            user = click.prompt("GitHub Username")
            password = click.prompt("GitHub Password", hide_input=True)
            auth = authorize(
                user,
                password,
                ["user", "repo", "gist"],
                "emmet CLI",
                two_factor_callback=opt_prompt,
            )
            with open(CREDENTIALS, "w") as fd:
                fd.write(auth.token)

        with open(CREDENTIALS, "r") as fd:
            token = fd.readline().strip()
            ctx.obj["GH"] = login(token=token)
    else:
        click.secho("DRY RUN! Add --run flag to execute changes.", fg="green")

    install_mp_handler(logger=logger)
Example #16
0
def download_flickr_dataset(dataset_path, data_dir, ffmpeg_path, ffprobe_path,
                            log_path=None, verbose=False, disable_logging=False,
                            num_workers=1, **ffmpeg_cfg):
    """
    Downloads Flickr dataset files

    Args:
        dataset_path:  Path to dataset file containin URLs
                       (Type: str)

        data_dir:      Output directory where video will be saved if output
                       path is not explicitly given
                       (Type: str)

        ffmpeg_path:   Path to ffmpeg executable
                       (Type: str)

        ffprobe_path:  Path to ffprobe executable
                       (Type: str)

    Keyword Args:
        log_path:         Path to log file. If None, defaults to "flickr-soundnet-dl.log"
                          (Type: str or None)

        verbose:          If True, prints detailed messages to console
                          (Type: bool)

        disable_logging:  If True, does not log to a file
                          (Type: bool)

        num_workers:      Number of multiprocessing workers used to download videos
                          (Type: int)

        **ffmpeg_cfg:     ffmpeg configurations
    """

    init_console_logger(LOGGER, verbose=verbose)
    if not disable_logging:
        init_file_logger(LOGGER, log_path=log_path)
    multiprocessing_logging.install_mp_handler()
    LOGGER.debug('Initialized logging.')

    audio_dir = os.path.join(data_dir, 'audio')
    video_dir = os.path.join(data_dir, 'video')

    if not os.path.isdir(audio_dir):
        os.makedirs(audio_dir)

    if not os.path.isdir(video_dir):
        os.makedirs(video_dir)
    ffmpeg_cfg_gpu = dict.copy(ffmpeg_cfg)
    ffmpeg_cfg_gpu["video_codec"] += "_nvenc"
    url_queue = mp.Queue(3*num_workers)
    #lock = mp.Lock()
    cv_main_to_worker = mp.Condition()
    cv_worker_to_main = mp.Condition()
    p_list = []
    try:
        worker_args = (url_queue, cv_main_to_worker, cv_worker_to_main, data_dir, ffmpeg_path, ffprobe_path)
        for i in range(num_workers):
            if i < 2:
                p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg_gpu)
            else:
                p = mp.Process(target=download_flickr_video, args=worker_args, kwargs=ffmpeg_cfg)
            p.start()
            p_list.append(p)
        with open(dataset_path, 'r') as f:
            for line in f:
                url = line.strip()
                if url:
                    media_filename = extract_flickr_id(url)
                    video_filepath = os.path.join(data_dir, 'video', media_filename + '.' + ffmpeg_cfg.get('video_format', 'mp4'))
                    skip_audio = ffmpeg_cfg.get("skip_audio", True)
                    if not skip_audio:
                        audio_filepath = os.path.join(data_dir, 'audio', media_filename + '.' + ffmpeg_cfg.get('audio_format', 'flac'))
                    else:
                        audio_filepath = None

                    if (skip_audio and os.path.exists(video_filepath)) or (not skip_audio and os.path.exists(video_filepath) and os.path.exists(audio_filepath)):
                        info_msg = 'Already downloaded video {}. Skipping.'
                        LOGGER.info(info_msg.format(media_filename))
                        continue
                    
                    while True:
                        try:
                            url_queue.put(url, False)
                            break
                        except queue.Full:
                            with cv_worker_to_main:
                                cv_worker_to_main.wait(5.0)
                    with cv_main_to_worker:
                        cv_main_to_worker.notify()
                    LOGGER.info('Notify a worker {}'.format(url))
        with cv_main_to_worker:
            for i in range(num_workers):
                url_queue.put("#END#")
            cv_main_to_worker.notify_all()
        LOGGER.info('End of enqueue')

    except KeyboardInterrupt:
        LOGGER.info("Received KeyboardInterrupt")
        with cv_main_to_worker:
            cv_main_to_worker.notify_all()
        for p in p_list:
            p.join()
        LOGGER.info("Forcing exit.")
        exit()
    finally:
        try:
            for p in p_list:
                p.join()
        except KeyboardInterrupt:
            LOGGER.info("Received KeyboardInterrupt")
            with cv_main_to_worker:
                cv_main_to_worker.notify_all()
            for p in p_list:
                p.join()
            LOGGER.info("Forcing exit.")
            exit()

    LOGGER.info('Finished downloading videos!')
Example #17
0
def make_app(config_file):

    fileConfig(config_file)
    multiprocessing_logging.install_mp_handler()

    with codecs.open(config_file, 'r', 'utf8') as f:
        config = konfig.Config(f)

    bottle.debug(config['henet'].get('debug', DEFAULT_DEBUG))
    app = bottle.app()

    # bottle config used by bottle-utils
    csrf_config = dict([('csrf.%s' % key, val) for key, val in
                        config.items('csrf')])
    app.config.update(csrf_config)

    # setting up languages
    default_locale = config['henet'].get('default_locale', 'fr_FR')
    langs = [[p.strip() for p in lang.split(',')] for lang
             in config['henet'].get('langs', ['fr_FR'])]

    app = I18NPlugin(app, langs=langs, default_locale=default_locale,
                     locale_dir=LOCALES_PATH)

    cats = []

    config_cats = config['henet']['categories']
    if not isinstance(config_cats, list):
        config_cats = [config_cats]

    for cat in config_cats:
        values = dict(config[cat].items())
        # defaults
        if 'can_create' not in values:
            values['can_create'] = True
        cats.append((cat, values))

    pages = []

    config_pages = config['henet']['pages']
    if not isinstance(config_pages, list):
        config_pages = [config_pages]

    for page in config_pages:
        values = dict(config[page].items())
        # defaults
        if 'can_create' not in values:
            values['can_create'] = True
        pages.append((page, values))

    use_comments = config['henet'].get('comments', True)
    use_media = config['henet'].get('media', True)

    app_stack.vars = app.vars = {'pages': pages,
                                 'categories': cats,
                                 'get_alerts': get_alerts,
                                 'site_url': config['henet']['site_url'],
                                 'use_comments': use_comments,
                                 'use_media': use_media,
                                 'langs': langs}

    app_stack.view = partial(view, **app.vars)
    app_stack._config = app._config = config
    app_stack.workers = app.workers = MemoryWorkers()
    app_stack.use_comments = app.use_comments = use_comments
    app_stack.use_media = app.use_media = use_media
    app_stack.add_alert = app.add_alert = add_alert

    smtp_config = dict(config.items('smtp'))

    def _send_email(*args):
        args = list(args) + [smtp_config]
        app.workers.apply_async('send-email', send_email, args)

    app_stack.send_email = app.send_email = _send_email

    from henet import views  # NOQA

    def _close_workers(*args):
        app.workers.close()
        sys.exit(0)

    subscribe(ALL_EVENTS, add_alert)
    signal.signal(signal.SIGINT, _close_workers)

    return app
Example #18
0
def sync(config, logs):
    """
    Main Sync process
    """

    logging.my_logfile(logs=logs)
    logging.my_fmt(label='main_sync')
    starttime = datetime.now()
    modify = {}
    workers = {}  # this is the array of running pnns
    pnns = None  # this is the array of pnn to be launched
    pool = None

    pcli = PhEDEx()

    install_mp_handler()

    conf = _load_config(config, modify, starttime)

    pnns = []

    size = conf['main']['pool']

    logging.summary('Starting')

    while conf['main']['run']:

        if pool is None:
            logging.notice('Started pool of size %d', size)
            pool = multiprocessing.NDPool(size)

        add = [
            pnn for pnn, sec in conf.items() if pnn != 'main' if sec['run']
            if pnn not in workers if pnn not in pnns
        ]

        pnns += add

        random.shuffle(pnns)

        if not _ping():
            logging.warning('Cannot ping, not launching workers')
        else:
            _launch_workers(pool, workers, pnns, pcli)
            pnns = []

        _poll_workers(workers, pnns)

        conf = _load_config(config, modify, starttime)

        if not conf['main']['run'] or\
            conf['main']['pool'] != size:

            # trigger draining of all workers, close the pool and wait
            # for the task to be over
            conf = _load_config(config, {'default': {'run': False}}, starttime)
            _drain_up(workers, pnns)
            workers = {}
            pool.close()
            pool = None
            size = conf['main']['pool']

        else:
            time.sleep(conf['main']['sleep'])

    logging.summary('Exiting.')

    return config
Example #19
0
def download_audioset(data_dir,
                      ffmpeg_path,
                      ffprobe_path,
                      eval_segments_path,
                      balanced_train_segments_path,
                      unbalanced_train_segments_path,
                      disable_logging=False,
                      verbose=False,
                      num_workers=4,
                      log_path=None,
                      **ffmpeg_cfg):
    """
    Download AudioSet files

    Args:
        data_dir:                       Directory where dataset files will
                                        be saved
                                        (Type: str)

        ffmpeg_path:                    Path to ffmpeg executable
                                        (Type: str)

        ffprobe_path:                   Path to ffprobe executable
                                        (Type: str)

        eval_segments_path:             Path to evaluation segments file
                                        (Type: str)

        balanced_train_segments_path:   Path to balanced train segments file
                                        (Type: str)

        unbalanced_train_segments_path: Path to unbalanced train segments file
                                        (Type: str)

    Keyword Args:
        disable_logging:                Disables logging to a file if True
                                        (Type: bool)

        verbose:                        Prints verbose information to stdout
                                        if True
                                        (Type: bool)

        num_workers:                    Number of multiprocessing workers used
                                        to download videos
                                        (Type: int)

        log_path:                       Path where log file will be saved. If
                                        None, saved to './audiosetdl.log'
                                        (Type: str or None)

        **ffmpeg_cfg:                   Configuration for audio and video
                                        downloading and decoding done by ffmpeg
                                        (Type: dict[str, *])
    """
    init_console_logger(LOGGER, verbose=verbose)
    if not disable_logging:
        init_file_logger(LOGGER, log_path=log_path)
    multiprocessing_logging.install_mp_handler()
    LOGGER.debug('Initialized logging.')

    download_subset(eval_segments_path, "evaluation", data_dir, ffmpeg_path,
                    ffprobe_path, num_workers, **ffmpeg_cfg)
    download_subset(balanced_train_segments_path, "balanced_train", data_dir,
                    ffmpeg_path, ffprobe_path, num_workers, **ffmpeg_cfg)
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    class ArgumentParserWithDefaults(argparse.ArgumentParser):
        '''
        From https://stackoverflow.com/questions/12151306/argparse-way-to-include-default-values-in-help
        '''
        def add_argument(self, *args, help=None, default=None, **kwargs):
            if help is not None:
                kwargs['help'] = help
            if default is not None and args[0] != '-h':
                kwargs['default'] = default
                if help is not None:
                    kwargs['help'] += ' (default: {})'.format(default)
            super().add_argument(*args, **kwargs)

    parser = ArgumentParserWithDefaults(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-l",
                        "--logconfig",
                        dest="logconfig",
                        help="logging configuration (default: logging.json)",
                        default='logging.json')
    parser.add_argument("--debug",
                        dest="debug",
                        help="Enable interactive debugger on error",
                        action='store_true')
    parser.add_argument("-c",
                        "--chart_output",
                        dest="chart_output",
                        help="Chart output directory",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        dest="output",
                        help="Output directory",
                        required=True)
    parser.add_argument("-s",
                        "--sim-output",
                        dest="sim_output",
                        help="Sim output directory",
                        required=True)
    parser.add_argument("-w",
                        "--window-size",
                        dest="window_size",
                        help="Minutes over which to collect data",
                        default=3,
                        type=int)
    parser.add_argument(
        "--first-timestamp-file",
        dest="first_timestamp_file",
        help=
        "Path to file containing the log timestamp that the simulation started",
        required=True)

    args = parser.parse_args(argv)

    map_utils.setup_logging(default_path=args.logconfig)
    if 'multiprocessing' in sys.modules:
        import multiprocessing_logging
        multiprocessing_logging.install_mp_handler()

    if args.debug:
        import pdb, traceback
        try:
            return main_method(args)
        except:
            extype, value, tb = sys.exc_info()
            traceback.print_exc()
            pdb.post_mortem(tb)
    else:
        return main_method(args)
filename = os.path.basename(__file__)
logfile = os.path.splitext(filename)[0] + '.log'
fh = logging.FileHandler(logfile, mode='w')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
# add the handlers to logger
logger.addHandler(ch)
logger.addHandler(fh)
multiprocessing_logging.install_mp_handler(logger=logger)

# ==================================== SALE ORDER LINE ====================================


def update_sale_order_line(pid, data_pool, product_ids, uom_ids,
                           order_tax_code_ids):
    sock = xmlrpclib.ServerProxy(URL, allow_none=True)
    while data_pool:
        try:
            data = data_pool.pop()
            order_id = data.get('order_id')
            order_lines = sock.execute(DB, UID, PSW, 'sale.order.line',
                                       'search_read',
                                       [('order_id', '=', order_id)],
                                       ['product_id', 'product_uom'])
Example #22
0
def update_database():
    with logging_redirect_tqdm():
        install_mp_handler()

        latest_update = Update.get_latest_update(success=False)

        if latest_update.status not in [
                Update.Status.ERROR, Update.Status.SUCCESS
        ]:
            print(
                'Last update revision=%s is still not ended (or hang or crashed). '
                'Would you like to start new update (y) continue previous (N)?'
                % latest_update.id)
            response = input()
            if response.lower() == 'y':
                latest_update.status = 'error'
                latest_update.save()

                latest_update = Update.objects.create(
                    status=Update.Status.IN_PROGRESS)
        else:
            latest_update = Update.objects.create(
                status=Update.Status.IN_PROGRESS)

        # scraping level 3 indexes first
        level_3_koatuu = list(
            set([koatuu for koatuu in _get_indexes() if koatuu.level <= 2]))
        # sort unique ids then (stable sort, so level is still sorted)
        level_3_koatuu.sort(key=attrgetter('unique_id'))
        # sort level from 1 to 3 keeping stable unique_id
        level_3_koatuu.sort(key=attrgetter('level'))

        if latest_update.latest_koatuu:
            logging.info("Searching for the latest koatuu scraped")
            latest_koatuu_obj = next(
                (koatuu for koatuu in level_3_koatuu
                 if koatuu.unique_id == latest_update.latest_koatuu), None)
            if latest_koatuu_obj is None:
                level_3_koatuu = []
            else:
                logging.info("Found latest koatuu scraped %s",
                             latest_koatuu_obj)
                level_3_koatuu = level_3_koatuu[level_3_koatuu.
                                                index(latest_koatuu_obj):]
                logging.info('Koatuu to scrape only %s', len(level_3_koatuu))

        if level_3_koatuu:
            _download_and_insert(latest_update, level_3_koatuu)
        logging.info('All insert l1 operations ended')
        # process level 4 indexes only for regions where parcels
        # number is more than 100000

        annotated = Landuse.objects.all().values('koatuu').filter(
            revision=latest_update.id).annotate(
                total=Count('koatuu')).order_by('-total')

        level_4_koatuu = []
        all_koatuu = list(set([koatuu for koatuu in _get_indexes()]))
        for result in annotated:
            if result['total'] < 100000:
                continue
            koatuu_obj = next(koatuu for koatuu in all_koatuu
                              if koatuu.unique_id == str(result['koatuu']))

            if koatuu_obj.level == 2:
                level_3_koatuus = [
                    *set([
                        koatuu for koatuu in all_koatuu if koatuu.level == 3
                        and str(koatuu.parent) == koatuu_obj.unique_id
                    ])
                ]
                level_4_koatuu.extend(level_3_koatuus)
                for level_3_koatuu in level_3_koatuus:
                    level_4_koatuu.extend([
                        *set([
                            koatuu
                            for koatuu in all_koatuu if koatuu.level == 4
                            and str(koatuu.parent) == level_3_koatuu.unique_id
                        ])
                    ])

            if koatuu_obj.level == 3:
                level_4_koatuu.extend([
                    *set([
                        koatuu for koatuu in all_koatuu if koatuu.level == 4
                        and str(koatuu.parent) == koatuu_obj.unique_id
                    ])
                ])

        level_4_koatuu.sort(key=attrgetter('unique_id'))
        _download_and_insert(latest_update, level_4_koatuu)

    # detecting changes to create analysis table
    create_changeset(
        revision=Update.objects.get(id=latest_update.id),
        previous=Update.objects.get(id=Update.get_latest_update().id),
    )

    # everything is ok => success status
    Update.objects.filter(id=latest_update.id).update(
        status=Update.Status.SUCCESS)
Example #23
0
if __name__ == "__main__":

    import os
    import shutil
    import sys
    import logging
    from multiprocessing_logging import install_mp_handler

    from wepy_tools.sim_makers.openmm.lennard_jones import LennardJonesPairOpenMMSimMaker

    OUTPUT_DIR = "_output/sim_maker_run"

    logging.getLogger().setLevel(logging.DEBUG)
    install_mp_handler()

    if sys.argv[1] == "-h" or sys.argv[1] == "--help":
        print(
            "arguments: n_cycles, n_steps, n_walkers, n_workers, platform, resampler"
        )
        exit()
    else:
        n_cycles = int(sys.argv[1])
        n_steps = int(sys.argv[2])
        n_walkers = int(sys.argv[3])
        n_workers = int(sys.argv[4])
        platform = sys.argv[5]
        resampler = sys.argv[6]

        print("Number of steps: {}".format(n_steps))
        print("Number of cycles: {}".format(n_cycles))
Example #24
0
def main():
    config = get_config()

    logging.basicConfig(
        format=
        '%(asctime)s %(levelname)s [%(processName)s %(process)d] [%(name)s] %(message)s',
        datefmt="%Y-%m-%dT%H:%M:%S%z",
        level=logging.DEBUG)
    multiprocessing_logging.install_mp_handler()
    logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)

    zabbix_config = dict(config["zabbix"])
    zabbix_config["failsafe"] = int(zabbix_config.get("failsafe", "20"))
    if zabbix_config["dryrun"] == "false":
        zabbix_config["dryrun"] = False
    elif zabbix_config["dryrun"] == "true":
        zabbix_config["dryrun"] = True
    else:
        raise Exception()

    logging.info("Main start (%d) version %s", os.getpid(), __version__)

    stop_event = multiprocessing.Event()
    processes = []

    source_hosts_queues = []
    source_collectors = get_source_collectors(config)
    for source_collector in source_collectors:
        source_hosts_queue = multiprocessing.Queue()
        process = processing.SourceCollectorProcess(source_collector["name"],
                                                    source_collector["module"],
                                                    source_collector["config"],
                                                    source_hosts_queue)
        source_hosts_queues.append(source_hosts_queue)
        processes.append(process)
        process.start()

    process = processing.SourceHandlerProcess("source-handler",
                                              config["zac"]["db_uri"],
                                              source_hosts_queues)
    process.start()
    processes.append(process)

    process = processing.SourceMergerProcess(
        "source-merger", config["zac"]["db_uri"],
        config["zac"]["host_modifier_dir"])
    process.start()
    processes.append(process)

    process = processing.ZabbixHostUpdater("zabbix-host-updater",
                                           config["zac"]["db_uri"],
                                           zabbix_config)
    process.start()
    processes.append(process)

    process = processing.ZabbixHostgroupUpdater("zabbix-hostgroup-updater",
                                                config["zac"]["db_uri"],
                                                zabbix_config)
    process.start()
    processes.append(process)

    process = processing.ZabbixTemplateUpdater("zabbix-template-updater",
                                               config["zac"]["db_uri"],
                                               zabbix_config)
    process.start()
    processes.append(process)

    with processing.SignalHandler(stop_event):
        status_interval = 60
        next_status = datetime.datetime.now()

        while not stop_event.is_set():
            if next_status < datetime.datetime.now():
                log_process_status(processes)
                next_status = datetime.datetime.now() + datetime.timedelta(
                    seconds=status_interval)

            dead_process_names = [
                process.name for process in processes
                if not process.is_alive()
            ]
            if dead_process_names:
                logging.error("A child has died: %s. Exiting",
                              ', '.join(dead_process_names))
                stop_event.set()

            time.sleep(1)

        logging.debug(
            "Queues: %s",
            ", ".join([str(queue.qsize()) for queue in source_hosts_queues]))

        for process in processes:
            logging.info("Terminating: %s(%d)", process.name, process.pid)
            process.terminate()

        alive_processes = [
            process for process in processes if process.is_alive()
        ]
        while alive_processes:
            process = alive_processes[0]
            logging.info("Waiting for: %s(%d)", process.name, process.pid)
            log_process_status(processes)  # TODO: Too verbose?
            process.join(10)
            if process.exitcode is None:
                logging.warning(
                    "Process hanging. Signaling new terminate: %s(%d)",
                    process.name, process.pid)
                process.terminate()
            time.sleep(1)
            alive_processes = [
                process for process in processes if process.is_alive()
            ]

    logging.info("Main exit")
    def test_when_a_logger_is_passed_then_it_does_not_change_the_root_logger(
            self):
        with mock.patch('logging.getLogger') as getLogger:
            install_mp_handler(self.logger)

            self.assertEqual(0, getLogger.call_count)
Example #26
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Perform segmentation on .svg and .png files.')
    parser.add_argument('dirs',
                        nargs='+',
                        help='Directories that stores .svg&.png files.')
    parser.add_argument(
        '--num-workers',
        default=0,
        type=int,
        dest='num_workers',
        help='Number of processes. 0 for all available cpu cores.')
    parser.add_argument('--log',
                        default='segmentation.log',
                        type=str,
                        dest='log_file',
                        help='Path to log file.')
    parser.add_argument('--conf',
                        default='seg_conf.json',
                        type=str,
                        dest='confidence_file',
                        help='Path to segmentation confidence file.')
    parser.add_argument(
        '--no-optimize',
        default=True,
        action='store_false',
        dest='optimize',
        help=
        "Dont't use svgo optimization. This will produce larger svg files but cost much less time."
    )
    parser.add_argument('--export-contour',
                        default=False,
                        action='store_true',
                        dest='export_contour',
                        help='Export contour segmentation results.')
    parser.add_argument('--export-mask',
                        default=False,
                        action='store_true',
                        dest='export_mask',
                        help='Export morphed mask for debug use.')
    args = parser.parse_args(argv[1:])

    global logger
    logger = get_logger('segmentation',
                        args.log_file,
                        echo=False,
                        multiprocessing=True)
    install_mp_handler(logger)

    global USE_OPTIMIZE, EXPORT_CONTOUR_RESULTS, EXPORT_MASK
    USE_OPTIMIZE = args.optimize
    EXPORT_CONTOUR_RESULTS = args.export_contour
    EXPORT_MASK = args.export_mask

    num_workers = args.num_workers
    if num_workers == 0:
        num_workers = multiprocessing.cpu_count()
    logger.info('Using {} processes.'.format(num_workers))

    src_dirs = args.dirs
    for src_dir in src_dirs:
        if not osp.isdir(src_dir):
            continue
        logger.info('Processing {} ...'.format(src_dir))
        tgt_dir = src_dir
        tgts = []
        for f in glob.glob(osp.join(src_dir, '*.eps')):
            _id, _ = osp.splitext(osp.basename(f))
            svg_file, png_file = osp.join(src_dir, _id + '.svg'), osp.join(
                src_dir, _id + '.png')
            if osp.exists(svg_file) and osp.exists(png_file):
                tgts.append({
                    'id': _id,
                    'svg_file': svg_file,
                    'png_file': png_file
                })
        conf = seg(tgts, tgt_dir, num_workers=num_workers)
        with open(args.confidence_file, 'w') as f:
            f.write(
                json.dumps([{
                    'id': tgt['id'],
                    'score': s
                } for tgt, s in zip(tgts, conf)]))
Example #27
0
from collections import OrderedDict

import numpy as np

import click

from data_iterator import TextIterator
from params import load_params


logging.basicConfig(level=logging.WARN,
                    format="%(asctime)s - %(levelname)s %(module)s - %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S")

import multiprocessing_logging
multiprocessing_logging.install_mp_handler()


def error_process(params, device, **model_options):

    import theano
    import theano.sandbox.cuda
    from build_model import build_model

    theano.sandbox.cuda.use(device)

    tparams = OrderedDict()
    for param_name, param in params.items():
        tparams[param_name] = theano.shared(param, name=param_name)

    process_name = current_process().name
    def processYears(self, years: List[int]) -> None:
        '''
        Parameters
        ----------
        years : List[int]
            list of years to (re-)process
        '''

        # If necessary, creates indexes for the data collection.
        self._createDataColIndex()
        # Does the grid collection exists?
        col_grid = self._createMongoConn(cfg=self.cfg)['col_grid']
        ndoc = col_grid.count_documents(filter={})
        if ndoc == 0:
            logging.info('Creation of the grid collection...')
            # Does the reference netCDF file exists?
            fname = self.downloadDir + 'era5_masks.nc'
            if os.path.exists(fname) is False:
                print('Downloading mask data...')
                self.getMasks()
            self.createGridCollection(mask=False)
        # Get all grid_ids
        self.all_ids = col_grid.distinct(key='id_grid')

        for year in years:
            self.year = int(year)
            logging.info(f' --- PROCESSING YEAR {year} ---')

            if self.download is True:
                logging.info('Proceeding with downloads...')
                today = datetime.today()
                if (year == today.year):
                    months = np.arange(1, today.month + 1).tolist()
                else:
                    months = np.arange(1, 12 + 1).tolist()

                # Are these months present as nc files ?
                # List this year's nc files
                try:
                    ncfiles = self.listNetCDFfiles(year)
                except FileNotFoundError:
                    logging.info(f'No ERA5T files downloaded for {year} yet.')
                    months_to_download = months
                else:
                    fmonths_present = sorted(
                        list(
                            map(
                                lambda x: int(x[x.find("-") + 1:x.find(".nc")]
                                              ), ncfiles)))
                    fmonths_needed = months
                    # Months needed but not present :
                    missing_months = list(
                        set(fmonths_needed) - (set(fmonths_present)))
                    months_to_download = missing_months
                    # months_to_download = list(
                    #     set(missing_months + months))  # distinct months
                # fmonths_present
                # print(ncfiles)
                # print(fmonths_present)
                # print('...')
                # print(missing_months)
                # import sys
                # sys.exit(0)

                if len(months_to_download) > 0:
                    logging.info(f'Downloading files for YEAR {year}....\n' +
                                 f'Months: {months_to_download}')
                    # Parralel download of monthly data:
                    install_mp_handler()
                    p = ThreadPool(processes=12)  # one thread per month
                    p.map(lambda m: self.getFiles(year=year, month=m),
                          months_to_download)
                    p.close()
                    p.join()
                    logging.info(f'Downloading files for YEAR {year} Done.')
                else:
                    logging.info(f'All files already present for year {year}.')
            else:
                logging.info('Proceeding without downloads')

            # List all the current year's nc files after download
            nc_local = self.listNetCDFfiles(year=year)

            # Open them all in one ds object
            # arrays will be loaded in chronological order
            try:
                ds = xr.open_mfdataset(nc_local, combine='by_coords')
            except Exception as e:
                logging.info(e)
            else:
                self.df_missing_dates = self.findMissingDates(ds)
                # Create the tile (chunks) elements
                # This operation starts to be useful at high grid resolution
                # i.e., from 0.25 x 0.25. For coarser grid (i.e., 0.1 x 0.1)
                # this is not really essential.
                delta = 30  # grid chunk in degrees (should be a multiple of
                # both 360 and 180)
                # ERA's lon have range [0, 360] and not [-180, 180]
                ilons = np.arange(0, 360, delta)
                ilats = np.arange(-60, 90, delta)
                elements = itertools.product(*[ilons, ilats])

                # Explore the grid chunks and select
                # those containing grid cells
                def worker_initializer00():
                    global col_grid
                    cons = self._createMongoConn(cfg=self.cfg)
                    col_grid = cons['col_grid']

                p = ThreadPool(processes=self.nthreads,
                               initializer=worker_initializer00)
                res = p.map(
                    lambda e: self.exploreChunks(ilon_chunk=e[0],
                                                 ilat_chunk=e[1],
                                                 delta=delta,
                                                 mask_query=None,
                                                 retrn='ndocs',
                                                 col_grid=col_grid), elements)
                p.close()
                p.join()
                df_e = pd.DataFrame(res)
                df_e = df_e.query('n > 0').sort_values(by='n').reset_index(
                    drop=True)

                # Do the insertion
                N = df_e.shape[0]
                for i in np.arange(N):
                    logging.info(f'Year {year}: processing chunk {i}/{N}')
                    ilon = df_e.loc[i, 'ilon_chunk']
                    ilat = df_e.loc[i, 'ilat_chunk']
                    n = df_e.loc[i, 'n']
                    self.insertChunk(ilon, ilat, delta, ds, 'insert')
                    logging.info(f'{n} documents inserted')
                logging.info(' --- PROCESSING YEAR %s DONE !---' % year)