def __init__(self):
     super(ConnectionLockStage, self).__init__()
     self.queue = queue.Queue()
     self.blocked = False
Example #2
0
  def testMemoryLimit(self):
    memory_limit = 512 * 1024  # 512K
    chunk = 200 * 1024  # 256K
    capacity = memory_limit // chunk

    with ops.Graph().as_default() as G:
      with ops.device('/cpu:0'):
        x = array_ops.placeholder(dtypes.uint8, name='x')
        pi = array_ops.placeholder(dtypes.int64, name='pi')
        gi = array_ops.placeholder(dtypes.int64, name='gi')
      with ops.device(test.gpu_device_name()):
        stager = data_flow_ops.MapStagingArea(
            [dtypes.uint8], memory_limit=memory_limit, shapes=[[]])
        stage = stager.put(pi, [x], [0])
        get = stager.get()
        size = stager.size()

    G.finalize()

    from six.moves import queue as Queue
    import threading
    import numpy as np

    queue = Queue.Queue()
    n = 8

    with self.session(graph=G) as sess:
      # Stage data in a separate thread which will block
      # when it hits the staging area's capacity and thus
      # not fill the queue with n tokens
      def thread_run():
        for i in range(n):
          data = np.full(chunk, i, dtype=np.uint8)
          sess.run(stage, feed_dict={x: data, pi: i})
          queue.put(0)

      t = threading.Thread(target=thread_run)
      t.daemon = True
      t.start()

      # Get tokens from the queue until a timeout occurs
      try:
        for i in range(n):
          queue.get(timeout=TIMEOUT)
      except Queue.Empty:
        pass

      # Should've timed out on the iteration 'capacity'
      if not i == capacity:
        self.fail("Expected to timeout on iteration '{}' "
                  "but instead timed out on iteration '{}' "
                  "Staging Area size is '{}' and configured "
                  "capacity is '{}'.".format(capacity, i, sess.run(size),
                                             capacity))

      # Should have capacity elements in the staging area
      self.assertTrue(sess.run(size) == capacity)

      # Clear the staging area completely
      for i in range(n):
        sess.run(get)

      self.assertTrue(sess.run(size) == 0)
Example #3
0
import requests
import json

import asyncio
import aiohttp
from six.moves import queue
from lxml import etree
from bs4 import BeautifulSoup
import re
import datetime
import pymongo
from utils.cpca import *
from utils.zb_storage_setting import StorageSetting

_page_response_queue = queue.Queue()

# 限制并发数为5个
semaphore = asyncio.Semaphore(1)

name = 'shenyang_ccgp-shenyang_gov_cn'
coll = StorageSetting(name)
collection = coll.find_collection

headers = {
    'Origin': 'http://www.ccgp-shenyang.gov.cn',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh,zh-CN;q=0.9',
    'User-Agent':
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Accept': '*/*',
Example #4
0
def _discover_workflow_components(workflow_class):
    """
    This task iterates over the attributes of a user-defined class in order to return a list of inputs, outputs and
    nodes.
    :param class workflow_class: User-defined class with task instances as attributes.
    :rtype: (list[flytekit.common.promise.Input], list[Output], list[flytekit.common.nodes.SdkNode])
    """

    inputs = []
    outputs = []
    nodes = []

    to_visit_objs = _queue.Queue()
    top_level_attributes = set()
    for attribute_name in dir(workflow_class):
        to_visit_objs.put((attribute_name, getattr(workflow_class, attribute_name)))
        top_level_attributes.add(attribute_name)

    # For all task instances defined within the workflow, bind them to this specific workflow and hook-up to the
    # engine (when available)
    visited_obj_ids = set()
    while not to_visit_objs.empty():
        attribute_name, current_obj = to_visit_objs.get()

        current_obj_id = id(current_obj)
        if current_obj_id in visited_obj_ids:
            continue
        visited_obj_ids.add(current_obj_id)

        if isinstance(current_obj, _nodes.SdkNode):
            # TODO: If an attribute name is on the form node_name[index], the resulting
            # node name might not be correct.
            nodes.append(current_obj.assign_id_and_return(attribute_name))
        elif isinstance(current_obj, _promise.Input):
            if attribute_name is None or attribute_name not in top_level_attributes:
                raise _user_exceptions.FlyteValueException(
                    attribute_name,
                    "Detected workflow input specified outside of top level."
                )
            inputs.append(current_obj.rename_and_return_reference(attribute_name))
        elif isinstance(current_obj, Output):
            if attribute_name is None or attribute_name not in top_level_attributes:
                raise _user_exceptions.FlyteValueException(
                    attribute_name,
                    "Detected workflow output specified outside of top level."
                )
            outputs.append(current_obj.rename_and_return_reference(attribute_name))
        elif isinstance(current_obj, list) or isinstance(current_obj, set) or isinstance(current_obj, tuple):
            for idx, value in enumerate(current_obj):
                to_visit_objs.put(
                    (_assign_indexed_attribute_name(attribute_name, idx), value))
        elif isinstance(current_obj, dict):
            # Visit dictionary keys.
            for key in current_obj.keys():
                to_visit_objs.put(
                    (_assign_indexed_attribute_name(attribute_name, key), key))
            # Visit dictionary values.
            for key, value in _six.iteritems(current_obj):
                to_visit_objs.put(
                    (_assign_indexed_attribute_name(attribute_name, key), value))
    return inputs, outputs, nodes
    def __call__(self, *inputs):
        comm_queue = queue.Queue()
        y = None
        delegate_variable = None

        for i_comp, (f, (rank_in, rank_out)) in \
                enumerate(zip(self._children, self._rank_inouts)):
            x = None

            if rank_in is None:  # Use inputs.
                if i_comp == 0:
                    x = f(*inputs)
                else:
                    # If the graph component is not the first one,
                    # backprop to the previous graph component must be
                    # guaranteed.
                    x = chainermn.functions.pseudo_connect(
                        delegate_variable, *inputs)
                    x = f(x)

            else:  # Receive inputs from the other machines.
                # Preprocess: receiving inputs from the other machines.
                xs = []
                for _rank_in in rank_in:
                    if _rank_in == self._comm.rank:
                        # Receive inputs from itself.
                        if delegate_variable is None:
                            _x = comm_queue.get()
                        else:
                            _x = chainermn.functions.pseudo_connect(
                                delegate_variable, comm_queue.get())
                    else:
                        _x = chainermn.functions.recv(
                            self._comm,
                            rank=_rank_in,
                            delegate_variable=delegate_variable,
                            device=self._device_id)

                    xs.append(_x)

                    # Guarantee the backward path to the previous graph
                    # component to be executed in the last to avoid dead-lock.
                    delegate_variable = _x

                # Guarantee backprop on the same edge exactly once.
                delegate_variable = None

                # Actual forward.
                x = f(*tuple(xs))

            if rank_out is None:  # Return outputs.
                assert y is None, "MultiNodeChainList cannot have more than "\
                    "two computational graph component whose rank_out is None"
                y = x  # model output
                delegate_variable = y

            else:  # Send outputs to the other machines.
                for i_comp, _rank_out in enumerate(rank_out):
                    if _rank_out == self._comm.rank:
                        # Send outputs to itself.
                        if delegate_variable is not None:
                            x = chainermn.functions.pseudo_connect(
                                delegate_variable, x)
                        comm_queue.put(x)
                        delegate_variable = x
                    elif i_comp == 0:
                        delegate_variable = chainermn.functions.send(
                            x, self._comm, rank=_rank_out)
                    else:
                        # If the model has multiple targets for send,
                        # we must guarantee backwards of each send to be
                        # called in the reversed order.
                        if delegate_variable is not None:
                            x = chainermn.functions.pseudo_connect(
                                delegate_variable, x)
                        delegate_variable = chainermn.functions.send(
                            x, self._comm, rank=_rank_out)

        if not comm_queue.empty():
            raise ValueError(
                'Communication queue is not empty at the end of forward.'
                'Make sure if all rank_in and rank_out correspond each other.')

        # Return.
        if y is delegate_variable:
            # The last computational graph component returns model output.
            return y
        elif y is not None:
            # The intermediate graph component returns model output.
            return chainermn.functions.pseudo_connect(delegate_variable, y)
        else:
            # Do not have any model output.
            return delegate_variable
Example #6
0
 def __init__(self, pipe_c2s, pipe_s2c, gpus):
     super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
     self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
     self._gpus = gpus
Example #7
0
 def __init__(self, executor=None):
     self._queue = queue.Queue()
     if executor is None:
         self._executor = _make_default_thread_pool_executor()
     else:
         self._executor = executor
Example #8
0
 def __init__(self, acceptors, reactors, wait_on_queue=True):
     self.acceptors = acceptors or []
     self.reactors = reactors or []
     self.route_process_queue = queue.Queue()
     self.wait_on_queue = wait_on_queue
Example #9
0
    def __init__(self, com_port, ssp_address="0", nv11=False, debug=False):
        self.debug = debug
        self.nv11 = nv11
        self.actions = queue.Queue()
        self.actions_args = {}
        self.response_data = {}
        self.events = []
        # There can't be 9999 notes in the storage
        self.response_data['getnoteamount_response'] = 9999
        self.sspC = self.essp.ssp_init(
            com_port.encode(), ssp_address.encode(), debug)
        self.poll = SspPollData6()
        setup_req = Ssp6SetupRequestData()
        # Check if the validator is present
        if self.essp.ssp6_sync(self.sspC) != Status.SSP_RESPONSE_OK.value:
            self.print_debug("NO VALIDATOR FOUND")
            self.close()
            raise Exception("No validator found")
        else:
            self.print_debug("Validator Found !")
        # Try to setup encryption
        if self.essp.ssp6_setup_encryption(self.sspC, c_ulonglong(
                0x123456701234567)) == Status.SSP_RESPONSE_OK.value:
            self.print_debug("Encryption Setup")
        else:
            self.print_debug("Encryption Failed")

        # Checking the version, make sure we are using ssp version 6
        if self.essp.ssp6_host_protocol(
                self.sspC, 0x06) != Status.SSP_RESPONSE_OK.value:
            self.print_debug(self.essp.ssp6_host_protocol(self.sspC, 0x06))
            self.print_debug("Host Protocol Failed")
            self.close()
            raise Exception("Host Protocol Failed")

        # Get some information about the validator
        if self.essp.ssp6_setup_request(self.sspC, byref(
                setup_req)) != Status.SSP_RESPONSE_OK.value:
            self.print_debug("Setup Request Failed")
            self.close()
            raise Exception("Setup request failed")

        self.print_debug("Firmware %s " % (setup_req.FirmwareVersion.decode('utf8')))
        self.print_debug("Channels : ")
        for i, channel in enumerate(setup_req.ChannelData):
            self.print_debug("Channel %s :  %s %s" %
                  (str(i + 1), str(channel.value), channel.cc.decode()))

        # Enable the validator
        if self.essp.ssp6_enable(self.sspC) != Status.SSP_RESPONSE_OK.value:
            self.print_debug("Enable Failed")
            self.close()
            raise Exception("Enable failed")

        if setup_req.UnitType == 0x03:  # magic number
            for channel in enumerate(setup_req.ChannelData):
                self.essp.ssp6_set_coinmech_inhibits(
                    self.sspC, channel.value, channel.cc, Status.ENABLED.value)
        else:
            if setup_req.UnitType in {0x06, 0x07}:
                # Enable the payout unit
                if self.essp.ssp6_enable_payout(
                        self.sspC, setup_req.UnitType) != Status.SSP_RESPONSE_OK.value:
                    self.print_debug("Payout Enable Failed")

            # Set the inhibits ( enable all note acceptance )
            if self.essp.ssp6_set_inhibits(
                    self.sspC, 0xFF, 0xFF) != Status.SSP_RESPONSE_OK.value:
                self.print_debug("Inhibits Failed")
                self.close()
                raise Exception("Inhibits failed")

        system_loop_thread = threading.Thread(target=self.system_loop)
        system_loop_thread.setDaemon(True)
        system_loop_thread.start()
Example #10
0
    def __init__(self,
                 endpoint=None,
                 token=None,
                 solver=None,
                 proxy=None,
                 permissive_ssl=False,
                 **kwargs):
        """To setup the connection a pipeline of queues/workers is constructed.

        There are five interactions with the server the connection manages:
        1. Downloading solver information.
        2. Submitting problem data.
        3. Polling problem status.
        4. Downloading problem results.
        5. Canceling problems

        Loading solver information is done synchronously. The other four tasks
        are performed by asynchronously workers. For 2, 3, and 5 the workers
        gather tasks in batches.
        """
        if not endpoint or not token:
            raise ValueError("Endpoint URL and/or token not defined")

        _LOGGER.debug("Creating a client for endpoint: %r", endpoint)

        self.endpoint = endpoint
        self.token = token
        self.default_solver = solver

        # Create a :mod:`requests` session. `requests` will manage our url parsing, https, etc.
        self.session = requests.Session()
        self.session.headers.update({
            'X-Auth-Token': self.token,
            'User-Agent': self.USER_AGENT
        })
        self.session.proxies = {'http': proxy, 'https': proxy}
        if permissive_ssl:
            self.session.verify = False

        # Build the problem submission queue, start its workers
        self._submission_queue = queue.Queue()
        self._submission_workers = []
        for _ in range(self._SUBMISSION_THREAD_COUNT):
            worker = threading.Thread(target=self._do_submit_problems)
            worker.daemon = True
            worker.start()
            self._submission_workers.append(worker)

        # Build the cancel problem queue, start its workers
        self._cancel_queue = queue.Queue()
        self._cancel_workers = []
        for _ in range(self._CANCEL_THREAD_COUNT):
            worker = threading.Thread(target=self._do_cancel_problems)
            worker.daemon = True
            worker.start()
            self._cancel_workers.append(worker)

        # Build the problem status polling queue, start its workers
        self._poll_queue = queue.PriorityQueue()
        self._poll_workers = []
        for _ in range(self._POLL_THREAD_COUNT):
            worker = threading.Thread(target=self._do_poll_problems)
            worker.daemon = True
            worker.start()
            self._poll_workers.append(worker)

        # Build the result loading queue, start its workers
        self._load_queue = queue.Queue()
        self._load_workers = []
        for _ in range(self._LOAD_THREAD_COUNT):
            worker = threading.Thread(target=self._do_load_results)
            worker.daemon = True
            worker.start()
            self._load_workers.append(worker)

        # Prepare an empty set of solvers
        self._solvers = {}
        self._solvers_lock = threading.RLock()
        self._all_solvers_ready = False

        # Set the parameters for requests; disable SSL verification if needed
        self._request_parameters = {}
        if permissive_ssl:
            self._request_parameters['verify'] = False
 def __init__(self, numbers):
     self.numbers = numbers
     self.queue = Queue.Queue()
     self.scheduling()
Example #12
0
 def __init__(self):
     super(MockMidiPort, self).__init__()
     self.message_queue = Queue.Queue()
Example #13
0
    def __init__(self,
                 config_path,
                 load_metrics,
                 max_launch_batch=AUTOSCALER_MAX_LAUNCH_BATCH,
                 max_concurrent_launches=AUTOSCALER_MAX_CONCURRENT_LAUNCHES,
                 max_failures=AUTOSCALER_MAX_NUM_FAILURES,
                 process_runner=subprocess,
                 update_interval_s=AUTOSCALER_UPDATE_INTERVAL_S):
        self.config_path = config_path
        # Keep this before self.reset (self.provider needs to be created
        # exactly once).
        self.provider = None
        self.reset(errors_fatal=True)
        self.load_metrics = load_metrics

        self.max_failures = max_failures
        self.max_launch_batch = max_launch_batch
        self.max_concurrent_launches = max_concurrent_launches
        self.process_runner = process_runner

        # Map from node_id to NodeUpdater processes
        self.updaters = {}
        self.num_failed_updates = defaultdict(int)
        self.num_successful_updates = defaultdict(int)
        self.num_failures = 0
        self.last_update_time = 0.0
        self.update_interval_s = update_interval_s
        self.bringup = True

        # Node launchers
        self.launch_queue = queue.Queue()
        self.pending_launches = ConcurrentCounter()
        max_batches = math.ceil(max_concurrent_launches /
                                float(max_launch_batch))
        for i in range(int(max_batches)):
            node_launcher = NodeLauncher(
                provider=self.provider,
                queue=self.launch_queue,
                index=i,
                pending=self.pending_launches,
                node_types=self.available_node_types,
            )
            node_launcher.daemon = True
            node_launcher.start()

        # Expand local file_mounts to allow ~ in the paths. This can't be done
        # earlier when the config is written since we might be on different
        # platform and the expansion would result in wrong path.
        self.config["file_mounts"] = {
            remote: os.path.expanduser(local)
            for remote, local in self.config["file_mounts"].items()
        }

        for local_path in self.config["file_mounts"].values():
            assert os.path.exists(local_path)

        # Aggregate resources the user is requesting of the cluster.
        self.resource_requests = defaultdict(int)
        # List of resource bundles the user is requesting of the cluster.
        self.resource_demand_vector = []

        logger.info("StandardAutoscaler: {}".format(self.config))
Example #14
0
    def _compose_wf_graph(cls, wf_spec):
        if not isinstance(wf_spec, cls.wf_spec_type):
            raise TypeError("Workflow spec is not typeof %s." % cls.wf_spec_type.__name__)

        q = queue.Queue()
        wf_graph = graphing.WorkflowGraph()

        for task_name, condition, task_transition_item_idx in wf_spec.tasks.get_start_tasks():
            q.put((task_name, []))

        while not q.empty():
            task_name, splits = q.get()

            wf_graph.add_task(task_name)

            if wf_spec.tasks.is_join_task(task_name):
                task_spec = wf_spec.tasks[task_name]
                barrier = "*" if task_spec.join == "all" else task_spec.join
                wf_graph.set_barrier(task_name, value=barrier)

            # Determine if the task is a split task and if it is in a cycle. If the task is a
            # split task, keep track of where the split(s) occurs.
            if wf_spec.tasks.is_split_task(task_name) and not wf_spec.tasks.in_cycle(task_name):
                splits.append(task_name)

            if splits:
                wf_graph.update_task(task_name, splits=splits)

            # Update task attributes if task spec has retry criteria.
            task_spec = wf_spec.tasks.get_task(task_name)

            if task_spec.has_retry():
                retry_spec = {
                    "when": getattr(task_spec.retry, "when", None),
                    "count": getattr(task_spec.retry, "count", None),
                    "delay": getattr(task_spec.retry, "delay", None),
                }

                wf_graph.update_task(task_name, retry=retry_spec)

            # Add task transition to the workflow graph.
            next_tasks = wf_spec.tasks.get_next_tasks(task_name)

            for next_task_name, condition, task_transition_item_idx in next_tasks:
                if next_task_name == "retry":
                    retry_spec = {"when": condition or "<% completed() %>", "count": 3}
                    wf_graph.update_task(task_name, retry=retry_spec)
                    continue

                if not wf_graph.has_task(next_task_name) or not wf_spec.tasks.in_cycle(
                    next_task_name
                ):
                    q.put((next_task_name, list(splits)))

                crta = [condition] if condition else []

                seqs = wf_graph.has_transition(
                    task_name, next_task_name, criteria=crta, ref=task_transition_item_idx
                )

                # Use existing transition if present otherwise create new transition.
                if seqs:
                    wf_graph.update_transition(
                        task_name,
                        next_task_name,
                        key=seqs[0][2],
                        criteria=crta,
                        ref=task_transition_item_idx,
                    )
                else:
                    wf_graph.add_transition(
                        task_name, next_task_name, criteria=crta, ref=task_transition_item_idx
                    )

        return wf_graph
Example #15
0
 def __init__(self, sites, proxies=None):
     self.sites = sites
     self.proxies = proxies
     self.queue = Queue.Queue()
     self.scheduling()
Example #16
0
def run(fn,
        args=(),
        kwargs={},
        num_proc=None,
        start_timeout=None,
        env=None,
        stdout=None,
        stderr=None,
        verbose=1):
    """
    Runs Horovod in Spark.  Runs `num_proc` processes executing `fn` using the same amount of Spark tasks.

    Args:
        fn: Function to run.
        args: Arguments to pass to `fn`.
        kwargs: Keyword arguments to pass to `fn`.
        num_proc: Number of Horovod processes.  Defaults to `spark.default.parallelism`.
        start_timeout: Timeout for Spark tasks to spawn, register and start running the code, in seconds.
                       If not set, falls back to `HOROVOD_SPARK_START_TIMEOUT` environment variable value.
                       If it is not set as well, defaults to 600 seconds.
        env: Environment dictionary to use in Horovod run.  Defaults to `os.environ`.
        stdout: Horovod stdout is redirected to this stream. Defaults to sys.stdout.
        stderr: Horovod stderr is redirected to this stream. Defaults to sys.stderr.
        verbose: Debug output verbosity (0-2). Defaults to 1.

    Returns:
        List of results returned by running `fn` on each rank.
    """
    spark_context = pyspark.SparkContext._active_spark_context
    if spark_context is None:
        raise Exception(
            'Could not find an active SparkContext, are you running in a PySpark session?'
        )

    if num_proc is None:
        num_proc = spark_context.defaultParallelism
        if verbose >= 1:
            print(
                'Running %d processes (inferred from spark.default.parallelism)...'
                % num_proc)
    else:
        if verbose >= 1:
            print('Running %d processes...' % num_proc)

    if start_timeout is None:
        # Lookup default timeout from the environment variable.
        start_timeout = int(os.getenv('HOROVOD_SPARK_START_TIMEOUT', '600'))

    result_queue = queue.Queue(1)
    tmout = timeout.Timeout(start_timeout)
    key = secret.make_secret_key()
    spark_job_group = 'horovod.spark.run.%d' % job_id.next_job_id()
    driver = driver_service.SparkDriverService(num_proc, fn, args, kwargs, key)
    spark_thread = _make_spark_thread(spark_context, spark_job_group, num_proc,
                                      driver, tmout, key, result_queue)
    try:
        driver.wait_for_initial_registration(tmout)
        if verbose >= 2:
            print('Initial Spark task registration is complete.')
        task_clients = [
            task_service.SparkTaskClient(
                index, driver.task_addresses_for_driver(index), key)
            for index in range(num_proc)
        ]
        for task_client in task_clients:
            task_client.notify_initial_registration_complete()
        driver.wait_for_task_to_task_address_updates(tmout)
        if verbose >= 2:
            print('Spark task-to-task address registration is complete.')

        # Determine a set of common interfaces for task-to-task communication.
        common_intfs = set(driver.task_addresses_for_tasks(0).keys())
        for index in range(1, num_proc):
            common_intfs.intersection_update(
                driver.task_addresses_for_tasks(index).keys())
        if not common_intfs:
            raise Exception(
                'Unable to find a set of common task-to-task communication interfaces: %s'
                % [(index, driver.task_addresses_for_tasks(index))
                   for index in range(num_proc)])

        # Determine the index grouping based on host hashes.
        # Barrel shift until index 0 is in the first host.
        host_hashes = list(driver.task_host_hash_indices().keys())
        host_hashes.sort()
        while 0 not in driver.task_host_hash_indices()[host_hashes[0]]:
            host_hashes = host_hashes[1:] + host_hashes[:1]

        ranks_to_indices = []
        for host_hash in host_hashes:
            ranks_to_indices += driver.task_host_hash_indices()[host_hash]
        driver.set_ranks_to_indices(ranks_to_indices)

        if env is None:
            env = os.environ.copy()

        # Pass secret key through the environment variables.
        env[secret.HOROVOD_SECRET_KEY] = codec.dumps_base64(key)

        mpirun_command = (
            'mpirun --allow-run-as-root --tag-output '
            '-np {num_proc} -H {hosts} '
            '-bind-to none -map-by slot '
            '-mca pml ob1 -mca btl ^openib -mca btl_tcp_if_include {common_intfs} '
            '-x NCCL_DEBUG=INFO -x NCCL_SOCKET_IFNAME={common_intfs} '
            '{env} '  # expect a lot of environment variables
            '-mca plm_rsh_agent "{python} -m horovod.spark.driver.mpirun_rsh {encoded_driver_addresses}" '
            '{python} -m horovod.spark.task.mpirun_exec_fn {encoded_driver_addresses} '
            .format(
                num_proc=num_proc,
                hosts=','.join(
                    '%s:%d' % (host_hash,
                               len(driver.task_host_hash_indices()[host_hash]))
                    for host_hash in host_hashes),
                common_intfs=','.join(common_intfs),
                env=' '.join('-x %s' % key for key in env.keys()
                             if key not in env_constants.IGNORE_LIST),
                python=sys.executable,
                encoded_driver_addresses=codec.dumps_base64(
                    driver.addresses())))
        if verbose >= 2:
            print('+ %s' % mpirun_command)
        exit_code = safe_shell_exec.execute(mpirun_command, env, stdout,
                                            stderr)
        if exit_code != 0:
            raise Exception(
                'mpirun exited with code %d, see the error above.' % exit_code)
    except:
        # Terminate Spark job.
        spark_context.cancelJobGroup(spark_job_group)

        # Re-raise exception.
        raise
    finally:
        spark_thread.join()
        driver.shutdown()

    # Make sure Spark Job did not fail.
    driver.check_for_spark_job_failure()

    # If there's no exception, execution results are in this queue.
    results = result_queue.get_nowait()
    return [results[index] for index in ranks_to_indices]
Example #17
0
    model = googlenetbn.GoogLeNetBN()
else:
    raise ValueError('Invalid architecture name')

if args.gpu >= 0:
    cuda.init(args.gpu)
    model.to_gpu()

# Setup optimizer
optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
optimizer.setup(model.collect_parameters())

# ------------------------------------------------------------------------------
# This example consists of three threads: data feeder, logger and trainer.
# These communicate with each other via Queue.
data_q = queue.Queue(maxsize=1)
res_q = queue.Queue()

# Data loading routine
cropwidth = 256 - model.insize


def read_image(path, center=False, flip=False):
    image = np.asarray(Image.open(path)).transpose(2, 0, 1)
    if center:
        top = left = cropwidth / 2
    else:
        top = random.randint(0, cropwidth - 1)
        left = random.randint(0, cropwidth - 1)
    bottom = model.insize + top
    right = model.insize + left
Example #18
0
def _parse_graph_scope(graph, keras_node_dict, topology, top_scope,
                       output_names):
    """
    travel the tensor Graph and build the corresponding intermediate operation objects.
    :param graph: the tensorflow session graph of the Keras mode.
    :param keras_node_dict: the mapping of operation node to keras layer output.
    :param topology: The whole topology of the intermediate objects.
    :param top_scope: The top varset
    :param output_names: the output names of the TF graph
    :return: The whole topology of the intermediate objects.
    """
    input_nodes = set()
    raw_model_container = topology.raw_model

    # build the node in the working scope.
    varset = topology.declare_scope('curr_', top_scope)
    for name in output_names:
        raw_model_container.add_output_name(name)

    model_outputs = []
    for name in output_names:
        var_ts = graph.get_operation_by_name(tsname_to_node(name)).outputs[0]
        _create_link_node(var_ts, top_scope, varset, adjust_batch_size=True)
        model_outputs.append(var_ts.op)

    # starting from the output node.
    q_overall = queue.Queue()
    for n_ in model_outputs:
        q_overall.put_nowait(n_)

    visited = set(
    )  # since the output could be shared among the successor nodes.

    def advance_by_input(cur_node, layer_nodes, subgraph, inputs):
        for input_ in cur_node.inputs:
            predecessor = input_.op
            if is_placeholder_node(predecessor):
                input_nodes.add(predecessor)
            if predecessor in layer_nodes or len(layer_nodes) == 0:
                subgraph.put_nowait(predecessor)
            else:
                inputs.add(predecessor)
                q_overall.put_nowait(predecessor)

    inference_nodeset = _build_inference_nodeset(graph, model_outputs)
    keras_nodeset = _build_keras_nodeset(inference_nodeset, keras_node_dict)
    while not q_overall.empty():
        node = q_overall.get_nowait()
        if node in input_nodes or node in visited:
            continue

        layer_key_, model_ = (None, None)
        if node.name in keras_node_dict:
            layer_key_, model_ = keras_node_dict[node.name]
            if isinstance(layer_key_, keras.Model) and \
                    _check_layer_converter_availability(layer_key_):
                k2o_logger().debug("Processing a keras sub model - %s" %
                                   layer_key_.name)
                kenode = _find_kenode_by_output_tensor(
                    extract_inbound_nodes(layer_key_), node.name)
                ts_in, ts_out = _on_parsing_model_layer(
                    layer_key_, graph, kenode, varset)
                for ts_ in ts_in:
                    if is_placeholder_node(ts_.op):
                        input_nodes.add(ts_.op)
                    else:
                        q_overall.put_nowait(ts_.op)

                visited.update(ts_.op for ts_ in ts_out)
                continue

            activated_keras_nodes = _create_keras_nodelist(
                layer_key_, inference_nodeset, node)
        else:
            activated_keras_nodes = _general_nodelist_closure(
                node, inference_nodeset, keras_nodeset)
        q_subgraph = queue.Queue()
        i_subgraph = set()
        nodes = []
        for ot_ in (_get_output_nodes(activated_keras_nodes, layer_key_, node)
                    if activated_keras_nodes else [node]):
            if ot_ not in nodes:
                visited.add(ot_)
                nodes.append(ot_)
                advance_by_input(ot_, activated_keras_nodes, q_subgraph,
                                 i_subgraph)

        while not q_subgraph.empty():
            int_node = q_subgraph.get_nowait()
            if int_node in input_nodes or int_node in visited or int_node.name in keras_node_dict:
                continue

            visited.add(int_node)
            nodes.append(int_node)
            advance_by_input(int_node, activated_keras_nodes, q_subgraph,
                             i_subgraph)

        k2o_logger().debug('Processing a keras layer - (%s: %s)' %
                           (layer_key_.name,
                            type(layer_key_)) if layer_key_ else (
                                nodes[0].name, "Custom_Layer"))
        if isinstance(layer_key_, keras.layers.TimeDistributed):
            _on_parsing_time_distributed_layer(graph, nodes, layer_key_,
                                               model_, varset)
        elif layer_key_ is None or get_converter(type(layer_key_)) is None:
            _on_parsing_tf_subgraph(nodes, varset)
        else:
            kenode = _find_kenode_by_output_tensor(
                extract_inbound_nodes(layer_key_), nodes[0].name)
            _on_parsing_keras_layer(graph, nodes, layer_key_, kenode, model_,
                                    varset)

    for nd_ in input_nodes:
        var_ts = nd_.outputs[
            0]  # since it's placeholder node, safely claim there is only one output.
        _create_link_node(var_ts, top_scope, varset, True)

    _finalize_const_graph(topology, top_scope, varset)
    _infer_graph_shape(topology, top_scope, varset)
    topology.root_names = [
        variable.onnx_name for variable in top_scope.variables.values()
    ]
    return topology
Example #19
0
 def __init__(self):
     super(EnsureConnectionStage, self).__init__()
     self.connected = False
     self.queue = queue.Queue()
     self.blocked = False
 def setUp(self):
     self.mock_cg = tempfile.mkdtemp()
     self.monitor = Cgroup(self.mock_cg)
     self.queue = queue.Queue()
Example #21
0
def get_std_encoding(stream):
    """
    Get encoding of the stream

    Args:
        stream: stream

    Returns:
        encoding or file system encoding

    """
    return getattr(stream, "encoding", None) or sys.getfilesystemencoding()


CLEANUP_CALLS = queue.Queue()


def reg_cleanup(func, *args, **kwargs):
    """
    Clean the register for given function

    Args:
        func: function name
        *args: optional argument
        **kwargs: optional arguments

    Returns:
        None

    """
Example #22
0
    def backup(self,
               backup_resource,
               hostname_backup_name,
               no_incremental,
               max_level,
               always_level,
               restart_always_level,
               queue_size=2):
        """
        Here we now location of all interesting artifacts like metadata
        Should return stream for storing data.
        :return: stream
        """
        prev_backup = self.storage.previous_backup(
            engine=self,
            hostname_backup_name=hostname_backup_name,
            no_incremental=no_incremental,
            max_level=max_level,
            always_level=always_level,
            restart_always_level=restart_always_level)

        try:
            tmpdir = tempfile.mkdtemp()
        except Exception:
            LOG.error("Unable to create a tmp directory")
            raise

        try:
            engine_meta = utils.path_join(tmpdir, "engine_meta")
            freezer_meta = utils.path_join(tmpdir, "freezer_meta")
            if prev_backup:
                prev_backup.storage.get_file(prev_backup.engine_metadata_path,
                                             engine_meta)
            timestamp = utils.DateTime.now().timestamp
            level_zero_timestamp = (prev_backup.level_zero_timestamp
                                    if prev_backup else timestamp)
            backup = base.Backup(engine=self,
                                 hostname_backup_name=hostname_backup_name,
                                 level_zero_timestamp=level_zero_timestamp,
                                 timestamp=timestamp,
                                 level=(prev_backup.level +
                                        1 if prev_backup else 0))

            input_queue = streaming.RichQueue(queue_size)
            read_except_queue = queue.Queue()
            write_except_queue = queue.Queue()

            read_stream = streaming.QueuedThread(self.backup_stream,
                                                 input_queue,
                                                 read_except_queue,
                                                 kwargs={
                                                     "backup_resource":
                                                     backup_resource,
                                                     "manifest_path":
                                                     engine_meta
                                                 })

            write_stream = streaming.QueuedThread(self.storage.write_backup,
                                                  input_queue,
                                                  write_except_queue,
                                                  kwargs={"backup": backup})

            read_stream.daemon = True
            write_stream.daemon = True
            read_stream.start()
            write_stream.start()
            read_stream.join()
            write_stream.join()

            # queue handling is different from SimpleQueue handling.
            def handle_except_queue(except_queue):
                if not except_queue.empty():
                    while not except_queue.empty():
                        e = except_queue.get_nowait()
                        LOG.critical('Engine error: {0}'.format(e))
                    return True
                else:
                    return False

            got_exception = None
            got_exception = (handle_except_queue(read_except_queue)
                             or got_exception)
            got_exception = (handle_except_queue(write_except_queue)
                             or got_exception)

            if got_exception:
                raise engine_exceptions.EngineException(
                    "Engine error. Failed to backup.")

            with open(freezer_meta, mode='wb') as b_file:
                b_file.write(json.dumps(self.metadata(backup_resource)))
            self.storage.put_metadata(engine_meta, freezer_meta, backup)
        finally:
            shutil.rmtree(tmpdir)
 def __init__(self):
     self._request_queue = queue.Queue()
     self._stopped = threading.Event()
     self._can_consume = threading.Event()
     self._consumer_thread = None
Example #24
0
 def __init__(self, pipe_c2s, pipe_s2c, model):
     super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
     self.M = model
     self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
Example #25
0
def wandb_internal(
    settings,
    record_q,
    result_q,
):
    """Internal process function entrypoint.

    Read from record queue and dispatch work to various threads.

    Arguments:
        settings: dictionary of configuration parameters.
        record_q: records to be handled
        result_q: for sending results back

    """
    # mark this process as internal
    wandb._set_internal_process()
    started = time.time()

    # register the exit handler only when wandb_internal is called, not on import
    @atexit.register
    def handle_exit(*args):
        logger.info("Internal process exited")

    # Lets make sure we dont modify settings so use a static object
    _settings = settings_static.SettingsStatic(settings)
    if _settings.log_internal:
        configure_logging(_settings.log_internal, _settings._log_level)

    parent_pid = os.getppid()
    pid = os.getpid()

    logger.info(
        "W&B internal server running at pid: %s, started at: %s",
        pid,
        datetime.fromtimestamp(started),
    )

    publish_interface = interface.BackendSender(record_q=record_q)

    stopped = threading.Event()
    threads = []

    send_record_q = queue.Queue()
    record_sender_thread = SenderThread(
        settings=_settings,
        record_q=send_record_q,
        result_q=result_q,
        stopped=stopped,
        interface=publish_interface,
    )
    threads.append(record_sender_thread)

    write_record_q = queue.Queue()
    record_writer_thread = WriterThread(
        settings=_settings,
        record_q=write_record_q,
        result_q=result_q,
        stopped=stopped,
        writer_q=write_record_q,
    )
    threads.append(record_writer_thread)

    record_handler_thread = HandlerThread(
        settings=_settings,
        record_q=record_q,
        result_q=result_q,
        stopped=stopped,
        sender_q=send_record_q,
        writer_q=write_record_q,
        interface=publish_interface,
    )
    threads.append(record_handler_thread)

    process_check = ProcessCheck(settings=_settings, pid=parent_pid)

    for thread in threads:
        thread.start()

    interrupt_count = 0
    while not stopped.is_set():
        try:
            # wait for stop event
            while not stopped.is_set():
                time.sleep(1)
                if process_check.is_dead():
                    logger.error("Internal process shutdown.")
                    stopped.set()
        except KeyboardInterrupt:
            interrupt_count += 1
            logger.warning(
                "Internal process interrupt: {}".format(interrupt_count))
        finally:
            if interrupt_count >= 2:
                logger.error("Internal process interrupted.")
                stopped.set()

    for thread in threads:
        thread.join()

    for thread in threads:
        exc_info = thread.get_exception()
        if exc_info:
            logger.error("Thread {}:".format(thread.name), exc_info=exc_info)
            print("Thread {}:".format(thread.name), file=sys.stderr)
            traceback.print_exception(*exc_info)
            sentry_exc(exc_info, delay=True)
            sys.exit(-1)
Example #26
0
    def seek(self, offset, whence=None, partition=None):
        """
        Alter the current offset in the consumer, similar to fseek

        Arguments:
            offset: how much to modify the offset
            whence: where to modify it from, default is None

                * None is an absolute offset
                * 0    is relative to the earliest available offset (head)
                * 1    is relative to the current offset
                * 2    is relative to the latest known offset (tail)

            partition: modify which partition, default is None.
                If partition is None, would modify all partitions.
        """

        if whence is None: # set an absolute offset
            if partition is None:
                for tmp_partition in self.offsets:
                    self.offsets[tmp_partition] = offset
            else:
                self.offsets[partition] = offset
        elif whence == 1:  # relative to current position
            if partition is None:
                for tmp_partition, _offset in self.offsets.items():
                    self.offsets[tmp_partition] = _offset + offset
            else:
                self.offsets[partition] += offset
        elif whence in (0, 2):  # relative to beginning or end
            reqs = []
            deltas = {}
            if partition is None:
                # divide the request offset by number of partitions,
                # distribute the remained evenly
                (delta, rem) = divmod(offset, len(self.offsets))
                for tmp_partition, r in izip_longest(self.offsets.keys(),
                                                     repeat(1, rem),
                                                     fillvalue=0):
                    deltas[tmp_partition] = delta + r

                for tmp_partition in self.offsets.keys():
                    if whence == 0:
                        reqs.append(OffsetRequestPayload(self.topic, tmp_partition, -2, 1))
                    elif whence == 2:
                        reqs.append(OffsetRequestPayload(self.topic, tmp_partition, -1, 1))
                    else:
                        pass
            else:
                deltas[partition] = offset
                if whence == 0:
                    reqs.append(OffsetRequestPayload(self.topic, partition, -2, 1))
                elif whence == 2:
                    reqs.append(OffsetRequestPayload(self.topic, partition, -1, 1))
                else:
                    pass

            resps = self.client.send_offset_request(reqs)
            for resp in resps:
                self.offsets[resp.partition] = \
                    resp.offsets[0] + deltas[resp.partition]
        else:
            raise ValueError('Unexpected value for `whence`, %d' % whence)

        # Reset queue and fetch offsets since they are invalid
        self.fetch_offsets = self.offsets.copy()
        self.count_since_commit += 1
        if self.auto_commit:
            self.commit()

        self.queue = queue.Queue()
Example #27
0
    def create():

        from six.moves import queue
        return queue.Queue()
Example #28
0
 def __init__(self, parent, on_error=None):
     super(TaskThread, self).__init__(parent)
     self.on_error = on_error
     self.tasks = queue.Queue()
     self.doneSig.connect(self.on_done)
     self.start()
Example #29
0
 def add_task(self, func, args=()):
     assert hasattr(func, '__call__') # must be a function
     if func not in self.result_queues:
         self.result_queues[func] = Queue.Queue()
     self.task_queue.put((func, args, self.result_queues[func]))
 def setUp(self):
     import six.moves.queue as queue
     self.queue = queue.Queue()
     self.writer = DataWriterContainer()