client_data.identity = (u"%s" % (os.getpid() + 6000)).encode('ascii')
        client_data.setsockopt(zmq.IDENTITY, client_data.identity)
        client_data.connect(fe_client_addr)

        client_hb = ctx.socket(zmq.DEALER)
        client_hb.identity = (u"%s" % (os.getpid() + 6000)).encode('ascii')
        client_hb.setsockopt(zmq.IDENTITY, client_data.identity)
        client_hb.connect(fe_hb_addr)

        client_control = ctx.socket(zmq.PUSH)
        client_control.connect(fe_monitor_addr)

        print(fe_monitor_addr)
        print(fe_client_addr)

        poller_hb = zmq.Poller()
        poller_hb.register(client_hb, zmq.POLLIN)
        hb_thread = threading.Thread(target=manage_heartbeat,
                                     args=(
                                         client_hb,
                                         poller_hb,
                                     ))
        hb_thread.daemon = True
        hb_thread.start()

        host_name = socket.gethostname()
        print("Path to the image(s)")
        option = 1

        while option < 6:
            try:
Beispiel #2
0
    def _run(self, receiver, frontend, sender):
        receiver_addr = auto_bind(receiver)
        frontend.connect(self.front_sink_addr)
        sender.bind('tcp://*:%d' % self.port)

        pending_jobs: Dict[str, SinkJob] = defaultdict(lambda: SinkJob())

        poller = zmq.Poller()
        poller.register(frontend, zmq.POLLIN)
        poller.register(receiver, zmq.POLLIN)

        # send worker receiver address back to frontend
        frontend.send(receiver_addr.encode('ascii'))

        # Windows does not support logger in MP environment, thus get a new logger
        # inside the process for better compability
        logger = set_logger(colored('SINK', 'green'), self.verbose)
        logger.info('ready')
        self.is_ready.set()

        while not self.exit_flag.is_set():
            socks = dict(poller.poll())
            if socks.get(receiver) == zmq.POLLIN:
                msg = receiver.recv_multipart()
                job_id = msg[0]
                # parsing job_id and partial_id
                job_info = job_id.split(b'@')
                job_id = job_info[0]
                partial_id = int(job_info[1]) if len(job_info) == 2 else 0

                if msg[2] == ServerCmd.data_embed:
                    x = jsonapi.loads(msg[1])
                    pending_jobs[job_id].add_output(x, partial_id)
                else:
                    logger.error(
                        'received a wrongly-formatted request (expected 4 frames, got %d)' % len(msg))
                    logger.error('\n'.join('field %d: %s' % (idx, k)
                                           for idx, k in enumerate(msg)), exc_info=True)

                logger.info('collect %s %s (E:%d/A:%d)' % (msg[2], job_id,
                                                           pending_jobs[job_id].progress_outputs,
                                                           pending_jobs[job_id].checksum))

                # check if there are finished jobs, then send it back to workers

                finished = [(k, v)
                            for k, v in pending_jobs.items() if v.is_done]
                for job_info, tmp in finished:
                    client_addr, req_id = job_info.split(b'#')
                    x = tmp.result
                    sender.send_multipart([client_addr, x, req_id])
                    logger.info('send back\tsize: %d\tjob id: %s' %
                                (tmp.checksum, job_info))
                    # release the job
                    tmp.clear()
                    pending_jobs.pop(job_info)

            if socks.get(frontend) == zmq.POLLIN:
                client_addr, msg_type, msg_info, req_id = frontend.recv_multipart()
                if msg_type == ServerCmd.new_job:
                    job_info = client_addr + b'#' + req_id
                    # register a new job
                    pending_jobs[job_info].checksum = int(msg_info)
                    logger.info('job register\tsize: %d\tjob id: %s' %
                                (int(msg_info), job_info))
                elif msg_type == ServerCmd.show_config:
                    # dirty fix of slow-joiner: sleep so that client receiver can connect.
                    time.sleep(0.1)
                    logger.info('send config\tclient %s' % client_addr)
                    sender.send_multipart([client_addr, msg_info, req_id])
Beispiel #3
0
def run():
    context = zmq.Context(1)

    restart_pull_socket = context.socket(zmq.REP)
    restart_pull_socket.bind('tcp://*:7000')

    churn_pull_socket = context.socket(zmq.PULL)
    churn_pull_socket.bind('tcp://*:7001')

    list_executors_socket = context.socket(zmq.REP)
    list_executors_socket.bind('tcp://*:7002')

    function_status_socket = context.socket(zmq.PULL)
    function_status_socket.bind('tcp://*:7003')

    list_schedulers_socket = context.socket(zmq.REP)
    list_schedulers_socket.bind('tcp://*:7004')

    executor_depart_socket = context.socket(zmq.PULL)
    executor_depart_socket.bind('tcp://*:7005')

    executor_statistics_socket = context.socket(zmq.PULL)
    executor_statistics_socket.bind('tcp://*:7006')

    poller = zmq.Poller()
    poller.register(restart_pull_socket, zmq.POLLIN)
    poller.register(churn_pull_socket, zmq.POLLIN)
    poller.register(function_status_socket, zmq.POLLIN)
    poller.register(list_executors_socket, zmq.POLLIN)
    poller.register(list_schedulers_socket, zmq.POLLIN)
    poller.register(executor_depart_socket, zmq.POLLIN)
    poller.register(executor_statistics_socket, zmq.POLLIN)

    add_push_socket = context.socket(zmq.PUSH)
    add_push_socket.connect('ipc:///tmp/node_add')

    remove_push_socket = context.socket(zmq.PUSH)
    remove_push_socket.connect('ipc:///tmp/node_remove')

    # waits until the kubecfg file gets copied into the pod -- this might be
    # brittle if we try to move to a non-Ubuntu setting, but I'm not worried
    # about that for now
    while not os.path.isfile('/root/.kube/config'):
        pass

    client, _ = util.init_k8s()

    # track the self-reported status of each function execution thread
    executor_statuses = {}
    departing_executors = {}
    function_frequencies = {}
    function_runtimes = {}
    latency_history = {}

    start = time.time()
    while True:
        socks = dict(poller.poll(timeout=1000))

        if churn_pull_socket in socks and socks[churn_pull_socket] == \
                zmq.POLLIN:
            msg = churn_pull_socket.recv_string()
            args = msg.split(':')

            if args[0] == 'add':
                logging.info('Received message: %s.' % (msg))
                msg = args[2] + ':' + args[1]
                add_push_socket.send_string(msg)
            elif args[0] == 'remove':
                msg = args[2] + ':' + args[1]
                remove_push_socket.send_string(msg)

        if restart_pull_socket in socks and socks[restart_pull_socket] == \
                zmq.POLLIN:
            msg = restart_pull_socket.recv_string()
            args = msg.split(':')

            ip = args[1]
            pod = util.get_pod_from_ip(client, ip)

            count = str(pod.status.container_statuses[0].restart_count)

            logging.info('Returning restart count %s for IP %s.' % (count, ip))
            restart_pull_socket.send_string(count)

        if list_executors_socket in socks and socks[list_executors_socket] == \
                zmq.POLLIN:
            # it doesn't matter what is in this message
            list_executors_socket.recv()

            ks = KeySet()
            for ip in util.get_pod_ips(client, 'role=function'):
                ks.keys.append(ip)

            list_executors_socket.send(ks.SerializeToString())

        if function_status_socket in socks and \
                socks[function_status_socket] == zmq.POLLIN:
            status = ThreadStatus()
            status.ParseFromString(function_status_socket.recv())

            key = (status.ip, status.tid)

            # if this executor is one of the ones that's currently departing,
            # we can just ignore its status updates since we don't want
            # utilization to be skewed downwards
            if key in departing_executors:
                continue

            executor_statuses[key] = status
            logging.info(('Received thread status update from %s:%d: %.4f ' +
                          'occupancy, %d functions pinned') %
                         (status.ip, status.tid, status.utilization,
                          len(status.functions)))

        if (list_schedulers_socket in socks
                and socks[list_schedulers_socket] == zmq.POLLIN):
            # It doesn't matter what is in this message
            msg = list_schedulers_socket.recv_string()

            ks = KeySet()
            for ip in util.get_pod_ips(client, 'role=scheduler'):
                ks.keys.append(ip)

            list_schedulers_socket.send(ks.SerializeToString())

        if executor_depart_socket in socks and \
                socks[executor_depart_socket] == zmq.POLLIN:
            ip = executor_depart_socket.recv_string()
            departing_executors[ip] -= 1

            # wait until all the executors on this IP have cleared their queues
            # and left; then we remove the node
            if departing_executors[ip] == 0:
                msg = 'function:' + ip
                remove_push_socket.send_string(msg)
                del departing_executors[ip]

        if executor_statistics_socket in socks and \
                socks[executor_statistics_socket] == zmq.POLLIN:
            stats = ExecutorStatistics()
            stats.ParseFromString(executor_statistics_socket.recv())

            for fstats in stats.statistics:
                fname = fstats.fname

                if fname not in function_frequencies:
                    function_frequencies[fname] = 0

                if fname not in function_runtimes:
                    function_runtimes[fname] = (0.0, 0)

                if fstats.HasField('runtime'):
                    old_latency = function_runtimes[fname]
                    function_runtimes[fname] = (old_latency[0] +
                                                fstats.runtime,
                                                old_latency[1] +
                                                fstats.call_count)
                else:
                    function_frequencies[fname] += fstats.call_count

        end = time.time()
        if end - start > REPORT_PERIOD:
            logging.info('Checking hash ring...')
            check_hash_ring(client, context)

            check_executor_utilization(context, executor_statuses,
                                       departing_executors, add_push_socket)

            check_function_load(context, function_frequencies,
                                function_runtimes, executor_statuses,
                                latency_history)

            function_runtimes.clear()
            function_frequencies.clear()
            start = time.time()
Beispiel #4
0
    def start(self):
        self._init_plugins()

        context = zmq.Context()
        pull_s = context.socket(zmq.PULL)
        push_s = context.socket(zmq.PUSH)

        push_s.SNDTIMEO = SNDTIMEO

        logger.debug('connecting to sockets...')
        pull_s.connect(self.pull)
        push_s.connect(self.push)
        logger.debug('starting Gatherer')

        poller = zmq.Poller()
        poller.register(pull_s)

        while not self.exit.is_set():
            try:
                s = dict(poller.poll(1000))
            except Exception as e:
                self.logger.error(e)
                break

            if pull_s in s:
                id, token, mtype, data = Msg().recv(pull_s)

                data = json.loads(data)
                if isinstance(data, dict):
                    data = [data]

                rv = []
                start = time.time()
                for d in data:
                    try:
                        i = Indicator(**d)

                    except InvalidIndicator as e:
                        from pprint import pprint
                        pprint(i)

                        logger.error('gatherer failed: %s' % g)
                        logger.error(e)
                        traceback.print_exc()

                    for g in self.gatherers:
                        try:
                            g.process(i)
                        except Exception as e:
                            from pprint import pprint
                            pprint(i)

                            logger.error('gatherer failed: %s' % g)
                            logger.error(e)
                            traceback.print_exc()

                    rv.append(i.__dict__())

                data = json.dumps(rv)
                logger.debug('sending back to router: %f' % (time.time() - start))
                Msg(id=id, mtype=mtype, token=token, data=data).send(push_s)

        logger.info('shutting down gatherer..')
Beispiel #5
0
def main():
    """ main method """

    url_worker = "inproc://workers"
    url_client = "inproc://clients"
    client_nbr = NBR_CLIENTS

    # Prepare our context and sockets
    context = zmq.Context()
    frontend = context.socket(zmq.ROUTER)
    frontend.bind(url_client)
    backend = context.socket(zmq.ROUTER)
    backend.bind(url_worker)

    # create workers and clients threads
    for i in range(NBR_WORKERS):
        thread = threading.Thread(target=worker_thread,
                                  args=(
                                      url_worker,
                                      context,
                                      i,
                                  ))
        thread.start()

    for i in range(NBR_CLIENTS):
        thread_c = threading.Thread(target=client_thread,
                                    args=(
                                        url_client,
                                        context,
                                        i,
                                    ))
        thread_c.start()

    # Logic of LRU loop
    # - Poll backend always, frontend only if 1+ worker ready
    # - If worker replies, queue worker as ready and forward reply
    # to client if necessary
    # - If client requests, pop next worker and send request to it

    # Queue of available workers
    available_workers = 0
    workers_list = []

    # init poller
    poller = zmq.Poller()

    # Always poll for worker activity on backend
    poller.register(backend, zmq.POLLIN)

    # Poll front-end only if we have available workers
    poller.register(frontend, zmq.POLLIN)

    while True:

        socks = dict(poller.poll())

        # Handle worker activity on backend
        if (backend in socks and socks[backend] == zmq.POLLIN):

            # Queue worker address for LRU routing
            worker_addr = backend.recv()

            assert available_workers < NBR_WORKERS

            # add worker back to the list of workers
            available_workers += 1
            workers_list.append(worker_addr)

            #   Second frame is empty
            empty = backend.recv()
            assert empty == b""

            # Third frame is READY or else a client reply address
            client_addr = backend.recv()

            # If client reply, send rest back to frontend
            if client_addr != b"READY":

                # Following frame is empty
                empty = backend.recv()
                assert empty == b""

                reply = backend.recv()

                frontend.send(client_addr, zmq.SNDMORE)
                frontend.send(b"", zmq.SNDMORE)
                frontend.send(reply)

                client_nbr -= 1

                if client_nbr == 0:
                    break  # Exit after N messages

        # poll on frontend only if workers are available
        if available_workers > 0:

            if (frontend in socks and socks[frontend] == zmq.POLLIN):
                # Now get next client request, route to LRU worker
                # Client request is [address][empty][request]
                client_addr = frontend.recv()

                empty = frontend.recv()
                assert empty == b""

                request = frontend.recv()

                #  Dequeue and drop the next worker address
                available_workers -= 1
                worker_id = workers_list.pop()

                backend.send(worker_id, zmq.SNDMORE)
                backend.send(b"", zmq.SNDMORE)
                backend.send(client_addr, zmq.SNDMORE)
                backend.send(b"", zmq.SNDMORE)
                backend.send(request)

    # Out of infinite loop: do some housekeeping

    frontend.close()
    backend.close()
    context.term()
Beispiel #6
0
from twisted.internet.task import LoopingCall
from twisted.internet import reactor
from config import config
from socket import gethostname

config = config[gethostname()]

context = zmq.Context()
socket = context.socket(zmq.SUB)
socket.connect('tcp://localhost:9002')
#socket.connect('tcp://localhost:' + str(config.kmet1_port))
#socket.connect('tcp://' + config.kmet1_ip + ':' + str(config.kmet1_port))
#socket.connect('tcp://' + gethostname() + ':9002')
socket.setsockopt_string(zmq.SUBSCRIBE, u'kmet1_')

poller = zmq.Poller()
poller.register(socket, zmq.POLLIN)

D = {}  # latest samples of all received variables
Dt = {}  # one list of timestamps per variable, max list length = 10


def taskRecv():
    try:
        socks = dict(poller.poll(100))
        if socket in socks and zmq.POLLIN == socks[socket]:
            msg = socket.recv_string()
            if len(msg):
                msg = msg.split(',', 1)
                d = json.loads(msg[1])
                tag = d['tag']
Beispiel #7
0
def executor(ip, mgmt_ip, schedulers, thread_id):
    logging.basicConfig(filename='log_executor.txt',
                        level=logging.INFO,
                        format='%(asctime)s %(message)s')

    ctx = zmq.Context(1)
    poller = zmq.Poller()

    pin_socket = ctx.socket(zmq.PULL)
    pin_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.PIN_PORT + thread_id))

    unpin_socket = ctx.socket(zmq.PULL)
    unpin_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                      (sutils.UNPIN_PORT + thread_id))

    exec_socket = ctx.socket(zmq.PULL)
    exec_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                     (sutils.FUNC_EXEC_PORT + thread_id))

    dag_queue_socket = ctx.socket(zmq.PULL)
    dag_queue_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                          (sutils.DAG_QUEUE_PORT + thread_id))

    dag_exec_socket = ctx.socket(zmq.PULL)
    dag_exec_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                         (sutils.DAG_EXEC_PORT + thread_id))

    self_depart_socket = ctx.socket(zmq.PULL)
    self_depart_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                            (sutils.SELF_DEPART_PORT + thread_id))

    pusher_cache = SocketCache(ctx, zmq.PUSH)

    poller = zmq.Poller()
    poller.register(pin_socket, zmq.POLLIN)
    poller.register(unpin_socket, zmq.POLLIN)
    poller.register(exec_socket, zmq.POLLIN)
    poller.register(dag_queue_socket, zmq.POLLIN)
    poller.register(dag_exec_socket, zmq.POLLIN)
    poller.register(self_depart_socket, zmq.POLLIN)

    client = IpcAnnaClient(thread_id)

    status = ThreadStatus()
    status.ip = ip
    status.tid = thread_id
    status.running = True
    utils._push_status(schedulers, pusher_cache, status)

    departing = False

    # this is going to be a map of map of maps for every function that we have
    # pinnned, we will track a map of execution ids to DAG schedules
    queue = {}

    # track the actual function objects that we are storing here
    pinned_functions = {}

    # tracks runtime cost of excuting a DAG function
    runtimes = {}

    # if multiple triggers are necessary for a function, track the triggers as
    # we receive them
    received_triggers = {}

    # track when we received a function request, so we can report e2e latency
    receive_times = {}

    # track how many functions we're executing
    exec_counts = {}

    # metadata to track thread utilization
    report_start = time.time()
    event_occupancy = {
        'pin': 0.0,
        'unpin': 0.0,
        'func_exec': 0.0,
        'dag_queue': 0.0,
        'dag_exec': 0.0
    }
    total_occupancy = 0.0

    while True:
        socks = dict(poller.poll(timeout=1000))

        if pin_socket in socks and socks[pin_socket] == zmq.POLLIN:
            work_start = time.time()
            pin(pin_socket, pusher_cache, client, status, pinned_functions,
                runtimes, exec_counts)
            utils._push_status(schedulers, pusher_cache, status)

            elapsed = time.time() - work_start
            event_occupancy['pin'] += elapsed
            total_occupancy += elapsed

        if unpin_socket in socks and socks[unpin_socket] == zmq.POLLIN:
            work_start = time.time()
            unpin(unpin_socket, status, pinned_functions, runtimes,
                  exec_counts)
            utils._push_status(schedulers, pusher_cache, status)

            elapsed = time.time() - work_start
            event_occupancy['unpin'] += elapsed
            total_occupancy += elapsed

        if exec_socket in socks and socks[exec_socket] == zmq.POLLIN:
            work_start = time.time()
            exec_function(exec_socket, client, status, ip, thread_id)
            utils._push_status(schedulers, pusher_cache, status)

            elapsed = time.time() - work_start
            event_occupancy['func_exec'] += elapsed
            total_occupancy += elapsed

        if dag_queue_socket in socks and socks[dag_queue_socket] == zmq.POLLIN:
            work_start = time.time()

            schedule = DagSchedule()
            schedule.ParseFromString(dag_queue_socket.recv())
            fname = schedule.target_function

            logging.info('Received a schedule for DAG %s (%s), function %s.' %
                         (schedule.dag.name, schedule.id, fname))

            if fname not in queue:
                queue[fname] = {}

            queue[fname][schedule.id] = schedule

            if (schedule.id, fname) not in receive_times:
                receive_times[(schedule.id, fname)] = time.time()

            # in case we receive the trigger before we receive the schedule, we
            # can trigger from this operation as well
            trkey = (schedule.id, fname)
            if trkey in received_triggers and \
                    len(received_triggers[trkey]) == \
                            len(schedule.triggers):
                exec_dag_function(pusher_cache, client,
                                  received_triggers[trkey],
                                  pinned_functions[fname], schedule, ip,
                                  thread_id)
                del received_triggers[trkey]
                del queue[fname][schedule.id]

                fend = time.time()
                fstart = receive_times[(schedule.id, fname)]
                runtimes[fname] += fend - fstart
                exec_counts[fname] += 1

            elapsed = time.time() - work_start
            event_occupancy['dag_queue'] += elapsed
            total_occupancy += elapsed

        if dag_exec_socket in socks and socks[dag_exec_socket] == zmq.POLLIN:
            work_start = time.time()
            trigger = DagTrigger()
            trigger.ParseFromString(dag_exec_socket.recv())

            fname = trigger.target_function
            logging.info('Received a trigger for schedule %s, function %s.' %
                         (trigger.id, fname))

            key = (trigger.id, fname)
            if key not in received_triggers:
                received_triggers[key] = {}

            if (trigger.id, fname) not in receive_times:
                receive_times[(trigger.id, fname)] = time.time()

            received_triggers[key][trigger.source] = trigger
            if fname in queue and trigger.id in queue[fname]:
                schedule = queue[fname][trigger.id]
                if len(received_triggers[key]) == len(schedule.triggers):
                    exec_dag_function(pusher_cache, client,
                                      received_triggers[key],
                                      pinned_functions[fname], schedule, ip,
                                      thread_id)
                    del received_triggers[key]
                    del queue[fname][trigger.id]

                    fend = time.time()
                    fstart = receive_times[(trigger.id, fname)]
                    runtimes[fname] += fend - fstart
                    exec_counts[fname] += 1

            elapsed = time.time() - work_start
            event_occupancy['dag_exec'] += elapsed
            total_occupancy += elapsed

        if self_depart_socket in socks and socks[self_depart_socket] == \
                zmq.POLLIN:
            # This message should not matter
            msg = self_depart_socket.recv()

            logging.info('Preparing to depart. No longer accepting requests ' +
                         'and clearing all queues.')

            status.ClearField('functions')
            status.running = False
            utils._push_status(schedulers, pusher_cache, status)

            departing = True

        # periodically report function occupancy
        report_end = time.time()
        if report_end - report_start > REPORT_THRESH:
            # periodically report my status to schedulers
            utils._push_status(schedulers, pusher_cache, status)

            utilization = total_occupancy / (report_end - report_start)
            status.utilization = utilization

            if utilization > 0.5:
                msg = ip + ':' + str(thread_id)
                for scheduler in schedulers:
                    sckt = pusher_cache.get(
                        sutils._get_backoff_addresss(scheduler))
                    sckt.send_string(msg)

            sckt = pusher_cache.get(utils._get_util_report_address(mgmt_ip))
            sckt.send(status.SerializeToString())

            logging.info('Total thread occupancy: %.6f' % (utilization))

            for event in event_occupancy:
                occ = event_occupancy[event] / (report_end - report_start)
                logging.info('Event %s occupancy: %.6f' % (event, occ))
                event_occupancy[event] = 0.0

            stats = ExecutorStatistics()
            for fname in runtimes:
                if exec_counts[fname] > 0:
                    fstats = stats.statistics.add()
                    fstats.fname = fname
                    fstats.runtime = runtimes[fname]
                    fstats.call_count = exec_counts[fname]

                runtimes[fname] = 0.0
                exec_counts[fname] = 0

            sckt = pusher_cache.get(sutils._get_statistics_report_address \
                    (mgmt_ip))
            sckt.send(stats.SerializeToString())

            report_start = time.time()
            total_occupancy = 0.0

            # periodically clear any old functions we have cached that we are
            # no longer accepting requests for
            for fname in queue:
                if len(queue[fname]) == 0 and fname not in status.functions:
                    del queue[fname]
                    del pinned_functions[fname]
                    del runtimes[fname]
                    del exec_counts[fname]

            # if we are departing and have cleared our queues, let the
            # management server know, and exit the process
            if departing and len(queue) == 0:
                sckt = pusher_cache.get(utils._get_depart_done_addr(mgmt_ip))
                sckt.send_string(ip)

                return 0
Beispiel #8
0
 def __init__(self):
     self.context = zmq.Context(1)
     self.zmq_s = None
     self.poll = zmq.Poller()
     self.master_uri = config.get_conf().master_url  # type: str
     self._connect()
Beispiel #9
0
def controlsd_thread(gctx, rate=100):
    # start the loop
    set_realtime_priority(3)

    context = zmq.Context()

    params = Params()

    # pub
    live100 = messaging.pub_sock(context, service_list['live100'].port)
    carstate = messaging.pub_sock(context, service_list['carState'].port)
    carcontrol = messaging.pub_sock(context, service_list['carControl'].port)
    livempc = messaging.pub_sock(context, service_list['liveMpc'].port)

    passive = params.get("Passive") != "0"
    if not passive:
        sendcan = messaging.pub_sock(context, service_list['sendcan'].port)
    else:
        sendcan = None

    # sub
    poller = zmq.Poller()
    thermal = messaging.sub_sock(context,
                                 service_list['thermal'].port,
                                 conflate=True,
                                 poller=poller)
    health = messaging.sub_sock(context,
                                service_list['health'].port,
                                conflate=True,
                                poller=poller)
    cal = messaging.sub_sock(context,
                             service_list['liveCalibration'].port,
                             conflate=True,
                             poller=poller)

    logcan = messaging.sub_sock(context, service_list['can'].port)

    CC = car.CarControl.new_message()

    CI, CP = get_car(logcan, sendcan, 1.0 if passive else None)

    if CI is None:
        raise Exception("unsupported car")

    if passive:
        CP.safetyModel = car.CarParams.SafetyModels.noOutput

    fcw_enabled = params.get("IsFcwEnabled") == "1"

    PL = Planner(CP, fcw_enabled)
    LoC = LongControl(CP, CI.compute_gb)
    VM = VehicleModel(CP)
    LaC = LatControl(VM)
    AM = AlertManager()

    if not passive:
        AM.add("startup", False)

    # write CarParams
    params.put("CarParams", CP.to_bytes())

    state = State.disabled
    soft_disable_timer = 0
    v_cruise_kph = 255
    overtemp = False
    free_space = False
    cal_status = Calibration.UNCALIBRATED
    rear_view_toggle = False
    rear_view_allowed = params.get("IsRearViewMirror") == "1"

    # 0.0 - 1.0
    awareness_status = 1.
    v_cruise_kph_last = 0

    rk = Ratekeeper(rate, print_delay_threshold=2. / 1000)

    # learned angle offset
    angle_offset = 1.5  # Default model bias
    calibration_params = params.get("CalibrationParams")
    if calibration_params:
        try:
            calibration_params = json.loads(calibration_params)
            angle_offset = calibration_params["angle_offset"]
        except (ValueError, KeyError):
            pass

    prof = Profiler(False)  # off by default

    while 1:

        prof.checkpoint("Ratekeeper", ignore=True)  # rk is here

        # sample data and compute car events
        CS, events, cal_status, overtemp, free_space = data_sample(
            CI, CC, thermal, cal, health, poller, cal_status, overtemp,
            free_space)
        prof.checkpoint("Sample")

        # define plan
        plan, plan_ts = calc_plan(CS, events, PL, LoC, v_cruise_kph,
                                  awareness_status)
        prof.checkpoint("Plan")

        if not passive:
            # update control state
            state, soft_disable_timer, v_cruise_kph, v_cruise_kph_last = state_transition(
                CS, CP, state, events, soft_disable_timer, v_cruise_kph, AM)
            prof.checkpoint("State transition")

        # compute actuators
        actuators, v_cruise_kph, awareness_status, angle_offset, rear_view_toggle = state_control(
            plan, CS, CP, state, events, v_cruise_kph, v_cruise_kph_last, AM,
            rk, awareness_status, PL, LaC, LoC, VM, angle_offset,
            rear_view_allowed, rear_view_toggle)
        prof.checkpoint("State Control")

        # publish data
        CC = data_send(plan, plan_ts, CS, CI, CP, VM, state, events, actuators,
                       v_cruise_kph, rk, carstate, carcontrol, live100,
                       livempc, AM, rear_view_allowed, rear_view_toggle,
                       awareness_status, LaC, LoC, angle_offset, passive)
        prof.checkpoint("Sent")

        # *** run loop at fixed rate ***
        rk.keep_time()

        prof.display()
Beispiel #10
0
def executor(ip, mgmt_ip, schedulers, thread_id):
    logging.basicConfig(filename='log_executor.txt', level=logging.INFO,
                        format='%(asctime)s %(message)s')

    context = zmq.Context(1)
    poller = zmq.Poller()

    pin_socket = context.socket(zmq.PULL)
    pin_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.PIN_PORT + thread_id))

    unpin_socket = context.socket(zmq.PULL)
    unpin_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.UNPIN_PORT +
                                                   thread_id))

    exec_socket = context.socket(zmq.PULL)
    exec_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.FUNC_EXEC_PORT +
                                                  thread_id))

    dag_queue_socket = context.socket(zmq.PULL)
    dag_queue_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.DAG_QUEUE_PORT
                                                       + thread_id))

    dag_exec_socket = context.socket(zmq.PULL)
    dag_exec_socket.bind(sutils.BIND_ADDR_TEMPLATE % (sutils.DAG_EXEC_PORT
                                                      + thread_id))

    self_depart_socket = context.socket(zmq.PULL)
    self_depart_socket.bind(sutils.BIND_ADDR_TEMPLATE %
                            (sutils.SELF_DEPART_PORT + thread_id))

    pusher_cache = SocketCache(context, zmq.PUSH)

    poller = zmq.Poller()
    poller.register(pin_socket, zmq.POLLIN)
    poller.register(unpin_socket, zmq.POLLIN)
    poller.register(exec_socket, zmq.POLLIN)
    poller.register(dag_queue_socket, zmq.POLLIN)
    poller.register(dag_exec_socket, zmq.POLLIN)
    poller.register(self_depart_socket, zmq.POLLIN)

    # If the management IP is set to None, that means that we are running in
    # local mode, so we use a regular AnnaTcpClient rather than an IPC client.
    if mgmt_ip:
        client = AnnaIpcClient(thread_id, context)
    else:
        client = AnnaTcpClient('127.0.0.1', '127.0.0.1', local=True, offset=1)

    user_library = DropletUserLibrary(context, pusher_cache, ip, thread_id,
                                      client)

    status = ThreadStatus()
    status.ip = ip
    status.tid = thread_id
    status.running = True
    utils.push_status(schedulers, pusher_cache, status)

    departing = False

    # Maintains a request queue for each function pinned on this executor. Each
    # function will have a set of request IDs mapped to it, and this map stores
    # a schedule for each request ID.
    queue = {}

    # Tracks the actual function objects that are pinned to this executor.
    pinned_functions = {}

    # Tracks runtime cost of excuting a DAG function.
    runtimes = {}

    # If multiple triggers are necessary for a function, track the triggers as
    # we receive them. This is also used if a trigger arrives before its
    # corresponding schedule.
    received_triggers = {}

    # Tracks when we received a function request, so we can report end-to-end
    # latency for the whole executio.
    receive_times = {}

    # Tracks the number of requests we are finishing for each function pinned
    # here.
    exec_counts = {}

    # Tracks the end-to-end runtime of each DAG request for which we are the
    # sink function.
    dag_runtimes = {}

    # A map with KVS keys and their corresponding deserialized payloads.
    cache = {}

    # Internal metadata to track thread utilization.
    report_start = time.time()
    event_occupancy = {'pin': 0.0,
                       'unpin': 0.0,
                       'func_exec': 0.0,
                       'dag_queue': 0.0,
                       'dag_exec': 0.0}
    total_occupancy = 0.0

    while True:
        socks = dict(poller.poll(timeout=1000))

        if pin_socket in socks and socks[pin_socket] == zmq.POLLIN:
            work_start = time.time()
            pin(pin_socket, pusher_cache, client, status, pinned_functions,
                runtimes, exec_counts)
            utils.push_status(schedulers, pusher_cache, status)

            elapsed = time.time() - work_start
            event_occupancy['pin'] += elapsed
            total_occupancy += elapsed

        if unpin_socket in socks and socks[unpin_socket] == zmq.POLLIN:
            work_start = time.time()
            unpin(unpin_socket, status, pinned_functions, runtimes,
                  exec_counts)
            utils.push_status(schedulers, pusher_cache, status)

            elapsed = time.time() - work_start
            event_occupancy['unpin'] += elapsed
            total_occupancy += elapsed

        if exec_socket in socks and socks[exec_socket] == zmq.POLLIN:
            work_start = time.time()
            exec_function(exec_socket, client, user_library, cache)
            user_library.close()

            utils.push_status(schedulers, pusher_cache, status)

            elapsed = time.time() - work_start
            event_occupancy['func_exec'] += elapsed
            total_occupancy += elapsed

        if dag_queue_socket in socks and socks[dag_queue_socket] == zmq.POLLIN:
            work_start = time.time()

            schedule = DagSchedule()
            schedule.ParseFromString(dag_queue_socket.recv())
            fname = schedule.target_function

            logging.info('Received a schedule for DAG %s (%s), function %s.' %
                         (schedule.dag.name, schedule.id, fname))

            if fname not in queue:
                queue[fname] = {}

            queue[fname][schedule.id] = schedule

            if (schedule.id, fname) not in receive_times:
                receive_times[(schedule.id, fname)] = time.time()

            # In case we receive the trigger before we receive the schedule, we
            # can trigger from this operation as well.
            trkey = (schedule.id, fname)
            if (trkey in received_triggers and (len(received_triggers[trkey])
                                                == len(schedule.triggers))):

                exec_dag_function(pusher_cache, client,
                                  received_triggers[trkey],
                                  pinned_functions[fname], schedule,
                                  user_library, dag_runtimes, cache)
                user_library.close()

                del received_triggers[trkey]
                del queue[fname][schedule.id]

                fend = time.time()
                fstart = receive_times[(schedule.id, fname)]
                runtimes[fname].append(fend - fstart)
                exec_counts[fname] += 1

            elapsed = time.time() - work_start
            event_occupancy['dag_queue'] += elapsed
            total_occupancy += elapsed

        if dag_exec_socket in socks and socks[dag_exec_socket] == zmq.POLLIN:
            work_start = time.time()
            trigger = DagTrigger()
            trigger.ParseFromString(dag_exec_socket.recv())

            fname = trigger.target_function
            logging.info('Received a trigger for schedule %s, function %s.' %
                         (trigger.id, fname))

            key = (trigger.id, fname)
            if key not in received_triggers:
                received_triggers[key] = {}

            if (trigger.id, fname) not in receive_times:
                receive_times[(trigger.id, fname)] = time.time()

            received_triggers[key][trigger.source] = trigger
            if fname in queue and trigger.id in queue[fname]:
                schedule = queue[fname][trigger.id]
                if len(received_triggers[key]) == len(schedule.triggers):
                    exec_dag_function(pusher_cache, client,
                                      received_triggers[key],
                                      pinned_functions[fname], schedule,
                                      user_library, dag_runtimes, cache)
                    user_library.close()

                    del received_triggers[key]
                    del queue[fname][trigger.id]

                    fend = time.time()
                    fstart = receive_times[(trigger.id, fname)]
                    runtimes[fname].append(fend - fstart)
                    exec_counts[fname] += 1

            elapsed = time.time() - work_start
            event_occupancy['dag_exec'] += elapsed
            total_occupancy += elapsed

        if self_depart_socket in socks and socks[self_depart_socket] == \
                zmq.POLLIN:
            # This message does not matter.
            self_depart_socket.recv()

            logging.info('Preparing to depart. No longer accepting requests ' +
                         'and clearing all queues.')

            status.ClearField('functions')
            status.running = False
            utils.push_status(schedulers, pusher_cache, status)

            departing = True

        # periodically report function occupancy
        report_end = time.time()
        if report_end - report_start > REPORT_THRESH:
            cache.clear()

            utilization = total_occupancy / (report_end - report_start)
            status.utilization = utilization

            # Periodically report my status to schedulers with the utilization
            # set.
            utils.push_status(schedulers, pusher_cache, status)

            logging.info('Total thread occupancy: %.6f' % (utilization))

            for event in event_occupancy:
                occ = event_occupancy[event] / (report_end - report_start)
                logging.info('\tEvent %s occupancy: %.6f' % (event, occ))
                event_occupancy[event] = 0.0

            stats = ExecutorStatistics()
            for fname in runtimes:
                if exec_counts[fname] > 0:
                    fstats = stats.functions.add()
                    fstats.name = fname
                    fstats.call_count = exec_counts[fname]
                    fstats.runtime.extend(runtimes[fname])

                runtimes[fname].clear()
                exec_counts[fname] = 0

            for dname in dag_runtimes:
                dstats = stats.dags.add()
                dstats.name = dname

                dstats.runtimes.extend(dag_runtimes[dname])

                dag_runtimes[dname].clear()

            # If we are running in cluster mode, mgmt_ip will be set, and we
            # will report our status and statistics to it. Otherwise, we will
            # write to the local conf file
            if mgmt_ip:
                sckt = pusher_cache.get(sutils.get_statistics_report_address
                                        (mgmt_ip))
                sckt.send(stats.SerializeToString())

                sckt = pusher_cache.get(utils.get_util_report_address(mgmt_ip))
                sckt.send(status.SerializeToString())
            else:
                logging.info(stats)

            status.ClearField('utilization')
            report_start = time.time()
            total_occupancy = 0.0

            # Periodically clear any old functions we have cached that we are
            # no longer accepting requests for.
            for fname in queue:
                if len(queue[fname]) == 0 and fname not in status.functions:
                    del queue[fname]
                    del pinned_functions[fname]
                    del runtimes[fname]
                    del exec_counts[fname]

            # If we are departing and have cleared our queues, let the
            # management server know, and exit the process.
            if departing and len(queue) == 0:
                sckt = pusher_cache.get(utils.get_depart_done_addr(mgmt_ip))
                sckt.send_string(ip)

                # We specifically pass 1 as the exit code when ending our
                # process so that the wrapper script does not restart us.
                os._exit(1)
Beispiel #11
0
def main():
    # Prepare our context and subscriber
    ctx = zmq.Context()
    snapshot = ctx.socket(zmq.DEALER)
    snapshot.linger = 0
    snapshot.connect("tcp://localhost:5556")
    subscriber = ctx.socket(zmq.SUB)
    subscriber.linger = 0
    subscriber.setsockopt(zmq.SUBSCRIBE, SUBTREE)
    subscriber.connect("tcp://localhost:5557")
    publisher = ctx.socket(zmq.PUSH)
    publisher.linger = 0
    publisher.connect("tcp://localhost:5558")

    random.seed(time.time())
    kvmap = {}

    # Get state snapshot
    sequence = 0
    snapshot.send_multipart([b"ICANHAZ?", SUBTREE])
    while True:
        try:
            kvmsg = KVMsg.recv(snapshot)
        except:
            raise
            return  # Interrupted

        if kvmsg.key == b"KTHXBAI":
            sequence = kvmsg.sequence
            print(f"I: Received snapshot={sequence:d}")
            break  # Done
        kvmsg.store(kvmap)

    poller = zmq.Poller()
    poller.register(subscriber, zmq.POLLIN)

    alarm = time.time() + 1
    while True:
        tickless = 1000 * max(0, alarm - time.time())
        try:
            items = dict(poller.poll(tickless))
        except:
            break  # Interrupted

        if subscriber in items:
            kvmsg = KVMsg.recv(subscriber)

            # Discard out-of-sequence kvmsgs, incl. heartbeats
            if kvmsg.sequence > sequence:
                sequence = kvmsg.sequence
                kvmsg.store(kvmap)
                print(f"I: received update={sequence:d}")

        # If we timed-out, generate a random kvmsg
        if time.time() >= alarm:
            kvmsg = KVMsg(0)
            kvmsg.key = SUBTREE + str(random.randint(1, 10000)).encode()
            kvmsg.body = str(random.randint(1, 1000000)).encode()
            kvmsg.send(publisher)
            kvmsg.store(kvmap)
            alarm = time.time() + 1

    print(f"Interrupted\n{sequence:d} messages in")
Beispiel #12
0
    def handle(self, *args, **options):
        # Initialize logging.
        self.setup_logging("lava-logs", options["level"], options["log_file"],
                           FORMAT)

        self.logger.info("[INIT] Starting lava-logs")
        self.logger.info("[INIT] Version %s", __version__)

        self.logger.info("[INIT] Dropping privileges")
        if not self.drop_privileges(options["user"], options["group"]):
            self.logger.error("[INIT] Unable to drop privileges")
            return

        filename = os.path.join(settings.MEDIA_ROOT, "lava-logs-config.yaml")
        self.logger.debug("[INIT] Dumping config to %s", filename)
        with open(filename, "w") as output:
            yaml.dump(options, output)

        # Create the sockets
        context = zmq.Context()
        self.log_socket = context.socket(zmq.PULL)
        self.controler = context.socket(zmq.ROUTER)
        self.controler.setsockopt(zmq.IDENTITY, b"lava-logs")
        # Limit the number of messages in the queue
        self.controler.setsockopt(zmq.SNDHWM, 2)
        # From http://api.zeromq.org/4-2:zmq-setsockopt#toc5
        # "Immediately readies that connection for data transfer with the master"
        self.controler.setsockopt(zmq.CONNECT_RID, b"master")

        if options["ipv6"]:
            self.logger.info("[INIT] Enabling IPv6")
            self.log_socket.setsockopt(zmq.IPV6, 1)
            self.controler.setsockopt(zmq.IPV6, 1)

        if options["encrypt"]:
            self.logger.info("[INIT] Starting encryption")
            try:
                self.auth = ThreadAuthenticator(context)
                self.auth.start()
                self.logger.debug("[INIT] Opening master certificate: %s",
                                  options["master_cert"])
                master_public, master_secret = zmq.auth.load_certificate(
                    options["master_cert"])
                self.logger.debug("[INIT] Using slaves certificates from: %s",
                                  options["slaves_certs"])
                self.auth.configure_curve(domain="*",
                                          location=options["slaves_certs"])
            except OSError as err:
                self.logger.error("[INIT] %s", err)
                self.auth.stop()
                return
            self.log_socket.curve_publickey = master_public
            self.log_socket.curve_secretkey = master_secret
            self.log_socket.curve_server = True
            self.controler.curve_publickey = master_public
            self.controler.curve_secretkey = master_secret
            self.controler.curve_serverkey = master_public

        self.logger.debug("[INIT] Watching %s", options["slaves_certs"])
        self.cert_dir_path = options["slaves_certs"]
        self.inotify_fd = watch_directory(options["slaves_certs"])
        if self.inotify_fd is None:
            self.logger.error("[INIT] Unable to start inotify")

        self.log_socket.bind(options["socket"])
        self.controler.connect(options["master_socket"])

        # Poll on the sockets. This allow to have a
        # nice timeout along with polling.
        self.poller = zmq.Poller()
        self.poller.register(self.log_socket, zmq.POLLIN)
        self.poller.register(self.controler, zmq.POLLIN)
        if self.inotify_fd is not None:
            self.poller.register(os.fdopen(self.inotify_fd), zmq.POLLIN)

        # Translate signals into zmq messages
        (self.pipe_r, _) = self.setup_zmq_signal_handler()
        self.poller.register(self.pipe_r, zmq.POLLIN)

        self.logger.info("[INIT] listening for logs")
        # PING right now: the master is waiting for this message to start
        # scheduling.
        self.controler.send_multipart([b"master", b"PING"])

        try:
            self.main_loop()
        except BaseException as exc:
            self.logger.error("[EXIT] Unknown exception raised, leaving!")
            self.logger.exception(exc)

        # Close the controler socket
        self.controler.close(linger=0)
        self.poller.unregister(self.controler)

        # Carefully close the logging socket as we don't want to lose messages
        self.logger.info(
            "[EXIT] Disconnect logging socket and process messages")
        endpoint = u(self.log_socket.getsockopt(zmq.LAST_ENDPOINT))
        self.logger.debug("[EXIT] unbinding from '%s'", endpoint)
        self.log_socket.unbind(endpoint)

        # Empty the queue
        try:
            while self.wait_for_messages(True):
                # Flush test cases cache for every iteration because we might
                # get killed soon.
                self.flush_test_cases()
        except BaseException as exc:
            self.logger.error("[EXIT] Unknown exception raised, leaving!")
            self.logger.exception(exc)
        finally:
            # Last flush
            self.flush_test_cases()
            self.logger.info(
                "[EXIT] Closing the logging socket: the queue is empty")
            self.log_socket.close()
            if options["encrypt"]:
                self.auth.stop()
            context.term()
Beispiel #13
0
    def handle(self, *args, **options):
        # Initialize logging.
        self.setup_logging("lava-master", options["level"],
                           options["log_file"], FORMAT)

        self.logger.info("[INIT] Dropping privileges")
        if not self.drop_privileges(options["user"], options["group"]):
            self.logger.error("[INIT] Unable to drop privileges")
            return

        filename = os.path.join(settings.MEDIA_ROOT, "lava-master-config.yaml")
        self.logger.debug("[INIT] Dumping config to %s", filename)
        with open(filename, "w") as output:
            yaml.dump(options, output)

        self.logger.info("[INIT] Marking all workers as offline")
        with transaction.atomic():
            for worker in Worker.objects.select_for_update().all():
                worker.go_state_offline()
                worker.save()

        # Create the sockets
        context = zmq.Context()
        self.controler = context.socket(zmq.ROUTER)
        self.event_socket = context.socket(zmq.SUB)

        if options["ipv6"]:
            self.logger.info("[INIT] Enabling IPv6")
            self.controler.setsockopt(zmq.IPV6, 1)
            self.event_socket.setsockopt(zmq.IPV6, 1)

        if options["encrypt"]:
            self.logger.info("[INIT] Starting encryption")
            try:
                self.auth = ThreadAuthenticator(context)
                self.auth.start()
                self.logger.debug("[INIT] Opening master certificate: %s",
                                  options["master_cert"])
                master_public, master_secret = zmq.auth.load_certificate(
                    options["master_cert"])
                self.logger.debug("[INIT] Using slaves certificates from: %s",
                                  options["slaves_certs"])
                self.auth.configure_curve(domain="*",
                                          location=options["slaves_certs"])
            except OSError as err:
                self.logger.error(err)
                self.auth.stop()
                return
            self.controler.curve_publickey = master_public
            self.controler.curve_secretkey = master_secret
            self.controler.curve_server = True

            self.logger.debug("[INIT] Watching %s", options["slaves_certs"])
            self.inotify_fd = watch_directory(options["slaves_certs"])
            if self.inotify_fd is None:
                self.logger.error("[INIT] Unable to start inotify")

        self.controler.setsockopt(zmq.IDENTITY, b"master")
        # From http://api.zeromq.org/4-2:zmq-setsockopt#toc42
        # "If two clients use the same identity when connecting to a ROUTER
        # [...] the ROUTER socket shall hand-over the connection to the new
        # client and disconnect the existing one."
        self.controler.setsockopt(zmq.ROUTER_HANDOVER, 1)
        self.controler.bind(options["master_socket"])

        # Set the topic and connect
        self.event_socket.setsockopt(zmq.SUBSCRIBE, b(settings.EVENT_TOPIC))
        self.event_socket.connect(options["event_url"])

        # Poll on the sockets. This allow to have a
        # nice timeout along with polling.
        self.poller = zmq.Poller()
        self.poller.register(self.controler, zmq.POLLIN)
        self.poller.register(self.event_socket, zmq.POLLIN)
        if self.inotify_fd is not None:
            self.poller.register(os.fdopen(self.inotify_fd), zmq.POLLIN)

        # Translate signals into zmq messages
        (self.pipe_r, _) = self.setup_zmq_signal_handler()
        self.poller.register(self.pipe_r, zmq.POLLIN)

        self.logger.info("[INIT] LAVA master has started.")
        self.logger.info("[INIT] Using protocol version %d", PROTOCOL_VERSION)

        try:
            self.main_loop(options)
        except BaseException as exc:
            self.logger.error("[CLOSE] Unknown exception raised, leaving!")
            self.logger.exception(exc)
        finally:
            # Drop controler socket: the protocol does handle lost messages
            self.logger.info(
                "[CLOSE] Closing the controler socket and dropping messages")
            self.controler.close(linger=0)
            self.event_socket.close(linger=0)
            if options["encrypt"]:
                self.auth.stop()
            context.term()
def client_task(i, img_paths, option, path_overlay=None):
    # def client_task(i):
    """Request-reply client using DEALER socket"""

    # img_dict = get_images(img_path)
    img_dict = dict()
    img_nt_comp = dict()
    img_recv_dict = dict()
    all_items = set()

    poller = zmq.Poller()
    poller.register(client_data, zmq.POLLIN)
    poller.register(client_control, zmq.POLLIN)

    numbr_msgs = 0
    img_number = 0

    for img_path in img_paths:
        img_dict[img_number] = img_path
        task_msg, img_recv_dict[img_number] = create_msg(
            client_data.identity, img_number, img_path, option, path_overlay)
        numbr_msgs = numbr_msgs + len(task_msg)

        img_nt_comp[img_number] = task_msg

        send_imgs_to_server(client_data, task_msg)
        print("Sending {}".format(img_number))
        img_number = img_number + 1

    len_all_items = len(all_items)
    while True and (numbr_msgs - len_all_items):
        # wait max 10 seconds for a reply, then complain
        try:
            l = int(liveness)
            if liveness > 0:
                try:
                    events = dict(poller.poll(10))
                except zmq.ZMQError:
                    print("Exited {}".format(client_data.identity))
                    return  # interrupted

                if events:
                    reply = recv_msg_server(client_data)
                    # print('Client recieved string {} from server'.format(reply[0]))
                    # assert reply == img_number, "expected %s, got %s" % (img_number, reply)

                    img_number = reply[0]['img_number']
                    chunk = reply[0]['chunk']

                    if tuple([img_number, chunk]) not in all_items:
                        all_items.add(tuple([img_number, chunk]))
                        len_all_items = len(all_items)

                    img_recv_dict[img_number][chunk] = reply[1]
                    img_nt_comp[img_number][chunk] = None

                    # client_control.send_string('Client {} received img {} and chunk {} from server'.format(client_data.identity,reply[0]['img_number'], reply[0]['chunk']))
                    print('Client {} received img {} and chunk {} from server'.
                          format(client_data.identity, reply[0]['img_number'],
                                 reply[0]['chunk']))
            else:
                while l < 1:
                    try:
                        l = int(liveness)
                    except:
                        pass
                count = 0
                for key in img_nt_comp.keys():
                    print("Resending {}".format(key))
                    send_imgs_to_server(client_data, img_nt_comp[key])
                print("Liveness {} msgs {}".format(l, len(all_items)))
        except:
            pass

    print("Starting Stitching")
    for img_number in img_recv_dict.keys():
        img_rgb = ip.readImageRGB(img_dict[img_number])
        img = ip.stitchImage(img_rgb, img_recv_dict[img_number])
        ip.writeimage(ip.get_folder(option),
                      'D_' + img_dict[img_number].split('/')[-1][:-4], img)
        # new = readImageRGB(os.path.join(path, filename) + '.jpg')
        # cv2.cvtColor(new, cv2.COLOR_BGR2RGB)
        # cv2.imwrite(os.path.join(path, filename) + '.jpg', new)
    print("End Stitching")
Beispiel #15
0
    # For a server with a (local) pool of workers, see
    #   zmq-guide.html#The-Asynchronous-Client-Server-Pattern
    # ie, http://zguide.zeromq.org/py:asyncsrv

    # Backend socket, for talking to workers.  Note 'inproc' transport.
    backend = context.socket(zmq.DEALER)
    backend.bind('inproc://backend')

    workers = []
    for i in range(2):
        worker = Worker(context)
        worker.start()
        workers.append(worker)

    poll = zmq.Poller()
    poll.register(socket, zmq.POLLIN)
    poll.register(backend, zmq.POLLIN)

    while True:
        #  Wait for next request from client, or reply from Worker
        print('Polling...')
        timeout = None
        events = poll.poll(timeout)

        for s, flag in events:
            if s == backend:
                print('Received result from worker')
                msgs = backend.recv_multipart()
                client = msgs[0]
                msg = msgs[1]
Beispiel #16
0
def radard_thread(gctx=None):
    set_realtime_priority(2)

    ##### AS
    # # wait for stats about the car to come in from controls
    # cloudlog.info("radard is waiting for CarParams")
    # CP = car.CarParams.from_bytes(Params().get("CarParams", block=True))
    # mocked = CP.carName == "mock"
    # VM = VehicleModel(CP)
    # cloudlog.info("radard got CarParams")

    # # import the radar from the fingerprint
    # cloudlog.info("radard is importing %s", CP.carName)
    # RadarInterface = importlib.import_module('selfdrive.car.%s.radar_interface' % CP.carName).RadarInterface
    # context = zmq.Context()

    cloudlog.info("radard is waiting for CarParams")
    CP = ToyotaInterface.get_params("TOYOTA PRIUS 2017", {})
    CP.steerRatio = 1.0
    mocked = True
    VM = VehicleModel(CP)
    cloudlog.info("radard got CarParams")
    # import the radar from the fingerprint
    cloudlog.info(
        "radard is NOT importing %s but using mock radar instead (AS)",
        CP.carName)
    RadarInterface = importlib.import_module(
        'selfdrive.car.mock.radar_interface').RadarInterface
    context = zmq.Context()
    ##### AS

    # *** subscribe to features and model from visiond
    poller = zmq.Poller()
    model = messaging.sub_sock(context,
                               service_list['model'].port,
                               conflate=True,
                               poller=poller)
    live100 = messaging.sub_sock(context,
                                 service_list['live100'].port,
                                 conflate=True,
                                 poller=poller)

    MP = ModelParser()
    RI = RadarInterface(CP)

    last_md_ts = 0
    last_l100_ts = 0

    # *** publish live20 and liveTracks
    live20 = messaging.pub_sock(context, service_list['live20'].port)
    liveTracks = messaging.pub_sock(context, service_list['liveTracks'].port)

    path_x = np.arange(0.0, 140.0, 0.1)  # 140 meters is max

    # Time-alignment
    rate = 20.  # model and radar are both at 20Hz
    tsv = 1. / rate
    v_len = 20  # how many speed data points to remember for t alignment with rdr data

    active = 0
    steer_angle = 0.
    steer_override = False

    tracks = defaultdict(dict)

    # Kalman filter stuff:
    ekfv = EKFV1D()
    speedSensorV = SimpleSensor(XV, 1, 2)

    # v_ego
    v_ego = None
    v_ego_array = np.zeros([2, v_len])
    v_ego_t_aligned = 0.

    rk = Ratekeeper(rate, print_delay_threshold=np.inf)
    while 1:
        rr = RI.update()

        ar_pts = {}
        for pt in rr.points:
            ar_pts[pt.trackId] = [
                pt.dRel + RDR_TO_LDR, pt.yRel, pt.vRel, pt.measured
            ]

        # receive the live100s
        l100 = None
        md = None

        recv_live100 = False
        recv_model = False
        while not (recv_live100 and recv_model):
            for socket, event in poller.poll(0):
                if socket is live100:
                    l100 = messaging.recv_one(socket)
                    recv_live100 = True
                elif socket is model:
                    md = messaging.recv_one(socket)
                    recv_model = True

        if l100 is not None:
            active = l100.live100.active
            v_ego = l100.live100.vEgo
            steer_angle = l100.live100.angleSteers
            steer_override = l100.live100.steerOverride

            v_ego_array = np.append(v_ego_array,
                                    [[v_ego], [float(rk.frame) / rate]], 1)
            v_ego_array = v_ego_array[:, 1:]

            last_l100_ts = l100.logMonoTime

        if v_ego is None:
            continue

        if md is not None:
            last_md_ts = md.logMonoTime

        # *** get path prediction from the model ***
        MP.update(v_ego, md)

        # run kalman filter only if prob is high enough
        if MP.lead_prob > 0.7:
            reading = speedSensorV.read(MP.lead_dist,
                                        covar=np.matrix(MP.lead_var))
            ekfv.update_scalar(reading)
            ekfv.predict(tsv)

            # When changing lanes the distance to the lead car can suddenly change,
            # which makes the Kalman filter output large relative acceleration
            if mocked and abs(MP.lead_dist - ekfv.state[XV]) > 2.0:
                ekfv.state[XV] = MP.lead_dist
                ekfv.covar = (np.diag([MP.lead_var, ekfv.var_init]))
                ekfv.state[SPEEDV] = 0.

            ar_pts[VISION_POINT] = (float(ekfv.state[XV]),
                                    np.polyval(MP.d_poly,
                                               float(ekfv.state[XV])),
                                    float(ekfv.state[SPEEDV]), False)
        else:
            ekfv.state[XV] = MP.lead_dist
            ekfv.covar = (np.diag([MP.lead_var, ekfv.var_init]))
            ekfv.state[SPEEDV] = 0.

            if VISION_POINT in ar_pts:
                del ar_pts[VISION_POINT]

        # *** compute the likely path_y ***
        if (active and not steer_override) or mocked:
            # use path from model (always when mocking as steering is too noisy)
            path_y = np.polyval(MP.d_poly, path_x)
        else:
            # use path from steer, set angle_offset to 0 it does not only report the physical offset
            path_y = calc_lookahead_offset(v_ego,
                                           steer_angle,
                                           path_x,
                                           VM,
                                           angle_offset=0)[0]

        # *** remove missing points from meta data ***
        for ids in tracks.keys():
            if ids not in ar_pts:
                tracks.pop(ids, None)

        # *** compute the tracks ***
        for ids in ar_pts:
            # ignore standalone vision point, unless we are mocking the radar
            if ids == VISION_POINT and not mocked:
                continue
            rpt = ar_pts[ids]

            # align v_ego by a fixed time to align it with the radar measurement
            cur_time = float(rk.frame) / rate
            v_ego_t_aligned = np.interp(cur_time - RI.delay, v_ego_array[1],
                                        v_ego_array[0])
            d_path = np.sqrt(
                np.amin((path_x - rpt[0])**2 + (path_y - rpt[1])**2))
            # add sign
            d_path *= np.sign(rpt[1] - np.interp(rpt[0], path_x, path_y))

            # create the track if it doesn't exist or it's a new track
            if ids not in tracks:
                tracks[ids] = Track()
            tracks[ids].update(rpt[0], rpt[1], rpt[2], d_path, v_ego_t_aligned,
                               rpt[3], steer_override)

        # allow the vision model to remove the stationary flag if distance and rel speed roughly match
        if VISION_POINT in ar_pts:
            fused_id = None
            best_score = NO_FUSION_SCORE
            for ids in tracks:
                dist_to_vision = np.sqrt(
                    (0.5 * (ar_pts[VISION_POINT][0] - tracks[ids].dRel))**2 +
                    (2 * (ar_pts[VISION_POINT][1] - tracks[ids].yRel))**2)
                rel_speed_diff = abs(ar_pts[VISION_POINT][2] -
                                     tracks[ids].vRel)
                tracks[ids].update_vision_score(dist_to_vision, rel_speed_diff)
                if best_score > tracks[ids].vision_score:
                    fused_id = ids
                    best_score = tracks[ids].vision_score

            if fused_id is not None:
                tracks[fused_id].vision_cnt += 1
                tracks[fused_id].update_vision_fusion()

        if DEBUG:
            print("NEW CYCLE")
            if VISION_POINT in ar_pts:
                print("vision", ar_pts[VISION_POINT])

        idens = tracks.keys()
        track_pts = np.array(
            [tracks[iden].get_key_for_cluster() for iden in idens])

        # If we have multiple points, cluster them
        if len(track_pts) > 1:
            link = linkage_vector(track_pts, method='centroid')
            cluster_idxs = fcluster(link, 2.5, criterion='distance')
            clusters = [None] * max(cluster_idxs)

            for idx in xrange(len(track_pts)):
                cluster_i = cluster_idxs[idx] - 1

                if clusters[cluster_i] == None:
                    clusters[cluster_i] = Cluster()
                clusters[cluster_i].add(tracks[idens[idx]])
        elif len(track_pts) == 1:
            # TODO: why do we need this?
            clusters = [Cluster()]
            clusters[0].add(tracks[idens[0]])
        else:
            clusters = []

        if DEBUG:
            for i in clusters:
                print(i)
        # *** extract the lead car ***
        lead_clusters = [c for c in clusters if c.is_potential_lead(v_ego)]
        lead_clusters.sort(key=lambda x: x.dRel)
        lead_len = len(lead_clusters)

        # *** extract the second lead from the whole set of leads ***
        lead2_clusters = [
            c for c in lead_clusters if c.is_potential_lead2(lead_clusters)
        ]
        lead2_clusters.sort(key=lambda x: x.dRel)
        lead2_len = len(lead2_clusters)

        # *** publish live20 ***
        dat = messaging.new_message()
        dat.init('live20')
        dat.live20.mdMonoTime = last_md_ts
        dat.live20.canMonoTimes = list(rr.canMonoTimes)
        dat.live20.radarErrors = list(rr.errors)
        dat.live20.l100MonoTime = last_l100_ts
        if lead_len > 0:
            dat.live20.leadOne = lead_clusters[0].toLive20()
            if lead2_len > 0:
                dat.live20.leadTwo = lead2_clusters[0].toLive20()
            else:
                dat.live20.leadTwo.status = False
        else:
            dat.live20.leadOne.status = False

        dat.live20.cumLagMs = -rk.remaining * 1000.
        live20.send(dat.to_bytes())

        # *** publish tracks for UI debugging (keep last) ***
        dat = messaging.new_message()
        dat.init('liveTracks', len(tracks))

        for cnt, ids in enumerate(tracks.keys()):
            if DEBUG:
                print("id: %4.0f x:  %4.1f  y: %4.1f  vr: %4.1f d: %4.1f  va: %4.1f  vl: %4.1f  vlk: %4.1f alk: %4.1f  s: %1.0f  v: %1.0f" % \
                  (ids, tracks[ids].dRel, tracks[ids].yRel, tracks[ids].vRel,
                   tracks[ids].dPath, tracks[ids].vLat,
                   tracks[ids].vLead, tracks[ids].vLeadK,
                   tracks[ids].aLeadK,
                   tracks[ids].stationary,
                   tracks[ids].measured))
            dat.liveTracks[cnt] = {
                "trackId": ids,
                "dRel": float(tracks[ids].dRel),
                "yRel": float(tracks[ids].yRel),
                "vRel": float(tracks[ids].vRel),
                "aRel": float(tracks[ids].aRel),
                "stationary": bool(tracks[ids].stationary),
                "oncoming": bool(tracks[ids].oncoming),
            }
        liveTracks.send(dat.to_bytes())

        rk.monitor_time()
Beispiel #17
0
def controlsd_thread(gctx=None, rate=100):
    gc.disable()

    # start the loop
    set_realtime_priority(3)

    context = zmq.Context()
    params = Params()

    # Pub Sockets
    live100 = messaging.pub_sock(context, service_list['live100'].port)
    carstate = messaging.pub_sock(context, service_list['carState'].port)
    carcontrol = messaging.pub_sock(context, service_list['carControl'].port)

    is_metric = params.get("IsMetric") == "1"
    passive = params.get("Passive") != "0"

    # No sendcan if passive
    if not passive:
        sendcan = messaging.pub_sock(context, service_list['sendcan'].port)
    else:
        sendcan = None

    # Sub sockets
    poller = zmq.Poller()
    thermal = messaging.sub_sock(context,
                                 service_list['thermal'].port,
                                 conflate=True,
                                 poller=poller)
    health = messaging.sub_sock(context,
                                service_list['health'].port,
                                conflate=True,
                                poller=poller)
    cal = messaging.sub_sock(context,
                             service_list['liveCalibration'].port,
                             conflate=True,
                             poller=poller)
    driver_monitor = messaging.sub_sock(context,
                                        service_list['driverMonitoring'].port,
                                        conflate=True,
                                        poller=poller)
    plan_sock = messaging.sub_sock(context,
                                   service_list['plan'].port,
                                   conflate=True,
                                   poller=poller)
    path_plan_sock = messaging.sub_sock(context,
                                        service_list['pathPlan'].port,
                                        conflate=True,
                                        poller=poller)
    logcan = messaging.sub_sock(context, service_list['can'].port)

    CC = car.CarControl.new_message()
    CI, CP = get_car(logcan, sendcan, 1.0 if passive else None)

    if CI is None:
        raise Exception("unsupported car")

    # if stock camera is connected, then force passive behavior
    if not CP.enableCamera:
        passive = True
        sendcan = None

    if passive:
        CP.safetyModel = car.CarParams.SafetyModels.noOutput

    LoC = LongControl(CP, CI.compute_gb)
    VM = VehicleModel(CP)
    LaC = LatControl(CP)
    AM = AlertManager()
    driver_status = DriverStatus()

    if not passive:
        AM.add("startup", False)

    # Write CarParams for radard and boardd safety mode
    params.put("CarParams", CP.to_bytes())
    params.put("LongitudinalControl",
               "1" if CP.openpilotLongitudinalControl else "0")

    state = State.disabled
    soft_disable_timer = 0
    v_cruise_kph = 255
    v_cruise_kph_last = 0
    overtemp = False
    free_space = False
    cal_status = Calibration.INVALID
    cal_perc = 0
    mismatch_counter = 0
    low_battery = False

    plan = messaging.new_message()
    plan.init('plan')
    path_plan = messaging.new_message()
    path_plan.init('pathPlan')

    rk = Ratekeeper(rate, print_delay_threshold=2. / 1000)
    controls_params = params.get("ControlsParams")

    # Read angle offset from previous drive
    angle_model_bias = 0.
    if controls_params is not None:
        try:
            controls_params = json.loads(controls_params)
            angle_model_bias = controls_params['angle_model_bias']
        except (ValueError, KeyError):
            pass

    prof = Profiler(False)  # off by default

    while True:
        start_time = int(sec_since_boot() * 1e9)
        prof.checkpoint("Ratekeeper", ignore=True)

        # Sample data and compute car events
        CS, events, cal_status, cal_perc, overtemp, free_space, low_battery, mismatch_counter, plan, path_plan  =\
          data_sample(CI, CC, plan_sock, path_plan_sock, thermal, cal, health, driver_monitor,
                      poller, cal_status, cal_perc, overtemp, free_space, low_battery, driver_status,
                      state, mismatch_counter, params, plan, path_plan)
        prof.checkpoint("Sample")

        path_plan_age = (start_time - path_plan.logMonoTime) / 1e9
        plan_age = (start_time - plan.logMonoTime) / 1e9
        if not path_plan.pathPlan.valid or plan_age > 0.5 or path_plan_age > 0.5:
            events.append(
                create_event('plannerError', [ET.NO_ENTRY, ET.SOFT_DISABLE]))
        if not path_plan.pathPlan.paramsValid:
            events.append(create_event('vehicleModelInvalid', [ET.WARNING]))
        events += list(plan.plan.events)

        # Only allow engagement with brake pressed when stopped behind another stopped car
        if CS.brakePressed and plan.plan.vTargetFuture >= STARTING_TARGET_SPEED and not CP.radarOffCan and CS.vEgo < 0.3:
            events.append(
                create_event('noTarget', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE]))

        if not passive:
            # update control state
            state, soft_disable_timer, v_cruise_kph, v_cruise_kph_last = \
              state_transition(CS, CP, state, events, soft_disable_timer, v_cruise_kph, AM)
            prof.checkpoint("State transition")

        # Compute actuators (runs PID loops and lateral MPC)
        actuators, v_cruise_kph, driver_status, angle_model_bias, v_acc, a_acc = \
          state_control(plan.plan, path_plan.pathPlan, CS, CP, state, events, v_cruise_kph,
                        v_cruise_kph_last, AM, rk, driver_status,
                        LaC, LoC, VM, angle_model_bias, passive, is_metric, cal_perc)

        prof.checkpoint("State Control")

        # Publish data
        CC = data_send(plan, path_plan, CS, CI, CP, VM, state, events,
                       actuators, v_cruise_kph, rk, carstate, carcontrol,
                       live100, AM, driver_status, LaC, LoC, angle_model_bias,
                       passive, start_time, params, v_acc, a_acc)
        prof.checkpoint("Sent")

        rk.keep_time()  # Run at 100Hz
        prof.display()
Beispiel #18
0
def main():
    # Address for each server to receive files
    servAddresses = []
    dataParts = {}
    dataIndex = {}
    dataOwner = {}

    context = zmq.Context()

    servers = context.socket(zmq.REP)
    servers.bind("tcp://*:5555")

    clients = context.socket(zmq.REP)
    clients.bind("tcp://*:6666")

    poller = zmq.Poller()
    poller.register(servers, zmq.POLLIN)
    poller.register(clients, zmq.POLLIN)

    while True:
        socks = dict(poller.poll(10))
        if clients in socks:
            print("Message from client")
            operation, *msg = clients.recv_multipart()
            if operation == b"availableServers":
                clients.send_multipart(servAddresses)
            elif operation == b"finished":
                dictSeg = eval(msg[0])
                ShaIn = msg[1]
                AddressIndexSha = msg[2]  #Servidorq'contiene archivo
                Owner = msg[3]  #Propietario
                filename = msg[4].decode('ascii')  #nombreArchivo
                dataParts[ShaIn] = dictSeg
                dataIndex[ShaIn] = AddressIndexSha
                if Owner in dataOwner:
                    dataOwner[Owner].append(filename)
                else:
                    dataOwner[Owner] = [filename]
                clients.send(b"Ok")
                # print(dataParts)
                # print("########")
                # print(dataIndex)
                print("########")
                print(dataOwner)
            elif operation == b"serverIndex":
                shaIndex = msg[0]
                dictIndex = dataParts[shaIndex]
                locatIndex = dataIndex[shaIndex]
                clients.send_multipart([
                    shaIndex,
                    bytes(str(dictIndex), 'ascii'), locatIndex,
                    bytes(str(servAddresses), 'ascii')
                ])
            elif operation == b"tolist":
                username = msg[0]
                listFiles = dataOwner[username]
                clients.send_multipart([bytes(str(listFiles), 'ascii')])

        if servers in socks:
            print("Message from server")
            operation, *rest = servers.recv_multipart()
            if operation == b"newServer":
                servAddresses.append(rest[0])
                print(servAddresses)
                servers.send(b"Ok")
Beispiel #19
0
 def __init__(self, socket=None, poller=None):
     self.socket = socket
     self.poller = zmq.Poller()
Beispiel #20
0
def console_monitor_service(port_pull=CONSOLE_MONITOR_PORT_PUSH, port_pub=CONSOLE_MONITOR_PORT_SUB, ip='127.0.0.1'):
    url_pull = "tcp://{ip}:{port_pull}".format(**locals())
    url_pub = "tcp://{ip}:{port_pub}".format(**locals())
    try:
        log.info('console_monitor_service staring')
        log.info('console_monitor_service pull url: {url_pull}'.format(**locals()))
        log.info('console_monitor_service pub url: {url_pub}'.format(**locals()))
        context = zmq.Context()

        receiver = context.socket(zmq.PULL)
        receiver.bind(url_pull)

        publisher = context.socket(zmq.XPUB)
        publisher.bind(url_pub)

        poller = zmq.Poller()
        poller.register(receiver, zmq.POLLIN)
        poller.register(publisher, zmq.POLLIN)

        # Cache the last 100 messages per cluster:
        cache = defaultdict(partial(deque, maxlen=100)) # cluster_name -> deque

        while True:
            events = dict(poller.poll(1000))
            
            if receiver in events:
                data = receiver.recv()
                topic, cluster, msg = data.split(' ', 2)
                cache[cluster].append(msg)
                # Mark message as realtime:
                msg = json.loads(msg)
                msg['realtime'] = True
                msg = json.dumps(msg)
                data = " ".join([topic, cluster, msg])
                log.debug("PUB - {msg}".format(msg=data))
                publisher.send(data)
            
            if publisher in events:
                event = publisher.recv()
                # Subscription events areone byte: 0=unsub or 1=sub,
                # followed by topic:
                if event[0] == b'\x01':
                    topic, cluster = event[1:].strip().split(" ")
                    log.debug("SUBSCRIBE - {sub}".format(sub=event[1:]))
                    if topic == 'console':
                        # Client subscribed, send out previous messages:
                        log.debug("Sending backlog:")
                        for msg in cache[cluster]:
                            # Mark messages as non-realtime:
                            data = json.loads(msg)
                            data['realtime'] = False
                            msg = json.dumps(data)
                            data = "console {cluster} {msg}".format(cluster=cluster, msg=msg)
                            log.debug(data)
                            publisher.send(data)
                elif event[0] == b'\x00':
                    log.debug("UNSUBSCRIBE - {sub}".format(sub=event[1:]))
                    

    except Exception, e:
        # Log every error. If we're not running in the foreground, we
        # won't see the errrors any other way:
        log.error(traceback.format_exc())
        log.info("console_monitor_service shutdown")
Beispiel #21
0
    def run(self):
        logger.debug('Id: %s. Starting a new Receiver for meta: %s' %
                     (self.identity, self.meta))
        self.msg = ('Top level exception in receiver')
        latest_snap = None
        with self._clean_exit_handler():
            self.law = APIWrapper()
            self.poll = zmq.Poller()
            self.dealer = self.ctx.socket(zmq.DEALER)
            self.dealer.setsockopt_string(zmq.IDENTITY, u'%s' % self.identity)
            self.dealer.set_hwm(10)
            self.dealer.connect('ipc://%s' %
                                settings.REPLICATION.get('ipc_socket'))
            self.poll.register(self.dealer, zmq.POLLIN)

            self.ack = True
            self.msg = ('Failed to get the sender ip for appliance: %s' %
                        self.sender_id)
            self.sender_ip = Appliance.objects.get(uuid=self.sender_id).ip

            if (not self.incremental):
                self.msg = ('Failed to verify/create share: %s.' % self.sname)
                self.create_share(self.sname, self.dest_pool)

                self.msg = ('Failed to create the replica metadata object '
                            'for share: %s.' % self.sname)
                data = {
                    'share': self.sname,
                    'appliance': self.sender_ip,
                    'src_share': self.src_share,
                }
                self.rid = self.create_rshare(data)
            else:
                self.msg = ('Failed to retreive the replica metadata '
                            'object for share: %s.' % self.sname)
                rso = ReplicaShare.objects.get(share=self.sname)
                self.rid = rso.id
                # Find and send the current snapshot to the sender. This will
                # be used as the start by btrfs-send diff.
                self.msg = ('Failed to verify latest replication snapshot '
                            'on the system.')
                latest_snap = self._latest_snap(rso)

            self.msg = ('Failed to create receive trail for rid: %d' %
                        self.rid)
            data = {
                'snap_name': self.snap_name,
            }
            self.rtid = self.create_receive_trail(self.rid, data)

            # delete the share, move the oldest snap to share
            self.msg = ('Failed to promote the oldest Snapshot to Share.')
            oldest_snap = get_oldest_snap(self.snap_dir,
                                          self.num_retain_snaps,
                                          regex='_replication_')
            if (oldest_snap is not None):
                self.update_repclone(self.sname, oldest_snap)
                self.refresh_share_state()
                self.refresh_snapshot_state()

            self.msg = ('Failed to prune old Snapshots')
            self._delete_old_snaps(self.sname, self.snap_dir,
                                   self.num_retain_snaps + 1)

            # TODO: The following should be re-instantiated once we have a
            # TODO: working method for doing so. see validate_src_share.
            # self.msg = ('Failed to validate the source share(%s) on '
            #             'sender(uuid: %s '
            #             ') Did the ip of the sender change?' %
            #             (self.src_share, self.sender_id))
            # self.validate_src_share(self.sender_id, self.src_share)

            sub_vol = ('%s%s/%s' %
                       (settings.MNT_PT, self.dest_pool, self.sname))
            if (not is_subvol(sub_vol)):
                self.msg = ('Failed to create parent subvolume %s' % sub_vol)
                run_command([BTRFS, 'subvolume', 'create', sub_vol])

            self.msg = ('Failed to create snapshot directory: %s' %
                        self.snap_dir)
            run_command(['/usr/bin/mkdir', '-p', self.snap_dir])
            snap_fp = ('%s/%s' % (self.snap_dir, self.snap_name))

            # If the snapshot already exists, presumably from the previous
            # attempt and the sender tries to send the same, reply back with
            # snap_exists and do not start the btrfs-receive
            if (is_subvol(snap_fp)):
                logger.debug('Id: %s. Snapshot to be sent(%s) already '
                             'exists. Not starting a new receive process' %
                             (self.identity, snap_fp))
                self._send_recv('snap-exists')
                self._sys_exit(0)

            cmd = [BTRFS, 'receive', self.snap_dir]
            self.msg = ('Failed to start the low level btrfs receive '
                        'command(%s). Aborting.' % cmd)
            self.rp = subprocess.Popen(cmd,
                                       shell=False,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)

            self.msg = ('Failed to send receiver-ready')
            rcommand, rmsg = self._send_recv('receiver-ready', latest_snap
                                             or '')
            if (rcommand is None):
                logger.error('Id: %s. No response from the broker for '
                             'receiver-ready command. Aborting.' %
                             self.identity)
                self._sys_exit(3)

            term_commands = (
                'btrfs-send-init-error',
                'btrfs-send-unexpected-termination-error',
                'btrfs-send-nonzero-termination-error',
            )
            num_tries = 10
            poll_interval = 6000  # 6 seconds
            num_msgs = 0
            t0 = time.time()
            while (True):
                socks = dict(self.poll.poll(poll_interval))
                if (socks.get(self.dealer) == zmq.POLLIN):
                    # reset to wait upto 60(poll_interval x num_tries
                    # milliseconds) for every message
                    num_tries = 10
                    command, message = self.dealer.recv_multipart()
                    if (command == 'btrfs-send-stream-finished'):
                        # this command concludes fsdata transfer. After this,
                        # btrfs-recev process should be
                        # terminated(.communicate).
                        if (self.rp.poll() is None):
                            self.msg = ('Failed to terminate btrfs-recv '
                                        'command')
                            out, err = self.rp.communicate()
                            out = out.split('\n')
                            err = err.split('\n')
                            logger.debug('Id: %s. Terminated btrfs-recv. '
                                         'cmd = %s out = %s err: %s rc: %s' %
                                         (self.identity, cmd, out, err,
                                          self.rp.returncode))
                        if (self.rp.returncode != 0):
                            self.msg = ('btrfs-recv exited with unexpected '
                                        'exitcode(%s). ' % self.rp.returncode)
                            raise Exception(self.msg)
                        data = {
                            'status': 'succeeded',
                            'kb_received': self.total_bytes_received / 1024,
                        }
                        self.msg = ('Failed to update receive trail for '
                                    'rtid: %d' % self.rtid)
                        self.update_receive_trail(self.rtid, data)

                        self._send_recv('btrfs-recv-finished')
                        self.refresh_share_state()
                        self.refresh_snapshot_state()

                        dsize, drate = self.size_report(
                            self.total_bytes_received, t0)
                        logger.debug('Id: %s. Receive complete. Total data '
                                     'transferred: %s. Rate: %s/sec.' %
                                     (self.identity, dsize, drate))
                        self._sys_exit(0)

                    if (command in term_commands):
                        self.msg = ('Terminal command(%s) received from the '
                                    'sender. Aborting.' % command)
                        raise Exception(self.msg)

                    if (self.rp.poll() is None):
                        self.rp.stdin.write(message)
                        self.rp.stdin.flush()
                        # @todo: implement advanced credit request system.
                        self.dealer.send_multipart([b'send-more', ''])
                        num_msgs += 1
                        self.total_bytes_received += len(message)
                        if (num_msgs == 1000):
                            num_msgs = 0
                            data = {
                                'status': 'pending',
                                'kb_received':
                                self.total_bytes_received / 1024,
                            }
                            self.update_receive_trail(self.rtid, data)

                            dsize, drate = self.size_report(
                                self.total_bytes_received, t0)
                            logger.debug('Id: %s. Receiver alive. Data '
                                         'transferred: %s. Rate: %s/sec.' %
                                         (self.identity, dsize, drate))
                    else:
                        out, err = self.rp.communicate()
                        out = out.split('\n')
                        err = err.split('\n')
                        logger.error('Id: %s. btrfs-recv died unexpectedly. '
                                     'cmd: %s out: %s. err: %s' %
                                     (self.identity, cmd, out, err))
                        msg = (
                            'Low level system error from btrfs receive '
                            'command. cmd: %s out: %s err: %s for rtid: %s' %
                            (cmd, out, err, self.rtid))
                        data = {
                            'status': 'failed',
                            'error': msg,
                        }
                        self.msg = ('Failed to update receive trail for '
                                    'rtid: %d.' % self.rtid)
                        self.update_receive_trail(self.rtid, data)
                        self.msg = msg
                        raise Exception(self.msg)
                else:
                    num_tries -= 1
                    msg = ('No response received from the broker. '
                           'remaining tries: %d' % num_tries)
                    logger.error('Id: %s. %s' % (self.identity, msg))
                    if (num_tries == 0):
                        self.msg = ('%s. Terminating the receiver.' % msg)
                        raise Exception(self.msg)
Beispiel #22
0
 def init(self, install_sig_handler=True):  # type: (Optional[bool]) -> None
     super(Sink, self).init(install_sig_handler)
     self.poller = zmq.Poller()
     self.poller.register(self.sockets['worker_results'], zmq.POLLIN)
     self.poller.register(self.sockets['job_ids_to_sink'], zmq.POLLIN)
Beispiel #23
0
def main():

    ##########################
    # Input/output physical connections
    ao_wavelength_physchan = '/Dev6229/ao1'
    ctrin_mcp_physchan = '/Dev6229/ctr0'
    ai_physchan_list = ['/Dev6229/ai2', '/Dev6229/ai3']
    ai_physchan_description = ['hv_monitor', 'blue_power_monitor']
    wavemeter_address = 'tcp://192.168.68.43:5678'
    plotter_port = '5679'

    #########
    # Control of the ISEG power supplies
    #########
    # ip address of the iCS2
    ip = '10.100.12.27'  # Updated, this is on the UoM network, not atom
    # username for iCS2
    usr = '******'
    # password for the iCS2
    passwd = 'molly7802'  # How is that security?
    # Shorthand string for '/api/getItem/'
    apiget = 'http://' + ip + '/api/getItem/'
    # Shorthand string for '/api/setItem/'
    apiset = 'http://' + ip + '/api/setItem/'

    ########
    # Get API key to be used for the rest of session
    ########
    # Returns API Key to be identified for session
    r = requests.get('http://' + ip + '/api/login/' + usr + '/' + passwd)
    sessionid = r.json()['i']

    ##########################
    # Output parameters
    dwell_time = 0.075  # Time spent at a given wavelength
    wavelength_stabilisation_time = 0.01  # Time between ramp and data recording
    wavelenght_max_voltage_jump = 1E-3  # Can't remember exactly what this does
    wavelength_jump_period = 0.001  # Controls the sharpness of the transition between wavelength values
    default_ao_wavelength_val = 0  # Value to reset the wavelength ramping AO

    ##########################
    # HV parameters
    hv_min = 1000.0  # Minimum voltage
    hv_max = 1100.0  # Maximum voltage
    hv_numpoints = 2  # Number of voltages at which scans will be conducted
    hv_ramp = linspace(hv_min, hv_max, hv_numpoints)  # Make a list of voltages

    ##########################
    # wavelength parameters
    '''
        Wavelength is scanned by putting additional voltage onto the stack
        Hence one is defining a voltage,  not an actual wavelength
        This voltage must be between -1.5 and 1.5 V
    '''
    wavelength_min = -1.5  # Minimum value of the voltage ramp
    wavelength_max = 1.5  # Maximum value of the voltage ramp
    wavelength_numpoints = 100  # Number of discrete wavelength values
    wavelength_ramp = linspace(
        wavelength_min, wavelength_max,
        wavelength_numpoints)  # Make a list of wavelengths

    #Create hdf storage
    hdf_name = 'Stark_data.hdf'  # Name the output file

    # Initialise data file
    # Pretty self explanatory
    timestamp = datetime.datetime.fromtimestamp(time.time())
    dataset_name = 'data_slab_{:d}{:0>2d}{:0>2d}:{:0>2d}:{:0>2d}:{:0>2d}'.format(
        timestamp.year, timestamp.month, timestamp.day, timestamp.hour,
        timestamp.minute, timestamp.second)
    data_file = h5py.File(hdf_name, 'a')
    data_file.require_dataset(dataset_name,
                              (hv_numpoints, len(wavelength_ramp), 6),
                              'float64')
    dset = data_file[dataset_name]
    dset.attrs[
        'data_layout'] = '(voltage, wavelength, (act_voltage, act_wavelength, counts, time_for_counts, hv_monitor, measured_blue_power_input))'

    ##########################
    # Initialise tasks
    '''
        Note! Generally speaking, a single task can't use physical channels from different devices.
        Also, a single device can't run more than one task of a particular type at any one time.
        This means as if I add more ao channels, I will have to add them to one task or the other
        (probably, though it seems to be working just fine as it is).
    '''
    # Initialise HV to nominal value
    vsr = requests.get(
        apiget + sessionid +
        '/1/0/2/Control.voltageSet')  # Get the current set value
    # '/1/0/2/' corresponds to 30kV supply 1 channel 2, the plate which determines beam energy
    vset = float(vsr.json()[0]['c'][0]['d']['v'])
    # If it is not zero, make it zero
    if vset != hv_min:
        requests.get(apiset + sessionid + '/0/3/1/Control.voltageSet/' +
                     str(hv_min) + '/V')

    # Initialise wavelength ao
    ao_wavelength_val = 0
    ao_wavelength_task = Task()
    ao_wavelength_task.CreateAOVoltageChan(ao_wavelength_physchan, "",
                                           wavelength_min, wavelength_max,
                                           DAQmx_Val_Volts, None)
    ao_wavelength_task.WriteAnalogF64(1, 1, 10.0, DAQmx_Val_GroupByChannel,
                                      array(float(ao_wavelength_val)), None,
                                      None)

    # Initialise Counter
    ctrin_mcp_val_cytpe = (ctypes.c_ulong * 1)()
    ctypes.cast(ctrin_mcp_val_cytpe, ctypes.POINTER(ctypes.c_ulong))
    ctrin_mcp_task = Task()
    ctrin_mcp_task.CreateCICountEdgesChan(ctrin_mcp_physchan, "",
                                          DAQmx_Val_Rising, 0,
                                          DAQmx_Val_CountUp)

    # Initialise ai (analogue in)
    samps_per_chan = 100
    sample_rate = 10000
    buffer_size = 10000
    ai_data = numpy.zeros((samps_per_chan * len(ai_physchan_list), ),
                          dtype=numpy.float64)
    ai_data_dict = dict()
    ai_task = Task()
    read = int32()
    ai_task.CreateAIVoltageChan(','.join(ai_physchan_list), '',
                                DAQmx_Val_Cfg_Default, -10.0, 10.0,
                                DAQmx_Val_Volts, None)
    ai_task.CfgSampClkTiming('', sample_rate, DAQmx_Val_Rising,
                             DAQmx_Val_ContSamps, buffer_size)
    ai_task.SetReadRelativeTo(DAQmx_Val_MostRecentSamp)
    ai_task.StartTask()

    # Initialise wavemeter communications
    wavemeter_ctx = zmq.Context()
    wavemeter_soc = wavemeter_ctx.socket(zmq.SUB)
    wavemeter_soc.setsockopt(zmq.SUBSCRIBE, b'L1')
    wavemeter_soc.connect(wavemeter_address)
    poller = zmq.Poller()
    poller.register(wavemeter_soc, zmq.POLLIN)

    #Test wavemeter communications
    print('Checking communication with wavemeter...')
    wavelenght = float(wavemeter_soc.recv_multipart()[1])
    if wavelenght > 0:
        print('Communication OK. Wavelength ={:10.6f}nm'.format(wavelenght))
    else:
        print(
            'Communication OK, but invalid wavelength recieved. Check exposure level.'
        )

    #Initialise plotter server
    plotter_ctx = zmq.Context()
    plotter_soc = plotter_ctx.socket(zmq.PUB)
    plotter_soc.hwm = 1
    plotter_soc.bind('tcp://*:' + plotter_port)
    time.sleep(1)  #Gives subscribers time to bind

    ctrin_mcp_task.StartTask()

    ###############################################################################
    # Start to take the data
    last_ao_wavelength_val = default_ao_wavelength_val
    print('Generating ramps and receiving counts....')
    start_time = time.time()

    #    #If you just want to repeat the experiment over and over at on voltege, uncomment this, and comment out the for statement
    #    while True:
    #        hv_idx = 0
    #        ao_hv_val = 0.211
    for hv_idx, ao_hv_val in enumerate(hv_ramp):

        print('Electrode control voltage ={:.4f}, Ramp value {:d} of {:d}'.
              format(ao_hv_val, hv_idx + 1, len(hv_ramp)))
        online_plotter_refresh = 1

        #set the voltage
        #ao_hv_task.WriteAnalogF64(1,1,10.0,DAQmx_Val_GroupByChannel,array(ao_hv_val),None,None)
        requests.get(apiset + sessionid + '/0/3/1/Control.voltageSet/' +
                     str(ao_hv_val) + '/V')
        #sleep for 1 second to allow the voltage to change
        time.sleep(1)
        for wavelength_idx, ao_wavelength_val in enumerate(wavelength_ramp):

            #Ramp to the desired wavelength with small steps
            wavelenght_safety_ramp = arange(
                last_ao_wavelength_val, ao_wavelength_val,
                sign(ao_wavelength_val - last_ao_wavelength_val) *
                wavelenght_max_voltage_jump)
            for safety_ao_wavelength_val in wavelenght_safety_ramp:
                ao_wavelength_task.WriteAnalogF64(
                    1, 1, 10.0, DAQmx_Val_GroupByChannel,
                    array(safety_ao_wavelength_val), None, None)
                time.sleep(wavelength_jump_period)
            #Set the wavelenth to the actual desired val
            ao_wavelength_task.WriteAnalogF64(1, 1, 10.0,
                                              DAQmx_Val_GroupByChannel,
                                              array(ao_wavelength_val), None,
                                              None)
            time.sleep(wavelength_stabilisation_time)

            #Read initial counts
            t0 = time.time()
            ctrin_mcp_task.ReadCounterScalarU32(10.0, ctrin_mcp_val_cytpe,
                                                None)
            ctrin_mcp_val0 = float(ctrin_mcp_val_cytpe[0])

            time.sleep(
                dwell_time / 2
            )  # to attempt to read wavelength in the middle of the aquisition period
            #Empty the wavelength queue, then read wavelength
            while True:
                poll_dict = dict(poller.poll(0))
                if wavemeter_soc in poll_dict and poll_dict[
                        wavemeter_soc] == zmq.POLLIN:
                    #recieve message straight away just to remove it from the queue
                    wavemeter_soc.recv_multipart()
                else:
                    #when queue is empty, wait for the next message to come through, and use that one
                    wavelength = float(wavemeter_soc.recv_multipart()[1])
                    break

            #Read analogue voltages, store each channel in a dictionary
            ai_task.ReadAnalogF64(samps_per_chan, 10.0,
                                  DAQmx_Val_GroupByChannel, ai_data,
                                  len(ai_data), byref(read), None)
            for idx, chan_description in enumerate(ai_physchan_description):
                ai_data_dict[chan_description] = array(
                    ai_data[idx * samps_per_chan:(idx + 1) * samps_per_chan])

            time_remaining_in_dwell = dwell_time - (time.time() - t0)
            if time_remaining_in_dwell > 0:
                time.sleep(time_remaining_in_dwell)

            #Read final counts
            t1 = time.time()
            ctrin_mcp_task.ReadCounterScalarU32(10.0, ctrin_mcp_val_cytpe,
                                                None)
            ctrin_mcp_val1 = float(ctrin_mcp_val_cytpe[0])

            dt = t1 - t0
            dcounts = ctrin_mcp_val1 - ctrin_mcp_val0

            #Save data locally for later, and send it to a plotter for immeadiate visulation
            #measured_hv_input = ai_data_dict['hv_monitor'].mean()
            #mhvr=requests.get(apiget+sessionid+'/0/3/1/Status.voltageMeasure')
            #measured_hv_input=float(mhvr.json()[0]['c'][0]['d']['v'])
            measured_hv_input = ao_hv_val
            measured_blue_power_input = ai_data_dict[
                'blue_power_monitor'].mean()
            dset[hv_idx, wavelength_idx, :] = array(
                (ao_hv_val, wavelength, dcounts, dt, measured_hv_input,
                 measured_blue_power_input))
            plotter_soc.send_multipart(
                ('data', str(measured_hv_input), str(wavelength), str(dcounts),
                 str(dt), str(online_plotter_refresh),
                 str(measured_blue_power_input)))

            online_plotter_refresh = 0
            last_ao_wavelength_val = ao_wavelength_val

        #Print update to the terminal every so often
        try:
            time_so_far = time.time() - start_time
            est_time_remaining = time_so_far / (
                (hv_idx + 1) / len(hv_ramp)) - time_so_far
            print('########################################################')
            print('Est. time remaining = ' +
                  str(int(floor(est_time_remaining / (60 * 60)))) + 'hrs ' +
                  str(int(floor(mod(est_time_remaining, 60 * 60) / 60))) +
                  'mins ' + str(int(mod(est_time_remaining, 60))) + 'secs')
        except ZeroDivisionError:
            pass

    #Return ao_wavelength and ao_hv to default values
    #Ramp with small steps to the desired wavelength
    print('Ramping back to default wavelength and electrode voltage')
    wavelenght_safety_ramp = arange(
        last_ao_wavelength_val, default_ao_wavelength_val,
        sign(default_ao_wavelength_val - last_ao_wavelength_val) *
        wavelenght_max_voltage_jump)
    for safety_ao_wavelength_val in wavelenght_safety_ramp:
        ao_wavelength_task.WriteAnalogF64(1, 1, 10.0, DAQmx_Val_GroupByChannel,
                                          array(safety_ao_wavelength_val),
                                          None, None)
        time.sleep(wavelength_jump_period)
    ao_wavelength_task.WriteAnalogF64(1, 1, 10.0, DAQmx_Val_GroupByChannel,
                                      array(float(default_ao_wavelength_val)),
                                      None, None)
    #ao_hv_task.WriteAnalogF64(1,1,10.0,DAQmx_Val_GroupByChannel,array(float(hv_min)),None,None)
    requests.get(apiset + sessionid + '/0/3/1/Control.voltageSet/' +
                 str(hv_min) + '/V')

    data_file.close()

    #ao_hv_task.StopTask()
    ao_wavelength_task.StartTask()
    ctrin_mcp_task.StopTask()

    #ao_hv_task.ClearTask()
    ao_wavelength_task.ClearTask()
    ctrin_mcp_task.ClearTask()

    time.sleep(0.5)
    print('Experiment complete.')
    total_time = time.time() - start_time
    print('Total time taken = ' + str(int(floor(total_time / (60 * 60)))) +
          'hrs ' + str(int(floor(mod(total_time, 60 * 60) / 60))) + 'mins ' +
          str(int(mod(total_time, 60))) + 'secs')
Beispiel #24
0
    def run(self):
        '''
        Main loop of the FSCache, checks schedule, retrieves result-data
        from the workers and answer requests with data from the cache
        '''
        context = zmq.Context()
        # the socket for incoming cache requests
        creq_in = context.socket(zmq.REP)
        creq_in.setsockopt(zmq.LINGER, 100)
        creq_in.bind('ipc:///' + self.cache_sock)

        # the socket for incoming cache-updates from workers
        cupd_in = context.socket(zmq.REP)
        cupd_in.setsockopt(zmq.LINGER, 100)
        cupd_in.bind('ipc:///' + self.update_sock)

        # wait for the timer to bind to its socket
        log.debug('wait 2 secs for the timer')
        time.sleep(2)

        # the socket for the timer-event
        timer_in = context.socket(zmq.PULL)
        timer_in.setsockopt(zmq.LINGER, 100)
        timer_in.connect('ipc:///' + self.upd_t_sock)

        poller = zmq.Poller()
        poller.register(creq_in, zmq.POLLIN)
        poller.register(cupd_in, zmq.POLLIN)
        poller.register(timer_in, zmq.POLLIN)

        # our serializer
        serial = salt.payload.Serial(self.opts.get('serial', ''))

        # register a signal handler
        signal.signal(signal.SIGINT, self.signal_handler)

        # secure the sockets from the world
        self.secure()

        log.info('FSCache started')
        log.debug('FSCache started')

        while self.running:

            # we check for new events with the poller
            try:
                socks = dict(poller.poll())
            except KeyboardInterrupt:
                self.stop()
            except zmq.ZMQError as t:
                self.stop()

            # check for next cache-request
            if socks.get(creq_in) == zmq.POLLIN:
                msg = serial.loads(creq_in.recv())
                log.debug('Received request: {0}'.format(msg))

                # we only accept requests as lists [req_id, <path>]
                if isinstance(msg, list):
                    # for now only one item is assumed to be requested
                    msgid, file_n = msg[:]
                    log.debug('Looking for {0}:{1}'.format(msgid, file_n))

                    fdata = self.path_data.get(file_n, None)

                    if fdata is not None:
                        log.debug('Cache HIT')
                    else:
                        log.debug('Cache MISS')

                    # simulate slow caches
                    #randsleep = random.randint(0,3)
                    #time.sleep(randsleep)

                    # Send reply back to client
                    reply = serial.dumps([msgid, fdata])
                    creq_in.send(reply)

                # wrong format, item not cached
                else:
                    reply = serial.dumps([msgid, None])
                    creq_in.send(reply)

            # check for next cache-update from workers
            elif socks.get(cupd_in) == zmq.POLLIN:
                new_c_data = serial.loads(cupd_in.recv())
                # tell the worker to exit
                cupd_in.send(serial.dumps('OK'))

                # check if the returned data is usable
                if not isinstance(new_c_data, dict):
                    log.error('Worker returned unusable result')
                    del new_c_data
                    continue

                # the workers will return differing data:
                # 1. '{'file1': <data1>, 'file2': <data2>,...}' - a cache update
                # 2. '{search-path: None}' -  job was not run, pre-checks failed
                # 3. '{}' - no files found, check the pattern if defined?
                # 4. anything else is considered malformed

                if len(new_c_data) == 0:
                    log.debug('Got empty update from worker')
                elif new_c_data.values()[0] is not None:
                    log.debug('Got cache update with {0} item(s)'.format(
                        len(new_c_data)))
                    self.path_data.update(new_c_data)
                else:
                    log.debug('Got malformed result dict from worker')

                log.info('{0} entries in cache'.format(len(self.path_data)))

            # check for next timer-event to start new jobs
            elif socks.get(timer_in) == zmq.POLLIN:
                sec_event = serial.loads(timer_in.recv())

                log.debug('Timer event: #{0}'.format(sec_event))

                # loop through the jobs and start if a jobs ival matches
                for item in self.jobs:
                    if sec_event in self.jobs[item]['ival']:
                        self.run_job(item)
        self.stop()
        creq_in.close()
        cupd_in.close()
        timer_in.close()
        context.term()
        log.debug('Shutting down')\
Beispiel #25
0
import zmq

LRU_READY = "\x01"

context = zmq.Context(1)

frontend = context.socket(zmq.ROUTER)  # ROUTER
backend = context.socket(zmq.ROUTER)  # ROUTER
frontend.bind("tcp://*:5555")  # For clients
backend.bind("tcp://*:5556")  # For workers

poll_workers = zmq.Poller()
poll_workers.register(backend, zmq.POLLIN)

poll_both = zmq.Poller()
poll_both.register(frontend, zmq.POLLIN)
poll_both.register(backend, zmq.POLLIN)

workers = []

while True:
    if workers:
        socks = dict(poll_both.poll())
    else:
        socks = dict(poll_workers.poll())

    # Handle worker activity on backend
    if socks.get(backend) == zmq.POLLIN:
        # Use worker address for LRU routing
        msg = backend.recv_multipart()
        if not msg:
Beispiel #26
0
 def __init__(self):
     self.context = zmq.Context()
     self.poller = zmq.Poller()
Beispiel #27
0
    def run(self):
        """
        Process messages for commands and data
        """
        # Warning: this might run even when ending (aka "in a __del__() state")
        # Which means: logging might be None, and zmq might not be working
        # normally (apparently zmq.POLLIN == None during this time).
        try:
            poller = zmq.Poller()
            poller.register(self._commands, zmq.POLLIN)
            poller.register(self._data, zmq.POLLIN)
            discarded = 0
            while True:
                socks = dict(poller.poll())

                # process commands
                if self._commands in socks:
                    message = self._commands.recv()
                    if message == "SUB":
                        self._data.setsockopt(zmq.SUBSCRIBE, '')
                        logging.debug("Subscribed to remote dataflow %s",
                                      self.uri)
                        self._commands.send("SUBD")
                    elif message == "UNSUB":
                        self._data.setsockopt(zmq.UNSUBSCRIBE, '')
                        if logging:
                            logging.debug(
                                "Unsubscribed from remote dataflow %s",
                                self.uri)
                        # no confirmation (async)
                    elif message == "STOP":
                        return
                    else:
                        logging.warning("Received unknown message %s", message)

                # receive data
                if self._data in socks:
                    # TODO: be more resilient if wrong data is received (can
                    # block forever)
                    array_format = self._data.recv_pyobj()
                    array_md = self._data.recv_pyobj()
                    array_buf = self._data.recv(copy=False)
                    # more fresh data already?
                    if (self._data.getsockopt(zmq.EVENTS) & zmq.POLLIN
                            and discarded < self.max_discard):
                        discarded += 1
                        continue
                    # TODO: only log the accumulated number every second, to avoid log flooding


#                     if discarded:
#                         logging.debug("Dataflow %s dropped %d arrays", self.uri, discarded)
                    discarded = 0
                    # TODO: any need to use zmq.utils.rebuffer.array_from_buffer()?
                    array = numpy.frombuffer(array_buf,
                                             dtype=array_format["dtype"])
                    array.shape = array_format["shape"]
                    darray = DataArray(array, metadata=array_md)

                    try:
                        self.w_notifier(darray)
                    except WeakRefLostError:
                        return  # It's a sign there is nothing left to do
        except:
            if logging:
                logging.exception("Ending ZMQ thread due to exception")
        finally:
            try:
                self._commands.close()
            except:
                print "Exception closing ZMQ commands connection"
            try:
                self._data.close()
            except:
                print "Exception closing ZMQ data connection"
Beispiel #28
0
    def thread_loop(self, context, pipe):
        n = Pyre(self.name)
        n.join(self.group)
        n.start()

        poller = zmq.Poller()
        poller.register(pipe, zmq.POLLIN)
        logger.debug(n.socket())
        poller.register(n.socket(), zmq.POLLIN)
        while (True):
            try:
                #this should not fail but it does sometimes. We need to clean this out.
                # I think we are not treating sockets correclty as they are not thread-save.
                items = dict(poller.poll())
            except zmq.ZMQError:
                logger.warning('Socket fail.')
            # print(n.socket(), items)
            if pipe in items and items[pipe] == zmq.POLLIN:
                message = pipe.recv()
                # message to quit
                if message.decode('utf-8') == "EXIT_THREAD":
                    break
                logger.debug("Emitting to '%s' to '%s' " %
                             (message, self.group))
                n.shouts(self.group, message)
            if n.socket() in items and items[n.socket()] == zmq.POLLIN:
                cmds = n.recv()
                msg_type = cmds.pop(0)
                msg_type = msg_type.decode('utf-8')
                if msg_type == "SHOUT":
                    uid, name, group, msg = cmds
                    logger.debug("'%s' shouts '%s'." % (name, msg))
                    if start_rec in msg:
                        session_name = msg.replace(start_rec, '')
                        self.notify_all({
                            'name': 'rec_should_start',
                            'session_name': session_name,
                            'network_propagate': False
                        })
                    elif stop_rec in msg:
                        self.notify_all({
                            'name': 'rec_should_stop',
                            'network_propagate': False
                        })
                    elif sync_time in msg:
                        offset = float(msg.replace(sync_time, ''))
                        if self.ok_to_set_timebase():
                            self.adjust_timebase(offset)

                elif msg_type == "ENTER":
                    uid, name, headers, ip = cmds
                elif msg_type == "JOIN":
                    uid, name, group = cmds
                    if group == self.group:
                        self.group_members[uid] = name
                        self.update_gui()
                elif msg_type == "EXIT":
                    uid, name = cmds
                    try:
                        del self.group_members[uid]
                    except KeyError:
                        pass
                    else:
                        self.update_gui()
                elif msg_type == "LEAVE":
                    uid, name, group = cmds
                elif msg_tpye == "WHISPER":
                    pass

        logger.debug('thread_loop closing.')
        self.thread_pipe = None
        n.stop()
Beispiel #29
0
    def execute_interactive(
        self,
        code,
        silent=False,
        store_history=True,
        user_expressions=None,
        allow_stdin=None,
        stop_on_error=True,
        timeout=None,
        output_hook=None,
        stdin_hook=None,
    ):
        """Execute code in the kernel interactively

        Output will be redisplayed, and stdin prompts will be relayed as well.
        If an IPython kernel is detected, rich output will be displayed.

        You can pass a custom output_hook callable that will be called
        with every IOPub message that is produced instead of the default redisplay.

        .. versionadded:: 5.0

        Parameters
        ----------
        code : str
            A string of code in the kernel's language.

        silent : bool, optional (default False)
            If set, the kernel will execute the code as quietly possible, and
            will force store_history to be False.

        store_history : bool, optional (default True)
            If set, the kernel will store command history.  This is forced
            to be False if silent is True.

        user_expressions : dict, optional
            A dict mapping names to expressions to be evaluated in the user's
            dict. The expression values are returned as strings formatted using
            :func:`repr`.

        allow_stdin : bool, optional (default self.allow_stdin)
            Flag for whether the kernel can send stdin requests to frontends.

            Some frontends (e.g. the Notebook) do not support stdin requests.
            If raw_input is called from code executed from such a frontend, a
            StdinNotImplementedError will be raised.

        stop_on_error: bool, optional (default True)
            Flag whether to abort the execution queue, if an exception is encountered.

        timeout: float or None (default: None)
            Timeout to use when waiting for a reply

        output_hook: callable(msg)
            Function to be called with output messages.
            If not specified, output will be redisplayed.

        stdin_hook: callable(msg)
            Function to be called with stdin_request messages.
            If not specified, input/getpass will be called.

        Returns
        -------
        reply: dict
            The reply message for this request
        """
        if not self.iopub_channel.is_alive():
            raise RuntimeError(
                "IOPub channel must be running to receive output")
        if allow_stdin is None:
            allow_stdin = self.allow_stdin
        if allow_stdin and not self.stdin_channel.is_alive():
            raise RuntimeError("stdin channel must be running to allow input")
        msg_id = self.execute(
            code,
            silent=silent,
            store_history=store_history,
            user_expressions=user_expressions,
            allow_stdin=allow_stdin,
            stop_on_error=stop_on_error,
        )
        if stdin_hook is None:
            stdin_hook = self._stdin_hook_default
        if output_hook is None:
            # detect IPython kernel
            if 'IPython' in sys.modules:
                from IPython import get_ipython
                ip = get_ipython()
                in_kernel = getattr(ip, 'kernel', False)
                if in_kernel:
                    output_hook = partial(
                        self._output_hook_kernel,
                        ip.display_pub.session,
                        ip.display_pub.pub_socket,
                        ip.display_pub.parent_header,
                    )
        if output_hook is None:
            # default: redisplay plain-text outputs
            output_hook = self._output_hook_default

        # set deadline based on timeout
        if timeout is not None:
            deadline = monotonic() + timeout
        else:
            timeout_ms = None

        poller = zmq.Poller()
        iopub_socket = self.iopub_channel.socket
        poller.register(iopub_socket, zmq.POLLIN)
        if allow_stdin:
            stdin_socket = self.stdin_channel.socket
            poller.register(stdin_socket, zmq.POLLIN)
        else:
            stdin_socket = None

        # wait for output and redisplay it
        while True:
            if timeout is not None:
                timeout = max(0, deadline - monotonic())
                timeout_ms = 1e3 * timeout
            events = dict(poller.poll(timeout_ms))
            if not events:
                raise TimeoutError("Timeout waiting for output")
            if stdin_socket in events:
                req = self.stdin_channel.get_msg(timeout=0)
                stdin_hook(req)
                continue
            if iopub_socket not in events:
                continue

            msg = self.iopub_channel.get_msg(timeout=0)

            if msg['parent_header'].get('msg_id') != msg_id:
                # not from my request
                continue
            output_hook(msg)

            # stop on idle
            if msg['header']['msg_type'] == 'status' and \
            msg['content']['execution_state'] == 'idle':
                break

        # output is done, get the reply
        if timeout is not None:
            timeout = max(0, deadline - monotonic())
        return self._recv_reply(msg_id, timeout=timeout)
Beispiel #30
0
    def start(self, timeout_sec=-1):
        # note: need to handle interrupt
        # Switch messages between sockets
        if self._server_verbosity > 1:
            print "Server: start"

        # Initialize pollers

        # poll with backend only when queue is empty
        poll_workers = zmq.Poller()
        poll_workers.register(self._backend, zmq.POLLIN)

        # poll for when workers available
        poll_both = zmq.Poller()
        poll_both.register(self._backend, zmq.POLLIN)
        poll_both.register(self._frontend, zmq.POLLIN)

        # time to send next heartbeat
        heartbeat_at = time.time() + self._heartbeat_interval

        tstart = time.time()

        while True:
            if self._server_verbosity > 1:
                print "Server: runtime=", time.time() - tstart, "secs"

            if len(self._workers) > 0:
                poller = poll_both
            else:
                poller = poll_workers

            #socks = dict(poller.poll( self._poller_timeout_secs*1000 ))
            socks = dict(poller.poll(10))
            if self._server_verbosity > 1:
                print "Server: finish poll."

            # Handle worker activity on backend
            if socks.get(self._backend) == zmq.POLLIN:
                # Use worker address for LRU routing
                frames = self._backend.recv_multipart(zmq.DONTWAIT)
                if not frames:
                    if self._server_verbosity >= 0:
                        print "Server: error in backend frame"
                    break

                address = frames[0]
                self._workers.ready(
                    Worker(address, self._heartbeat_interval,
                           self._heartbeat_liveness))
                if self._server_verbosity > 1:
                    print "Server: added to queue worker {}. In queue=".format(
                        address.decode("ascii")), len(self._workers)

                # Validate control message, or return reply to client
                msg = frames[1:]
                if len(msg) == 1:
                    if msg[0] not in (PPP_READY, PPP_HEARTBEAT):
                        print "Server: ERROR Invalid message from worker: %s" % msg
                    elif msg[0] == PPP_HEARTBEAT:
                        if self._server_verbosity > 1:
                            print "Server: got heartbeat from {}".format(
                                address.decode("ascii"))
                        pass
                else:
                    if self._server_verbosity > 1:
                        print "Server: route worker {} result back to client {}".format(
                            address.decode("ascii"), msg[0].decode("ascii"))
                    self._frontend.send_multipart(msg)

                # # Send heartbeats to idle workers if it's time
                # if time.time() >= heartbeat_at:
                #     for worker in self._workers.queue:
                #         print "Server: send heartbeat"
                #         msg = [worker, PPP_HEARTBEAT]
                #         self._backend.send_multipart(msg)
                #     heartbeat_at = time.time() + self._heartbeat_interval
                # else:
                #     print "time to next heartbeat: ",heartbeat_at-time.time()," secs"

            # Handle frontend requests
            if socks.get(self._frontend) == zmq.POLLIN:
                frames = self._frontend.recv_multipart(zmq.DONTWAIT)
                if not frames:
                    print "Server: error in frontend frame"
                    break
                frames.insert(0, self._workers.next())
                if self._server_verbosity > 1:
                    print "Server: send job for {} to {}".format(
                        frames[1], frames[0])
                self._backend.send_multipart(frames)

            # Send heartbeats to idle workers if it's time
            if time.time() >= heartbeat_at:
                for worker in self._workers.queue:
                    if self._server_verbosity > 1:
                        print "Server: send heartbeat"
                    msg = [worker, PPP_HEARTBEAT]
                    self._backend.send_multipart(msg)
                heartbeat_at = time.time() + self._heartbeat_interval
            else:
                if self._server_verbosity > 1:
                    print "time to next heartbeat: ", heartbeat_at - time.time(
                    ), " secs"
                pass

            # purge expired workers
            self._workers.purge()
            if self._server_verbosity > 1:
                print "Server: purged. in queue=", len(self._workers)

            # check for time-out condition.
            # change state to closing
            if timeout_sec > 0 and time.time() - tstart > timeout_sec:
                print "Server: at end of life. stopping."
                break

            sys.stdout.flush()

        # End of main loop
        # do we tell workers to stop?
        #for worker in self._workers:
        #    self._backend.send_multipart([worker,b"",b"NOCLIENT",b"",b"__BROKER_STOPPING__"])

        return