class Scheduler(object):
    def __init__(self):
        self.ready = Queue()
        self.taskmap = {}

        # Tasks waiting for other tasks to exit
        self.exit_waiting = {}

    def new(self, target):
        newtask = Task(target)
        self.taskmap[newtask.tid] = newtask
        self.schedule(newtask)
        return newtask.tid

    def exit(self, task):
        print("Task %d terminated" % task.tid)
        del self.taskmap[task.tid]
        # Notify other tasks waiting for exit of the exiting task
        for task in self.exit_waiting.pop(task.tid, []):
            print(task)
            self.schedule(task)

    def waitforexit(self, task, waittid):
        if waittid in self.taskmap:
            self.exit_waiting.setdefault(waittid, []).append(task)
            print(self.exit_waiting)
            return True
        else:
            return False

    def schedule(self, task):
        self.ready.put(task)

    def mainloop(self):
        while self.taskmap:
            print(self.ready._qsize())
            task = self.ready.get()
            try:
                print(self.ready._qsize())
                result = task.run()
                if isinstance(result, SystemCall):
                    # task that currently running
                    result.task = task
                    result.sched = self
                    result.handle()
                    continue
            except StopIteration:
                self.exit(task)
                continue
            self.schedule(task)
Example #2
0
    def __str__(self):

        ret = []

        q = Queue()
        q.put(self)

        while not q.empty():

            size = q._qsize()
            while size > 0:

                cur = q.get()
                if cur != None:
                    ret.append(cur.val)
                    q.put(cur.left)
                    q.put(cur.right)
                else:
                    ret.append(None)

                size -= 1

        while ret[-1] == None:
            ret.pop()

        return str(ret)
Example #3
0
class Worker:
    def __init__(self):
        self._task_list = Queue()
        self._running = True

    def add_task(self, task):
        future = TaskFuture(task)
        self._task_list.put(future)
        return future

    def run(self):
        pool = self._task_list
        while True:
            future = pool.get()
            if future is STOP_SENTINEL:
                break
            task = future.get_task()
            future.ready()
            result = task.callable(*task.args, **task.kwargs)
            future.set_result(result)

    def stop(self):
        """Discard all items and waiting for stop.
        """
        self._task_list.queue.clear()
        self._task_list.put(STOP_SENTINEL)

    def task_count(self):
        return self._task_list._qsize()
Example #4
0
def single_run(preset_filename, robots, bind_addr):
    preset_file = find_abs(preset_filename, allowed_areas=['local', 'local/presets/', 'package', 'package/presets/'])
    with open(preset_file, 'r') as f:
        config = yaml.safe_load(f)

    config['robots'] = config.get('robots', []) + robots

    shared_data = {
        'tick': 0,                      # Current tick
        'write_stack': deque(),         # All write actions are processed through this
        'data_queue': {},               # Simulation data for each bot
        'active_count': {},             # Keeps track of which code connection each bot has.
        'bot_locks': {},                # Threading Locks and Conditions for each bot to wait for connection actions
        'bot_communications_data': {},  # Buffers and information for all bot communications
        'tick_updates': {},             # Simply a dictionary where the simulation tick will push static data, so the other methods are aware of when the simulation has exited.
    }

    result_bucket = Queue(maxsize=1)

    from threading import Thread
    from ev3sim.simulation.communication import start_server_with_shared_data

    def run(shared_data, result):
        try:
            runFromConfig(config, shared_data)
        except Exception as e:
            result.put(('Simulation', e))
            return
        result.put(True)

    comm_thread = Thread(target=start_server_with_shared_data, args=(shared_data, result_bucket, bind_addr), daemon=True)
    sim_thread = Thread(target=run, args=(shared_data, result_bucket), daemon=True)

    comm_thread.start()
    sim_thread.start()

    try:
        with result_bucket.not_empty:
            while not result_bucket._qsize():
                result_bucket.not_empty.wait(0.1)
        r = result_bucket.get()
        # Chuck it back on the queue so that other threads know we are quitting.
        result_bucket.put(r)
        if r is not True:
            print(f"An error occured in the {r[0]} thread. Raising an error now...")
            time.sleep(1)
            raise r[1]
    except KeyboardInterrupt:
        pass
Example #5
0
class Scheduler(object):
    def __init__(self):
        self.q = Queue()

    def next_request(self):
        try:
            req = self.q.get(block=False)
        except Exception as e:
            req = None
        return req

    def enqueue_request(self, req):
        self.q.put(req)

    def size(self):
        return self.q._qsize()
Example #6
0
class Scheduler(Thread):
    def __init__(self, pool_size, headless=True):
        Thread.__init__(self)
        self.pool = list()
        self.retry = Queue()
        self.tasks = Queue()
        self.pool_size = pool_size
        self.headless = headless
        self.pool = create(pool_size, headless)

    def __enter__(self):
        return self

    def __exit__(self, *arg):
        self.start()
        dispose(self.pool)

    def schedule_task(self, task):
        """Enqueues a task
        """
        if (not issubclass(task.__class__, Task)):
            raise ValueError("task must be an instance of Task")
        self.tasks.put(task)

    def consume_queue(self):
        raise NotImplementedError

    def dispose(self):
        """Disposes of all workers
        """
        dispose(self.pool)

    def run(self):
        self.consume_queue(self.tasks)
        while (self.retry._qsize() > 0):
            self.consume_queue(self.retry)
Example #7
0
    def run_simulation():
        class CommunicationsError(Exception): pass

        def comms(data, result):
            data['thread_ids'][threading.get_ident()] = ev3sim.simulation.comm_schema_pb2.RobotLogSource.COMMS
            from grpc._channel import _MultiThreadedRendezvous
            logging.basicConfig()
            first_message = True
            while True:
                with grpc.insecure_channel(args.simulator_addr) as channel:
                    try:
                        stub = ev3sim.simulation.comm_schema_pb2_grpc.SimulationDealerStub(channel)
                        response = stub.RequestTickUpdates(ev3sim.simulation.comm_schema_pb2.RobotRequest(robot_id=robot_id))
                        for r in response:
                            data['tick'] = r.tick
                            data['tick_rate'] = r.tick_rate
                            data['current_data'] = json.loads(r.content)
                            if first_message:
                                print("Connection initialised.")
                                first_message = False
                                data['start_robot_queue'].put(True)
                            for key in data['active_data_handlers']:
                                data['active_data_handlers'][key].put(True)
                            with data['condition_updating']:
                                data['condition_updated'].notify()
                    except Exception as e:
                        # https://github.com/MelbourneHighSchoolRobotics/ev3sim/issues/55 pygame window dragging will deadline.
                        if not (isinstance(e, _MultiThreadedRendezvous) and e._state.details == "Deadline Exceeded"):
                            result.put(('Communications', e))
                            break
                        # For some reason this needs to be done despite using the context manager.
                        channel.close()

        def write(data, result):
            data['thread_ids'][threading.get_ident()] = ev3sim.simulation.comm_schema_pb2.RobotLogSource.WRITE
            with grpc.insecure_channel(args.simulator_addr) as channel:
                try:
                    stub = ev3sim.simulation.comm_schema_pb2_grpc.SimulationDealerStub(channel)
                    while True:
                        action_type, info = data['actions_queue'].get()
                        if action_type == 'write':
                            path, value = info
                            stub.SendWriteInfo(ev3sim.simulation.comm_schema_pb2.RobotWrite(robot_id=robot_id, attribute_path=path, value=value))
                        elif action_type == 'send_log':
                            message, source = info
                            stub.SendRobotLog(ev3sim.simulation.comm_schema_pb2.RobotLogRequest(robot_id=robot_id, log=message, source=source, print=args.send_logs))
                        elif action_type == 'begin_server':
                            d = stub.RequestServer(ev3sim.simulation.comm_schema_pb2.ServerRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                        elif action_type == 'connect':
                            d = stub.RequestConnect(ev3sim.simulation.comm_schema_pb2.ClientRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                        elif action_type == 'accept_client':
                            d = stub.RequestGetClient(ev3sim.simulation.comm_schema_pb2.GetClientRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                        elif action_type == 'send_data':
                            d = stub.RequestSend(ev3sim.simulation.comm_schema_pb2.SendRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                        elif action_type == 'recv_data':
                            d = stub.RequestRecv(ev3sim.simulation.comm_schema_pb2.RecvRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                        elif action_type == 'close_server':
                            d = stub.CloseServerConnection(ev3sim.simulation.comm_schema_pb2.CloseServerRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                        elif action_type == 'close_client':
                            d = stub.CloseClientConnection(ev3sim.simulation.comm_schema_pb2.CloseClientRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data['write_results'].put(d)
                except Exception as e:
                    result.put(('Communications', e))

        def robot(filename, data, result):
            data['thread_ids'][threading.get_ident()] = ev3sim.simulation.comm_schema_pb2.RobotLogSource.ROBOT
            try:
                from ev3dev2 import Device, DeviceNotFound

                class MockedFile:
                    def __init__(self, data_path):
                        self.k2, self.k3, self.k4 = data_path
                        self.seek_point = 0
                    
                    def read(self):
                        if isinstance(data['current_data'][self.k2][self.k3][self.k4], int):
                            res = str(data['current_data'][self.k2][self.k3][self.k4])
                        if isinstance(data['current_data'][self.k2][self.k3][self.k4], str):
                            if self.seek_point == 0:
                                res = data['current_data'][self.k2][self.k3][self.k4]
                            else:
                                res = data['current_data'][self.k2][self.k3][self.k4][self.seek_point:]
                        return res.encode('utf-8')
                    
                    def seek(self, i):
                        self.seek_point = i
                    
                    def write(self, value):
                        data['actions_queue'].put(('write', (f'{self.k2} {self.k3} {self.k4}', value.decode())))
                    
                    def flush(self):
                        pass

                def device__init__(self, class_name, name_pattern='*', name_exact=False, **kwargs):
                    self._path = [class_name]
                    self.kwargs = kwargs
                    self._attr_cache = {}

                    def get_index(file):
                        match = Device._DEVICE_INDEX.match(file)
                        if match:
                            return int(match.group(1))
                        else:
                            return None
                    
                    if name_exact:
                        self._path.append(name_pattern)
                        self._device_index = get_index(name_pattern)
                    else:
                        for name in data['current_data'][self._path[0]].keys():
                            for k in kwargs:
                                if k not in data['current_data'][self._path[0]][name]:
                                    break
                                if isinstance(kwargs[k], list):
                                    if data['current_data'][self._path[0]][name][k] not in kwargs[k]:
                                        break
                                else:
                                    if data['current_data'][self._path[0]][name][k] != kwargs[k]:
                                        break
                            else:
                                self._path.append(name)
                                self._device_index = get_index(name)
                                break
                        else:
                            print(kwargs, data['current_data'][self._path[0]])
                            self._device_index = None

                            raise DeviceNotFound("%s is not connected." % self)

                def _attribute_file_open(self, name):
                    return MockedFile((self._path[0], self._path[1], name))

                def wait(self, cond, timeout=None):
                    import time
                    tic = time.time()
                    if cond(self.state):
                        return True
                    # Register to active_data_handlers so we can do something every tick without lagging.
                    handler_key = ' ' .join(self._path)
                    data['active_data_handlers'][handler_key] = Queue(maxsize=0)
                    while True:
                        data['active_data_handlers'][handler_key].get()
                        res = cond(self.state)
                        if res or ((timeout is not None) and (time.time() >= tic + timeout / 1000)):
                            del data['active_data_handlers'][handler_key]
                            return cond(self.state)

                def get_time():
                    return data['tick'] / data['tick_rate']

                def sleep(seconds):
                    from time import time
                    cur = time()
                    with data['condition_updated']:
                        while True:
                            elapsed = time() - cur
                            if elapsed >= seconds:
                                return
                            data['condition_updated'].wait(0.1)

                def raiseEV3Error(*args, **kwargs):
                    raise ValueError("This simulator is not compatible with ev3dev. Please use ev3dev2: https://pypi.org/project/python-ev3dev2/")

                class MockedCommSocket:
                    def __init__(self, hostaddr, port, sender_id):
                        self.hostaddr = hostaddr
                        self.port = str(port)
                        self.sender_id = sender_id
                    
                    def send(self, d):
                        assert isinstance(d, str), "Can only send string data through simulator."
                        data['actions_queue'].put(('send_data', {
                            'robot_id': robot_id,
                            'client_id': self.sender_id,
                            'address': self.hostaddr,
                            'port': self.port,
                            'data': d,
                        }))
                        # Wait for it to be handled
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        r = data['write_results'].get()
                    
                    def recv(self, buffer):
                        # At the moment the buffer is ignored.
                        data['actions_queue'].put(('recv_data', {
                            'robot_id': robot_id,
                            'client_id': self.sender_id,
                            'address': self.hostaddr,
                            'port': self.port,
                        }))
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        return data['write_results'].get().data

                    def close(self):
                        data['actions_queue'].put(('close_client', {
                            'robot_id': robot_id,
                            'address': self.hostaddr,
                            'port': self.port,
                            'server_id': self.sender_id,
                        }))
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        info = data['write_results'].get()

                class MockedCommClient(MockedCommSocket):
                    def __init__(self, hostaddr, port):
                        if hostaddr == 'aa:bb:cc:dd:ee:ff':
                            print(f"While this example will work, for competition bots please change the host address from {hostaddr} so competing bots can communicate separately.")
                        data['actions_queue'].put(('connect', {
                            'robot_id': robot_id,
                            'address': hostaddr,
                            'port': str(port),
                        }))
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        sender_id = data['write_results'].get().host_robot_id
                        super().__init__(hostaddr, port, sender_id)
                        data['active_connections'].append(self)

                    def close(self):
                        super().close()
                        data['active_connections'].remove(self)

                class MockedCommServer:
                    def __init__(self, hostaddr, port):
                        if hostaddr == 'aa:bb:cc:dd:ee:ff':
                            print(f"While this example will work, for competition bots please change the host address from {hostaddr} so competing bots can communicate separately.")
                        self.hostaddr = hostaddr
                        self.port = str(port)
                        data['actions_queue'].put(('begin_server', {
                            'robot_id': robot_id,
                            'address': self.hostaddr,
                            'port': self.port,
                        }))
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        result = data['write_results'].get()
                        self.sockets = []
                        data['active_connections'].append(self)
                    
                    def accept_client(self):
                        data['actions_queue'].put(('accept_client', {
                            'robot_id': robot_id,
                            'address': self.hostaddr,
                            'port': self.port,
                        }))
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        client = data['write_results'].get()
                        self.sockets.append(MockedCommSocket(self.hostaddr, self.port, client.client_id))
                        return self.sockets[-1], (self.hostaddr, self.port)
                    
                    def close(self):
                        # Close all clients, then close myself
                        for socket in self.sockets:
                            socket.close()
                        data['actions_queue'].put(('close_server', {
                            'robot_id': robot_id,
                            'address': self.hostaddr,
                            'port': self.port,
                        }))
                        with data['write_results'].not_empty:
                            while not data['write_results']._qsize():
                                data['write_results'].not_empty.wait(0.1)
                        info = data['write_results'].get()
                        data['active_connections'].remove(self)

                fake_path = sys.path.copy()
                fake_path.append(called_from)

                @mock.patch('time.time', get_time)
                @mock.patch('time.sleep', sleep)
                @mock.patch('ev3dev2.motor.Motor.wait', wait)
                @mock.patch('ev3dev2.Device.__init__', device__init__)
                @mock.patch('ev3dev2.Device._attribute_file_open', _attribute_file_open)
                @mock.patch('ev3sim.code_helpers.is_ev3', False)
                @mock.patch('ev3sim.code_helpers.is_sim', True)
                @mock.patch('ev3sim.code_helpers.CommServer', MockedCommServer)
                @mock.patch('ev3sim.code_helpers.CommClient', MockedCommClient)
                @mock.patch('sys.path', fake_path)
                def run_script(fname):
                    from importlib.machinery import SourceFileLoader
                    module = SourceFileLoader('__main__', fname).load_module()
                
                try:
                    import ev3dev
                    run_script = mock.patch('ev3dev.core.Device.__init__', raiseEV3Error)(run_script)
                except:
                    pass

                assert data['start_robot_queue'].get(), "Something went wrong..."
                run_script(filename)
            except Exception as e:
                result.put(('Robots', e))
                return
            result.put(True)

        result_bucket = Queue(maxsize=1)

        from threading import Thread
        from ev3sim.file_helper import find_abs

        comm_thread = Thread(target=comms, args=(shared_data, result_bucket,), daemon=True)
        robot_thread = Thread(target=robot, args=(find_abs(args.filename, allowed_areas=['local', 'local/robots/', 'package', 'package/robots/']), shared_data, result_bucket,), daemon=True)
        write_thread = Thread(target=write, args=(shared_data, result_bucket,), daemon=True)

        comm_thread.start()
        write_thread.start()
        robot_thread.start()

        try:
            with result_bucket.not_empty:
                while not result_bucket._qsize():
                    result_bucket.not_empty.wait(0.1)
            r = result_bucket.get()
            if r is not True:
                # Clear the actions queue.
                shared_data['actions_queue'] = Queue()
        except KeyboardInterrupt as e:
            r = True
            pass

        # Ensure all active connections are closed, provided the Communications thread is still running.
        if r is True or r[0] != 'Communications':
            for active_connection in shared_data['active_connections']:
                active_connection.close()

        with shared_data['condition_updated']:
            while shared_data['actions_queue']._qsize() > 0:
                shared_data['condition_updated'].wait(0.1)

            shared_data['condition_updated'].wait(0.5)

        if r is not True:
            print(f"An error occured in the {r[0]} thread. Raising an error now...")
            raise r[1]
Example #8
0
class Spider(object):
    def __init__(self,
                 worker_num=10,
                 chunk_size=10000,
                 log_interval=600,
                 data_dir='data',
                 log_dir='log'):
        self.chunk_size = chunk_size
        self.log_interval = log_interval
        self.urls = Queue()
        self.results = Queue()
        self.url_cache = Set()
        self.url_cache1 = Set()
        self.url_cache2 = Set()
        self.url_cache3 = Set()
        self.name_cache = Set()
        self.black_urls = Set()
        self.black_cache = Dict()
        self.chunk_num = 0
        self.parser = HtmlParser(home='http://www.a-hospital.com/')

        self.last = 0
        self.state = 1

        if not os.path.exists(data_dir):
            os.mkdir(data_dir)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)
        self.data_dir = data_dir
        self.log_dir = log_dir

        self.writer = Thread(target=self._write)
        self.logger = Timer(log_interval, self._log)
        self.spiders = [Thread(target=self._scrap) for _ in range(worker_num)]

    def start(self, url):
        new_urls, new_data = self.parser.parse(url)
        self.results.put(new_data)
        self.url_cache1.add(url)
        self.url_cache.add(url)
        self.name_cache.add(new_data['name'])
        lenofblackset = len(blackset)
        for i in range(lenofblackset):
            data111 = blackset.pop()
            self.urls.put(data111)
            self.url_cache1.add(data111)
        # for url2 in new_urls:
        #     self.urls.put(url2)

        self.logger.start()
        self.writer.start()
        for spider in self.spiders:
            spider.start()

    def _write(self):
        """只使用self.results
        """
        while self.state:
            self.chunk_num += 1
            n = 0
            with open(
                    os.path.join(self.data_dir,
                                 '{}.json'.format(self.chunk_num)),
                    'wb') as fp:
                while n < self.chunk_size:
                    if not self.results.empty():
                        result = self.results.get()
                        line = json.dumps(result, ensure_ascii=False) + '\n'
                        fp.write(line.encode('utf8'))
                        n += 1
                    else:
                        sleep(10)

    def _log(self):
        now = len(self.name_cache)
        increase = now - self.last
        self.last = now
        if increase == 0:
            self.state = 1
            print('Exit: no entities scraped in this round.')
            # exit()
        else:
            with open(os.path.join(self.log_dir, 'log'), 'ab+') as fp:
                message = '新增词条数量:{},已抓取词条数量:{};已获取url数量:{},缓存任务数量:{},缓存结果数量:{}.'.format(
                    increase,
                    now,
                    len(self.url_cache),
                    self.urls._qsize(),
                    self.results._qsize(),
                ) + '\n'
                fp.write(message.encode('utf8'))
        timer = Timer(self.log_interval, self._log)
        timer.start()

    def _scrap(self):
        while self.state:
            if not self.urls.empty():
                url = self.urls.get()
                try:
                    new_urls, new_data = self.parser.parse(url)
                except:

                    print('unable to connnect')
                    self.url_cache.remove(url)
                    # 多次请求不成功的url加入黑名单
                    if url not in self.black_cache:
                        self.black_cache[url] = 1
                    self.black_cache[url] += 1
                    if self.black_cache[url] >= 10:
                        self.black_urls.add(url)
                        print(self.black_urls)
                    continue
                name = new_data['name']
                if name not in self.name_cache:
                    self.name_cache.add(name)
                    self.results.put(new_data)
                # if url in self.url_cache3:
                #     for url in new_urls:
                #         if url not in self.url_cache and url not in self.black_urls:
                #             self.url_cache2.add(url)
                #             self.urls.put(url)
                # if url in self.url_cache2:
                #     for url in new_urls:
                #         if url not in self.url_cache and url not in self.black_urls:
                #             self.url_cache1.add(url)
                #             self.urls.put(url)
                # if url in self.url_cache1:
                #     for url in new_urls:
                #         if url not in self.url_cache and url not in self.black_urls:
                #             self.urls.put(url)
                #             self.url_cache.add(url)
                sleep(3)

            else:
                sleep(10)
Example #9
0
def single_run(preset_filename,
               robots,
               bind_addr,
               seed,
               batch_file=None,
               override_settings={}):
    if batch_file:
        ScreenObjectManager.BATCH_FILE = batch_file
    ScreenObjectManager.PRESET_FILE = preset_filename
    import ev3sim

    try:
        latest_version = get_version_pypi("ev3sim")
        ScreenObjectManager.NEW_VERSION = latest_version != ev3sim.__version__
        if ScreenObjectManager.NEW_VERSION:
            update_message = f"""\

==========================================================================================
There is a new version of ev3sim available ({latest_version}).
Keeping an up to date version of ev3sim ensures you have the latest bugfixes and features.
Please update ev3sim by running the following command:
    python -m pip install -U ev3sim
==========================================================================================

"""
            print(update_message)
    except:
        ScreenObjectManager.NEW_VERSION = False

    Randomiser.createGlobalRandomiserWithSeed(seed)

    preset_file = find_abs(preset_filename,
                           allowed_areas=[
                               "local", "local/presets/", "package",
                               "package/presets/"
                           ])
    with open(preset_file, "r") as f:
        config = yaml.safe_load(f)

    config["robots"] = config.get("robots", []) + robots

    shared_data = {
        "tick": 0,  # Current tick.
        "write_stack":
        deque(),  # All write actions are processed through this.
        "data_queue": {},  # Simulation data for each bot.
        "active_count":
        {},  # Keeps track of which code connection each bot has.
        "bot_locks":
        {},  # Threading Locks and Conditions for each bot to wait for connection actions.
        "bot_communications_data":
        {},  # Buffers and information for all bot communications.
        "tick_updates":
        {},  # Simply a dictionary where the simulation tick will push static data, so the other methods are aware of when the simulation has exited.
        "events": {},  # Any events that should be sent to robots.
    }

    result_bucket = Queue(maxsize=1)

    from threading import Thread
    from ev3sim.simulation.communication import start_server_with_shared_data

    def run(shared_data, result):
        try:
            runFromConfig(config, shared_data)
        except Exception as e:
            result.put(("Simulation", e))
            return
        result.put(True)

    # Handle any other settings modified by the preset.
    settings = config.get("settings", {})
    settings.update(override_settings)
    for keyword, value in settings.items():
        run = mock.patch(keyword, value)(run)

    comm_thread = Thread(target=start_server_with_shared_data,
                         args=(shared_data, result_bucket, bind_addr),
                         daemon=True)
    sim_thread = Thread(target=run,
                        args=(shared_data, result_bucket),
                        daemon=True)

    comm_thread.start()
    sim_thread.start()

    try:
        with result_bucket.not_empty:
            while not result_bucket._qsize():
                result_bucket.not_empty.wait(0.1)
        r = result_bucket.get()
        # Chuck it back on the queue so that other threads know we are quitting.
        result_bucket.put(r)
        if r is not True:
            print(
                f"An error occurred in the {r[0]} thread. Raising an error now..."
            )
            time.sleep(1)
            raise r[1]
    except KeyboardInterrupt:
        pass
Example #10
0
class PooledConnection(object):
    """连接池"""
    def __init__(self,
                 connection_strings,
                 max_count=10,
                 min_free_count=1,
                 keep_conn_alive=False,
                 trace_sql=False,
                 log=None):
        self._max_count = max_count
        self._min_free_count = min_free_count
        self._connection_strings = connection_strings
        self._count = 0
        self._queue = Queue(max_count)
        self._lock = threading.Lock()
        self.trace = trace_sql

        self._logger = log if log is not None else logging.getLogger(__name__)

        if keep_conn_alive:
            self._ping_interval = 300
            self._run_ping()

    def __del__(self):

        while self._queue._qsize() > 0:
            self._lock.acquire()
            try:
                conn_info = self._queue.get(block=False)
                conn = conn_info.get("connection") if conn_info else None
            except Empty:
                conn = None
            finally:
                self._lock.release()

            if conn:
                self._close_connection(conn)
            else:
                break

    def _run_ping(self):
        """开启一个后台线程定时 ping 连接池里的连接,保证池子里的连接可用"""
        def ping_conn(pool_queue, pool_lock, per_seconds, log):
            # 每5分钟检测池子里未操作过的连接进行ping操作,移除失效的连接
            pre_time = time.time()
            while True:
                if pre_time <= time.time() - per_seconds:
                    log.debug("pool connection count:(%s,%s)" %
                              (pool_queue._qsize(), self._count))

                    # 使用 queue 的 _qsize 方法,防止queue里的lock与pool_lock造成死锁
                    while pool_queue._qsize() > 0:
                        conn = None
                        usable = True
                        pool_lock.acquire()
                        try:
                            conn_info = pool_queue.get(block=False)
                            if conn_info:
                                if conn_info.get("active_time") <= time.time(
                                ) - per_seconds:
                                    conn = conn_info.get("connection")
                                    try:
                                        conn._conn.ping()
                                    except:
                                        usable = False
                                else:
                                    # 只要遇到连接的激活时间未到 ping 时间就结束检测后面的连接【Queue的特性决定了后面的连接都不需要检测】
                                    break
                        except:
                            pass
                        finally:
                            pool_lock.release()

                        # 必须放在 lock 的外面,避免在做drop和release的时候死锁
                        if conn:
                            if not usable:
                                conn.drop()
                            else:
                                conn.release()

                    pre_time = time.time()
                else:
                    time.sleep(5)

        thread = threading.Thread(target=ping_conn,
                                  args=(self._queue, self._lock,
                                        self._ping_interval, self._logger),
                                  daemon=True)
        thread.start()

    def _create_connection(self, auto_commit=True):
        if self._count >= self._max_count:
            raise PoolError("Maximum number of connections exceeded!")

        # self._logger.info("开始创建mysql连接")
        conn = Connection(
            self,
            self.trace,
            self._logger,
            host=self._connection_strings.get("host"),
            port=self._connection_strings.get("port"),
            user=self._connection_strings.get("user"),
            password=self._connection_strings.get("password"),
            db=self._connection_strings.get("database"),
            charset=self._connection_strings.get("charset", "utf8"),
            autocommit=auto_commit,
            cursorclass=pymysql.cursors.DictCursor,
        )
        # self._logger.info("完成创建mysql连接")
        self._count += 1
        return conn

    def release_connection(self, connection):
        """释放连接"""

        self._lock.acquire()
        try:
            if self._queue._qsize() >= self._min_free_count:
                self._close_connection(connection)
            else:
                self._queue.put({
                    "connection": connection,
                    "active_time": time.time()
                })
        except:
            pass
        finally:
            self._lock.release()

    def get_connection(self, timeout=15):
        """获取一个连接"""
        begin_time = time.time()

        def get_conn():
            """获取连接"""
            self._lock.acquire()
            try:
                if self._queue._qsize() > 0:
                    try:
                        conn_info = self._queue.get(block=False)
                        conn = conn_info.get(
                            "connection") if conn_info else None
                    except Empty:
                        conn = None
                elif self._count < self._max_count:
                    conn = self._create_connection()
                else:
                    conn = None
                return conn
            except:
                raise
            finally:
                self._lock.release()

        connection = get_conn()
        if connection:
            return connection
        else:
            if timeout:
                while (time.time() - begin_time) < timeout:
                    connection = get_conn()
                    if connection:
                        break
                    time.sleep(0.2)
            if not connection:
                raise PoolError(
                    "mysql pool: get connection timeout, not enough connections"
                    +
                    " are available!(modify the maxConnections value maybe can fix it)"
                )
            return connection

    def _close_connection(self, connection):
        """关闭连接"""
        try:
            if connection._close():
                self._count -= 1
        except:
            pass
Example #11
0
    def run_simulation():
        class CommunicationsError(Exception):
            pass

        def comms(data, result):
            data["thread_ids"][threading.get_ident(
            )] = ev3sim.simulation.comm_schema_pb2.RobotLogSource.COMMS
            from grpc._channel import _MultiThreadedRendezvous

            logging.basicConfig()
            first_message = True
            while True:
                with grpc.insecure_channel(args.simulator_addr) as channel:
                    try:
                        stub = ev3sim.simulation.comm_schema_pb2_grpc.SimulationDealerStub(
                            channel)
                        response = stub.RequestTickUpdates(
                            ev3sim.simulation.comm_schema_pb2.RobotRequest(
                                robot_id=robot_id))
                        for r in response:
                            data["tick"] = r.tick
                            data["tick_rate"] = r.tick_rate
                            data["current_data"] = json.loads(r.content)
                            for e in data["current_data"]["events"]:
                                data["events"].put(e)
                            if first_message:
                                print("Connection initialised.")
                                first_message = False
                                data["start_robot_queue"].put(True)
                            for key in data["active_data_handlers"]:
                                data["active_data_handlers"][key].put(True)
                            with data["condition_updating"]:
                                data["condition_updated"].notify()
                    except Exception as e:
                        # https://github.com/MelbourneHighSchoolRobotics/ev3sim/issues/55 pygame window dragging will deadline.
                        if not (isinstance(e, _MultiThreadedRendezvous)
                                and e._state.details == "Deadline Exceeded"):
                            result.put(("Communications", e))
                            break
                        # For some reason this needs to be done despite using the context manager.
                        channel.close()

        def write(data, result):
            data["thread_ids"][threading.get_ident(
            )] = ev3sim.simulation.comm_schema_pb2.RobotLogSource.WRITE
            with grpc.insecure_channel(args.simulator_addr) as channel:
                try:
                    stub = ev3sim.simulation.comm_schema_pb2_grpc.SimulationDealerStub(
                        channel)
                    while True:
                        action_type, info = data["actions_queue"].get()
                        if action_type == "write":
                            path, value = info
                            d = stub.SendWriteInfo(
                                ev3sim.simulation.comm_schema_pb2.RobotWrite(
                                    robot_id=robot_id,
                                    attribute_path=path,
                                    value=value))
                            if path.split()[-1] == "mode":
                                data["write_results"].put(d)
                        elif action_type == "send_log":
                            message, source = info
                            stub.SendRobotLog(
                                ev3sim.simulation.comm_schema_pb2.
                                RobotLogRequest(robot_id=robot_id,
                                                log=message,
                                                source=source,
                                                print=args.send_logs))
                        elif action_type == "begin_server":
                            d = stub.RequestServer(
                                ev3sim.simulation.comm_schema_pb2.
                                ServerRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                        elif action_type == "connect":
                            d = stub.RequestConnect(
                                ev3sim.simulation.comm_schema_pb2.
                                ClientRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                        elif action_type == "accept_client":
                            d = stub.RequestGetClient(
                                ev3sim.simulation.comm_schema_pb2.
                                GetClientRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                        elif action_type == "send_data":
                            d = stub.RequestSend(
                                ev3sim.simulation.comm_schema_pb2.SendRequest(
                                    **info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                        elif action_type == "recv_data":
                            d = stub.RequestRecv(
                                ev3sim.simulation.comm_schema_pb2.RecvRequest(
                                    **info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                        elif action_type == "close_server":
                            d = stub.CloseServerConnection(
                                ev3sim.simulation.comm_schema_pb2.
                                CloseServerRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                        elif action_type == "close_client":
                            d = stub.CloseClientConnection(
                                ev3sim.simulation.comm_schema_pb2.
                                CloseClientRequest(**info))
                            if not d.result:
                                raise CommunicationsError(d.msg)
                            data["write_results"].put(d)
                except Exception as e:
                    result.put(("Communications", e))

        def robot(filename, data, result):
            data["thread_ids"][threading.get_ident(
            )] = ev3sim.simulation.comm_schema_pb2.RobotLogSource.ROBOT
            try:
                from ev3dev2 import Device, DeviceNotFound

                @classmethod
                def handle_events(cls):
                    """Since we can only handle events in mocked function calls, define a function to handle all of the existing events."""
                    while data["events"].qsize():
                        event_name, event_data = data["events"].get()
                        func = getattr(cls, event_name)
                        func(event_data)

                class MockedFile:
                    def __init__(self, data_path):
                        self.k2, self.k3, self.k4 = data_path
                        if self.k4 == "mode":
                            data["write_blocking_ticks"][
                                f"{self.k2} {self.k3} {self.k4}"] = -1
                        self.seek_point = 0

                    def read(self):
                        # If mode requires us to wait for values to update, then wait.
                        mode_string = f"{self.k2} {self.k3} mode"
                        if mode_string in data["write_blocking_ticks"]:
                            while data["write_blocking_ticks"][
                                    mode_string] >= data["tick"]:
                                data["last_checked_tick"] = data["tick"]
                                wait_for_tick()
                        if isinstance(
                                data["current_data"][self.k2][self.k3][
                                    self.k4], int):
                            res = str(data["current_data"][self.k2][self.k3][
                                self.k4])
                        if isinstance(
                                data["current_data"][self.k2][self.k3][
                                    self.k4], str):
                            if self.seek_point == 0:
                                res = data["current_data"][self.k2][self.k3][
                                    self.k4]
                            else:
                                res = data["current_data"][self.k2][self.k3][
                                    self.k4][self.seek_point:]
                        return res.encode("utf-8")

                    def seek(self, i):
                        self.seek_point = i

                    def write(self, value):
                        data["actions_queue"].put(
                            ("write", (f"{self.k2} {self.k3} {self.k4}",
                                       value.decode())))
                        if self.k4 == "mode":
                            with data["write_results"].not_empty:
                                while not data["write_results"]._qsize():
                                    data["write_results"].not_empty.wait(0.1)
                            res = data["write_results"].get()
                            data["write_blocking_ticks"][
                                f"{self.k2} {self.k3} {self.k4}"] = data[
                                    "tick"]

                    def flush(self):
                        pass

                def device__init__(self,
                                   class_name,
                                   name_pattern="*",
                                   name_exact=False,
                                   **kwargs):
                    self._path = [class_name]
                    self.kwargs = kwargs
                    self._attr_cache = {}

                    def get_index(file):
                        match = Device._DEVICE_INDEX.match(file)
                        if match:
                            return int(match.group(1))
                        else:
                            return None

                    if name_exact:
                        self._path.append(name_pattern)
                        self._device_index = get_index(name_pattern)
                    else:
                        for name in data["current_data"][self._path[0]].keys():
                            for k in kwargs:
                                if k not in data["current_data"][
                                        self._path[0]][name]:
                                    break
                                if isinstance(kwargs[k], list):
                                    if data["current_data"][self._path[0]][
                                            name][k] not in kwargs[k]:
                                        break
                                else:
                                    if data["current_data"][self._path[0]][
                                            name][k] != kwargs[k]:
                                        break
                            else:
                                self._path.append(name)
                                self._device_index = get_index(name)
                                break
                        else:
                            # Debug print for adding new devices.
                            # print(kwargs, data["current_data"][self._path[0]])
                            self._device_index = None

                            raise DeviceNotFound("%s is not connected." % self)

                def _attribute_file_open(self, name):
                    return MockedFile((self._path[0], self._path[1], name))

                def wait(self, cond, timeout=None):
                    import time

                    tic = time.time()
                    if cond(self.state):
                        return True
                    # Register to active_data_handlers so we can do something every tick without lagging.
                    handler_key = " ".join(self._path)
                    data["active_data_handlers"][handler_key] = Queue(
                        maxsize=0)
                    while True:
                        data["active_data_handlers"][handler_key].get()
                        res = cond(self.state)
                        if res or ((timeout is not None) and
                                   (time.time() >= tic + timeout / 1000)):
                            del data["active_data_handlers"][handler_key]
                            return cond(self.state)

                def get_time():
                    return data["tick"] / data["tick_rate"]

                def sleep(seconds):
                    from time import time

                    cur = time()
                    with data["condition_updated"]:
                        while True:
                            elapsed = time() - cur
                            if elapsed >= seconds:
                                return
                            data["condition_updated"].wait(0.1)

                data["last_checked_tick"] = data["tick"]

                def wait_for_tick():
                    with data["condition_updated"]:
                        while data["actions_queue"].qsize():
                            data["condition_updated"].wait(0.1)
                        if data["last_checked_tick"] == data["tick"]:
                            while True:
                                if data["last_checked_tick"] != data["tick"]:
                                    data["last_checked_tick"] = data["tick"]
                                    return
                                data["condition_updated"].wait(0.1)

                def raiseEV3Error(*args, **kwargs):
                    raise ValueError(
                        "This simulator is not compatible with ev3dev. Please use ev3dev2: https://pypi.org/project/python-ev3dev2/"
                    )

                class MockedCommSocket:
                    def __init__(self, hostaddr, port, sender_id):
                        self.hostaddr = hostaddr
                        self.port = str(port)
                        self.sender_id = sender_id

                    def send(self, d):
                        assert isinstance(
                            d, str
                        ), "Can only send string data through simulator."
                        data["actions_queue"].put((
                            "send_data",
                            {
                                "robot_id": robot_id,
                                "client_id": self.sender_id,
                                "address": self.hostaddr,
                                "port": self.port,
                                "data": d,
                            },
                        ))
                        # Wait for it to be handled
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        r = data["write_results"].get()

                    def recv(self, buffer):
                        # At the moment the buffer is ignored.
                        data["actions_queue"].put((
                            "recv_data",
                            {
                                "robot_id": robot_id,
                                "client_id": self.sender_id,
                                "address": self.hostaddr,
                                "port": self.port,
                            },
                        ))
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        return data["write_results"].get().data

                    def close(self):
                        data["actions_queue"].put((
                            "close_client",
                            {
                                "robot_id": robot_id,
                                "address": self.hostaddr,
                                "port": self.port,
                                "server_id": self.sender_id,
                            },
                        ))
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        info = data["write_results"].get()

                class MockedCommClient(MockedCommSocket):
                    def __init__(self, hostaddr, port):
                        if hostaddr == "aa:bb:cc:dd:ee:ff":
                            print(
                                f"While this example will work, for competition bots please change the host address from {hostaddr} so competing bots can communicate separately."
                            )
                        data["actions_queue"].put((
                            "connect",
                            {
                                "robot_id": robot_id,
                                "address": hostaddr,
                                "port": str(port),
                            },
                        ))
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        sender_id = data["write_results"].get().host_robot_id
                        super().__init__(hostaddr, port, sender_id)
                        data["active_connections"].append(self)

                    def close(self):
                        super().close()
                        data["active_connections"].remove(self)

                class MockedCommServer:
                    def __init__(self, hostaddr, port):
                        if hostaddr == "aa:bb:cc:dd:ee:ff":
                            print(
                                f"While this example will work, for competition bots please change the host address from {hostaddr} so competing bots can communicate separately."
                            )
                        self.hostaddr = hostaddr
                        self.port = str(port)
                        data["actions_queue"].put((
                            "begin_server",
                            {
                                "robot_id": robot_id,
                                "address": self.hostaddr,
                                "port": self.port,
                            },
                        ))
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        result = data["write_results"].get()
                        self.sockets = []
                        data["active_connections"].append(self)

                    def accept_client(self):
                        data["actions_queue"].put((
                            "accept_client",
                            {
                                "robot_id": robot_id,
                                "address": self.hostaddr,
                                "port": self.port,
                            },
                        ))
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        client = data["write_results"].get()
                        self.sockets.append(
                            MockedCommSocket(self.hostaddr, self.port,
                                             client.client_id))
                        return self.sockets[-1], (self.hostaddr, self.port)

                    def close(self):
                        # Close all clients, then close myself
                        for socket in self.sockets:
                            socket.close()
                        data["actions_queue"].put((
                            "close_server",
                            {
                                "robot_id": robot_id,
                                "address": self.hostaddr,
                                "port": self.port,
                            },
                        ))
                        with data["write_results"].not_empty:
                            while not data["write_results"]._qsize():
                                data["write_results"].not_empty.wait(0.1)
                        info = data["write_results"].get()
                        data["active_connections"].remove(self)

                class MockedButton:
                    class MockedButtonSpecific(Device):
                        _pressed = None

                        @property
                        def pressed(self):
                            self._pressed, value = self.get_attr_int(
                                self._pressed, "pressed")
                            return value

                    button_names = [
                        "up", "down", "left", "right", "enter", "backspace"
                    ]
                    on_up = None
                    on_down = None
                    on_left = None
                    on_right = None
                    on_enter = None
                    on_backspace = None
                    on_change = None

                    previous_presses = None

                    def __init__(self):
                        self.button_classes = {}
                        for name in self.button_names:
                            try:
                                self.button_classes[
                                    name] = MockedButton.MockedButtonSpecific(
                                        "brick_button", address=name)
                            except Exception as e:
                                if name == "up":
                                    raise e
                                self.button_classes[name] = None

                    @property
                    def buttons_pressed(self):
                        pressed = []
                        for name, obj in self.button_classes.items():
                            if obj is not None and obj.pressed:
                                pressed.append(name)
                        return pressed

                    @property
                    def up(self):
                        if self.button_classes["up"] is None:
                            raise ValueError("Up button not connected.")
                        return "up" in self.buttons_pressed

                    @property
                    def down(self):
                        if self.button_classes["down"] is None:
                            raise ValueError("Down button not connected.")
                        return "down" in self.buttons_pressed

                    @property
                    def left(self):
                        if self.button_classes["left"] is None:
                            raise ValueError("Left button not connected.")
                        return "left" in self.buttons_pressed

                    @property
                    def right(self):
                        if self.button_classes["right"] is None:
                            raise ValueError("Right button not connected.")
                        return "right" in self.buttons_pressed

                    @property
                    def enter(self):
                        if self.button_classes["enter"] is None:
                            raise ValueError("Enter button not connected.")
                        return "enter" in self.buttons_pressed

                    @property
                    def backspace(self):
                        if self.button_classes["backspace"] is None:
                            raise ValueError("Backspace button not connected.")
                        return "backspace" in self.buttons_pressed

                    def process(self, new_state=None):
                        if new_state is None:
                            new_state = set(self.buttons_pressed)
                        if self.previous_presses is None:
                            self.previous_presses = new_state

                        changed_names = new_state.symmetric_difference(
                            self.previous_presses)
                        for name in changed_names:
                            bound_method = getattr(self, f"on_{name}")

                            if bound_method is not None:
                                bound_method(name in new_state)

                        if self.on_change is not None and state_diff:
                            self.on_change([(name, name in new_state)
                                            for name in changed_names])

                        self.previous_presses = new_state

                fake_path = sys.path.copy()
                fake_path.append(called_from)

                orig_import = __import__

                def import_mock(name, *args):
                    if name in ("fcntl", "evdev"):
                        return mock.Mock()
                    return orig_import(name, *args)

                @mock.patch("time.time", get_time)
                @mock.patch("time.sleep", sleep)
                @mock.patch("ev3dev2.motor.Motor.wait", wait)
                @mock.patch("ev3dev2.Device.__init__", device__init__)
                @mock.patch("ev3dev2.Device._attribute_file_open",
                            _attribute_file_open)
                @mock.patch("ev3dev2.button.Button", MockedButton)
                @mock.patch("ev3sim.code_helpers.is_ev3", False)
                @mock.patch("ev3sim.code_helpers.is_sim", True)
                @mock.patch("ev3sim.code_helpers.robot_id", robot_id)
                @mock.patch("ev3sim.code_helpers.CommServer", MockedCommServer)
                @mock.patch("ev3sim.code_helpers.CommClient", MockedCommClient)
                @mock.patch("ev3sim.code_helpers.wait_for_tick", wait_for_tick)
                @mock.patch("builtins.__import__", import_mock)
                @mock.patch("ev3sim.code_helpers.EventSystem.handle_events",
                            handle_events)
                @mock.patch("sys.path", fake_path)
                def run_script(fname):
                    from importlib.machinery import SourceFileLoader

                    module = SourceFileLoader("__main__", fname).load_module()

                try:
                    import ev3dev

                    run_script = mock.patch("ev3dev.core.Device.__init__",
                                            raiseEV3Error)(run_script)
                except:
                    pass

                assert data["start_robot_queue"].get(
                ), "Something went wrong..."
                run_script(filename)
            except Exception as e:
                result.put(("Robots", e))
                return
            result.put(True)

        result_bucket = Queue(maxsize=1)

        from threading import Thread
        from ev3sim.file_helper import find_abs

        comm_thread = Thread(
            target=comms,
            args=(
                shared_data,
                result_bucket,
            ),
            daemon=True,
        )
        robot_thread = Thread(
            target=robot,
            args=(
                find_abs(args.filename,
                         allowed_areas=[
                             "local", "local/robots/", "package",
                             "package/robots/"
                         ]),
                shared_data,
                result_bucket,
            ),
            daemon=True,
        )
        write_thread = Thread(
            target=write,
            args=(
                shared_data,
                result_bucket,
            ),
            daemon=True,
        )

        comm_thread.start()
        write_thread.start()
        robot_thread.start()

        try:
            with result_bucket.not_empty:
                while not result_bucket._qsize():
                    result_bucket.not_empty.wait(0.1)
            r = result_bucket.get()
            if r is not True:
                # Clear the actions queue.
                shared_data["actions_queue"] = Queue()
        except KeyboardInterrupt as e:
            r = True
            pass

        # Ensure all active connections are closed, provided the Communications thread is still running.
        if r is True or r[0] != "Communications":
            for active_connection in shared_data["active_connections"]:
                active_connection.close()

        with shared_data["condition_updated"]:
            while shared_data["actions_queue"]._qsize() > 0:
                shared_data["condition_updated"].wait(0.1)

            shared_data["condition_updated"].wait(0.5)

        if r is not True:
            print(
                f"An error occurred in the {r[0]} thread. Raising an error now..."
            )
            raise r[1]
Example #12
0
import sys

N = int(sys.stdin.readline())
queue = Queue(N)
for _ in range(N):
    order = sys.stdin.readline().split()
    if order[0] == "push":
        queue.put_nowait(order[1])
    elif order[0] == "pop":
        try:
            pop_item = queue.get_nowait()
            print(pop_item)
        except:
            print(-1)
    elif order[0] == "size":
        print(queue._qsize())
    elif order[0] == "empty":
        if queue.empty():
            print(1)
        else:
            print(0)
    elif order[0] == "front":
        try:
            print(queue.queue[0])
        except:
            print(-1)

    elif order[0] == "back":
        try:
            print(queue.queue[-1])
        except:
Example #13
0
class Spider(object):
    def __init__(self, worker_num=10, chunk_size=10000, log_interval=600,
                 data_dir='data', log_dir='log'):
        self.chunk_size = chunk_size
        self.log_interval = log_interval
        self.urls = Queue()
        self.results = Queue()
        self.url_cache = Set()
        self.name_cache = Set()
        self.black_urls = Set()
        self.black_cache = Dict()
        self.chunk_num = 0
        self.parser = HtmlParser(home='https://baike.baidu.com')

        self.last = 0
        self.state = 1

        if not os.path.exists(data_dir):
            os.mkdir(data_dir)
        if not os.path.exists(log_dir):
            os.mkdir(log_dir)
        self.data_dir = data_dir
        self.log_dir = log_dir

        self.writer = Thread(target=self._write)
        self.logger = Timer(log_interval, self._log)
        self.spiders = [Thread(target=self._scrap) for _ in range(worker_num)]


    def start(self, url):
        new_urls, new_data = self.parser.parse(url)
        self.results.put(new_data)
        self.url_cache.add(url)
        self.name_cache.add(new_data['name'])
        for url in new_urls:
            self.urls.put(url)
        
        self.logger.start()
        self.writer.start()
        for spider in self.spiders:
            spider.start()
        
    def _write(self):
        """只使用self.results
           新增功能:保存爬取页面的html源代码
        """

        while self.state:
            self.chunk_num += 1
            n = 0
            with open(os.path.join(self.data_dir, '{}.json'.format(self.chunk_num)), 'wb') as fp:
                while n < self.chunk_size:
                    if not self.results.empty():
                        result = self.results.get()
                        line = json.dumps(result, ensure_ascii=False) + '\n'
                        fp.write(line.encode('utf8'))
                        print('写入词条成功')

                        n += 1
                    else:
                        sleep(10)

    def _log(self):
        now = len(self.name_cache)
        increase = now - self.last
        self.last = now
        if increase == 0:
            self.state = 0
            print('Exit: no entities scraped in this round.')
            exit()
        else:
            with open(os.path.join(self.log_dir, 'log'), 'ab+') as fp:
                message = '新增词条数量:{},已抓取词条数量:{};已获取url数量:{},缓存任务数量:{},缓存结果数量:{}.'.format(
                    increase, now, len(self.url_cache), self.urls._qsize(), self.results._qsize(),
                ) + '\n'
                fp.write(message.encode('utf8'))
        timer = Timer(self.log_interval, self._log)
        timer.start() 

    def _scrap(self):
        while self.state:
            if not self.urls.empty():
                url = self.urls.get()
                try:
                    new_urls, new_data = self.parser.parse(url) # TODO
                except:
                    print('url爬取失败')
                    self.url_cache.remove(url)
                    # 多次请求不成功的url加入黑名单
                    if url not in self.black_cache:
                        self.black_cache[url] = 1
                    self.black_cache[url] += 1
                    if self.black_cache[url] >= 3:
                        self.black_urls.add(url)
                        print('url黑名单已更新')
                    continue
                name = new_data['name']
                if name not in self.name_cache:
                    self.name_cache.add(name)
                    # TODO:设置筛选条件
                    label_related = False
                    summary_related = False
                    for label_keyword in label_keywords:
                        if label_keyword in new_data['labels']:
                            label_related = True
                            break
                    if not new_data['labels']:  # 针对无标签但与主题有关的特殊情况,设他们为True
                        label_related = True
                    for summary_keyword in summary_keywords:
                        if summary_keyword in new_data['summary']:
                            summary_related = True
                            break
                    if label_related or summary_related:
                        self.results.put(new_data)
                        print(('获取条目:' + name).encode('GBK', 'ignore').decode('GBk'))
                    else:
                        print("\033[0;31m%s\033[0m" % ('放弃条目 ' + name + ' :分类不符').encode('GBK', 'ignore').decode('GBk'))
                for url in new_urls:
                    if url not in self.url_cache and url not in self.black_urls:
                        self.url_cache.add(url)
                        self.urls.put(url)
            else:
                sleep(10)