Beispiel #1
0
def start_callback():
    """
    Starts the collection callback

    """
    global getDataCallback
    if getDataCallback is None:
        getDataCallback = PeriodicCallback(process_data, conf.REFRESH_FREQ)  
        getDataCallback.start()
    elif not getDataCallback.is_running():
        getDataCallback.start()
Beispiel #2
0
class Timer:
    __instance = None
    __timer = None
    __callback = None
    __repeat = True
    __args = []
    __kwargs = {}

    def __init__(self, func, repeat=True, timeout=10, args=None, kwargs=None):
        self.__callback = func
        self.__repeat = repeat
        self.__timeout = timeout
        self.__args = args if args else list()
        self.__kwargs = kwargs if kwargs else dict()
        if self.__repeat:
            self.__timer = PeriodicCallback(self.__callback_wrapper,
                                            self.__timeout * 1000)
            self.__timer.start()
        else:
            self.__timer = IOLoop.current().call_later(self.__timeout,
                                                       self.__callback_wrapper)

    def restart(self):
        if self.__timer is not None:
            self.__stop()
            self.__start()

    def start(self):
        if self.__timer is None:
            self.__start()

    def stop(self):
        if self.__timer is not None:
            self.__stop()

    def __start(self):
        if self.__repeat and not self.__timer.is_running():
            self.__timer.start()
        # else:
        #     print("calling later")
        #     self.__timer = IOLoop.current().call_later(self.__timeout, self.__callback_wrapper, self)

    def __stop(self):
        if self.__repeat:
            self.__timer.stop()
        else:
            IOLoop.current().remove_timeout(self.__timer)

    def __callback_wrapper(self):
        IOLoop.current().add_callback(self.__callback, *self.__args,
                                      **self.__kwargs)
        if self.__repeat:
            self.__start()
Beispiel #3
0
class ProcessWatchdog(object):
    def __init__(self, interval=1000):
        self.interval = interval
        self._process_to_check = {}
        self._periodic_callback = PeriodicCallback(self._check_processes, self.interval)

    def register_process(self, process, on_dead_callback):
        self._process_to_check[process] = on_dead_callback

    def start(self):
        if not self._periodic_callback.is_running():
            self._periodic_callback.start()

    def stop(self):
        if self._periodic_callback.is_running():
            self._periodic_callback.stop()

    def _check_processes(self):
        for process in self._process_to_check:
            if not process.is_running():
                self._process_to_check[process](process)
Beispiel #4
0
class CallbackHandler():
    def __init__(self):
        self.callback_fn = None

    def start_callback(self):
        if self.callback_fn is None:
            self.callback_fn = PeriodicCallback(process_data, conf.REFRESH_INTERVAL)
            self.callback_fn.start()
        elif not self.callback_fn.is_running():
            self.callback_fn.start()
    def stop_callback(self):
        self.callback_fn.stop()
class Timer():
    __instance = None
    __timer = None
    __callback = None
    __repeat = True
    __args = []
    __kwargs = {}

    def __init__(self, func, repeat=True, timeout=10, args=None, kwargs=None):
        self.__callback = func
        self.__repeat = repeat
        self.__timeout = timeout
        self.__args = args if args else list()
        self.__kwargs = kwargs if kwargs else dict()
        if self.__repeat:
            self.__timer = PeriodicCallback(
                partial(self.__callback_wrapper, self), self.__timeout * 1000)

    def restart(self):
        if self.__timer is not None:
            self.__stop()
            self.__start()

    def start(self):
        if self.__timer is None:
            self.__start()

    def stop(self):
        if self.__timer is not None:
            self.__stop()

    def __start(self):
        if self.__repeat and not self.__timer.is_running():
            self.__timer.start()
        else:
            self.__timer = IOLoop.current().call_later(self.__timeout,
                                                       self.__callback_wrapper,
                                                       self)

    def __stop(self):
        if self.__repeat:
            self.__timer.stop()
        else:
            IOLoop.current().remove_timeout(self.__timer)

    def __callback_wrapper(self):
        try:
            self.__callback(*self.__args, **self.__kwargs)
        except Exception:
            pass
        if self.__repeat:
            self.__start()
Beispiel #6
0
class Session():
    def __init__(self):
        self.cars=dict()
        self.timer=PeriodicCallback(self.on_timer,50)
        self.users=set()

    def publish(self,type,data):
        msg={"type":type,"data":data}
        for u in self.users:
            u.write_message(msg)

    def on_timer(self):
        for id in self.cars:
            car=self.cars[id]
            car.step()
            self.publish("timer",car.__dict__)

    def reset(self):
        self.stop()
        self.cars.clear()

    def start(self):
        if not self.timer.is_running():
            self.timer.start()
            if self.timer.is_running():
                print "session started"
            else:
                print "session failed to started"
        self.publish("cars number",len(self.cars))

    def stop(self):
        self.timer.stop()
        if(not self.timer.is_running()):
            print "session stopped"
        else:
            print "session failed to stop"
 def __init__(self, *args, **kwargs):
     super(EnvironmentKernelSpecManager, self).__init__(*args, **kwargs)
     self.log.info("Using EnvironmentKernelSpecManager...")
     self._env_data_cache = {}
     if self.refresh_interval > 0:
         try:
             from tornado.ioloop import PeriodicCallback, IOLoop
             # Initial loading NOW
             IOLoop.current().call_later(0, callback=self._update_env_data, initial=True)
             # Later updates
             updater = PeriodicCallback(callback=self._update_env_data,
                                        callback_time=1000 * 60 * self.refresh_interval)
             updater.start()
             if not updater.is_running():
                 raise Exception()
             self._periodic_updater = updater
             self.log.info("Started periodic updates of the kernel list (every %s minutes).", self.refresh_interval)
         except:
             self.log.exception("Error while trying to enable periodic updates of the kernel list.")
     else:
         self.log.info("Periodical updates the kernel list are DISABLED.")
Beispiel #8
0
class PollingHandler(BaseSocketHandler):
    """
    This class represents separate websocket connection.
    
    Attributes:
        tracker: tornado.ioloop.PeriodicCallback with get_location method as a
            callback. Starts when user pushes "track" button. When started, it
            runs every 5 seconds to find out and update character's location.
            
        q: tornado.queues.Queue used for running tasks successively.
        
        updating: A flag indicates if router is being updated or not. Required
            to avoid race conditions.
    """
    def __init__(self, *args, **kwargs):
        super(PollingHandler, self).__init__(*args, **kwargs)
        # Set Tornado PeriodicCallback with our self.track, we
        # will use launch it later on track/untrack commands
        self.tracker = PeriodicCallback(self.get_location, 5000)
        self.q = Queue(maxsize=5)
        self.updating = False

    async def get_location(self):
        """
        The callback for the `self.tracker`.
        
        Makes an API call, updates router and sends updated data to the
        front-end.
        """
        # Call API to find out current character location
        location = await self.character(self.user_id, '/location/', 'GET')

        if location:
            # Set `updating` flag to not accept periodic updates
            # from front-end, to not overwrite new data
            self.updating = True
            user = self.user
            graph_data = await user['router'].update(
                location['solarSystem']['name'])
            if graph_data:
                message = ['update', graph_data]
                logging.warning(graph_data)
                await self.safe_write(message)
            self.updating = False
        else:
            message = ['warning', 'Log into game to track your route']
            await self.safe_write(message)

    async def scheduler(self):
        """
        Scheduler for user tasks.
        
        Waits until there is new item in the queue, does task, resolves task.
        Tornado queues doc: http://www.tornadoweb.org/en/stable/queues.html
        
        Since we have no guarantee of the order of the incoming messages
        (new message from front-end can come before current is done),
        we need to ensure all tasks to run successively.
        Here comes the asynchronous generator.
        """
        logging.info(f"Scheduler started for {self.request.remote_ip}")

        # Wait on each iteration until there's actually an item available
        async for item in self.q:
            logging.debug(f"Started resolving task for {item}...")
            user = self.user
            try:
                if item == 'recover':
                    # Send saved route
                    await self.safe_write(['recover', user['router'].recovery])

                elif item == 'track':
                    # Start the PeriodicCallback
                    if not self.tracker.is_running():
                        self.tracker.start()

                elif item in ['stop', 'reset']:
                    # Stop the PeriodicCallback
                    if self.tracker.is_running():
                        self.tracker.stop()
                    # Clear all saved data
                    if item == 'reset':
                        await user['router'].reset()

                elif item[0] == 'backup':
                    # Do not overwrite user object while it's updating,
                    # just in case, to avoid race conditions.
                    if not self.updating:
                        await user['router'].backup(item[1])
            finally:
                self.q.task_done()
                logging.debug(f'Task "{item}" done.')

    async def task(self, item):
        """
        Intermediary between `self.on_message` and `self.scheduler`.
        
        Since we cannot do anything asynchronous in the `self.on_message`,
        this method can handle any additional non-blocking stuff if we need it.
        
        :argument item: item to pass to the `self.scheduler`.
        """
        await self.q.put(item)
        #await self.q.join()

    def open(self):
        """
        Triggers on successful websocket connection.
        
        Ensures user is authorized, spawns `self.scheduler` for user
        tasks, adds this websocket object to the connections pool,
        spawns the recovery of the saved route.
        """
        logging.info(f"Connection received from {self.request.remote_ip}")
        if self.user_id:
            self.spawn(self.scheduler)
            self.vagrants.append(self)

            self.spawn(self.task, 'recover')
        else:
            self.close()

    def on_message(self, message):
        """
        Triggers on receiving front-end message.
        
        :argument message: front-end message.
        
        Receives user commands and passes them
        to the `self.scheduler` via `self.task`.
        """
        self.spawn(self.task, json_decode(message))

    def on_close(self):
        """
        Triggers on closed websocket connection.
        
        Removes this websocket object from the connections pool,
        stops `self.tracker` if it is running.
        """
        self.vagrants.remove(self)
        if self.tracker.is_running():
            self.tracker.stop()
        logging.info("Connection closed, " + self.request.remote_ip)
Beispiel #9
0
class Executor:
    """
    Main object communicate to server and control tasks running.
    """
    # hearbeat check interval, default: 10 seconds
    heartbeat_interval = 10 * 1000
    # check task interval, default: 10 seconds
    checktask_interval = 10 * 1000
    custom_ssl_cert = False

    def __init__(self, config=None):

        self.ioloop = IOLoop.current()
        self.node_id = None
        self.status = EXECUTOR_STATUS_OFFLINE
        if config is None:
            config = AgentConfig()
        self.config = config
        self.tags = config.get('tags')
        self.checktask_callback = None

        self.task_slots = TaskSlotContainer(config.getint('slots', 1))

        server_base = config.get('server')
        if urlparse(server_base).scheme == '':
            if config.getint('server_https_port'):
                server_https_port = config.getint('server_https_port')
                server_base = 'https://%s:%d' % (server_base,
                                                 server_https_port)
            else:
                server_base = 'http://%s:%d' % (server_base,
                                                config.getint('server_port'))
        self.service_base = server_base
        client_cert = config.get('client_cert') or None
        client_key = config.get('client_key') or None
        self.keep_job_files = config.getboolean('debug', False)
        LOGGER.debug('keep_job_files %s', self.keep_job_files)

        httpclient_defaults = {
            'request_timeout': config.getfloat('request_timeout', 60)
        }
        if client_cert:
            httpclient_defaults['client_cert'] = client_cert
        if client_key:
            httpclient_defaults['client_key'] = client_key
        if os.path.exists('keys/ca.crt'):
            self.custom_ssl_cert = True
            httpclient_defaults['ca_certs'] = 'keys/ca.crt'
            httpclient_defaults['validate_cert'] = True
        LOGGER.debug(httpclient_defaults)

        node_key = None
        secret_key = None
        if os.path.exists('conf/node.conf'):
            parser = ConfigParser()
            parser.read('conf/node.conf')
            node_key = parser.get('agent', 'node_key')
            secret_key = parser.get('agent', 'secret_key')

        self.httpclient = NodeAsyncHTTPClient(self.service_base,
                                              key=node_key,
                                              secret_key=secret_key,
                                              defaults=httpclient_defaults)
        self.runner_factory = RunnerFactory(config)

    def start(self):
        self.register_node()
        # init heartbeat period callback
        heartbeat_callback = PeriodicCallback(self.send_heartbeat,
                                              self.heartbeat_interval)
        heartbeat_callback.start()


        # init checktask period callback
        # the new version use HEARTBEAT response to tell client whether there
        # is new task on server queue
        # so do not start this period method. But if server is an old version
        # without that header info
        # client sill need to poll GET_TASK.
        self.checktask_callback = PeriodicCallback(self.check_task,
                                                   self.checktask_interval)

        self.ioloop.start()

    def check_header_new_task_on_server(self, headers):
        try:
            new_task_on_server = headers['X-DD-New-Task'] == 'True'
            if new_task_on_server:
                self.ioloop.call_later(0, self.check_task)
        except KeyError:
            # if response contains no 'DD-New-Task' header, it might
            # be the old version server,
            # so use the old GET_TASK pool mode
            if not self.checktask_callback.is_running():
                self.checktask_callback.start()

    @coroutine
    def send_heartbeat(self):
        if self.status == EXECUTOR_STATUS_OFFLINE:
            self.register_node()
            return
        url = urljoin(self.service_base, '/nodes/%d/heartbeat' % self.node_id)
        running_tasks = ','.join([task_executor.task.id for task_executor in
                                  self.task_slots.tasks()])
        request = HTTPRequest(url=url, method='POST', body='',
                              headers={'X-DD-RunningJobs': running_tasks})
        try:
            res = yield self.httpclient.fetch(request)
            if 'X-DD-KillJobs' in res.headers:
                LOGGER.info('received kill signal %s',
                            res.headers['X-DD-KillJobs'])
                for job_id in json.loads(res.headers['X-DD-KillJobs']):
                    task_to_kill = self.task_slots.get_task(job_id)
                    if task_to_kill:
                        LOGGER.info('%s', task_to_kill)
                        task_to_kill.kill()
                        self.task_slots.remove_task(task_to_kill)
            self.check_header_new_task_on_server(res.headers)
        except HTTPError as ex:
            if ex.code == 400:
                logging.warning('Node expired, register now.')
                self.status = EXECUTOR_STATUS_OFFLINE
                self.register_node()
        except URLError as ex:
            logging.warning('Cannot connect to server. %s', ex)
        except Exception as ex:
            logging.warning('Cannot connect to server. %s', ex)

    @coroutine
    def register_node(self):
        if self.custom_ssl_cert and \
                self.service_base.startswith('https') and \
                not os.path.exists('keys/ca.crt'):
            httpclient = AsyncHTTPClient(force_instance=True)
            cacertrequest = HTTPRequest(urljoin(self.service_base, 'ca.crt'),
                                        validate_cert=False)
            try:
                cacertresponse = yield httpclient.fetch(cacertrequest)
                if not os.path.exists('keys'):
                    os.mkdir('keys')
                open('keys/ca.crt', 'wb').write(cacertresponse.body)
            except HTTPError:
                LOGGER.info('Custom ca cert retrieve failed.')
        try:

            node_id = yield self.httpclient.node_online(self.tags)
            self.status = EXECUTOR_STATUS_ONLINE
            self.node_id = node_id
            LOGGER.info('node %d registered', self.node_id)
        except URLError as ex:
            logging.warning('Cannot connect to server, %s', ex)
        except socket.error as ex:
            logging.warning('Cannot connect to server, %s', ex)

    def on_new_task_reach(self, task):
        if task is not None:
            task_executor = self.execute_task(task)
            self.task_slots.put_task(task_executor)

    @staticmethod
    def parse_task_data(response_data):
        task = SpiderTask()
        task.id = response_data['data']['task']['task_id']
        task.spider_id = response_data['data']['task']['spider_id']
        task.project_name = response_data['data']['task']['project_name']
        task.project_version = response_data['data']['task']['version']
        task.spider_name = response_data['data']['task']['spider_name']
        if 'extra_requirements' in response_data['data']['task'] and \
                response_data['data']['task']['extra_requirements']:
            task.extra_requirements = response_data['data']['task']['extra_requirements']
        if 'spider_parameters' in response_data['data']['task']:
            task.spider_parameters = response_data['data']['task'][
                'spider_parameters']
        else:
            task.spider_parameters = {}
        task.settings = response_data['data']
        return task

    @coroutine
    def get_next_task(self):
        url = urljoin(self.service_base, '/executing/next_task')
        post_data = urlencode({'node_id': self.node_id})
        request = HTTPRequest(url=url, method='POST', body=post_data)
        try:
            response = yield self.httpclient.fetch(request)
            response_content = response.body
            response_data = json.loads(response_content)
            LOGGER.debug(url)
            LOGGER.debug(response_content)
            if response_data['data'] is not None:
                task = Executor.parse_task_data(response_data)
                self.on_new_task_reach(task)
        except URLError:
            LOGGER.warning('Cannot connect to server')

    def execute_task(self, task):
        egg_downloader = ProjectEggDownloader(service_base=self.service_base,
                                              client=self.httpclient)
        executor = TaskExecutor(task, egg_downloader=egg_downloader,
                                runner_factory=self.runner_factory,
                                keep_files=self.keep_job_files)
        pid = None
        future = executor.execute()
        self.post_start_task(task, pid)
        executor.on_subprocess_start = self.post_start_task
        self.ioloop.add_future(future, self.task_finished)
        return executor

    @coroutine
    def post_start_task(self, task, pid):
        url = urljoin(self.service_base, '/jobs/%s/start' % task.id)
        post_data = urlencode({'pid': pid or ''})
        try:
            request = HTTPRequest(url=url, method='POST', body=post_data)
            yield self.httpclient.fetch(request)
        except URLError as ex:
            LOGGER.error('Error when post_task_task: %s', ex)
        except HTTPError as ex:
            LOGGER.error('Error when post_task_task: %s', ex)

    def complete_task(self, task_executor, status):
        """
        @type task_executor: TaskExecutor
        """
        url = urljoin(self.service_base, '/executing/complete')

        log_file = open(task_executor.output_file, 'rb')

        post_data = {
            'task_id': task_executor.task.id,
            'status': status,
            'log': log_file,
        }

        items_file = None
        if task_executor.items_file and \
                os.path.exists(task_executor.items_file):
            post_data['items'] = items_file = open(task_executor.items_file,
                                                   "rb")
            if LOGGER.isEnabledFor(logging.DEBUG):
                item_file_size = os.path.getsize(task_executor.items_file)
                LOGGER.debug('item file size : %d', item_file_size)
        LOGGER.debug(post_data)
        datagen, headers = multipart_encode(post_data)
        headers['X-DD-Nodeid'] = str(self.node_id)
        body_producer = MultipartRequestBodyProducer(datagen)
        request = HTTPRequest(url, method='POST', headers=headers,
                              body_producer=body_producer)
        client = self.httpclient
        future = client.fetch(request, raise_error=False)
        self.ioloop.add_future(future, self.complete_task_done(task_executor,
                                                               log_file,
                                                               items_file))
        LOGGER.info('task %s finished', task_executor.task.id)

    def complete_task_done(self, task_executor, log_file, items_file):
        def complete_task_done_f(future):
            """
            The callback of complete job request, if socket error occurred,
            retry commiting complete request in 10 seconds.
            If request is completed successfully, remove task from slots.
            Always close stream files.
            @type future: Future
            :return:
            """
            response = future.result()
            LOGGER.debug(response)
            if log_file:
                log_file.close()
            if items_file:
                items_file.close()
            if response.error and isinstance(response.error, socket.error):
                status = TASK_STATUS_SUCCESS if task_executor.ret_code == 0 \
                    else TASK_STATUS_FAIL
                self.ioloop.call_later(10, self.complete_task, task_executor,
                                       status)
                LOGGER.warning('Socket error when completing job, retry in '
                               '10 seconds.')
                return

            if response.error:
                LOGGER.warning('Error when post task complete request: %s',
                               response.error)
            self.task_slots.remove_task(task_executor)
            LOGGER.debug('complete_task_done')

        return complete_task_done_f

    def task_finished(self, future):
        task_executor = future.result()
        status = TASK_STATUS_SUCCESS if task_executor.ret_code == 0 \
            else TASK_STATUS_FAIL
        self.complete_task(task_executor, status)

    def check_task(self):
        if not self.task_slots.is_full():
            self.get_next_task()
Beispiel #10
0
class MetadataStorage:
    def __init__(self, server):
        self.server = server
        self.metadata = {}
        self.pending_requests = {}
        self.events = {}
        self.script_response = None
        self.busy = False
        self.gc_path = os.path.expanduser("~")
        self.prune_cb = PeriodicCallback(
            self.prune_metadata, METADATA_PRUNE_TIME)

    def update_gcode_path(self, path):
        if path == self.gc_path:
            return
        self.metadata = {}
        self.gc_path = path
        if not self.prune_cb.is_running():
            self.prune_cb.start()

    def close(self):
        self.prune_cb.stop()

    def get(self, key, default=None):
        if key not in self.metadata:
            return default
        return dict(self.metadata[key])

    def __getitem__(self, key):
        return dict(self.metadata[key])

    def _handle_script_response(self, result):
        try:
            proc_resp = json.loads(result.strip())
        except Exception:
            logging.exception("file_manager: unable to load metadata")
            logging.debug(result)
            return
        proc_log = proc_resp.get('log', [])
        for log_msg in proc_log:
            logging.info(log_msg)
        if 'file' in proc_resp:
            self.script_response = proc_resp

    def prune_metadata(self):
        for fname in list(self.metadata.keys()):
            fpath = os.path.join(self.gc_path, fname)
            if not os.path.exists(fpath):
                del self.metadata[fname]
                logging.info(f"Pruned file: {fname}")
                continue

    def _has_valid_data(self, fname, fsize, modified):
        mdata = self.metadata.get(fname, {'size': "", 'modified': 0})
        return mdata['size'] == fsize and mdata['modified'] == modified

    def remove_file(self, fname):
        self.metadata.pop(fname)

    def parse_metadata(self, fname, fsize, modified, notify=False):
        evt = Event()
        if fname in self.pending_requests or \
                self._has_valid_data(fname, fsize, modified):
            # request already pending or not necessary
            evt.set()
            return evt
        self.pending_requests[fname] = (fsize, modified, notify, evt)
        if self.busy:
            return evt
        self.busy = True
        IOLoop.current().spawn_callback(self._process_metadata_update)
        return evt

    async def _process_metadata_update(self):
        while self.pending_requests:
            fname, (fsize, modified, notify, evt) = \
                self.pending_requests.popitem()
            if self._has_valid_data(fname, fsize, modified):
                evt.set()
                continue
            retries = 3
            while retries:
                try:
                    await self._run_extract_metadata(fname, notify)
                except Exception:
                    logging.exception("Error running extract_metadata.py")
                    retries -= 1
                else:
                    break
            else:
                self.metadata[fname] = {'size': fsize, 'modified': modified}
                logging.info(
                    f"Unable to extract medatadata from file: {fname}")
            evt.set()
        self.busy = False

    async def _run_extract_metadata(self, filename, notify):
        # Escape single quotes in the file name so that it may be
        # properly loaded
        filename = filename.replace("\"", "\\\"")
        cmd = " ".join([sys.executable, METADATA_SCRIPT, "-p",
                        self.gc_path, "-f", f"\"{filename}\""])
        shell_command = self.server.lookup_plugin('shell_command')
        scmd = shell_command.build_shell_command(
            cmd, self._handle_script_response)
        self.script_response = None
        await scmd.run(timeout=10.)
        if self.script_response is None:
            raise self.server.error("Unable to extract metadata")
        path = self.script_response['file']
        metadata = self.script_response['metadata']
        if not metadata:
            # This indicates an error, do not add metadata for this
            raise self.server.error("Unable to extract metadata")
        self.metadata[path] = dict(metadata)
        metadata['filename'] = path
        if notify:
            self.server.send_event(
                "file_manager:metadata_update", metadata)
Beispiel #11
0
class State(tornado.websocket.WebSocketHandler):
    def __init__(self, *args, **kwargs):
        self.stopped = False
        self.t = None
        self.car = kwargs.pop('car')
        self.rps_ms = kwargs.pop('rps_ms')  # Request Per Seconds Milliseconds
        self.for_network = kwargs.pop('for_network')

        self.timer = [0, 0]
        self.timer_index = 0
        self.start_time = time()

        self.total_requests = 0

        self.inf_loop = PeriodicCallback(
            self.loop, callback_time=self.rps_ms
        )  # 50 ms = 15 Request Per Seconds; 65 ms = 15 Request Per Seconds

        super(State, self).__init__(*args, **kwargs)

    def check_origin(self, origin):
        return True

    def on_message(self, message):
        try:
            if message == "send_state":
                state = self.car.get_state()
                self.write_message(state, True)
                logging.debug("Sent send_state")
            elif message == "read_state":
                if not self.inf_loop.is_running():
                    self.start_time = time()
                    self.inf_loop.start()
            elif message == "stop_read_state":
                self.inf_loop.stop()
        except tornado.websocket.WebSocketClosedError:
            logging.debug("State WS closed, stopping PeriodicCallback")
            self.inf_loop.stop()

    def on_close(self):
        self.inf_loop.stop()
        logging.debug("WebSocket closed")

    def loop(self):
        self.timer[self.timer_index] += 1
        self.total_requests += 1

        elapsed = time() - self.start_time

        if elapsed > 1:
            self.start_time = time()
            self.timer_index = 1
            self.timer[0] = (self.timer[0] + self.timer[1]) / 2
            self.timer[1] = 0

        car_rps = self.timer[0]
        logging.debug("RPS: " + str(car_rps))
        state = {}
        state = self.car.get_state_vector(latest=True,
                                          for_network=self.for_network)
        state['car_rps'] = car_rps
        try:
            self.write_message(state, True)
        except tornado.websocket.WebSocketClosedError:
            logging.debug("State WS closed, stopping PeriodicCallback")
            self.inf_loop.stop()
Beispiel #12
0
class FutureConnection(object):
    def _future_start(self):
        self._future_connecting = False
        self._future_closed = False
        self._future_periodic_connect.start()
        self._future_io_loop = self._future_periodic_connect.io_loop
        self._future_thread_id = thread.get_ident()

    @coroutine
    def _future_stream_close(self, shutdown=False):
        if shutdown:  # positive close issued by user.
            self._future_closed = True

        self._future_connecting = False

        LOG.warn('Closing: threadId:%s!', thread.get_ident())
        if self.future_connected():
            for callback in self._future_close_callbacks:
                if inspect.isgeneratorfunction(callback):
                    raise SyntaxError(
                        'wrap generator function with ``@tornado.gen.coroutine`` first :%s '
                        % callback)
                LOG.debug('calling close_callback: %s', callback)
                self.future_stream.io_loop.add_callback(callback, self)

            def _future_stream_close_callback(conn):
                conn.future_stream.close()
                conn.future_stream = None
                conn._future_packet_bulk = []
                conn._future_callback_queue.clear()  # registry

                if conn.future_stream is None:
                    if conn._future_closed is False:
                        conn._future_periodic_connect.start()
                    return

            self.future_stream.io_loop.add_callback(
                _future_stream_close_callback, self)

    @coroutine
    def _future_stream_connect(self,
                               host=None,
                               port=None,
                               af=socket.AF_UNSPEC,
                               ssl_options=None,
                               max_buffer_size=None,
                               source_ip=None,
                               source_port=None,
                               timeout=None):
        assert self._future_thread_id == thread.get_ident(
        ), 'this should run in IO thread:%s' % self._future_thread_id
        if self.future_connected():
            if self._future_periodic_connect.is_running():
                self._future_periodic_connect.stop()
            raise Return(self)

        if self._future_connecting is True:
            return
        else:
            self._future_connecting = True

        host = host or self.host
        port = port or self.port
        LOG.warn('Connecting %s:%s, threadId:%s, id:%s' %
                 (host, port, thread.get_ident(), id(self)))
        try:
            self.future_stream = yield self._future_connector.connect(
                host,
                port,
                timeout=timeout,
                max_buffer_size=max_buffer_size,
                af=af,
                ssl_options=ssl_options,
                source_ip=source_ip,
                source_port=source_port)
            # Note: we first register Async Close|Read|Write callback here, since handshake may require Async READ|WRITE
        except Exception as e:
            self._future_connecting = False
            LOG.exception('Error connect to %s:%s, retry.', host, port)
            if self.future_stream:
                self.future_stream.close()
            if not self._future_periodic_connect.is_running():
                self._future_periodic_connect.start()
            return

        self._future_callback_queue.clear()  # registry
        self._future_packet_bulk = []
        self._future_connecting = False
        self._future_chunk_size = stream_options(self.future_stream,
                                                 blocking=False,
                                                 keepalive=True)
        self._future_reader_buffer = bytearray(self._future_chunk_size)
        self.future_stream.set_close_callback(self._future_stream_close)
        self.future_stream.read_into(self._future_reader_buffer,
                                     callback=self._future_socket_read,
                                     partial=True)
        LOG.warn('Handshaking: %s:%s, threadId:%s, id:%s', host, port,
                 thread.get_ident(), id(self))
        try:
            for callback in self._future_handshake_callbacks:
                if not is_coroutine_function(callback):
                    raise SyntaxError(
                        'wrap generator function with ``@tornado.gen.coroutine`` first :%s '
                        % callback)
                LOG.debug('calling handshake_callback: %s', callback)
                r = yield callback(self)

            waiters = tuple(self._future_waiters)
            for fut in waiters:
                if not fut.done():
                    fut.set_result(self)
        except Exception as e:
            LOG.exception('Error Handshaking to %s:%s, retry.', host, port)
            self.future_stream.close()
            if not self._future_periodic_connect.is_running():
                self._future_periodic_connect.start()
            return
        finally:
            self._future_connecting = False

        LOG.warn('Connected: %s:%s, threadId:%s, id:%s', host, port,
                 thread.get_ident(), id(self))
        try:
            for callback in self._future_initial_callbacks:
                if inspect.isgeneratorfunction(callback):
                    raise SyntaxError(
                        'wrap generator function with ``@tornado.gen.coroutine`` first :%s '
                        % callback)
                LOG.debug('calling initial_callback: %s', callback)
                r = yield callback(self)
        except Exception as e:
            LOG.exception('Error connect to %s:%s, retry.', host, port)
            self.future_stream.close()
            if not self._future_periodic_connect.is_running():
                self._future_periodic_connect.start()
            return

        for callback in self._future_close_callbacks:
            if inspect.isgeneratorfunction(callback):
                raise SyntaxError(
                    'wrap generator function with ``@tornado.gen.coroutine`` first :%s '
                    % callback)

        LOG.warn('Initialized: %s:%s, threadId:%s, id:%s!', host, port,
                 thread.get_ident(), id(self))

    ###########################################################################
    def future_initialize(self):
        self.host = self.host if hasattr(self, 'host') else '127.0.0.1'
        self.port = self.port if hasattr(self, 'port') else '/tmp/redis.sock'

        self.future_stream = None
        self._future_thread_id = None
        self._future_io_loop = None
        self._future_connecting = None
        self._future_closed = None

        self._future_reader_buffer = None
        self._future_chunk_size = None

        self._future_periodic_connect = PeriodicCallback(
            self._future_stream_connect, 1000)
        self._future_connector = TCPClient(resolver=CachedOverrideResolver())

        self._future_handshake_callbacks = []  # connected, handshaking
        self._future_initial_callbacks = [
        ]  # connected, shandshake passed, initializing
        self._future_waiters = [
        ]  # connected, shandshake passed, initialization passed, wake up all waiters.
        self._future_close_callbacks = []

        self._future_packet_bulk = []
        self._future_callback_queue = collections.deque()  # registry

    def future_connected(self):
        return self._future_connecting is False and self.future_stream is not None and not self.future_stream.closed(
        )

    def future_disconnect(self):
        if not self.future_connected():
            return

        self._future_connecting = False

        def safe_shutdown_stream_read(stream):
            if stream.socket is None:
                return

            while len(self._future_close_callbacks) > 0:
                callback = self._future_close_callbacks.pop(0)
                stream.io_loop.add_callback(callback, self)

            if stream.reading() is False:
                stream.io_loop.update_handler(stream.socket.fileno(),
                                              IOLoop.WRITE | IOLoop.ERROR)

                if stream.writing() is False:
                    stream.io_loop.remove_handler(stream.socket.fileno())
                    stream.close()
                    return

            stream.io_loop.add_callback(safe_shutdown_stream_read, stream)

        self.future_stream.io_loop.add_callback(safe_shutdown_stream_read,
                                                self.future_stream)

    def future_connect(self):
        connected_future = Future()
        if self.future_connected():
            if self._future_periodic_connect.is_running():
                self._future_periodic_connect.stop()

            connected_future.set_result(self)
            return connected_future

        # most time, Transport should be in connected state.
        connected_future.add_done_callback(
            lambda cf: self._future_waiters.remove(cf))

        for callback in self._future_initial_callbacks:
            connected_future.add_done_callback(callback)

        if connected_future not in self._future_waiters:
            self._future_waiters.append(connected_future)

        if not self._future_periodic_connect.is_running():
            if self._future_io_loop is None:
                self._future_start()
            else:
                self._future_io_loop.add_callback(
                    self._future_periodic_connect.start)

        return connected_future

    ###########################################################################
    def add_future(self, future, future_callback):
        self.future_stream.io_loop.add_future(future, future_callback)

    def add_callback(self, callback, *args, **kwargs):
        self.future_stream.io_loop.add_callback(callback, *args, **kwargs)

    def spawn_callback(self, callback, *args, **kwargs):
        self.future_stream.io_loop.spawn_callback(callback, *args, **kwargs)

    def add_handshake_callback(self, callback):
        self._future_handshake_callbacks.append(callback)

    def add_initial_callback(self, callback):
        self._future_initial_callbacks.append(callback)

    def add_close_callback(self, callback):
        self._future_close_callbacks.append(callback)

    def clear_connect_callbacks(self):
        self._future_close_callbacks = []

    def set_future_chunk_size(self, size=None):
        if size is None:
            size = self._future_chunk_size
        if size < 1:
            return
        cur_size = len(self._future_reader_buffer)
        if size > cur_size:
            self._future_reader_buffer.extend(bytearray(size - cur_size))

        elif size < cur_size:
            del self._future_reader_buffer[size:]

    ###########################################################################
    def _future_socket_read(self, num_bytes=0):
        if num_bytes < 1:
            return

        data = memoryview(
            self._future_reader_buffer)[:num_bytes] if num_bytes > 0 else None
        self.future_packet_read(data)
        # this 'read_into() DO MUST call after self.future_packet_read(data)! '
        self.future_stream.read_into(self._future_reader_buffer,
                                     callback=self._future_socket_read,
                                     partial=True)

    def _future_packet_aggregator(self, packet_num, aggregator_callback,
                                  packet):
        self._future_packet_bulk.append(packet)
        if len(self._future_packet_bulk) == packet_num:
            future_packet_bulk = self._future_packet_bulk
            self._future_packet_bulk = []
            aggregator_callback(future_packet_bulk)

    def _future_socket_send(self,
                            buffer_or_partial,
                            packet_callbacks,
                            send_future=None,
                            connected_future=None):
        if packet_callbacks is None:
            packet_callbacks = []
        elif callable(packet_callbacks):
            packet_callbacks = [packet_callbacks]

        if callable(buffer_or_partial):
            buffer_or_partial = buffer_or_partial()

        if isinstance(buffer_or_partial, (tuple, list)):
            buffer_or_partial = BYTES_EMPTY.join(buffer_or_partial)

        if send_future is None:
            send_future = self.future_stream.write(buffer_or_partial)
        else:
            self.future_stream.write(buffer_or_partial).add_done_callback(
                lambda _: send_future.set_result(_.result()))

        self._future_callback_queue.extend(packet_callbacks)

        return send_future

    def future_packet_send(self, packets, callbacks=None):
        packet_send_future = Future()
        send_partial = partial(self._future_socket_send, packets, callbacks,
                               packet_send_future)
        if self.future_connected():
            return send_partial()
        else:
            self.future_connect().add_done_callback(send_partial)
        return packet_send_future

    def future_packet_read(self, data=None):
        raise NotImplementedError('Implement this method first !')
Beispiel #13
0
class Executor():
    heartbeat_interval = 10
    checktask_interval = 10

    def __init__(self, config=None):
        self.ioloop = IOLoop.current()
        self.node_id = None
        self.status = EXECUTOR_STATUS_OFFLINE

        if config is None:
            config =AgentConfig()
        self.task_slots = TaskSlotContainer(config.getint('slots', 1))
        self.config = config
        # if server_https_port is configured, prefer to use it.
        if config.get('server_https_port'):
            self.service_base = 'https://%s:%d'% (config.get('server'), config.getint('server_https_port'))
        else:
            self.service_base = 'http://%s:%d' % (config.get('server'), config.getint('server_port'))
        client_cert = config.get('client_cert') or None
        client_key = config.get('client_key') or None

        httpclient_defaults = {
            'validate_cert': True,
            'ca_certs' : 'keys/ca.crt',
            'client_cert' : client_cert,
            'client_key' : client_key,
            'request_timeout': config.getfloat('request_timeout', 60)
        }
        logger.debug(httpclient_defaults)
        self.httpclient = AsyncHTTPClient(defaults=httpclient_defaults)


    def start(self):
        self.register_node()
        #init heartbeat period callback
        heartbeat_callback = PeriodicCallback(self.send_heartbeat, self.heartbeat_interval*1000)
        heartbeat_callback.start()

        #init checktask period callback
        # the new version use HEARTBEAT response to tell client whether there is new task on server queue
        # so do not start this period method. But if server is an old version without that header info
        # client sill need to poll GET_TASK.
        self.checktask_callback = PeriodicCallback(self.check_task, self.checktask_interval*1000)

        # code for debuging memory leak
        # import objgraph
        # def check_memory():
        #     logger.debug('Checking memory.')
        #     logger.debug(objgraph.by_type('SpiderTask'))
        #     logger.debug(objgraph.by_type('TaskExecutor'))
        #     logger.debug(objgraph.by_type('Future'))
        #     logger.debug(objgraph.by_type('PeriodicCallback'))
        #     future_objs = objgraph.by_type('Future')
        #     if future_objs:
        #         objgraph.show_chain(
        #             objgraph.find_backref_chain(future_objs[-1], objgraph.is_proper_module),
        #             filename='chain.png'
        #         )
        # self.check_memory_callback = PeriodicCallback(check_memory, 1*1000)
        # self.check_memory_callback.start()

        self.ioloop.start()

    def check_header_new_task_on_server(self, headers):
        try:
            new_task_on_server = headers['X-DD-New-Task'] == 'True'
            if new_task_on_server:
                self.ioloop.call_later(0, self.check_task)
        except KeyError:
            # if response contains no 'DD-New-Task' header, it might be the old version server,
            # so use the old GET_TASK pool mode
            if not self.checktask_callback.is_running():
                self.checktask_callback.start()

    @coroutine
    def send_heartbeat(self):
        if self.status == EXECUTOR_STATUS_OFFLINE:
            self.register_node()
            return
        url = urlparse.urljoin(self.service_base, '/nodes/%d/heartbeat' % self.node_id)
        running_tasks = ','.join([task_executor.task.id for task_executor in self.task_slots.tasks()])
        request = HTTPRequest(url=url, method='POST', body='', headers={'X-DD-RunningJobs': running_tasks})
        try:
            res = yield self.httpclient.fetch(request)
            if 'X-DD-KillJobs' in res.headers:
                logger.info('received kill signal %s' % res.headers['X-DD-KillJobs'])
                for job_id in json.loads(res.headers['X-DD-KillJobs']):
                    task_to_kill = self.task_slots.get_task(job_id)
                    if task_to_kill:
                        logger.info('%s' % task_to_kill)
                        task_to_kill.kill()
            self.check_header_new_task_on_server(res.headers)
        except urllib2.HTTPError as e:
            if e.code == 400:
                logging.warning('Node expired, register now.')
                self.status = EXECUTOR_STATUS_OFFLINE
                self.register_node()
        except urllib2.URLError as e:
            logging.warning('Cannot connect to server. %s' % e)
        except HTTPError as e:
            if e.code == 400:
                logging.warning('Node expired, register now.')
                self.status = EXECUTOR_STATUS_OFFLINE
                self.register_node()
        except Exception as e:
            logging.warning('Cannot connect to server. %s' % e)


    @coroutine
    def register_node(self):
        if self.service_base.startswith('https') and not os.path.exists('keys/ca.crt') :
            httpclient = AsyncHTTPClient(force_instance=True)
            cacertrequest = HTTPRequest(urlparse.urljoin(self.service_base, 'ca.crt'), validate_cert=False)
            cacertresponse = yield httpclient.fetch(cacertrequest)
            if not os.path.exists('keys'):
                os.mkdir('keys')
            open('keys/ca.crt', 'wb').write(cacertresponse.body)
        try:
            url = urlparse.urljoin(self.service_base, '/nodes')
            request = HTTPRequest(url = url, method='POST', body='')
            res = yield self.httpclient.fetch(request)
            self.status = EXECUTOR_STATUS_ONLINE
            self.node_id = json.loads(res.body)['id']
            logger.info('node %d registered' % self.node_id)
        except urllib2.URLError as e:
            logging.warning('Cannot connect to server, %s' % e )
        except socket.error as e:
            logging.warning('Cannot connect to server, %s' % e)

    def on_new_task_reach(self, task):
        if task is not None:
            task_executor = self.execute_task(task)
            self.task_slots.put_task(task_executor)


    @coroutine
    def get_next_task(self):
        url = urlparse.urljoin(self.service_base, '/executing/next_task')
        post_data = urllib.urlencode({'node_id': self.node_id})
        request = HTTPRequest(url=url, method='POST', body=post_data)
        try:
            response = yield self.httpclient.fetch(request)
            response_content = response.body
            response_data = json.loads(response_content)
            logger.debug(url)
            logger.debug(response_content)
            if response_data['data'] is not None:
                task = SpiderTask()
                task.id = response_data['data']['task']['task_id']
                task.spider_id = response_data['data']['task']['spider_id']
                task.project_name = response_data['data']['task']['project_name']
                task.project_version = response_data['data']['task']['version']
                task.spider_name = response_data['data']['task']['spider_name']
                if 'spider_parameters' in response_data['data']['task']:
                    task.spider_parameters = response_data['data']['task']['spider_parameters']
                else:
                    task.spider_parameters = {}
                self.on_new_task_reach(task)
        except urllib2.URLError:
            logger.warning('Cannot connect to server')

    def execute_task(self, task):
        executor = TaskExecutor(task, config=self.config)
        pid = None
        future = executor.execute()
        self.post_start_task(task, pid)
        executor.on_subprocess_start = self.post_start_task
        self.ioloop.add_future(future, self.task_finished)
        return executor

    @coroutine
    def post_start_task(self, task, pid):
        url = urlparse.urljoin(self.service_base, '/jobs/%s/start' % task.id)
        post_data = urllib.urlencode({'pid':pid or ''})
        try:
            request = HTTPRequest(url=url, method='POST', body=post_data)
            respones = yield self.httpclient.fetch(request)
        except urllib2.URLError as e:
            logger.error('Error when post_task_task: %s' % e)
        except urllib2.HTTPError as e:
            logger.error('Error when post_task_task: %s' % e)

    def complete_task(self, task_executor, status):
        '''
        @type task_executor: TaskExecutor
        '''
        url = urlparse.urljoin(self.service_base, '/executing/complete')

        log_file = open(task_executor.output_file, 'rb')

        post_data = {
            'task_id': task_executor.task.id,
            'status': status,
            'log': log_file,
        }

        items_file = None
        if task_executor.items_file and os.path.exists(task_executor.items_file):
            post_data['items'] = items_file = open(task_executor.items_file, "rb")
            logger.debug('item file size : %d' % os.path.getsize(task_executor.items_file))
        logger.debug(post_data)
        datagen, headers = multipart_encode(post_data)
        headers['X-DD-Nodeid'] = str(self.node_id)
        request = HTTPRequest(url, method='POST', headers=headers, body_producer=MultipartRequestBodyProducer(datagen))
        client = self.httpclient
        future = client.fetch(request, raise_error=False)
        self.ioloop.add_future(future, self.complete_task_done(task_executor, log_file, items_file))
        logger.info('task %s finished' % task_executor.task.id)


    def complete_task_done(self, task_executor, log_file, items_file):
        def complete_task_done_f(future):
            '''
            The callback of complete job request, if socket error occurred, retry commiting complete request in 10 seconds.
            If request is completed successfully, remove task from slots.
            Always close stream files.
            @type future: Future
            :return:
            '''
            response = future.result()
            logger.debug(response)
            if log_file:
                log_file.close()
            if items_file:
                items_file.close()
            if response.error and isinstance(response.error, socket.error):
                self.ioloop.call_later(10, self.complete_task, task_executor, TASK_STATUS_SUCCESS if task_executor.ret_code == 0 else TASK_STATUS_FAIL)
                logger.warning('Socket error when completing job, retry in 10 seconds.')
                return

            if response.error:
                logger.warning('Error when post task complete request: %s' % response.error)
            self.task_slots.remove_task(task_executor)
            logger.debug('complete_task_done')
        return complete_task_done_f

    def task_finished(self, future):
        task_executor = future.result()
        self.complete_task(task_executor, TASK_STATUS_SUCCESS if task_executor.ret_code == 0 else TASK_STATUS_FAIL)

    def check_task(self):
        if not self.task_slots.is_full():
            self.get_next_task()
Beispiel #14
0
class UpdateChecker(BotRequestHandler):
    def __init__(self, manager, trackers, ioloop=None):
        BotRequestHandler.__init__(self)
        self.ioloop = ioloop or IOLoop.current()
        self.manager = manager
        self.trackers = trackers

        self.search_renderer = Renderer(
            u"<b>{{item.title|e}}</b>\n"
            u"Раздел: {{item.category|e}}\n"
            u"Размер: {% if item.size | int(-1) == -1 %}{{item.size | e}}{% else %}"
            u"{{ item.size | default(0) | int | filesizeformat }}{% endif %}\n"
            u"Добавлено: {{ item.added | default(0) | int | todatetime | datetimeformat('%d-%m-%Y') }}\n"
            u"Скачать: /download_{{item.id}}\n"
            u"\n")
        self.torrent_renderer = Renderer(
            u"<b>{{ item.name | e }}</b> - {{ \"%0.2f\" | format(item.done*100) }}% done\n\n"
        )
        self.cache = []
        self.update_task = PeriodicCallback(self.do_update, 15 * 60 * 1000)
        self.version = telebots.version
        pass

    @PatternMessageHandler('/show( .*)?', authorized=True)
    def cmd_show(self, chat, text):
        data = text.split()

        search_id = int(data[1])
        start = int(data[2])
        if 0 <= search_id < len(self.cache):
            chat_id = self.cache[search_id]['chat_id']
            message_id = self.cache[search_id]['message_id']
            results = self.cache[search_id]['results']

            self.show_results(search_id, chat_id, message_id, results, start)
        return True

    @PatternMessageHandler('/status', authorized=True)
    def cmd_status(self, message_id, chat):
        @gen.coroutine
        def execute():
            torrents = yield self.manager.get_torrents()
            text = [self.torrent_renderer.render(x) for x in torrents]
            self.bot.send_message(to=chat['id'],
                                  message="".join(text),
                                  reply_to_message_id=message_id,
                                  parse_mode='HTML')

        execute()
        return True

    @PatternMessageHandler('/update( .*)?', authorized=True)
    def cmd_update(self, chat, text):
        cmd = text.split()
        chat_id = chat['id']
        if len(cmd) == 1:
            buttons = [
                {
                    'text': 'Now',
                    'callback_data': '/update now'
                },
                {
                    'text': '15m',
                    'callback_data': '/update 15'
                },
                {
                    'text': '30m',
                    'callback_data': '/update 30'
                },
                {
                    'text': '60m',
                    'callback_data': '/update 60'
                },
                {
                    'text': 'Off',
                    'callback_data': '/update 0'
                },
            ]
            markup = {'inline_keyboard': [buttons]}
            self.bot.send_message(to=chat_id,
                                  message='Schedule update',
                                  reply_markup=markup)
        else:
            when = cmd[1]
            if when == 'now':
                self.do_update(chat_id)
            else:
                minutes = int(when)
                if self.update_task.is_running():
                    self.update_task.stop()

                if minutes > 0:
                    self.update_task = PeriodicCallback(
                        self.do_update, minutes * 60 * 1000)
                    self.update_task.start()
                    text = 'Schedule updated: Each %d minutes' % minutes
                else:
                    text = 'Schedule updated: off'

                self.bot.send_message(to=chat_id, message=text)
            pass
        return True

    def show_results(self,
                     search_id,
                     chat_id,
                     message_id,
                     results,
                     start,
                     page_size=3):
        message = 'Sorry, nothing is found'
        markup = None
        if len(results) > 0:
            data = map((lambda i: self.search_renderer.render(results[i])),
                       range(start - 1, min(start + page_size - 1,
                                            len(results))))
            message = u"\n".join(data)

            buttons = []
            if start > page_size:
                buttons.append({
                    'text':
                    "Prev %d/%d" % (page_size, start - 1),
                    'callback_data':
                    '/show %d %d' % (search_id, start - page_size)
                })
            if (start + page_size) < len(results):
                buttons.append({
                    'text':
                    "Next %d/%d" %
                    (page_size, len(results) - start - page_size + 1),
                    'callback_data':
                    '/show %d %d' % (search_id, start + page_size)
                })

            if len(buttons) > 0:
                markup = {'inline_keyboard': [buttons]}

        self.bot.edit_message_text(to=chat_id,
                                   message_id=message_id,
                                   text=message,
                                   parse_mode='HTML',
                                   reply_markup=markup)
        pass

    @PatternMessageHandler('[^/].*', authorized=True)
    def do_search(self, text, chat, message_id):
        query = text
        chat_id = chat['id']

        @gen.coroutine
        def execute():
            # Send reply "search in progress"
            msg = yield self.bot.send_message(to=chat_id,
                                              message="Search in progress...",
                                              reply_to_message_id=message_id)
            placeholder_message_id = json.loads(
                msg.body)['result']['message_id']
            self.async_search(None, chat_id, placeholder_message_id, query)

        execute()
        return True

    @PatternMessageHandler('/download_.*', authorized=True)
    def do_download(self, chat, text):
        query = text
        user_id = chat['id']

        @gen.coroutine
        def execute():
            item_id = query.split('_', 1)[1]
            logging.info("Search download url for id=%s", item_id)
            item = None
            for search in self.cache:
                for result in search['results']:
                    if result.id == item_id:
                        item = result
                        break
                if item is not None:
                    break

            if item is not None:
                url = item.link

                msg = yield self.bot.send_message(to=user_id,
                                                  message='Downloading...')
                message_id = json.loads(msg.body)['result']['message_id']
                try:
                    for tracker in self.trackers:
                        if tracker.check_url(url):
                            torrent_data = yield tracker.download(url)
                            tr_info = bencode.bdecode(torrent_data)
                            torrent_name = tr_info['info']['name']

                            yield self.manager.add_torrent(torrent_data)
                            self.bot.edit_message_text(
                                to=user_id,
                                message_id=message_id,
                                text='Torrent "%s" downloaded' % torrent_name)
                            break
                except Exception:
                    logging.exception("Error while download torrent data")
                    self.bot.edit_message_text(
                        to=user_id,
                        message_id=message_id,
                        text="Sorry, error occurred!\n%s" %
                        traceback.format_exc())
            else:
                logging.warn("Couldn't find download url for id %s", item_id)

        execute()
        return True

    @gen.coroutine
    def async_search(self, search_id, chat_id, message_id, query):
        if search_id is None:
            search_id = len(self.cache)
            self.cache.append({
                'chat_id': chat_id,
                'message_id': message_id,
                'query': query,
                'results': []
            })
        logging.info("Start search_id %d for query \"%s\"", search_id, query)

        responses = yield [
            tracker.do_search(query) for tracker in self.trackers
        ]
        results = []
        for response in responses:
            results = results + response
        logging.info("Found %d torrents", len(results))
        self.cache[search_id]['results'] = results
        self.show_results(search_id, chat_id, message_id, results, 1)
        pass

    @gen.coroutine
    def do_update(self, reply_chat_id=None):
        chat_id = reply_chat_id or self.bot.admins[0]
        updated = False
        torrents = yield self.manager.get_torrents()
        error = False

        for torrent in torrents:
            url = torrent['url']
            logging.info('Checking updates for %s', url)
            for tracker in self.trackers:
                if tracker.check_url(url):
                    logging.debug('Download using %s', tracker.__class__)
                    try:
                        torrent_data = yield tracker.download(url)
                        added = yield self.manager.add_torrent(
                            torrent_data, torrent)
                        if added:
                            updated = True
                            self.bot.send_message(
                                to=chat_id,
                                message='Torrent "%s" updated' %
                                torrent['name'])
                    except Exception as e:
                        logging.exception('Error while check updates')
                        if reply_chat_id is not None:
                            self.bot.send_message(
                                to=chat_id, message=traceback.format_exc())
                        error = True
                    continue
            pass

        if not error and not updated and reply_chat_id is not None:
            self.bot.send_message(to=chat_id, message='No updates')
        pass

    def do_notify(self, message):
        logging.info("Notify message: \"%s\"", message)
        chat_id = self.bot.admins[0]
        self.bot.send_message(to=chat_id, message=message)
        pass
Beispiel #15
0
class MetadataStorage:
    def __init__(self, server, gc_path, database):
        self.server = server
        database.register_local_namespace(METADATA_NAMESPACE)
        self.mddb = database.wrap_namespace(METADATA_NAMESPACE,
                                            parse_keys=False)
        self.pending_requests = {}
        self.events = {}
        self.busy = False
        self.gc_path = gc_path
        self.prune_cb = PeriodicCallback(self.prune_metadata,
                                         METADATA_PRUNE_TIME)

    def update_gcode_path(self, path):
        if path == self.gc_path:
            return
        self.mddb.clear()
        self.gc_path = path
        if not self.prune_cb.is_running():
            self.prune_cb.start()

    def close(self):
        self.prune_cb.stop()

    def get(self, key, default=None):
        return self.mddb.get(key, default)

    def __getitem__(self, key):
        return self.mddb[key]

    def prune_metadata(self):
        for fname in list(self.mddb.keys()):
            fpath = os.path.join(self.gc_path, fname)
            if not os.path.exists(fpath):
                del self.mddb[fname]
                logging.info(f"Pruned file: {fname}")
                continue

    def _has_valid_data(self, fname, fsize, modified):
        mdata = self.mddb.get(fname, {'size': "", 'modified': 0})
        return mdata['size'] == fsize and mdata['modified'] == modified

    def remove_file(self, fname):
        try:
            del self.mddb[fname]
        except Exception:
            pass

    def parse_metadata(self, fname, fsize, modified, notify=False):
        evt = Event()
        if fname in self.pending_requests or \
                self._has_valid_data(fname, fsize, modified):
            # request already pending or not necessary
            evt.set()
            return evt
        self.pending_requests[fname] = (fsize, modified, notify, evt)
        if self.busy:
            return evt
        self.busy = True
        IOLoop.current().spawn_callback(self._process_metadata_update)
        return evt

    async def _process_metadata_update(self):
        while self.pending_requests:
            fname, (fsize, modified, notify, evt) = \
                self.pending_requests.popitem()
            if self._has_valid_data(fname, fsize, modified):
                evt.set()
                continue
            retries = 3
            while retries:
                try:
                    await self._run_extract_metadata(fname, notify)
                except Exception:
                    logging.exception("Error running extract_metadata.py")
                    retries -= 1
                else:
                    break
            else:
                self.mddb[fname] = {'size': fsize, 'modified': modified}
                logging.info(
                    f"Unable to extract medatadata from file: {fname}")
            evt.set()
        self.busy = False

    async def _run_extract_metadata(self, filename, notify):
        # Escape single quotes in the file name so that it may be
        # properly loaded
        filename = filename.replace("\"", "\\\"")
        cmd = " ".join([
            sys.executable, METADATA_SCRIPT, "-p", self.gc_path, "-f",
            f"\"{filename}\""
        ])
        shell_command = self.server.lookup_plugin('shell_command')
        scmd = shell_command.build_shell_command(cmd, log_stderr=True)
        result = await scmd.run_with_response(timeout=10.)
        if result is None:
            raise self.server.error(f"Metadata extraction error")
        try:
            decoded_resp = json.loads(result.strip())
        except Exception:
            logging.debug(f"Invalid metadata response:\n{result}")
            raise
        path = decoded_resp['file']
        metadata = decoded_resp['metadata']
        if not metadata:
            # This indicates an error, do not add metadata for this
            raise self.server.error("Unable to extract metadata")
        self.mddb[path] = dict(metadata)
        metadata['filename'] = path
        if notify:
            self.server.send_event("file_manager:metadata_update", metadata)
Beispiel #16
0
class MongoExportPipeline(object):
    """
    This pipeline exports tems to MongoDB using async mongo
    driver (motor). Interaction with MongoDB doesn't block
    the event loop.

    On start it creates object in 'jobs' collection and sets
    spider.motor_job_id to the ID of this job.

    If MONGO_EXPORT_JOBID_KEY option is set, job id is added to
    each stored item under the specified key name.

    If MONGO_EXPORT_DUMP_PERIOD is non-zero then updated job stats are saved
    to Mongo periodically every ``MONGO_EXPORT_DUMP_PERIOD`` seconds
    (default is 60).
    """
    def __init__(self, crawler):
        self.crawler = crawler
        settings = self.crawler.settings
        if not settings.getbool('MONGO_EXPORT_ENABLED', False):
            raise NotConfigured

        self.job_id_key = settings.get('MONGO_EXPORT_JOBID_KEY')
        self.items_uri = settings.get('MONGO_EXPORT_ITEMS_URI')
        self.jobs_uri = settings.get('MONGO_EXPORT_JOBS_URI')
        self.items_client, _, _, _, self.items_col = \
            motor_from_uri(self.items_uri)
        self.jobs_client, _, _, _, self.jobs_col = \
            motor_from_uri(self.jobs_uri)

        # XXX: spider_closed is used instead of close_spider because
        # the latter doesn't provide a closing reason.
        crawler.signals.connect(self.spider_closed, signals.spider_closed)
        crawler.signals.connect(self.spider_closing, signals.spider_closing)

        self.dump_period = settings.getfloat('MONGO_EXPORT_DUMP_PERIOD', 60.0)
        self._dump_pc = None

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    @classmethod
    def get_spider_urls(cls, spider):
        options = getattr(spider.crawler, 'start_options', None)
        if options and "domain" in options:
            return [options["domain"]]
        else:
            return spider.start_urls

    @tt_coroutine
    def open_spider(self, spider):
        try:
            yield self.items_col.ensure_index(self.job_id_key)
            yield self.jobs_col.ensure_index('id', unique=True)

            job = yield self.jobs_col.find_and_modify(
                {
                    'id': spider.crawl_id,
                }, {
                    'id': spider.crawl_id,
                    'started_at': datetime.datetime.utcnow(),
                    'status': 'running',
                    'spider': spider.name,
                    "urls": self.get_spider_urls(spider),
                    'options': getattr(spider.crawler, 'start_options', {}),
                },
                upsert=True,
                new=True)
            self.job_id = str(job['_id'])
            spider.motor_job_id = str(self.job_id)
            logger.info("Crawl job generated id: %s",
                        self.job_id,
                        extra={'crawler': self.crawler})

            if self.dump_period:
                self._dump_pc = PeriodicCallback(self.dump_stats,
                                                 self.dump_period * 1000)
                self._dump_pc.start()

        except Exception:
            self.job_id = None
            logger.error(
                "Can't connect to %s. Items won't be stored.",
                self.items_uri,
                exc_info=True,
                extra={'crawler': self.crawler},
            )

    @tt_coroutine
    def spider_closed(self, spider, reason, **kwargs):
        self._stop_periodic_tasks()

        if self.job_id is None:  # what's this?
            self.jobs_client.close()
            self.items_client.close()
            return

        # update the job one more time because something might
        # have happened while the spider is closing
        yield self._update_finished_job(reason)

        self.jobs_client.close()
        self.items_client.close()
        logger.info("Info is saved for a closed job %s",
                    self.job_id,
                    extra={'crawler': self.crawler})

    @tt_coroutine
    def spider_closing(self, spider, reason, **kwargs):
        self._stop_periodic_tasks()
        if self.job_id is None:  # what's this?
            return
        yield self._update_finished_job(reason)
        logger.info("Info is saved for a closing job %s",
                    self.job_id,
                    extra={'crawler': self.crawler})

    @tt_coroutine
    def process_item(self, item, spider):
        mongo_item = scrapy_item_to_dict(item)
        if self.job_id_key:
            mongo_item[self.job_id_key] = self.job_id
        try:
            yield self.items_col.insert(mongo_item)
            self.crawler.stats.inc_value("mongo_export/items_stored_count")
        except Exception as e:
            self.crawler.stats.inc_value("mongo_export/store_error_count")
            self.crawler.stats.inc_value("mongo_export/store_error_count/" +
                                         e.__class__.__name__)
            logger.error("Error storing item",
                         exc_info=True,
                         extra={'crawler': self.crawler})
        raise gen.Return(item)

    def _update_finished_job(self, reason):
        status = 'finished'
        if reason == 'shutdown':
            status = 'shutdown'
        return self.jobs_col.update({'_id': ObjectId(self.job_id)}, {
            '$set': {
                'finished_at': datetime.datetime.utcnow(),
                'status': status,
                'stats': self._get_stats_json(),
                'stats_dict': self._get_stats_escaped(),
            }
        })

    def _stop_periodic_tasks(self):
        if self._dump_pc is not None and self._dump_pc.is_running():
            self._dump_pc.stop()

    def _get_stats_json(self):
        # json is to fix an issue with dots in key names
        return json_encode(self.crawler.stats.get_stats())

    def _get_stats_escaped(self):
        return replace_dots(copy.deepcopy(self.crawler.stats.get_stats()))

    @gen.coroutine
    def dump_stats(self):
        # json is to fix an issue with dots in key names
        stats = self._get_stats_json()
        yield self.jobs_col.update({'_id': ObjectId(self.job_id)}, {
            '$set': {
                'stats': stats,
                'stats_dict': self._get_stats_escaped(),
            }
        })
        logger.info("Stats are stored for job %s" % self.job_id,
                    extra={'crawler': self.crawler})
Beispiel #17
0
class HTCondorCluster(htcondor.HTCondorCluster):
    job_cls = HTCondorJob
    machine_sep = "+"

    def __init__(self, *args, maintain=False, **kwargs):
        super().__init__(*args, **kwargs)
        self.pc_maintain = PeriodicCallback(self._correct_state, callback_time=30000)
        self.maintain = maintain

    @property
    def maintain(self):
        return self.pc_maintain.is_running()

    @maintain.setter
    def maintain(self, val):
        if bool(val) != self.maintain:
            if val:
                self.loop.add_callback(self.pc_maintain.start)
            else:
                self.pc_maintain.stop()

    @classmethod
    def worker2machine(cls, worker):
        if not isinstance(worker, str):
            worker = worker.name
        parts = worker.split(cls.machine_sep)
        return parts[0] if len(parts) == 2 else None

    def _machine_status(self, query="1"):
        ret = Counter()
        for line in (
            check_output(
                [
                    "condor_status",
                    "-constraint",
                    'SlotType!="Dynamic"',
                    "-af",
                    "Machine",
                    query,
                ]
            )
            .decode()
            .splitlines()
        ):
            key, value = line.split()
            ret[key] += int(value)
        return ret

    @property
    def machines_known(self):
        return self._machine_status("TotalSlotCpus")

    @property
    def machines_free(self):
        return self._machine_status("Cpus")

    @property
    def machines(self):
        ret = Counter(map(self.worker2machine, self.workers.keys()))
        ret.pop(None, None)
        return ret

    @property
    def machines_live(self):
        ret = Counter(map(self.worker2machine, self.scheduler.workers.values()))
        ret.pop(None, None)
        return ret

    @property
    def machines_pending(self):
        ml = self.machines_live
        return {m for m in self.machines.keys() if m not in ml}

    @property
    def workers_live(self):
        return set(w.name for w in self.scheduler.workers.values())

    @property
    def workers_pending(self):
        return set(self.workers.keys()) - self.workers_live

    @property
    def workers_machine_pending(self):
        mp = self.machines_pending
        return set(w for w in self.workers_pending if self.worker2machine(w) in mp)

    def status_table(self):
        from rich.table import Table, Column
        from rich import box

        info = (
            self.machines,
            self.machines_live,
            self.machines_free,
            self.machines_known,
        )

        b, l, f, t = [sum(i.values()) for i in info]

        def pct(a, b, digits=0):
            return "%.*f" % (
                max(0, digits),
                round(100 * a / (b or 1), digits),
            )

        tab = Table(
            Column("Machine \\ CPUs", "Total"),
            Column("live %", pct(l, b), justify="right"),
            Column("live", str(l), justify="right"),
            Column("booked", str(b), justify="right"),
            Column("total", str(t), justify="right"),
            Column("free", str(f), justify="right"),
            Column("free %", pct(f, t), justify="right"),
            show_footer=True,
            box=box.SIMPLE,
        )
        for m in sorted(info[-1]):
            b, l, f, t = [i.get(m, 0) for i in info]
            tab.add_row(m, pct(l, b), str(l), str(b), str(t), str(f), pct(f, t))

        return tab

    def scale_machines(
        self,
        machines,
        wait_machines=True,
        timeout=np.inf,
        poll=1,
        stop_pending=False,
    ):
        # remove excess
        max_target = make_target(self.machines, machines)
        for name in self.worker_spec.keys():
            if self.machine_sep not in str(name):
                continue
            machine, num = name.split(self.machine_sep)
            if machine not in max_target:
                continue
            if max_target[machine] <= num:
                del self.worker_spec[name]

        # spawn new
        name_new = []
        for machine, num in make_target(self.machines_known, machines).items():
            for i in range(num):
                name = self.machine_sep.join((machine, str(i)))
                assert name not in self.worker_spec, name
                self.worker_spec[name] = self.new_machine_spec(machine)
                name_new.append(name)

        # sync internal sate
        self.sync(self._correct_state)

        if not poll:
            return

        endtime = time() + timeout
        while time() < endtime and (
            self.machines_pending if wait_machines else self.workers_pending
        ):
            sleep(poll)

        if stop_pending:
            wp = self.workers_pending
            if stop_pending == "machine":
                wp = self.workers_machine_pending
            for w in wp:
                del self.worker_spec[w]

    def new_machine_spec(self, machine):
        spec = copy.deepcopy(self.new_spec)
        req = 'Machine=="%s"' % machine
        job_extra = spec.setdefault("options", {}).setdefault("job_extra", {})
        if "Requirements" in job_extra:
            req = "(%s)&&(%s)" % (req, job_extra["Requirements"])
        job_extra["Requirements"] = req
        return spec