Beispiel #1
0
def clone_with_timeout(src: str, dest: str, clone_func: Callable[[], None],
                       timeout: float) -> None:
    """Clone a repository with timeout.

    Args:
        src: clone source
        dest: clone destination
        clone_func: callable that does the actual cloning
        timeout: timeout in seconds
    """
    errors: Queue = Queue()
    process = Process(target=_clone_task, args=(clone_func, errors))
    process.start()
    process.join(timeout)

    if process.is_alive():
        process.terminate()
        # Give it literally a second (in successive steps of 0.1 second),
        # then kill it.
        # Can't use `process.join(1)` here, billiard appears to be bugged
        # https://github.com/celery/billiard/issues/270
        killed = False
        for _ in range(10):
            time.sleep(0.1)
            if not process.is_alive():
                break
        else:
            killed = True
            os.kill(process.pid, signal.SIGKILL)
        raise CloneTimeout(src, timeout, killed)

    if not errors.empty():
        raise CloneFailure(src, dest, errors.get())
 def _reflash(self, path):
     """
     this will execute the upgrade operation in another process
     because the SSH connection may hang indefinitely while reflashing
     and would block the program; setting a timeout to `exec_command`
     doesn't seem to take effect so at least we can stop the process
     using `subprocess.join(timeout=self.UPGRADE_TIMEOUT)`
     """
     def upgrade(conn, path, timeout):
         conn.connect()
         conn.exec_command('sysupgrade -v -c {0}'.format(path),
                           timeout=timeout)
         conn.close()
     subprocess = Process(
         target=upgrade,
         args=[self, path, self.UPGRADE_TIMEOUT]
     )
     subprocess.start()
     self.log('Upgrade operation in progress...')
     subprocess.join(timeout=self.UPGRADE_TIMEOUT)
     self.log('SSH connection closed, will wait {0} seconds before '
              'attempting to reconnect...'.format(self.SLEEP_TIME))
     sleep(self.SLEEP_TIME)
     # kill the subprocess if it has hanged
     if subprocess.is_alive():
         subprocess.terminate()
         subprocess.join()
Beispiel #3
0
    def _reflash(self, path):
        """
        this will execute the upgrade operation in another process
        because the SSH connection may hang indefinitely while reflashing
        and would block the program; setting a timeout to `exec_command`
        doesn't seem to take effect on some OpenWRT versions
        so at least we can stop the process using
        `subprocess.join(timeout=self.UPGRADE_TIMEOUT)`
        """
        self.disconnect()
        command = self.get_upgrade_command(path)

        def upgrade(conn, path, timeout):
            conn.connect()
            conn.exec_command(command, timeout=timeout)
            conn.disconnect()

        subprocess = Process(target=upgrade,
                             args=[self, path, self.UPGRADE_TIMEOUT])
        subprocess.start()
        self.log('Upgrade operation in progress...')
        subprocess.join(timeout=self.UPGRADE_TIMEOUT)
        self.log(
            f'SSH connection closed, will wait {self.RECONNECT_DELAY} seconds before '
            'attempting to reconnect...')
        sleep(self.RECONNECT_DELAY)
        # kill the subprocess if it has hanged
        if subprocess.is_alive():
            subprocess.terminate()
            subprocess.join()
 def screenshot_endpoint(
     self,
     ip_address=None,
     port=None,
     hostname=None,
     use_ssl=False,
     use_sni=False,
     path="/",
     in_separate_process=False,
 ):
     """
     Take a screenshot of the given endpoint, save it to a local temporary file, and return the local
     file path.
     :param ip_address: The IP address where the web service resides.
     :param port: The port where the web service resides.
     :param hostname: The hostname to request.
     :param use_ssl: Whether or not to use SSL to request the endpoint.
     :param use_sni: Whether or not the endpoint uses SNI.
     :param path: The path of the resource to screenshot.
     :param in_separate_process: Whether or not to take the screenshot in a separate process. This is to
     address the incredibly long time that the Selenium webdriver can take when it hangs.
     :return: A tuple containing (1) the local file path where the screenshot was saved and (2) whether or not
     the screenshot was taken successfully.
     """
     logger.debug(
         "Now attempting to take a screenshot of the web service at %s:%s (%s). Hostname is %s, SNI support is %s."
         % (ip_address, port, "using SSL" if use_ssl else "plain HTTP",
            hostname, use_sni))
     self.__set_endpoint(
         ip_address=ip_address,
         port=port,
         hostname=hostname,
         use_ssl=use_ssl,
         use_sni=use_sni,
         path=path,
     )
     self._output_file_path = self.get_temporary_file_path()
     if in_separate_process:
         process = Process(target=self.__take_screenshot)
         try:
             process.start()
             process.join(config.selenium_screenshot_delay +
                          config.inspection_screenshot_join_timeout)
         except IOError as e:
             if e.errno == errno.EINTR:
                 logger.warning("Interrupted system call error received.")
             else:
                 raise e
         finally:
             if process.is_alive():
                 print("PROCESS IS ALIVE - PID IS %s" % (process.pid, ))
                 os.kill(process.pid, signal.SIGTERM)
     else:
         self.__take_screenshot()
     return self.output_file_path, FilesystemHelper.does_file_exist(
         self.output_file_path)
Beispiel #5
0
class LongCalculation(QProgressDialog):
    """
    Multiprocessing based worker for mesh and eigenvalue calculations.

    This is necessary to make sure GUI is not blocked while mesh is built,
    or when eigenvalue calculations are performed.

    Transformations do not need as much time, unless there is one implemented
    without numpy vectorized coordinate calculations.
    """

    res = None

    def __init__(self, fun, args, postprocess, job):
        """ Build multiprocessing queues and start worker. """
        super(LongCalculation, self).__init__(job, "Cancel", 0, 0)
        self.setModal(True)
        self.input = Queue()
        self.output = Queue()
        self.input.put((fun, args, postprocess))
        self.proc = Process(target=worker, args=(self.input, self.output))
        self.proc.start()
        self.timer = QTimer()
        self.timer.timeout.connect(self.update)
        self.timer.start(10)

    def update(self):
        """ Check if worker is done, and close dialog. """
        try:
            out = self.output.get(block=False)
            if isinstance(out, basestring):
                self.setLabelText(out)
                return
            if out is None:
                self.done(0)
                return
            self.res = out
            self.timer.stop()
            self.proc.join()
            del self.proc
            self.done(1)
        except:
            pass

    def cleanUp(self):
        """ Kill the running processes if cancelled/failed. """
        if self.proc:
            while self.proc.is_alive():
                self.proc.terminate()
            del self.proc
        self.timer.stop()
Beispiel #6
0
class MessageConsole(textconsole.SimpleConsole):
    '''
    a message console for MAVProxy
    '''
    def __init__(self, title='MAVProxy: console'):
        if platform.system() == 'Darwin':
            forking_enable(False)
        textconsole.SimpleConsole.__init__(self)
        self.title = title
        self.menu_callback = None
        self.parent_pipe_recv, self.child_pipe_send = Pipe(duplex=False)
        self.child_pipe_recv, self.parent_pipe_send = Pipe(duplex=False)
        self.close_event = Event()
        self.close_event.clear()
        self.child = Process(target=self.child_task)
        self.child.start()
        self.child_pipe_send.close()
        self.child_pipe_recv.close()
        t = threading.Thread(target=self.watch_thread)
        t.daemon = True
        t.start()

    def child_task(self):
        '''child process - this holds all the GUI elements'''
        self.parent_pipe_send.close()
        self.parent_pipe_recv.close()

        import wx_processguard
        from wx_loader import wx
        from wxconsole_ui import ConsoleFrame
        app = wx.App(False)
        app.frame = ConsoleFrame(state=self, title=self.title)
        app.frame.SetDoubleBuffered(True)
        app.frame.Show()
        app.MainLoop()

    def watch_thread(self):
        '''watch for menu events from child'''
        from mp_settings import MPSetting
        try:
            while True:
                msg = self.parent_pipe_recv.recv()
                if self.menu_callback is not None:
                    self.menu_callback(msg)
                time.sleep(0.1)
        except EOFError:
            pass

    def write(self, text, fg='black', bg='white'):
        '''write to the console'''
        try:
            self.parent_pipe_send.send(Text(text, fg, bg))
        except Exception:
            pass

    def set_status(self, name, text='', row=0, fg='black', bg='white'):
        '''set a status value'''
        if self.is_alive():
            self.parent_pipe_send.send(Value(name, text, row, fg, bg))

    def set_menu(self, menu, callback):
        if self.is_alive():
            self.parent_pipe_send.send(menu)
            self.menu_callback = callback

    def close(self):
        '''close the console'''
        self.close_event.set()
        if self.is_alive():
            self.child.join(2)

    def is_alive(self):
        '''check if child is still going'''
        return self.child.is_alive()
Beispiel #7
0
class CrawlProcess():

    _model = Task
    __instance = None
    count = 0

    @staticmethod
    def get_instance():
        """ Static access method. """
        print(CrawlProcess.__instance)
        if CrawlProcess.__instance == None:
            CrawlProcess()
        return CrawlProcess.__instance

    def __init__(self):
        if CrawlProcess.__instance != None:
            raise Exception("This class is a singleton!")
        else:
            CrawlProcess.__instance = self
            #self.process = Process(target=CrawlProcess.crawl)
            self.process = None
            self.crawler_process = None
            self.task = None
            self.q = Queue()
            self.parent_conn, self.child_conn = Pipe()



    #@classmethod
    #def crawl(cls, q):
    #@classmethod
    #def crawl(cls, process, q):
    @classmethod
    def crawl(cls, q, conn):
        print()
        print()
        print('***************************************************************************************')
        print('crawl')

        def close(spider, reason):
            print(f'{multiprocessing.current_process().name}: *!!CLOSE')
            write_in_a_file('CrawlerProcess.signal.close', {'reason': reason}, 'task.txt')
            t = Task.objects.get_latest_crawler_task()
            d = datetime.today()
            t.description = f'spider closed with count: {CrawlProcess.count} at {str(d)}'
            t.result = CrawlProcess.count
            t.save()

        def open(spider):
            print(f'{multiprocessing.current_process().name}: *!!OPEN')
            try:
                name = spider.name
            except:
                name = str(spider)
            write_in_a_file('CrawlerProcess.signal.open', {'spider': name}, 'task.txt')
            CrawlProcess.count = 0
            try:
                t = Task.objects.get_latest_crawler_task()
                t.name = str(process.pid)
                t.save()
            except Exception as e:
                t.name = e
                t.save()
            #q.put_nowait()
            print()


        def scraped(item, response, spider):
            print(f'{multiprocessing.current_process().name}: *!!SCRAPED')

            print()
            CrawlProcess.count = CrawlProcess.count + 1
            n = CrawlProcess.count
            write_in_a_file('CrawlerProcess.signal.scraped_item', {'response': response, 'count': n}, 'task.txt')
            try:
                q.get_nowait()
                q.put_nowait(n)
            except:
                q.put_nowait(n)

        def stopped(*args, **kwargs):
            write_in_a_file('CrawlerProcess.signal.stopped', {'args': args, 'kwargs': kwargs}, 'task.txt')

        def error(*args, **kwargs):
            write_in_a_file('CrawlerProcess.signal.error', {'args': args, 'kwargs': kwargs}, 'task.txt')

        def send_by_pipe(item):
            try:
                conn.send(item)
                #conn.close()
            except Exception as e:
                write_in_a_file('CrawlProcess._crawl: error conn.send', {'conn error': e}, 'debug.txt')

        process = CrawlerProcess(get_project_settings())
        write_in_a_file('CrawlProcess.crawl: first', {'crawler_process': str(process), 'dir process': dir(process)},
                        'debug.txt')
        send_by_pipe(process)
        write_in_a_file('CrawlProcess.crawl: second', {'crawler_process': str(process), 'dir process': dir(process)},'debug.txt')
        process.crawl(InfoempleoSpider())
        write_in_a_file('CrawlProcess.crawl: third', {'crawler_process': str(process), 'dir process': dir(process)},'debug.txt')
        crawler = Crawler(InfoempleoSpider())
        crawler.signals.connect(open, signal=signals.spider_opened)
        crawler.signals.connect(scraped, signal=signals.item_scraped)
        crawler.signals.connect(close, signal=signals.spider_closed)
        crawler.signals.connect(stopped, signal=signals.engine_stopped)
        crawler.signals.connect(error, signal=signals.spider_error)

        write_in_a_file('CrawlProcess.crawl: before', {'crawler_process': str(process),'dir process': dir(process)},'debug.txt')

        process.crawl(crawler)
        write_in_a_file('CrawlProcess.crawl: after', {'crawler_process': str(process), 'dir process': dir(process)}, 'debug.txt')

        process.start()
        write_in_a_file('CrawlProcess._crawl: process started', {'crawler_process': str(process), 'dir process': dir(process)}, 'debug.txt')

        print('***************************************************************************************')
        print(f'CrawlerProcess: {process}')
        print(dir(process))
        print('***************************************************************************************')
        print()
        print()
        write_in_a_file('CrawlProcess.crawl', {'CrawlerProcess': str(process), 'dir(CrawlerProcess)': dir(process)}, 'task.txt')
        process.join()
        write_in_a_file('CrawlProcess.crawl: process.join', {}, 'task.txt')
        write_in_a_file('CrawlProcess.crawl: process.join', {}, 'spider.txt')

        print('Crawler Process has Finished!!!!!')



    @classmethod
    def crawl2(cls, q):
        while CrawlProcess.count < 15:
           # print(f'doing something: {CrawlProcess.count}')
            CrawlProcess.count = CrawlProcess.count + 1
            n = CrawlProcess.count
            try:
                q.get_nowait()
            except:
                pass
            q.put(n)
            if CrawlProcess.count % 5 == 0:
               # print(f'qsize: {q.qsize()}')
                time.sleep(5)


    def _clear_queue(self):
        while not self.q.empty():
            self.q.get_nowait()



    def _init_process(self, user):
        print(f'CrawlerProcess.init_process')
        self.q.put_nowait(0)
        self.process = Process(target=CrawlProcess.crawl, args=(self.q, self.child_conn,))
        self.task = Task(user=user, state=Task.STATE_PENDING, type=Task.TYPE_CRAWLER)



    def _start_process(self):
        print(f'CrawlerProcess._start_process')
        self.init_datetime = timezone.now()  # Before create the task
        self.process.start()
        self.task.pid = self.process.pid
        write_in_a_file('CrawlProcess._start_process: process started', {'pid': self.process.pid}, 'debug.txt')
        self.task.state = Task.STATE_RUNNING
        self.task.save()
        self.crawler_process = self.parent_conn.recv()
        write_in_a_file('CrawlProcess._start_process: conn.recv', {'crawler_process':str(self.crawler_process), 'dir crawler_process':dir(self.crawler_process)}, 'debug.txt')
        write_in_a_file('CrawlProcess._start_process', {'CrawlerProcess': str(self.crawler_process), 'dir(CrawlerProcess)': dir(self.crawler_process)},'task.txt')


    def _reset_process(self, state=Task.STATE_FINISHED):
        print(f'CrawlerProcess._reset_process({state})')
        try:
            self.process.terminate()
            write_in_a_file('_reset_process terminated (from stop)', {'is_running': self.process.is_alive()}, 'debug.txt')
            self.task.result = CrawlProcess.count
            self.task.state = state
            self.task.save()
            self.process.join()  # ! IMPORTANT after .terminate -> .join
            write_in_a_file('_reset_process joinned (from stop)', {'is_running': self.process.is_alive()}, 'debug.txt')
        except:
            pass
        try:
            self.result = self.q.get_nowait()
        except Exception as e:
            pass
        self._clear_queue()


    def _update_process(self):
        print('CrawlerProcess._update_process')
        print(f'process is alive: {self.process and self.process.is_alive()}')
        if self.process and not self.process.is_alive():
            self._reset_process()


    def start(self, user=None, **kwargs):
        """
        Si el proceso no está vivo es que o no se ha iniciado aún o que ya ha terminado, así que
        se guardan los datos almacenados y se ejecuta el proceso.
        Si el proceso está vivo no se hace nada.

        :param user: The uses that make the request
        :param kwargs:
        :return:
        """
        print(f'self.q.empty(): {self.q.empty()}')
        print(f'self.q.qsize(): {self.q.qsize()}')

        if not self.is_scrapping():
            if self.task and (self.task.state == Task.STATE_RUNNING):
                self._reset_process()
            self._init_process(user)
            self._start_process()

    def stop(self):
        print(f'CrawleProcess.stop')
        self._reset_process(Task.STATE_INCOMPLETE)
       # self.crawler_process.stop()
        #self.crawler_process.join()


    def join(self):
        self.process.join()

    def get_actual_task(self):
        self._update_process()
        return self.task

    def get_latest_task(self):
        last_task = Task.objects.get_latest_crawler_task()
        # If the latest task from de db has state equal STATE_RUNNING and not is the actual task will be an incomplete task...
        #... and would have to update its state
        is_an_incomplete_task = (
                last_task and
                last_task.state == Task.STATE_RUNNING and
                (not self.task or self.task.pk != last_task.pk)
        )
        if is_an_incomplete_task:
            last_task.state = Task.STATE_INCOMPLETE
            last_task.save()
        return last_task

    def is_scrapping(self):
        print(CrawlProcess.is_scrapping)
        if self.process:
            return self.process.is_alive()
        else:
            return False

    def _get_scraped_jobs(self):
        latest_task = Task.objects.get_latest_crawler_task()
        return Job.objects.filter(Q(created_at__gte=latest_task.created_at) | Q(updated_at__gte=latest_task.created_at))

    def get_scraped_items_number(self):
        print()
        print('!!!! CrawlProcess.get_scraped_items_number');print();
        count = CrawlProcess.count
        try:
            print(self.q)
            #print(f'CrawlProcess.count: {CrawlProcess.count}')
            #print(f'qsize: {self.q.qsize()}')
            count = self.q.get(block=True, timeout=5)
            CrawlProcess.count = count
            print(f'q.count: {count}')
        except Exception as e:
            print(f'get_scraped_items_number')
           # save_error(e, {'count': count})
        return count


    def get_scraped_items_percentage(self):
        # Calcula el total con los items scrapeados de la tarea enterior
        count = self.get_scraped_items_number()
        task = Task.objects.get_latest_finished_crawler_task()
        if task:
            old_result = task.result or 20000
        else:
            old_result = 20000

        if count < old_result:
            total = old_result
        else:
            total = count
        db_count = self._get_scraped_jobs().count()

        try:
            percentage = round(db_count/total, 2)
        except:
            percentage = 0

        if  percentage >= 0.95 and self.is_scrapping():
            percentage = 0.95

        return percentage