Exemple #1
0
def start():
        builder = cluster_crawler.ClusterCrawler()
        start_block_id  = int(sys.argv[1])
        block_id = start_block_id
        process = None
        while builder.crawl_block(block_id):
            print("Block %d crawled" % block_id)

            if block_id - start_block_id > 0 and (block_id - start_block_id) % settings.block_crawling_limit == 0:
                builder.network_graph.check_integrity()
                while  process is not None and process.is_alive():
                    print("Waiting for insertion thread to complete...")
                    process.join()

                if process is not None and process.exitcode > 0 : #error
                    raise Exception("Errorcode %d in DB Sync Thread, aborting" % process.exitcode)
                process = Process(target=builder.network_graph.synchronize_mongo_db)
                process.start()
                builder.network_graph = cluster_network.ClusterNetwork(settings.db_server, settings.db_port) #Starting a new graph while other graph data is inserted.
                builder.connect_to_bitcoind_rpc()

            if process is not None and not process.is_alive() and process.exitcode > 0 : #error
                    raise Exception("Errorcode %d in DB Sync Thread, aborting" % process.exitcode)
            block_id+=1

        #Finished Crawling, Flushing to DB.
        #Waiting for any previous DB Sync
        while  process is not None and process.is_alive():
            print("Waiting for insertion thread to complete...")
            process.join()

        #Sync the rest
        process = Process(target=builder.network_graph.synchronize_mongo_db)
        process.start()
        process.join()
class MultiProcessRunner(BaseRunner):
    JOIN_TIMEOUT = 60

    def __init__(self, target, **kwargs):
        super(MultiProcessRunner, self).__init__(target, **kwargs)
        self.process = None  # type: Process

    @capture_monitor_exception
    def start(self):
        self.process = Process(target=self.target, kwargs=self.kwargs)
        self.process.start()

    @capture_monitor_exception
    def stop(self):
        if self.process and self.is_alive():
            self.process.terminate()
            self.process.join(MultiProcessRunner.JOIN_TIMEOUT)
            if self.process.is_alive():
                self.process.kill()

    @capture_monitor_exception
    def heartbeat(self):
        # do we want to do something here?
        pass

    @capture_monitor_exception
    def is_alive(self):
        return self.process.is_alive()

    def __str__(self):
        s = super(MultiProcessRunner, self).__str__()
        return f"{s}({self.process})"
def process_file(file_path_input, file_path_output, i):
    if os.path.exists(file_path_output):
        return print('{}    {}  Already done'.format(now(), file_path_output))

    input_file = open(file_path_input, 'r')
    output_file = open(file_path_output, 'a')

    # every worker create separate file for a input file
    writer = csv.writer(output_file, delimiter='\t')

    # for every file we create seperate driver (100 URLs)
    driver = webdriver.PhantomJS(executable_path=path_to_phantomjs)

    for line in input_file:
        splited = line.split('\t')
        # property_type = splited[0]
        url = splited[1]
        print('{}   Process={}  Current url: {}'.format(now(), i, url))

        # start process for getting microformat properties
        temp_queue = Queue()
        # p = Process(target=get_microformat_properties_by_type, args=(url, property_type, temp_queue, i))
        p = Process(target=get_element_features,
                    args=(url, driver, temp_queue, i))
        print("{}   {}  Process={}  {}  {}".format(now(), i, "Started: ",
                                                   "feature extraction", url))
        p.start()
        event_features = temp_queue.get(timeout=TIME_OUT_FEATURE)
        # try:
        #     pass
        # except Empty:
        #     print("{}   {}  Process={}  {}  {}".format(now(), i, "Timed out on: ", "feature extraction", url))

        if p.is_alive():
            p.terminate()

        print("Event features:" + str(event_features))

        if event_features is not None:
            print("{}   Process={}  Got properties for  {}".format(
                now(), i, url))

            # start process for feature extraction and writing to separate file
            # p_event_features = Process(target=get_event_features_and_write,
            #                            args=(event_features, driver, writer, i, output_file))
            #
            p_event_features = Process(target=write_element_features,
                                       args=(event_features, writer, i,
                                             output_file))
            p.start()
            # start_with_timeout(p_event_features, TIME_OUT_LOAD, "feature writing", url, i)
            if p_event_features.is_alive():
                p.terminate()

    driver.service.process.send_signal(signal.SIGTERM)
    driver.quit()
    return 'done'
def main():
    print('主进程开始')
    p = Process(target=test, args=(1, ))
    p.start()
    print(p.is_alive())
    print('主进程结束')
    print(p.is_alive())
    print(p.is_alive())
    print(p.is_alive())
    print(p.is_alive())
    print(p.is_alive())
    print('子进程名字:', p.name)
    print('子进程pid:', p.pid)
    p.terminate()
    p.join(20)
Exemple #5
0
class AIOProcess:
    """ Execute a coroutine on a separate process """
    def __init__(self,
                 coroutine: Callable = None,
                 *args,
                 daemon: bool = False,
                 target_override: Callable = None,
                 **kwargs):
        if not asyncio.iscoroutinefunction(coroutine):
            raise ValueError("target must be a coroutine function")

        self.aio_process = Process(target=target_override
                                   or partial(AIOProcess.run_async, coroutine),
                                   args=args,
                                   kwargs=kwargs,
                                   daemon=daemon)

    @staticmethod
    def run_async(coroutine: Callable, *args, **kwargs):
        try:
            loop = uvloop.new_event_loop()
            asyncio.set_event_loop(loop)
            result = loop.run_until_complete(coroutine(*args, **kwargs))

            return result
        except BaseException:
            log.exception(f"aio process {os.getpid()} failed")
            raise

    def start(self):
        self.aio_process.start()

    async def join(self, timeout=None):
        if not self.is_alive() and self.exit_code is None:
            raise ValueError("must start process before joining")

        if timeout is not None:
            return await asyncio.wait_for(self.join(), timeout)

        while self.exit_code is None:
            await asyncio.sleep(0.005)

    @property
    def pid(self):
        return self.aio_process.pid

    @property
    def daemon(self):
        return self.aio_process.daemon

    @property
    def exit_code(self):
        return self.aio_process.exitcode

    def is_alive(self):
        return self.aio_process.is_alive()

    def terminate(self):
        self.aio_process.terminate()
Exemple #6
0
def start():
    builder = cluster_crawler.ClusterCrawler()
    start_block_id = int(sys.argv[1])
    block_id = start_block_id
    process = None
    while builder.crawl_block(block_id):
        if settings.debug or block_id % 100 == 0:
            print("Block %d crawled" % block_id)

        if block_id - start_block_id > 0 and (
                block_id -
                start_block_id) % settings.block_crawling_limit == 0:
            builder.network_graph.check_integrity()
            while process is not None and process.is_alive():
                print("Waiting for insertion thread to complete...")
                process.join()

            if process is not None and process.exitcode > 0:  #error
                raise Exception("Errorcode %d in DB Sync Thread, aborting" %
                                process.exitcode)
            process = Process(
                target=builder.network_graph.synchronize_mongo_db)
            process.start()
            builder.network_graph = cluster_network.ClusterNetwork(
                settings.db_server, settings.db_port
            )  #Starting a new graph while other graph data is inserted.
            builder.connect_to_bitcoind_rpc()

        if process is not None and not process.is_alive(
        ) and process.exitcode > 0:  #error
            raise Exception("Errorcode %d in DB Sync Thread, aborting" %
                            process.exitcode)
        block_id += 1

    #Finished Crawling, Flushing to DB.
    #Waiting for any previous DB Sync
    while process is not None and process.is_alive():
        print("Waiting for insertion thread to complete...")
        process.join()

    #Sync the rest
    print("Inserting into the DB")
    process = Process(target=builder.network_graph.synchronize_mongo_db)
    process.start()
    process.join()
Exemple #7
0
def process_file(file_path_input, file_path_output, i):
    if os.path.exists(file_path_output):
        return print('{}    {}  Already done'.format(now(), file_path_output))

    input_file = open(file_path_input, 'r')
    output_file = open(file_path_output, 'a')

    # every worker create separate file for a input file
    writer = csv.writer(output_file, delimiter='\t')

    # for every file we create seperate driver (100 URLs)
    driver = webdriver.PhantomJS(executable_path=path_to_phantomjs)

    for line in input_file:
        splited = line.split('\t')
        property_type = splited[0]
        url = splited[1]
        print('{}   Process={}  Current url: {}'.format(now(), i, url))

        # start process for getting microformat properties
        temp_queue = Queue()
        p = Process(target=get_microformat_properties_by_type,
                    args=(url, property_type, temp_queue, i))
        start_with_timeout(p, TIME_OUT_LOAD, "loading", url, i)

        event_properties = temp_queue.get() if not temp_queue.empty() else None
        if p.is_alive():
            p.terminate()
        if event_properties is not None:
            print("{}   Process={}  Got properties for  {}".format(
                now(), i, url))

            # start process for feature extraction and writing to separate file
            p_event_features = Process(target=get_event_features_and_write,
                                       args=(event_properties, driver, writer,
                                             i, output_file))
            start_with_timeout(p_event_features, TIME_OUT_FEATURE,
                               "feature extraction", url, i)
            if p_event_features.is_alive():
                p.terminate()

    return 'done'
def start():
        builder = GraphBuilder()
        start_block_id  = int(sys.argv[1])
        block_id = start_block_id
        process = None
        try:
            while builder.crawl_block(block_id):
                print("Block %d crawled" % block_id)

                if block_id - start_block_id > 0 and (block_id - start_block_id) % Settings.block_crawling_limit == 0:
                    builder.network_graph.check_integrity()
                    while  process is not None and process.is_alive():
                        print("Waiting for insertion thread to complete...")
                        process.join()

                    if process is not None and process.exitcode > 0 : #error
                        raise Exception("Errorcode %d in DB Sync Thread, aborting" % process.exitcode)
                    process = Process(target=builder.network_graph.synchronize_mongo_db)
                    process.start()
                    builder.network_graph = NetworkGraph.Network(Settings.db_server,Settings.db_port) #Starting a new graph while other graph data is inserted.

                if process is not None and not process.is_alive() and process.exitcode > 0 : #error
                        raise Exception("Errorcode %d in DB Sync Thread, aborting" % process.exitcode)
                block_id+=1

            #Finished Crawling, Flushing to DB.
            #Waiting for any previous DB Sync
            while  process is not None and process.is_alive():
                print("Waiting for insertion thread to complete...")
                process.join()

            #Sync the rest
            process = Process(target=builder.network_graph.synchronize_mongo_db)
            process.start()
            process.join()

            #DONE!

        #For Debugging purpose
        except:
            input("An exception will rise ")
            raise
Exemple #9
0
    def start_up_app(self):
        self.show_splash_screen()

        # load db in separate process
        process_startup = Process(target=LucteriosRefreshAll)
        process_startup.start()

        while process_startup.is_alive():
            # print('updating')
            self.splash.update()

        self.remove_splash_screen()
Exemple #10
0
    def start_up_app(self):
        self.show_splash_screen()
        try:
            # load db in separate process
            process_startup = Process(target=LucteriosRefreshAll)
            process_startup.start()

            while process_startup.is_alive():
                # print('updating')
                self.splash.update()
        finally:
            self.remove_splash_screen()
Exemple #11
0
 def test_keyboard_interrupt_on_linux(self) -> None:
     """
     - Keyboard interrupt should reach to all descendant processes.
     - Keyboard interrupt should shutdown ProcessTaskPoolExecutor gracefully.
     """
     process = Process(target=self.report_raises_keyboard_interrupt)
     process.start()
     LocalSocket.receive()
     time.sleep(SECOND_SLEEP_FOR_TEST_SHORT)
     self.simulate_ctrl_c_in_posix(process)
     assert LocalSocket.receive() == "Test succeed"
     process.join()
     assert process.exitcode == 0
     assert not process.is_alive()
Exemple #12
0
def main():
    logging.info(f'App started')

    max = 2
    worker = Process(target=work, args=['Working', max], daemon=True, name='Worker')
    worker.start()

    time.sleep(5)

    # if the process is running, stop it
    if worker.is_alive():
        worker.terminate()   # kill the process with SIGTERM
    worker.join()

    # exicode == 0 is good
    # anything else is an error
    logging.info(f'App finished: {worker.exitcode}')
Exemple #13
0
def run_server(launcher, wait_before_entering=0, verbose=False, **kwargs):
    """Context manager to launch server on entry, and shut it down on exit"""
    from warnings import warn

    clog = conditional_logger(verbose)
    server = None
    try:
        server = Process(target=launcher, kwargs=kwargs)
        clog(f'Starting server...')
        server.start()
        clog(f'... server started.')
        sleep(wait_before_entering)
        yield server
    finally:
        if server is not None and server.is_alive():
            clog(f'Terminating server...')
            server.terminate()
        clog(f'... server terminated')
Exemple #14
0
def main():
    logging.info('Started')

    max = 2
    worker = Process(target=work,
                     args=['Working', max],
                     daemon=True,
                     name='Super Mario')
    worker.start()

    time.sleep(5)

    #if the process is running, stop it
    if worker.is_alive():
        worker.terminate()
    worker.join()

    #exitcode == 0
    #Anything else is an error
    logging.info(f'Finished: {worker.exitcode}')
Exemple #15
0
def run_load_test_clients(clients_count: int):
    shared_rows_counts = multiprocessing.Manager().dict()
    last_failed_rows_count = [0]

    def log_rows_counts():
        current_rows_counts = dict(shared_rows_counts)
        inserted_rows_count = sum(
            map(
                lambda x: x[1],
                filter(lambda x: x[0].startswith("inserted"),
                       current_rows_counts.items())))
        failed_rows_count = sum(
            map(
                lambda x: x[1],
                filter(lambda x: x[0].startswith("failed"),
                       current_rows_counts.items())))

        log_message = f"Inserted {inserted_rows_count} rows, failed to insert {failed_rows_count} rows"
        if failed_rows_count % 1000 > last_failed_rows_count[0]:
            logging.warning(log_message)
            last_failed_rows_count[0] = failed_rows_count
        else:
            logging.info(log_message)

    clients = []
    for client_number in range(clients_count):
        client = Process(target=main,
                         args=(True, client_number, shared_rows_counts))
        client.start()
        clients.append(client)

    alive_clients = clients
    while len(alive_clients) > 0:
        sleep(3)
        log_rows_counts()
        alive_clients = list(filter(lambda client: client.is_alive(), clients))
class TestRemoteSession(TestCase):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.server_process = None
        """:type: Process"""

    def setUp(self):
        self.test_file = TestFile(settings.test_file_size)

        self.original_data = self.test_file.get_content()

        self.ftp_root = self.test_file.path
        self.remote_filename = uuid.uuid4().hex

        tuned_server, self.port = server.get_tuned_server(self.ftp_root)

        def server_func():
            tuned_server.serve_forever(handle_exit=True)

        self.server_process = Process(target=server_func)
        """:type: Process"""
        self.server_process.start()

    def get_connected_client_in_binary_mode(self):
        client = Client()
        client.connect('localhost', self.port)
        client.login(settings.ftp_user, settings.ftp_pass)
        client.type('I')  # Binary mode ('I'mage)
        return client

    def tearDown(self):
        while self.server_process.is_alive():
            # посылаем SIGINT процессу, pyftpd умеет его обрабатывать и закрывается корректно
            os.kill(self.server_process.pid, signal.SIGINT)
            self.server_process.join(timeout=1)

        unlink(self.test_file.full_filename)

    def test_receive_file(self):
        """
        тест на получение заранее сгенерированных и сохраненных на диске данных клиентом через подключение
        к локальному серверу.
        """
        client = self.get_connected_client_in_binary_mode()
        code, rest, data = client.retr(self.test_file.filename)
        q_code, q_rest = client.quit()

        self.assertEqual(code, 226)
        self.assertTrue(self.original_data == data)

    def test_send_file(self):
        """
        Тест на отправку файла клиентом на локальный сервер и сравнение содержимого файла на диске
        с отправленными данными
        """
        client = self.get_connected_client_in_binary_mode()
        client.stor(self.remote_filename, self.original_data)

        with open(os.path.join(self.ftp_root, self.remote_filename), 'rb') as f:
            stored_file_content = f.read()

        code, rest = client.quit()

        self.assertTrue(self.original_data == stored_file_content)
Exemple #17
0
class CreateProcess:
    """A context manager to launch a parallel process and close it on exit.
    """

    def __init__(
        self,
        proc_func: Callable,
        process_name=None,
        wait_before_entering=2,
        verbose=False,
        args=(),
        **kwargs,
    ):
        """
        Essentially, this context manager will call
        ```
            proc_func(*args, **kwargs)
        ```
        in an independent process.

        :param proc_func: A function that will be launched in the process
        :param process_name: The name of the process.
        :param wait_before_entering: A pause (in seconds) before returning from the enter phase.
            (in case the outside should wait before assuming everything is ready)
        :param verbose: If True, will print some info on the starting/stoping of the process
        :param args: args that will be given as arguments to the proc_func call
        :param kwargs: The kwargs that will be given as arguments to the proc_func call

        The following should print 'Hello console!' in the console.
        >>> with CreateProcess(print, verbose=True, args=('Hello console!',)) as p:
        ...     print("-------> Hello module!")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Starting process: print...
        ... print process started.
        -------> Hello module!
        ... print process terminated
        """
        self.proc_func = proc_func
        self.process_name = process_name or getattr(proc_func, '__name__', '')
        self.wait_before_entering = float(wait_before_entering)
        self.verbose = verbose
        self.args = args
        self.kwargs = kwargs
        self.clog = conditional_logger(verbose)
        self.process = None
        self.exception_info = None

    def process_is_running(self):
        return self.process is not None and self.process.is_alive()

    def __enter__(self):
        self.process = Process(
            target=self.proc_func,
            args=self.args,
            kwargs=self.kwargs,
            name=self.process_name,
        )
        self.clog(f'Starting process: {self.process_name}...')
        try:
            self.process.start()
            if self.process_is_running():
                self.clog(f'... {self.process_name} process started.')
                sleep(self.wait_before_entering)
                return self
            else:
                raise RuntimeError('Process is not running')
        except Exception:
            raise RuntimeError(
                f'Something went wrong when trying to launch process {self.process_name}'
            )

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.process is not None and self.process.is_alive():
            self.clog(f'Terminating process: {self.process_name}...')
            self.process.terminate()
        self.clog(f'... {self.process_name} process terminated')
        if exc_type is not None:
            self.exception_info = dict(
                exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb
            )

def child_2(interval):
    print('子进程(%s)开始执行, 父进程为(%s)' % (os.getpid(), os.getppid()))
    t_start = time.time()
    time.sleep(interval)
    t_end = time.time()
    print('子进程(%s)执行时间为"%0.2f"秒' % (os.getpid(), t_end - t_start))


if __name__ == '__main__':
    print('------父进程开始执行-------')
    print('父进程PID: %s' % os.getpid())
    p1 = Process(target=child_1, args=(1, ))
    p2 = Process(target=child_2, name='mrsoft', args=(2, ))
    p1.start()
    p2.start()

    print('p1.is_alive=%s' % p1.is_alive())
    print('p2.is_alive=%s' % p2.is_alive())

    print('p1.name=%s' % p1.name)
    print('p1.pid=%s' % p1.pid)

    print('p2.name=%s' % p2.name)
    print('p2.pid=%s' % p2.pid)
    print('------等待子进程-------')
    p1.join()
    p2.join()
    print('------父进程执行结束--------')
Exemple #19
0
                                    userData['playlist'],
                                    vnc,
                                    config.SQS_ENDPOINT,
                                    screenshotDir,
                                    runnerStats,
                                    processStates,
                                ))
                    p.start()
                    processes.append(p)
                except:
                    runnerStats[STAT_ERROR] += 1
                    console.exception()

            leftProcesses = []
            for p in processes:
                if p.is_alive():
                    leftProcesses.append(p)
                else:
                    p.join()
            processes = leftProcesses
            if showInfo:
                showStats(len(processes), systemStats, runnerStats)
            if len(processes) == 0:
                break
        except KeyboardInterrupt:
            shutdown(processes)
            break
        except:
            console.exception()
            shutdown(processes)
            break