def main(): proxy_queue = Queue() proxy_hosts = Queue() create_db() # 查询urls DB_CONN = get_conn() c = DB_CONN.cursor() LazyFW.log(r'''SELECT count(*) as `cnt` FROM `proxys_%s` where `speed` > %d;''' % (CURR_DATE, PROXY_TIMEOUT, )) c.execute(r'''SELECT count(*) as `cnt` FROM `proxys_%s` where `speed` > %d;''' % (CURR_DATE, PROXY_TIMEOUT, )) proxys = c.fetchone() c.close() if proxys[0] < 10: proxy_urls = get_proxy_urls() for url in proxy_urls: proxy_queue.put_nowait(url) workers = [] for i in range(PROXY_THREAD_FETCH_MAX): p = Process(target=worker, args=('fetch_proxy', proxy_queue)) p.daemon = True p.start() workers.append(p) for p in workers: p.join() DB_CONN.commit() DB_CONN.close() # 再次查询出数据 DB_CONN = get_conn() LazyFW.log(r'''SELECT `host`,`port` FROM `proxys_%s` where `speed` > %d;''' % (CURR_DATE, PROXY_TIMEOUT, )) c = DB_CONN.cursor() c.execute(r'''SELECT `host`,`port` FROM `proxys_%s` where `speed` > %d;''' % (CURR_DATE, PROXY_TIMEOUT, )) for row in c.fetchall(): proxy_hosts.put_nowait(row) c.close() DB_CONN.commit() DB_CONN.close() workers = [] for i in range(PROXY_THREAD_TEST_PROXY_MAX): p = Process(target=worker, args=('proxy_test', proxy_hosts)) p.daemon = True p.start() workers.append(p) for p in workers: p.join()
from multiprocessing.queues import Queue, context q = Queue(2, ctx=context._default_context) q.put_nowait(None) q.put_nowait(None) q.put_nowait(None)
class Client(LoggerMixin, Process, FileSystemEventHandler): """ Responsible for Listener and Processor components. Provides functions to start/stop both itself and its components. In addition, it is also capable of receiving file-system events via the 'watchdog' library. General procedure: 1. Starts both the Process and Listener components. 2. Listen and act upon exit/error notifications from components 3. Listen for file-system events and acts accordingly. """ def __init__(self, dsn, watch_path=None, failover_files=None): """ To make the client listen for Postgres 'recovery.conf, recovery.done' events:: from hermes.client import Client dsn = {'database': 'example_db', 'host': '127.0.0.1', 'port': 5432, 'user': '******', 'password': '******'} watch_path = '/var/lib/postgresql/9.4/main/' failover_files = ['recovery.done', 'recovery.conf'] client = Client(dsn, watch_path, failover_files) # Add processor and listener ... # Start the client client.start() Or, if you decide you don't want to use a file watcher, then you can omit those parameters. However, the Client will still perform master/slave checks if a problem is encountered:: from hermes.client import Client dsn = {'database': 'example_db', 'host': '127.0.0.1', 'port': 5432, 'user': '******', 'password': '******'} client = Client(dsn) # Add processor and listener ... # Start the client client.start() :param dsn: A Postgres-compatible DSN dictionary :param watch_path: The directory to monitor for filechanges. If None, then file monitoring is disabled. :param failover_files: A list of files which, when modified, will cause the client to call :func:`~execute_role_based_procedure` """ super(Client, self).__init__() self.directory_observer = Observer() self._processor = None self._listener = None self._watch_path = watch_path self._failover_files = failover_files self.master_pg_conn = PostgresConnector(dsn) self._should_run = False self._child_interrupted = False self._exception_raised = False self._exit_queue = Queue(1) def add_processor(self, processor): """ :param processor: A :class:`~hermes.components.Component` object which will receive notifications and run the :func:`~hermes.components.Component.execute` method. :raises: :class:`~hermes.exceptions.InvalidConfigurationException` if the provided processor is not a subclass of :class:`~hermes.components.Component` """ if not isinstance(processor, Component): raise InvalidConfigurationException( "Processor must of type Component" ) self._processor = processor def add_listener(self, listener): """ :param listener: A :class:`~hermes.components.Component` object which will listen for notifications from Postgres and pass an event down a queue. :raises: :class:`~hermes.exceptions.InvalidConfigurationException` if the provided listener is not a subclass of :class:`~hermes.components.Component` """ if not isinstance(listener, Component): raise InvalidConfigurationException( "Listener must of type Component" ) self._listener = listener def _validate_components(self): """ Checks through a set of validation procedures to ensure the client is configured properly. :raises: :class:`~hermes.exceptions.InvalidConfigurationException` """ if not self._processor: raise InvalidConfigurationException("A processor must be defined") if not self._listener: raise InvalidConfigurationException("A listener must be defined") if self._processor.error_queue is not self._listener.error_queue: raise InvalidConfigurationException( "A processor and listener's error queue must be the same" ) def start(self): """ Starts the Client, its Components and the directory observer :raises: :class:`~hermes.exceptions.InvalidConfigurationException` """ signal(SIGINT, self._handle_terminate) signal(SIGTERM, self._handle_terminate) self._validate_components() super(Client, self).start() def run(self): """ Performs a :func:`~select.select` on the components' error queue. When a notification is detected, the client will log the message and then calculate if the Postgres server is still a Master - if not, the components are shutdown. """ super(Client, self).run() self._start_observer() signal(SIGCHLD, self._handle_sigchld) self._should_run = True self.execute_role_based_procedure() while self._should_run: self._exception_raised = self._child_interrupted = False try: exit_pipe = self._exit_queue._reader ready_pipes, _, _ = select.select( (exit_pipe, ), (), () ) if exit_pipe in ready_pipes: self.terminate() except select.error: if not self._child_interrupted and not self._exception_raised: self._should_run = False def _start_components(self, restart=False): """ Starts the Processor and Listener if the client is not running """ if not self._processor.is_alive(): if restart and self._processor.ident: self._processor.join() self._processor.start() if not self._listener.is_alive(): if restart and self._listener.ident: self._listener.join() self._listener.start() def _stop_components(self): """ Stops the Processor and Listener if the client is running """ if self._listener and self._listener.ident and self._listener.is_alive(): self._listener.terminate() self._listener.join() if self._processor and self._processor.ident and self._processor.is_alive(): self._processor.terminate() self._processor.join() def _start_observer(self): """ Schedules the observer using 'settings.WATCH_PATH' """ if self._watch_path: self.directory_observer.schedule( self, self._watch_path, recursive=False ) self.directory_observer.start() def _stop_observer(self): """ Stops the observer if it is 'alive' """ if self._watch_path and self.directory_observer: if self.directory_observer.is_alive(): self.directory_observer.stop() def on_any_event(self, event): """ Listens to an event passed by 'watchdog' and checks the current master/slave status :param event: A :class:`~watchdog.events.FileSystemEvent` object passed by 'watchdog' indicating an event change within the specified directory. """ file_name = event.src_path.split('/')[-1] if file_name in self._failover_files: self.execute_role_based_procedure() def execute_role_based_procedure(self): """ Starts or stops components based on the role (Master/Slave) of the Postgres host. Implements a `binary exponential backoff <http://en.wikipedia.org/wiki/Exponential_backoff #Binary_exponential_backoff_.2F_truncated_exponential_backoff>`_ up to 32 seconds if it encounters a FATAL connection error. """ backoff = 0 while True: try: server_is_master = self.master_pg_conn.is_server_master() if server_is_master: self.log.warning('Server is a master, starting components') self._start_components(restart=True) else: self.log.warning('Server is a slave, stopping components') self._stop_components() break except OperationalError: self._stop_components() self.log.warning( 'Cannot connect to the DB, maybe it has been shutdown?', exc_info=True ) if backoff: # pragma: no cover backoff <<= 1 if backoff > 32: backoff = 1 else: backoff = 1 sleep(backoff) def _handle_sigchld(self, sig, frame): """ A child process dying, and the client not shutting down, indicates a process has been shut down by some external caller. We must check both the processor and listener for 'liveness' and start those which have failed. """ if sig == SIGCHLD and self._should_run and not self._exception_raised: try: expected, action = self._processor.error_queue.get_nowait() self._exception_raised = True if expected: if action == TERMINATE: self.execute_role_based_procedure() else: self.log.critical( 'An unexpected error was raised - shutting down' ) self._shutdown() except Empty: self._child_interrupted = True self._start_components(restart=True) def _handle_terminate(self, sig, frame): """ Handles SIGINT and SIGTERM signals. If called from another process then puts to the exit queue, else calls _shutdown. """ if self.ident != os.getpid(): self._exit_queue.put_nowait(True) else: self._shutdown() def _shutdown(self): """ Shuts down the Client: * Sets '_should_run' to False. * Stops the components. * Stops the observer. """ self.log.warning('Shutting down...') self._should_run = False self._stop_components() self._stop_observer()
class WorkerThread(threading.Thread): # too flags : stop after x errors # slow down after every seeing y errors # value_list is a list of document generators def __init__( self, serverInfo, name, values_list, ignore_how_many_errors=5000, override_vBucketId=-1, terminate_in_minutes=120, write_only=False, moxi=True, async_write=False, delete_ratio=0, expiry_ratio=0, ): threading.Thread.__init__(self) self.log = logger.Logger.get_logger() self.serverInfo = serverInfo self.name = name self.values_list = [] self.values_list.extend(copy.deepcopy(values_list)) self._value_list_copy = [] self._value_list_copy.extend(copy.deepcopy(values_list)) self._inserted_keys_count = 0 self._rejected_keys = [] self._rejected_keys_count = 0 self._delete_ratio = delete_ratio self._expiry_ratio = expiry_ratio self._delete_count = 0 self._expiry_count = 0 self._delete = [] self.ignore_how_many_errors = ignore_how_many_errors self.override_vBucketId = override_vBucketId self.terminate_in_minutes = terminate_in_minutes self._base_uuid = uuid.uuid4() self.queue = Queue() self.moxi = moxi # let's create a read_thread self.info = {"server": serverInfo, "name": self.name, "baseuuid": self._base_uuid} self.write_only = write_only self.aborted = False self.async_write = async_write def inserted_keys_count(self): return self._inserted_keys_count def rejected_keys_count(self): return self._rejected_keys_count # smart functin that gives you sth you can use to # get inserted keys # we should just expose an iterator instead which # generates the key,values on fly def keys_set(self): # let's construct the inserted keys set # TODO: hard limit , let's only populated up to 1 million keys inserted_keys = [] for item in self._value_list_copy: for i in range(0, (int(item["how_many"]))): key = "{0}-{1}-{2}".format(self._base_uuid, item["size"], i) if key not in self._rejected_keys: inserted_keys.append(key) if len(inserted_keys) > 2 * 1024 * 1024: break return inserted_keys, self._rejected_keys def run(self): msg = "starting a thread to set keys mixed set-get ? {0} and using async_set ? {1}" msg += " with moxi ? {2}" msg = msg.format(self.write_only, self.async_write, self.moxi) self.log.info(msg) awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) client = None if self.moxi: try: client = MemcachedClientHelper.proxy_client(self.serverInfo, self.name) except Exception as ex: self.log.info("unable to create memcached client due to {0}. stop thread...".format(ex)) import traceback traceback.print_exc() return # keeping keys in the memory is not such a good idea because # we run out of memory so best is to just keep a counter ? # if someone asks for the keys we can give them the formula which is # baseuuid-{0}-{1} , size and counter , which is between n-0 except those # keys which were rejected # let's print out some status every 5 minutes.. if not self.write_only: self.reader = Process(target=start_reader_process, args=(self.info, self._value_list_copy, self.queue)) self.reader.start() start_time = time.time() last_reported = start_time backoff_count = 0 while len(self.values_list) > 0 and not self.aborted: selected = MemcachedClientHelper.random_pick(self.values_list) selected["how_many"] -= 1 if selected["how_many"] < 1: self.values_list.remove(selected) if (time.time() - start_time) > self.terminate_in_minutes * 60: self.log.info( "its been more than {0} minutes loading data. stopping the process..".format( self.terminate_in_minutes ) ) break else: # every two minutes print the status if time.time() - last_reported > 2 * 60: if not self.moxi: awareness.done() try: awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) except Exception: # vbucket map is changing . sleep 5 seconds time.sleep(5) awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) self.log.info("now connected to {0} memcacheds".format(len(awareness.memcacheds))) last_reported = time.time() for item in self.values_list: self.log.info( "{0} keys (each {1} bytes) more to send...".format(item["how_many"], item["size"]) ) key = "{0}-{1}-{2}".format(self._base_uuid, selected["size"], int(selected["how_many"])) if not self.moxi: client = awareness.memcached(key) if not client: self.log.error("client should not be null") value = "*" try: value = selected["value"].next() except StopIteration: pass try: if self.override_vBucketId >= 0: client.vbucketId = self.override_vBucketId if self.async_write: client.send_set(key, 0, 0, value) else: client.set(key, 0, 0, value) self._inserted_keys_count += 1 backoff_count = 0 # do expiry sets, 30 second expiry time if Random().random() < self._expiry_ratio: client.set(key + "-exp", 30, 0, value) self._expiry_count += 1 # do deletes if we have 100 pending # at the end delete the remaining if len(self._delete) >= 100: # self.log.info("deleting {0} keys".format(len(self._delete))) for key_del in self._delete: client.delete(key_del) self._delete = [] # do delete sets if Random().random() < self._delete_ratio: client.set(key + "-del", 0, 0, value) self._delete.append(key + "-del") self._delete_count += 1 except MemcachedError as error: if not self.moxi: awareness.done() try: awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) except Exception: # vbucket map is changing . sleep 5 seconds time.sleep(5) awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) self.log.info("now connected to {0} memcacheds".format(len(awareness.memcacheds))) if isinstance(self.serverInfo, dict): self.log.error( "memcached error {0} {1} from {2}".format(error.status, error.msg, self.serverInfo["ip"]) ) else: self.log.error( "memcached error {0} {1} from {2}".format(error.status, error.msg, self.serverInfo.ip) ) if error.status == 134: backoff_count += 1 if backoff_count < 5: backoff_seconds = 15 * backoff_count else: backoff_seconds = 2 * backoff_count self.log.info("received error # 134. backing off for {0} sec".format(backoff_seconds)) time.sleep(backoff_seconds) self._rejected_keys_count += 1 self._rejected_keys.append({"key": key, "value": value}) if len(self._rejected_keys) > self.ignore_how_many_errors: break except Exception as ex: if not self.moxi: awareness.done() try: awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) except Exception: awareness = VBucketAwareMemcached(RestConnection(self.serverInfo), self.name) self.log.info("now connected to {0} memcacheds".format(len(awareness.memcacheds))) if isinstance(self.serverInfo, dict): self.log.error("error {0} from {1}".format(ex, self.serverInfo["ip"])) import traceback traceback.print_exc() else: self.log.error("error {0} from {1}".format(ex, self.serverInfo.ip)) self._rejected_keys_count += 1 self._rejected_keys.append({"key": key, "value": value}) if len(self._rejected_keys) > self.ignore_how_many_errors: break # before closing the session let's try sending those items again retry = 3 while retry > 0 and self._rejected_keys_count > 0: rejected_after_retry = [] self._rejected_keys_count = 0 for item in self._rejected_keys: try: if self.override_vBucketId >= 0: client.vbucketId = self.override_vBucketId if self.async_write: client.send_set(item["key"], 0, 0, item["value"]) else: client.set(item["key"], 0, 0, item["value"]) self._inserted_keys_count += 1 except MemcachedError: self._rejected_keys_count += 1 rejected_after_retry.append({"key": item["key"], "value": item["value"]}) if len(rejected_after_retry) > self.ignore_how_many_errors: break self._rejected_keys = rejected_after_retry retry = -1 # clean up the rest of the deleted keys if len(self._delete) > 0: # self.log.info("deleting {0} keys".format(len(self._delete))) for key_del in self._delete: client.delete(key_del) self._delete = [] self.log.info("deleted {0} keys".format(self._delete_count)) self.log.info("expiry {0} keys".format(self._expiry_count)) # client.close() awareness.done() if not self.write_only: self.queue.put_nowait("stop") self.reader.join() def _initialize_memcached(self): pass def _set(self): pass def _handle_error(self): pass # if error is memcached error oom related let's do a sleep def _time_to_stop(self): return self.aborted or len(self._rejected_keys) > self.ignore_how_many_errors