def __init__(self, stop_event: Event, input_queue: Queue=None, output_queue: Queue=None, max_worker: int=10, dir_path="", is_debug=False, **kwargs): self._is_debug = is_debug FeedbackInterface.__init__(self, **kwargs) ExternalTempInterface.__init__(self) # do not use predefined queue here # self._input_q = input_queue # self._output_q = output_queue self._stop_event = stop_event self._internal_stop_event = Event() self._max_worker = max_worker self._job_done = 0 self._job_done_shadow = 0 self._job_done_lock = RLock() self._input_period = 0.0001 # time to sample data into the buffer self._max_sample_results = 100000 self._min_sampling_duration = 0.0001 self._sample_batch_size = 5000 self._sample_batch_timeout = 60 if is_debug: self._min_buff_delete_threshold = 10000 # default is 100000 else: self._min_buff_delete_threshold = 100000 self._speed_penalty_count = 0 self._finished = False manager, self._output_q = get_queue_client(QueueManager.MachineSettingCrawler, QueueManager.Method_Whois_Output) self._db_buffer = ExternalTempDataDiskBuffer("whois_check.db", self, self._internal_stop_event, buf_size=self._max_worker*50, terminate_callback=WhoisChecker.terminate_callback, dir_path=dir_path) self._populate_with_state() # FeedbackInterface if not is_debug: log_period = 120 else: log_period = 10 self._progress_logger = ProgressLogger(log_period, self, self._internal_stop_event)
def __init__(self, stop_event: Event, input_queue: Queue = None, output_queue: Queue = None, max_worker: int = 10, dir_path="", is_debug=False, **kwargs): self._is_debug = is_debug FeedbackInterface.__init__(self, **kwargs) ExternalTempInterface.__init__(self) # do not use predefined queue here # self._input_q = input_queue # self._output_q = output_queue self._stop_event = stop_event self._internal_stop_event = Event() self._max_worker = max_worker self._job_done = 0 self._job_done_shadow = 0 self._job_done_lock = RLock() self._input_period = 0.0001 # time to sample data into the buffer self._max_sample_results = 100000 self._min_sampling_duration = 0.0001 self._sample_batch_size = 5000 self._sample_batch_timeout = 60 if is_debug: self._min_buff_delete_threshold = 10000 # default is 100000 else: self._min_buff_delete_threshold = 100000 self._speed_penalty_count = 0 self._finished = False manager, self._output_q = get_queue_client( QueueManager.MachineSettingCrawler, QueueManager.Method_Whois_Output) self._db_buffer = ExternalTempDataDiskBuffer( "whois_check.db", self, self._internal_stop_event, buf_size=self._max_worker * 50, terminate_callback=WhoisChecker.terminate_callback, dir_path=dir_path) self._populate_with_state() # FeedbackInterface if not is_debug: log_period = 120 else: log_period = 10 self._progress_logger = ProgressLogger(log_period, self, self._internal_stop_event)
def __init__(self, db_ref: str, db_dir: str, input_queue: Queue, output_queue: Queue, stop_event: Event, matrix: CrawlMatrix, accounts: list, force_mode=False, force_mode_offset=0, force_mode_total=0, **kwargs): FeedbackInterface.__init__(self, **kwargs) self._stop_event = stop_event self._matrix = matrix self._db_ref = db_ref self._input_queue = input_queue self._output_queue = output_queue self._pool_input = Queue() self._pool = FilterPool(self._pool_input, self._output_queue, self._queue_lock, self._stop_event, self._matrix, accounts=accounts) self._db_buffer = ExternalTempDataDiskBuffer(self._db_ref, self, self._stop_event, dir_path=db_dir, buf_size=2500, output_f=5000) # control how data flow speed, # it can keep input:output ratio = 1:1 at max 10 milion data row per hour #FeedbackInterface.__init__(self, **kwargs) ExternalTempInterface.__init__(self) self._populate_with_state() if force_mode: new_state = _FilterState(progress=force_mode_offset, all_data=force_mode_total) self.populate_with_state(new_state)
def __init__(self, db_ref: str, db_dir: str, input_queue: Queue, output_queue: Queue, stop_event: Event, matrix: CrawlMatrix, accounts: list, force_mode=False, force_mode_offset=0, force_mode_total=0, **kwargs): FeedbackInterface.__init__(self, **kwargs) self._stop_event = stop_event self._matrix = matrix self._db_ref = db_ref self._input_queue = input_queue self._output_queue = output_queue self._pool_input = Queue() self._pool = FilterPool(self._pool_input, self._output_queue, self._queue_lock, self._stop_event, self._matrix, accounts=accounts) self._db_buffer = ExternalTempDataDiskBuffer( self._db_ref, self, self._stop_event, dir_path=db_dir, buf_size=2500, output_f=5000) # control how data flow speed, # it can keep input:output ratio = 1:1 at max 10 milion data row per hour #FeedbackInterface.__init__(self, **kwargs) ExternalTempInterface.__init__(self) self._populate_with_state() if force_mode: new_state = _FilterState(progress=force_mode_offset, all_data=force_mode_total) self.populate_with_state(new_state)