コード例 #1
0
 def __init__(self, stop_event: Event, input_queue: Queue=None, output_queue: Queue=None,
              max_worker: int=10, dir_path="", is_debug=False,  **kwargs):
     self._is_debug = is_debug
     FeedbackInterface.__init__(self, **kwargs)
     ExternalTempInterface.__init__(self)
     # do not use predefined queue here
     # self._input_q = input_queue
     # self._output_q = output_queue
     self._stop_event = stop_event
     self._internal_stop_event = Event()
     self._max_worker = max_worker
     self._job_done = 0
     self._job_done_shadow = 0
     self._job_done_lock = RLock()
     self._input_period = 0.0001  # time to sample data into the buffer
     self._max_sample_results = 100000
     self._min_sampling_duration = 0.0001
     self._sample_batch_size = 5000
     self._sample_batch_timeout = 60
     if is_debug:
         self._min_buff_delete_threshold = 10000  # default is 100000
     else:
         self._min_buff_delete_threshold = 100000
     self._speed_penalty_count = 0
     self._finished = False
     manager, self._output_q = get_queue_client(QueueManager.MachineSettingCrawler, QueueManager.Method_Whois_Output)
     self._db_buffer = ExternalTempDataDiskBuffer("whois_check.db", self, self._internal_stop_event, buf_size=self._max_worker*50,
                                                  terminate_callback=WhoisChecker.terminate_callback, dir_path=dir_path)
     self._populate_with_state()  # FeedbackInterface
     if not is_debug:
         log_period = 120
     else:
         log_period = 10
     self._progress_logger = ProgressLogger(log_period, self, self._internal_stop_event)
コード例 #2
0
 def __init__(self,
              stop_event: Event,
              input_queue: Queue = None,
              output_queue: Queue = None,
              max_worker: int = 10,
              dir_path="",
              is_debug=False,
              **kwargs):
     self._is_debug = is_debug
     FeedbackInterface.__init__(self, **kwargs)
     ExternalTempInterface.__init__(self)
     # do not use predefined queue here
     # self._input_q = input_queue
     # self._output_q = output_queue
     self._stop_event = stop_event
     self._internal_stop_event = Event()
     self._max_worker = max_worker
     self._job_done = 0
     self._job_done_shadow = 0
     self._job_done_lock = RLock()
     self._input_period = 0.0001  # time to sample data into the buffer
     self._max_sample_results = 100000
     self._min_sampling_duration = 0.0001
     self._sample_batch_size = 5000
     self._sample_batch_timeout = 60
     if is_debug:
         self._min_buff_delete_threshold = 10000  # default is 100000
     else:
         self._min_buff_delete_threshold = 100000
     self._speed_penalty_count = 0
     self._finished = False
     manager, self._output_q = get_queue_client(
         QueueManager.MachineSettingCrawler,
         QueueManager.Method_Whois_Output)
     self._db_buffer = ExternalTempDataDiskBuffer(
         "whois_check.db",
         self,
         self._internal_stop_event,
         buf_size=self._max_worker * 50,
         terminate_callback=WhoisChecker.terminate_callback,
         dir_path=dir_path)
     self._populate_with_state()  # FeedbackInterface
     if not is_debug:
         log_period = 120
     else:
         log_period = 10
     self._progress_logger = ProgressLogger(log_period, self,
                                            self._internal_stop_event)
コード例 #3
0
 def __init__(self, db_ref: str, db_dir: str, input_queue: Queue, output_queue: Queue, stop_event: Event,
              matrix: CrawlMatrix, accounts: list, force_mode=False, force_mode_offset=0, force_mode_total=0,  **kwargs):
     FeedbackInterface.__init__(self, **kwargs)
     self._stop_event = stop_event
     self._matrix = matrix
     self._db_ref = db_ref
     self._input_queue = input_queue
     self._output_queue = output_queue
     self._pool_input = Queue()
     self._pool = FilterPool(self._pool_input, self._output_queue, self._queue_lock, self._stop_event, self._matrix,
                             accounts=accounts)
     self._db_buffer = ExternalTempDataDiskBuffer(self._db_ref, self, self._stop_event, dir_path=db_dir,
                                                  buf_size=2500, output_f=5000) # control how data flow speed,
                                                  # it can keep input:output ratio = 1:1 at max 10 milion data row per hour
     #FeedbackInterface.__init__(self, **kwargs)
     ExternalTempInterface.__init__(self)
     self._populate_with_state()
     if force_mode:
         new_state = _FilterState(progress=force_mode_offset, all_data=force_mode_total)
         self.populate_with_state(new_state)
コード例 #4
0
 def __init__(self,
              db_ref: str,
              db_dir: str,
              input_queue: Queue,
              output_queue: Queue,
              stop_event: Event,
              matrix: CrawlMatrix,
              accounts: list,
              force_mode=False,
              force_mode_offset=0,
              force_mode_total=0,
              **kwargs):
     FeedbackInterface.__init__(self, **kwargs)
     self._stop_event = stop_event
     self._matrix = matrix
     self._db_ref = db_ref
     self._input_queue = input_queue
     self._output_queue = output_queue
     self._pool_input = Queue()
     self._pool = FilterPool(self._pool_input,
                             self._output_queue,
                             self._queue_lock,
                             self._stop_event,
                             self._matrix,
                             accounts=accounts)
     self._db_buffer = ExternalTempDataDiskBuffer(
         self._db_ref,
         self,
         self._stop_event,
         dir_path=db_dir,
         buf_size=2500,
         output_f=5000)  # control how data flow speed,
     # it can keep input:output ratio = 1:1 at max 10 milion data row per hour
     #FeedbackInterface.__init__(self, **kwargs)
     ExternalTempInterface.__init__(self)
     self._populate_with_state()
     if force_mode:
         new_state = _FilterState(progress=force_mode_offset,
                                  all_data=force_mode_total)
         self.populate_with_state(new_state)