def run_farm(self):
     try:
         self._start_sending_feedback()
         input_t = threading.Thread(target=self._sample_data)
         input_t.start()  # start sampling data
         self._progress_logger.start()
         self._db_buffer.start_input_output_cycle(
         )  # start input and output data to/from file
         pool = ThreadPool(processes=self._max_worker)
         # pool.imap_unordered(self._check_whois_with_dns, self._db_buffer, chunksize=1)
         pool.imap_unordered(self._check_whois_with_dns,
                             iter(self.sample_gen, None),
                             chunksize=1)
         while not self._stop_event.is_set(
         ) or not self._internal_stop_event.is_set():
             time.sleep(1)
         if self._stop_event.is_set():
             self._internal_stop_event.set()
         input_t.join()
         self._progress_logger.join()
         self._db_buffer.terminate()
         if self._stop_event.is_set():
             self._finished = True
         self._end_sending_feedback()
     except Exception as ex:
         if self._stop_event.is_set():
             self._finished = True
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex,
                               "run_farm() index at:" + str(self._job_done))
 def force_clear(ref: str, dir_path="")->True:
     """
     force to remove database in file
     :param ref: the file name
     :return: True if remove successfully, else false
     """
     if len(dir_path) == 0:
         dir_path = get_db_buffer_default_dir()
     remove_ok = False
     filename = dir_path + ref
     try:
         time.sleep(1)
         # print("going to remove: ", filename)
         if os.path.exists(filename):
             os.remove(filename)
             # print("file removed: ", filename)
         temp_file = filename + TempDBInterface.sqlite_temp_suffix
         # print("going to remove:", temp_file)
         if os.path.exists(temp_file):
             os.remove(temp_file)
             # print("file removed: ", temp_file)
         # print("going to remove:", filename + TempDBInterface.sqlite_wal_suffix)
         if os.path.exists(filename + TempDBInterface.sqlite_wal_suffix):
             os.remove(filename + TempDBInterface.sqlite_wal_suffix)
             # print("file removed: ", filename + TempDBInterface.sqlite_wal_suffix)
         remove_ok = True
     except Exception as ex:
         msg = "error in SiteTempDatabase.force_clear(), " + filename
         ErrorLogger.log_error("SiteTempDatabase", ex, msg)
     finally:
         return remove_ok
Exemple #3
0
 def process_data(self, data: FilteredDomainData, **kwargs):
     #print("MozFilter processing: ", data)
     account = kwargs.get("Account")
     try:
         if isinstance(data, FilteredDomainData) and isinstance(account, SiteAccount):
             if TldUtility.is_top_tld(data.domain):
                 sleep_time =random.randint(self._min_sleep_time, self._max_wait)
                 time.sleep(sleep_time)
                 moz = MozCom(account)
                 if not self._is_throughput_debug:
                     ranking = moz.get_ranking_data(data.domain)
                 else:
                     ranking = 100
                 data.da = ranking
             else:
                 pass
         else:
             raise ValueError("account is none in process_data")
     except Exception as ex:
         ErrorLogger.log_error("MozFilter", ex, "process_data() " + str(data) + " account: " + account.userID)
     finally:
         PrintLogger.print("Moz processed: " + str(data) + " with: " + account.userID)
         if isinstance(data, FilteredDomainData):
             with self._sync_lock:
                 self._job_done += 1
                 if account is not None:
                     account.Available = True
             if data.da >= self._min_DA_value:
                 if not self._is_throughput_debug:
                     CsvLogger.log_to_file(self._log_file, [(data.domain, data.da)]) # log this to file
                 self._output_queue.put(data)
 def _check_whois(self, domain_data: OnSiteLink):
     root_domain = domain_data.link.lower()
     try:
         if not self._is_debug:
             if root_domain.startswith("http"):
                 root_domain = LinkChecker.get_root_domain(
                     domain_data.link)[1]
             is_available, is_redemption = LinkChecker.is_domain_available_whois(
                 root_domain)  # check whois record
             if is_available or is_redemption:
                 if is_available:
                     real_response_code = ResponseCode.Expired
                 else:
                     real_response_code = ResponseCode.MightBeExpired
                 domain_data.link = root_domain
                 domain_data.response_code = real_response_code
                 #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
                 self._put_output_result_in_queue(domain_data)
         else:
             self._put_output_result_in_queue(domain_data)
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex,
                               "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
 def _check_whois_v1(self, domain_data: OnSiteLink):
     root_domain = domain_data.link
     try:
         if root_domain.startswith("http"):
             root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
         real_response_code = domain_data.response_code
         whois = LinkChecker.check_whois(root_domain)  # check whois record
         if whois[0]:
             if whois[2]:  # domain is expired
                 real_response_code = ResponseCode.Expired
             else:
                 real_response_code = ResponseCode.MightBeExpired
         if real_response_code == ResponseCode.Expired:
             #if ResponseCode.domain_might_be_expired(real_response_code):
             domain_data.link = root_domain
             domain_data.response_code = real_response_code
             #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
             # if isinstance(self._queue_lock, multiprocessing.RLock):
             with self._queue_lock:
                 self._output_q.put(
                     (domain_data.link, domain_data.response_code))
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex,
                               "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
 def close(self):
     try:
         #print("close connection: ", self.connection_id)
         self.db.close()
     except Exception as ex:
         msg = "error in SiteTempDatabase.close(): trying to close db but failed, " + self.filename
         ErrorLogger.log_error("SiteTempDatabase", ex, msg)
 def force_clear(ref: str, dir_path="") -> True:
     """
     force to remove database in file
     :param ref: the file name
     :return: True if remove successfully, else false
     """
     if len(dir_path) == 0:
         dir_path = get_db_buffer_default_dir()
     remove_ok = False
     filename = dir_path + ref
     try:
         time.sleep(1)
         # print("going to remove: ", filename)
         if os.path.exists(filename):
             os.remove(filename)
             # print("file removed: ", filename)
         temp_file = filename + TempDBInterface.sqlite_temp_suffix
         # print("going to remove:", temp_file)
         if os.path.exists(temp_file):
             os.remove(temp_file)
             # print("file removed: ", temp_file)
         # print("going to remove:", filename + TempDBInterface.sqlite_wal_suffix)
         if os.path.exists(filename + TempDBInterface.sqlite_wal_suffix):
             os.remove(filename + TempDBInterface.sqlite_wal_suffix)
             # print("file removed: ", filename + TempDBInterface.sqlite_wal_suffix)
         remove_ok = True
     except Exception as ex:
         msg = "error in SiteTempDatabase.force_clear(), " + filename
         ErrorLogger.log_error("SiteTempDatabase", ex, msg)
     finally:
         return remove_ok
    def get_progress(self) -> []:
        """
        ProgressLogInterface, get the prograss data in tuple format, so that it can be used to complie to standard format
        :return: array contains prograss data, which has the exact length of column names in get_column_names()
        """
        total_record = self._db_buffer.get_total_record()

        if (self._job_done == self._job_done_shadow and self._job_done > 0
            ) or (self._job_done > self._min_buff_delete_threshold * 0.9
                  and total_record > self._min_buff_delete_threshold):
            self._speed_penalty_count += 1
            if self._speed_penalty_count >= 2:
                ErrorLogger.log_error(
                    "WhoisChecker.get_progress()",
                    TimeoutError("progress is stucked, restarted internal."),
                    self._db_buffer._file_name)
                print("going to clear cache")
                self._db_buffer.clear_cache()
                self.reset()
                total_record = 0
                self._db_buffer.start_input_output_cycle()
        else:
            print("no need to clear cache.")
            self._job_done_shadow = self._job_done
            self._speed_penalty_count = 0
        return [self._job_done, total_record]
 def close(self):
     try:
         #print("close connection: ", self.connection_id)
         self.db.close()
     except Exception as ex:
         msg = "error in SiteTempDatabase.close(): trying to close db but failed, " + self.filename
         ErrorLogger.log_error("SiteTempDatabase", ex, msg)
    def _check_whois_with_dns(self, page: OnSiteLink):

        real_response_code = ResponseCode.DNSError
        skip_whois_check = False
        try:
            if not self._is_debug:
                root_result = LinkChecker.get_root_domain(page.link)
                root_domain = root_result[1]
                sub_domain = root_result[4]
                suffix = root_result[5]

                if len(sub_domain) == 0 or suffix not in TldUtility.TOP_TLD_LIST:
                    skip_whois_check = True
                else:

                    if LinkChecker.is_domain_DNS_OK(sub_domain):  # check DNS first
                        real_response_code = ResponseCode.NoDNSError
                        skip_whois_check = True
                    elif not sub_domain.startswith("www."):
                        if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                            real_response_code = ResponseCode.NoDNSError
                            skip_whois_check = True

                    page.response_code = real_response_code
                    page.link_type = OnSiteLink.TypeOutbound
                    page.link = root_domain
        except Exception as ex:
            ErrorLogger.log_error("WhoisChecker", ex, "_check_whois_with_dns() " + page.link)
            skip_whois_check = True
        finally:
            if not skip_whois_check and real_response_code == ResponseCode.DNSError:
                self._check_whois(page)
            else:
                self._add_job_done_one()
Exemple #11
0
 def append_many(self, new_data_list, convert_tuple=True) -> bool:
     append_OK = False
     if new_data_list is not None and len(new_data_list) > 0:
         self.put_lock.acquire()
         try:
             tempdb = SiteTempDatabase(self.ref)
             try:
                 if convert_tuple:
                     to_tuple = [(x.link, x.response_code, x.link_level, x.link_type) for x in new_data_list]
                 else:
                     to_tuple = new_data_list
                 tempdb.cur.execute("BEGIN")
                 tempdb.cur.executemany("INSERT OR IGNORE INTO TEMP (LINK, RS_CODE, LEV, L_TYPE) "
                                        "VALUES (?, ?, ?, ?);", to_tuple)
                 tempdb.db.commit()
                 append_OK = True
             except OperationalError as ex:
                 msg = "error in SiteTempDataDisk.append_many(), operation failed. " + self.ref
                 ErrorLogger.log_error("SiteTempDataDisk", ex, msg)
         except Exception as outer_ex:
             msg = "error in SiteTempDataDisk.append_many() OperationalError, " + self.ref
             ErrorLogger.log_error("SiteTempDataDisk", outer_ex, msg)
         finally:
             self.put_lock.release()
     return append_OK
Exemple #12
0
 def process_data_batch(self, data: collections.Iterable, **kwargs):
     #print("MozFilter processing: ", data)
     account = kwargs.get("Account")
     temp = []
     try:
         if isinstance(data, collections.Iterable) and isinstance(account, SiteAccount):
             temp = [x for x in data if isinstance(x, FilteredDomainData) and TldUtility.is_top_tld(x.domain)]
             check_list = [y.domain for y in temp]
             sleep_time =random.randint(self._min_sleep_time, self._max_wait)
             time.sleep(sleep_time)
             moz = MozCom(account)
             if not self._is_throughput_debug:
                 rankings = moz.get_ranking_data_batch(check_list, limit=len(check_list))
             else:
                 rankings = [100] * len(temp)
             for i in range(len(temp)):
                 temp[i].da = rankings[i]
         else:
             raise ValueError("account is none in process_data_batch()")
     except Exception as ex:
         ErrorLogger.log_error("MozFilter", ex, "process_data_batch() " + str(data) + " account: " + account.userID)
     finally:
         PrintLogger.print("Moz processed: " + str(data) + " with: " + account.userID)
         with self._sync_lock:
             job_done = [x for x in data if x is not None]
             self._job_done += len(job_done)
             if account is not None:
                 account.Available = True
             for item in temp:
                 if isinstance(item, FilteredDomainData):
                     # print("moz processed:", item.domain)
                     if item.da >= self._min_DA_value:
                         if not self._is_throughput_debug:
                             CsvLogger.log_to_file(self._log_file, [(item.domain, item.da)]) # log this to file
                         self._output_queue.put(item)
 def run(self):
     try:
         for item in self._filters:
             item.start()
         for item in self._filters:
             item.join()
     except Exception as ex:
         ErrorLogger.log_error("FilterPool", ex, "start()")
 def run(self):
     try:
         for item in self._filters:
             item.start()
         for item in self._filters:
             item.join()
     except Exception as ex:
         ErrorLogger.log_error("FilterPool", ex, "start()")
 def remove_slaves(self, s: MiningMasterController, data: SlaveOperationData):
     if data.slave_addrs is not None and len(data.slave_addrs) > 0:
         s.remove_slaves(data.slave_addrs)
     elif data.ref != "" and data.count > 0:
         print("init from cloud")
         EC2 = EC2Controller("") # test this
         s.add_slaves(EC2.shut_down_machines(data.ref, data.count))
     else:
         ErrorLogger.log_error("MasterRequestHandler.remove_slaves()", ValueError("Remove Slaves failed"))
 def _put_output_result_in_queue(self, domain_data: OnSiteLink):
     if not self._stop_event.is_set() or not self._internal_stop_event.is_set():
         try:
             self._output_q.put((domain_data.link, domain_data.response_code))
         except Exception as inner_ex:
             if self._output_q is None:
                 manager, self._output_q = get_queue_client(QueueManager.MachineSettingCrawler, QueueManager.Method_Whois_Output)
             ErrorLogger.log_error("WhoisChecker", inner_ex, addtional="failed to put result in queue.")
             time.sleep(0.01)
             self._put_output_result_in_queue(domain_data)
    def _sample_data(self):
        ref_time = time.time()
        manager, result_queue = get_queue_client(
            QueueManager.MachineSettingCrawler,
            QueueManager.Method_Whois_Input)
        if result_queue is None:
            ErrorLogger.log_error(
                "ExternalSiteChecker.WhoisChecker._sample_data()",
                ValueError("result queue is None, cannot get data."))
            if not (self._stop_event.is_set()
                    or self._internal_stop_event.is_set()):
                self._sample_data()
        else:
            while not (self._stop_event.is_set()
                       or self._internal_stop_event.is_set()):
                data_list = []
                counter = 0
                while not result_queue.empty():

                    data = None
                    try:
                        data = result_queue.get()
                    except Exception as ex:
                        ErrorLogger.log_error("WhoisChecker._sampling_data",
                                              ex)
                        if result_queue is None:
                            manager, result_queue = get_queue_client(
                                QueueManager.MachineSettingCrawler,
                                QueueManager.Method_Whois_Input)
                    if isinstance(data, OnSiteLink):
                        counter += 1
                        data_list.append((data.link, data.response_code))
                    elif isinstance(data, tuple) and len(data) == 2:
                        #print("External Site checker: recieved:", data)
                        counter += 1
                        data_list.append(data)
                    if isinstance(data, list):
                        data_list += data
                        counter += 1
                    if counter >= self._sample_batch_size:
                        break
                    current_time = time.time()
                    if current_time - ref_time >= self._sample_batch_timeout:
                        break
                    time.sleep(self._min_sampling_duration)
                ref_time = time.time()
                if len(data_list) > 0:
                    #print("whois checker input data in db_buff: ", len(data_list))
                    self._db_buffer.append_to_buffer(data_list,
                                                     convert_tuple=False)
                    data_list.clear()
                else:
                    pass
                time.sleep(self._input_period)
    def check_whois(domain: str):
        """

        :param domain:
        :return: True domain might avaiable to buy now, date time of expire, True if action is 100% sure
        """
        if domain is not None and len(domain) > 0:
            try:
                match = re.match(
                    "^[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,10}$", domain)
                if match is None:
                    raise ValueError("domain name error.")
                server = net.get_root_server(domain)
                raw = net.get_whois_raw(domain, server=server)
                parsed = parse.parse_raw_whois(raw_data=raw)
                expire_record = parsed.get("expiration_date")
                name_servers = parsed.get("nameservers")
                if len(parsed) <= 1:
                    return True, None, True
                else:
                    if expire_record is not None and len(expire_record) > 0:
                        temp = expire_record[0]
                    else:
                        if name_servers is None:
                            return True, None, True
                        else:
                            return False, None, False

                    expire_dates = len(expire_record)
                    if expire_dates > 1:
                        for i in range(1, expire_dates):
                            data = expire_record[i]
                            if data > temp:
                                temp = data
                    date = datetime.datetime.utcnow()
                    if temp is not None:
                        if date < temp:
                            #print(domain + " is not expired")
                            return False, temp, True
                        else:
                            if name_servers is None:
                                return True, temp, True
                            else:
                                return True, temp, False
                    else:
                        return True, None, False
            except Exception as ex:
                msg = "error in LinkChecker.check_whois(), checking " + domain
                ErrorLogger.log_error("LinkChecker", ex, msg)
                return False, None, False
        else:
            return False, None, True
 def delete_db_data(self, data=None, **kwargs) -> bool:
     success = False
     try:
         if isinstance(data, MiningList):
             with self._db_lock:
                 db = CategoryDomainSiteDB(self._db_addr)
                 db.delete_sites(data.data)
                 db.close()
                 success = True
     except Exception as ex:
         ErrorLogger.log_error("MarketplaceDBManager.delete_db_data", ex)
     finally:
         return success
Exemple #20
0
 def delete_db_data(self, data=None, **kwargs) -> bool:
     success = False
     try:
         if isinstance(data, MiningList):
             with self._db_lock:
                 db = CategoryDomainSiteDB(self._db_addr)
                 db.delete_sites(data.data)
                 db.close()
                 success = True
     except Exception as ex:
         ErrorLogger.log_error("MarketplaceDBManager.delete_db_data", ex)
     finally:
         return success
Exemple #21
0
    def get_next(self, link_tpye: int=OnSiteLink.TypeAll, response_code: int=ResponseCode.All):
        counter = 0
        while True:
            if not self.can_continue():
                # print("data source is set not to continue!")
                raise StopIteration

            item = None
            self.get_lock.acquire()
            try:
                tempdb = SiteTempDatabase(self.ref)
                cur = tempdb.cur.execute(u"SELECT LINK, RS_CODE, LEV, L_TYPE, rowid FROM TEMP "
                                         u"ORDER BY ID LIMIT 1 OFFSET {0:d};".format(counter,))
                item = cur.fetchone()
                tempdb.close()
            except Exception as ex:
                msg = "error in SiteTempDataDisk.get_next(), " + self.ref
                ErrorLogger.log_error("SiteTempDataDisk", ex, msg)
            finally:
                self.get_lock.release()

            output_obj = None
            if item is not None and len(item) > 0:
                counter += 1
                link = item[0]
                rs_code = item[1]
                level = item[2]
                inner_link_type = item[3]
                obj = OnSiteLink(link, response_code=rs_code, link_level=level, link_type=inner_link_type)
                #print("load: ", str(obj))
                if link_tpye == OnSiteLink.TypeAll or inner_link_type == link_tpye:
                    if response_code == ResponseCode.All:
                        output_obj = obj
                    elif response_code == ResponseCode.LinkNotBroken and not ResponseCode.is_link_broken(rs_code):
                        output_obj = obj
                    elif response_code == ResponseCode.LinkBroken and ResponseCode.is_link_broken(rs_code):
                        output_obj = obj
                    elif rs_code == response_code:
                        output_obj = obj
                    else:
                        continue
                else:
                    continue
            else:
                raise StopIteration
            if output_obj is not None:

                yield output_obj
            else:
                raise StopIteration
 def add_db_data(self, data=None, **kwargs) -> bool:
     success = False
     try:
         if isinstance(data, MiningList):
             with self._db_lock:
                 db = CategoryDomainSiteDB(self._db_addr)
                 db.add_sites(data.data)
                 db.close()
                 success = True
         else:
             raise ValueError("input value has incorrect type.")
     except Exception as ex:
         ErrorLogger.log_error("MarketplaceDBManager.add_db_data", ex)
     finally:
         return success
 def begin_crawl(self, level=0):
     #while self.can_continue() and self.data_source.can_continue():
     #print("continue to work, page limit:", self.max_page, " max_level: ", self.max_level)
     #target_func = functools.partial(PageChecker.crawl_page, self)
     try:
         self.pool.imap_unordered(PageChecker.crawl_page_for_iter, self.data_source)
         while self.data_source.can_continue():
             time.sleep(0.1)
         #results = [self.pool.apply_async(PageChecker.crawl_page, args=(self, page))
         #           for page in self.data_source.get_next(OnSiteLink.TypeOnSite, ResponseCode.LinkOK)]
         #[p.get() for p in results]
     except Exception as ex:
         #self.stop()
         msg = "begin_crawl() " + str(self.get_site_info())
         ErrorLogger.log_error("SiteThreadChecker", ex, msg)
Exemple #24
0
 def add_db_data(self, data=None, **kwargs) -> bool:
     success = False
     try:
         if isinstance(data, MiningList):
             with self._db_lock:
                 db = CategoryDomainSiteDB(self._db_addr)
                 db.add_sites(data.data)
                 db.close()
                 success = True
         else:
             raise ValueError("input value has incorrect type.")
     except Exception as ex:
         ErrorLogger.log_error("MarketplaceDBManager.add_db_data", ex)
     finally:
         return success
 def begin_crawl(self, level=0):
     #while self.can_continue() and self.data_source.can_continue():
     #print("continue to work, page limit:", self.max_page, " max_level: ", self.max_level)
     #target_func = functools.partial(PageChecker.crawl_page, self)
     try:
         self.pool.imap_unordered(PageChecker.crawl_page_for_iter,
                                  self.data_source)
         while self.data_source.can_continue():
             time.sleep(0.1)
         #results = [self.pool.apply_async(PageChecker.crawl_page, args=(self, page))
         #           for page in self.data_source.get_next(OnSiteLink.TypeOnSite, ResponseCode.LinkOK)]
         #[p.get() for p in results]
     except Exception as ex:
         #self.stop()
         msg = "begin_crawl() " + str(self.get_site_info())
         ErrorLogger.log_error("SiteThreadChecker", ex, msg)
        def wrap(*args, **kw):
            ts = time.time()
            result = method(*args, **kw)
            te = time.time()
            gap = te - ts
            if gap > log_if_longer > 0:
                PrintLogger.print('%r (%r, %r) %2.2f sec' %
                                  (method.__name__, args, kw, gap))
                ErrorLogger.log_error(ref,
                                      ValueError("Operation took too long."),
                                      "completed in " + str(gap))
            elif log_if_longer == 0:
                # PrintLogger.print('%r (%r, %r) %2.2f sec' % (method.__name__, args, kw, gap))
                PrintLogger.print('%r took %2.2f sec' % (method.__name__, gap))

            return result
Exemple #27
0
 def __init__(self, stop_event: Event, input_queue: Queue=None, output_queue: Queue=None, worker_number: int=1,
              queue_lock: multiprocessing.RLock=None, throughput_debug=False, batch=1, batch_get_timeout=60, **kwargs):
     self._input_queue = input_queue
     self._output_queue = output_queue
     self._stop_event = stop_event
     if worker_number <= 0:
         worker_number = 1
         ErrorLogger.log_error("FilterInterface", ValueError("worker number is 0, reset to 1"), "__init__")
     self._worker_number = worker_number
     self._process_queue_lock = queue_lock
     self._is_throughput_debug = throughput_debug
     self._sync_lock = threading.RLock()
     self._job_done = 0
     self._batch = batch
     self._batch_get_timeout = batch_get_timeout
     threading.Thread.__init__(self)
 def _put_output_result_in_queue(self, domain_data: OnSiteLink):
     if not self._stop_event.is_set(
     ) or not self._internal_stop_event.is_set():
         try:
             self._output_q.put(
                 (domain_data.link, domain_data.response_code))
         except Exception as inner_ex:
             if self._output_q is None:
                 manager, self._output_q = get_queue_client(
                     QueueManager.MachineSettingCrawler,
                     QueueManager.Method_Whois_Output)
             ErrorLogger.log_error(
                 "WhoisChecker",
                 inner_ex,
                 addtional="failed to put result in queue.")
             time.sleep(0.01)
             self._put_output_result_in_queue(domain_data)
    def _sample_data(self):
        ref_time = time.time()
        manager, result_queue = get_queue_client(QueueManager.MachineSettingCrawler, QueueManager.Method_Whois_Input)
        if result_queue is None:
            ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker._sample_data()", ValueError("result queue is None, cannot get data."))
            if not (self._stop_event.is_set() or self._internal_stop_event.is_set()):
                self._sample_data()
        else:
            while not (self._stop_event.is_set() or self._internal_stop_event.is_set()):
                data_list = []
                counter = 0
                while not result_queue.empty():

                    data = None
                    try:
                        data = result_queue.get()
                    except Exception as ex:
                        ErrorLogger.log_error("WhoisChecker._sampling_data", ex)
                        if result_queue is None:
                            manager, result_queue = get_queue_client(QueueManager.MachineSettingCrawler,
                                                                    QueueManager.Method_Whois_Input)
                    if isinstance(data, OnSiteLink):
                        counter += 1
                        data_list.append((data.link, data.response_code))
                    elif isinstance(data, tuple) and len(data) == 2:
                        #print("External Site checker: recieved:", data)
                        counter += 1
                        data_list.append(data)
                    if isinstance(data, list):
                        data_list += data
                        counter += 1
                    if counter >= self._sample_batch_size:
                        break
                    current_time = time.time()
                    if current_time - ref_time >= self._sample_batch_timeout:
                        break
                    time.sleep(self._min_sampling_duration)
                ref_time = time.time()
                if len(data_list) > 0:
                    #print("whois checker input data in db_buff: ", len(data_list))
                    self._db_buffer.append_to_buffer(data_list, convert_tuple=False)
                    data_list.clear()
                else:
                    pass
                time.sleep(self._input_period)
    def get_progress(self) -> []:
        """
        ProgressLogInterface, get the prograss data in tuple format, so that it can be used to complie to standard format
        :return: array contains prograss data, which has the exact length of column names in get_column_names()
        """
        total_record = self._db_buffer.get_total_record()

        if (self._job_done == self._job_done_shadow and self._job_done > 0) or (self._job_done > self._min_buff_delete_threshold * 0.9 and total_record > self._min_buff_delete_threshold):
            self._speed_penalty_count += 1
            if self._speed_penalty_count >= 2:
                ErrorLogger.log_error("WhoisChecker.get_progress()", TimeoutError("progress is stucked, restarted internal."), self._db_buffer._file_name)
                print("going to clear cache")
                self._db_buffer.clear_cache()
                self.reset()
                total_record = 0
                self._db_buffer.start_input_output_cycle()
        else:
            print("no need to clear cache.")
            self._job_done_shadow = self._job_done
            self._speed_penalty_count = 0
        return [self._job_done, total_record]
 def _check_whois(self, domain_data: OnSiteLink):
     root_domain = domain_data.link.lower()
     try:
         if not self._is_debug:
             if root_domain.startswith("http"):
                 root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
             is_available, is_redemption = LinkChecker.is_domain_available_whois(root_domain)  # check whois record
             if is_available or is_redemption:
                 if is_available:
                     real_response_code = ResponseCode.Expired
                 else:
                     real_response_code = ResponseCode.MightBeExpired
                 domain_data.link = root_domain
                 domain_data.response_code = real_response_code
             #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
                 self._put_output_result_in_queue(domain_data)
         else:
             self._put_output_result_in_queue(domain_data)
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
    def _check_whois_with_dns(self, page: OnSiteLink):

        real_response_code = ResponseCode.DNSError
        skip_whois_check = False
        try:
            if not self._is_debug:
                root_result = LinkChecker.get_root_domain(page.link)
                root_domain = root_result[1]
                sub_domain = root_result[4]
                suffix = root_result[5]

                if len(sub_domain
                       ) == 0 or suffix not in TldUtility.TOP_TLD_LIST:
                    skip_whois_check = True
                else:

                    if LinkChecker.is_domain_DNS_OK(
                            sub_domain):  # check DNS first
                        real_response_code = ResponseCode.NoDNSError
                        skip_whois_check = True
                    elif not sub_domain.startswith("www."):
                        if LinkChecker.is_domain_DNS_OK("www." + root_domain):
                            real_response_code = ResponseCode.NoDNSError
                            skip_whois_check = True

                    page.response_code = real_response_code
                    page.link_type = OnSiteLink.TypeOutbound
                    page.link = root_domain
        except Exception as ex:
            ErrorLogger.log_error("WhoisChecker", ex,
                                  "_check_whois_with_dns() " + page.link)
            skip_whois_check = True
        finally:
            if not skip_whois_check and real_response_code == ResponseCode.DNSError:
                self._check_whois(page)
            else:
                self._add_job_done_one()
 def run_farm(self):
     try:
         self._start_sending_feedback()
         input_t = threading.Thread(target=self._sample_data)
         input_t.start()  # start sampling data
         self._progress_logger.start()
         self._db_buffer.start_input_output_cycle()  # start input and output data to/from file
         pool = ThreadPool(processes=self._max_worker)
         # pool.imap_unordered(self._check_whois_with_dns, self._db_buffer, chunksize=1)
         pool.imap_unordered(self._check_whois_with_dns, iter(self.sample_gen, None), chunksize=1)
         while not self._stop_event.is_set() or not self._internal_stop_event.is_set():
             time.sleep(1)
         if self._stop_event.is_set():
             self._internal_stop_event.set()
         input_t.join()
         self._progress_logger.join()
         self._db_buffer.terminate()
         if self._stop_event.is_set():
             self._finished = True
         self._end_sending_feedback()
     except Exception as ex:
         if self._stop_event.is_set():
             self._finished = True
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "run_farm() index at:" + str(self._job_done))
 def _check_whois_v1(self, domain_data: OnSiteLink):
     root_domain = domain_data.link
     try:
         if root_domain.startswith("http"):
             root_domain = LinkChecker.get_root_domain(domain_data.link)[1]
         real_response_code = domain_data.response_code
         whois = LinkChecker.check_whois(root_domain)  # check whois record
         if whois[0]:
             if whois[2]:  # domain is expired
                 real_response_code = ResponseCode.Expired
             else:
                 real_response_code = ResponseCode.MightBeExpired
         if real_response_code == ResponseCode.Expired:
         #if ResponseCode.domain_might_be_expired(real_response_code):
             domain_data.link = root_domain
             domain_data.response_code = real_response_code
             #return_obj = OnSiteLink(root_domain, real_response_code, domain_data.link_level, OnSiteLink.TypeOutbound)
             # if isinstance(self._queue_lock, multiprocessing.RLock):
             with self._queue_lock:
                 self._output_q.put((domain_data.link, domain_data.response_code))
     except Exception as ex:
         ErrorLogger.log_error("ExternalSiteChecker.WhoisChecker", ex, "_check_whois() " + root_domain)
     finally:
         self._add_job_done_one()
    def __init__(self,
                 input_queue: Queue,
                 output_queue: Queue,
                 queue_lock: multiprocessing.RLock,
                 stop_event: Event,
                 matrix: CrawlMatrix,
                 accounts=[]):
        self._input_queue = input_queue
        self._output_queue = output_queue
        self._queue_lock = queue_lock
        self._stop_event = stop_event
        self._maxtrix = matrix
        self._filters = []
        manager = AccountManager()
        self._proxies = ProxyManager().get_proxies()
        # majestic_queue = Queue()
        # archive_queue = Queue()
        if accounts is None:
            ErrorLogger.log_error("FilterPool.___init__",
                                  ValueError("accounts len is None"))
        moz_batch = 50
        moz_batch_timeout = int(moz_batch * 2)

        moz_accounts = manager.get_accounts(
            AccountType.Moz) if len(accounts) == 0 else [
                x for x in accounts if x.siteType == AccountType.Moz
            ]
        majestic_accounts = manager.get_accounts(
            AccountType.Majestic) if len(accounts) == 0 else [
                x for x in accounts if x.siteType == AccountType.Majestic
            ]
        filter_moz = MozFilter(  #input_queue=self._input_queue, output_queue=archive_queue,
            stop_event=self._stop_event,
            min_DA_value=self._maxtrix.da,
            manager=manager,
            accounts=moz_accounts,
            proxies=self._proxies,
            batch=moz_batch,
            batch_get_timeout=moz_batch_timeout
        )  # depend on number of accounts

        workers_for_moz = len(moz_accounts)
        workers_for_archive = int(workers_for_moz / 32 * moz_batch)
        workers_for_majestic = int(workers_for_moz / 200 * moz_batch)
        # self._filters.append(filter_moz)
        # if is_majestic_filter_on:
        filter_archive = ArchiveOrgFilter(  #input_queue=archive_queue, output_queue=majestic_queue,
            stop_event=self._stop_event,
            queue_lock=self._queue_lock,
            worker_number=workers_for_archive,
            en_profile_check=matrix.en_archive_check)  # min one worker
        filter_maj = MajesticFilter(  #input_queue=majestic_queue, output_queue=self._output_queue,
            stop_event=self._stop_event,
            TF=self._maxtrix.tf,
            CF=self._maxtrix.cf,
            CF_TF_Deviation=self._maxtrix.tf_cf_deviation,
            Ref_Domains=self._maxtrix.ref_domains,
            manager=manager,
            worker_number=workers_for_majestic,
            en_tf_check=matrix.en_tf_check,
            en_spam_check=matrix.en_spam_check,
            accounts=majestic_accounts)  # depend on number of accounts
        if matrix.en_moz:
            self._filters.append(filter_moz)
        if matrix.archive_count:
            self._filters.append(filter_archive)
        if matrix.en_majestic:
            self._filters.append(filter_maj)
        filter_len = len(self._filters)
        if filter_len == 0:
            output_queue = input_queue  # todo:short circuit, need to test
        else:
            if filter_len > 1:
                for i in range(0, filter_len - 1):
                    new_queue = Queue()
                    self._filters[i]._output_queue = new_queue
                    self._filters[i + 1]._input_queue = new_queue
            self._filters[0]._input_queue = self._input_queue
            self._filters[filter_len - 1]._output_queue = self._output_queue

        # else:
        #     filter_archive = ArchiveOrgFilter(input_queue=archive_queue, output_queue=self._output_queue,
        #                                       stop_event=self._stop_event, queue_lock=self._queue_lock,
        #                                       worker_number=workers_for_archive)  # min one worker
        #     self._filters.append(filter_archive)

        threading.Thread.__init__(self)
    def __init__(self, input_queue: Queue, output_queue: Queue, queue_lock: multiprocessing.RLock, stop_event: Event,
                 matrix: CrawlMatrix, accounts=[]):
        self._input_queue = input_queue
        self._output_queue = output_queue
        self._queue_lock = queue_lock
        self._stop_event = stop_event
        self._maxtrix = matrix
        self._filters = []
        manager = AccountManager()
        self._proxies = ProxyManager().get_proxies()
        # majestic_queue = Queue()
        # archive_queue = Queue()
        if accounts is None:
            ErrorLogger.log_error("FilterPool.___init__", ValueError("accounts len is None"))
        moz_batch = 50
        moz_batch_timeout = int(moz_batch*2)

        moz_accounts = manager.get_accounts(AccountType.Moz) if len(accounts) == 0 else [x for x in accounts if x.siteType == AccountType.Moz]
        majestic_accounts = manager.get_accounts(AccountType.Majestic) if len(accounts) == 0 else [x for x in accounts if x.siteType == AccountType.Majestic]
        filter_moz = MozFilter(#input_queue=self._input_queue, output_queue=archive_queue,
                               stop_event=self._stop_event, min_DA_value=self._maxtrix.da, manager=manager,
                               accounts=moz_accounts,
                               proxies=self._proxies, batch=moz_batch, batch_get_timeout=moz_batch_timeout)  # depend on number of accounts

        workers_for_moz = len(moz_accounts)
        workers_for_archive = int(workers_for_moz/32*moz_batch)
        workers_for_majestic = int(workers_for_moz/200*moz_batch)
        # self._filters.append(filter_moz)
        # if is_majestic_filter_on:
        filter_archive = ArchiveOrgFilter(#input_queue=archive_queue, output_queue=majestic_queue,
                                          stop_event=self._stop_event, queue_lock=self._queue_lock,
                                          worker_number=workers_for_archive, en_profile_check=matrix.en_archive_check)  # min one worker
        filter_maj = MajesticFilter(#input_queue=majestic_queue, output_queue=self._output_queue,
                                    stop_event=self._stop_event, TF=self._maxtrix.tf, CF=self._maxtrix.cf,
                                    CF_TF_Deviation=self._maxtrix.tf_cf_deviation, Ref_Domains=self._maxtrix.ref_domains,
                                    manager=manager, worker_number=workers_for_majestic, en_tf_check=matrix.en_tf_check,
                                    en_spam_check=matrix.en_spam_check, accounts=majestic_accounts)  # depend on number of accounts
        if matrix.en_moz:
            self._filters.append(filter_moz)
        if matrix.archive_count:
            self._filters.append(filter_archive)
        if matrix.en_majestic:
            self._filters.append(filter_maj)
        filter_len = len(self._filters)
        if filter_len == 0:
            output_queue = input_queue  # todo:short circuit, need to test
        else:
            if filter_len > 1:
                for i in range(0, filter_len-1):
                    new_queue = Queue()
                    self._filters[i]._output_queue = new_queue
                    self._filters[i+1]._input_queue = new_queue
            self._filters[0]._input_queue = self._input_queue
            self._filters[filter_len-1]._output_queue = self._output_queue


        # else:
        #     filter_archive = ArchiveOrgFilter(input_queue=archive_queue, output_queue=self._output_queue,
        #                                       stop_event=self._stop_event, queue_lock=self._queue_lock,
        #                                       worker_number=workers_for_archive)  # min one worker
        #     self._filters.append(filter_archive)

        threading.Thread.__init__(self)
 def terminate_callback():
     ErrorLogger.log_error("WhoisChecker", StopIteration("terminated."))
    def is_domain_available_whois(domain: str) -> (bool, bool):
        """
        availability check with whois.
        :param domain: domain name to check, e.g: google.com.
        make sure the domain is in lower case in the first place.
        :return:True if the domain is available, True if domain in is redemption
        """
        if domain is not None and len(domain) > 0:
            try:
                match = re.match(
                    "^[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,10}$", domain)
                if match is None:
                    raise ValueError("domain name error.")
                #gr0 = match.group(0)
                server = net.get_root_server(domain)
                raw_data = net.get_whois_raw(domain, server=server)
                available = False
                is_redemption = False
                force_break = False
                status = ""
                fomat_line = ""
                all_lines = []
                patterns = [
                    status_pattern, available_pattern0, available_pattern1
                ]
                raw_data = [segment.replace("\r", "") for segment in raw_data
                            ]  # Carriage returns are the devil
                for segment in raw_data:
                    all_lines += str(segment).splitlines()

                for pattern in patterns:
                    # if status is not None and len(status) > 0:
                    #     break
                    if available or is_redemption or force_break:
                        break
                    for line in all_lines:
                        if len(line) == 0:
                            continue
                        temp = line.strip()
                        if temp.endswith(":"):
                            fomat_line = temp
                            continue
                        if fomat_line.endswith(":"):
                            fomat_line += temp
                        else:
                            fomat_line = temp
                        if fomat_line.startswith("%"):
                            continue
                        else:
                            fomat_line = fomat_line.lower()
                            if pattern is status_pattern:
                                match_status = re.search(
                                    status_pattern, fomat_line)
                                if match_status is not None:
                                    status = match_status.group(2)
                                    if status is not None and len(status) > 0:
                                        if re.search(status_value_pattern,
                                                     status) is not None:
                                            available = True
                                            break
                                        elif re.search(redemption_pattern,
                                                       status) is not None:
                                            is_redemption = True
                                            break
                                        elif re.search(
                                                other_official_status_pattern,
                                                status) is not None:
                                            force_break = True
                                            break
                            elif re.search(pattern, fomat_line) is not None:
                                available = True
                                break

                # if status is not None and len(status) > 0:
                #     if re.search(status_value_pattern, status) is not None:
                #         available = True
                #     elif re.search(redemption_pattern, status) is not None:
                #         is_redemption = True

                return available, is_redemption
            except ValueError:
                return False, False
            except Exception as ex:
                ErrorLogger.log_error("LinkChecker", ex,
                                      "is_domain_available_whois() " + domain)
                return False, False
        else:
            return False
    def send_and_receive(self):
        in_buffer = self.rfile
        out_buffer = self.wfile
        s = self.server.addtional_obj
        command = CommandProcessor.receive_command(in_buffer)
        #print("process cmd: ", command.cmd)
        if command is not None and isinstance(s, MiningMasterController):
            reply = CommandStruct(cmd=ServerCommand.Com_ReplyOK)
            if command.cmd == ServerCommand.Com_Start:
                #print("start conversation:")
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Stop:
                #print("end conversation:")
                return  # exit point

            elif command.cmd == ServerCommand.Com_Get_DB_DATA:
                data = command.data
                if isinstance(data, DBRequestFields):
                    try:
                        reply.data = s.get_db_results(db_type=data.db_type, db_name=data.db_name, index=data.index, length=data.length)
                    except Exception as ex:
                        ErrorLogger.log_error("MasterRequestHandler.send_and_receive()", ex,
                                              "cmd = ServerCommand.Com-Get-DB-DATA")
                        reply.cmd = ServerCommand.Com_ReplyError
                        reply.data = "Get DB data failed"
                else:
                    ErrorLogger.log_error("MasterRequestHandler.send_and_receive()",
                                          Exception("wrong data type recieved."), "cmd = ServerCommand.Com-Get-DB-DATA")
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Get DB data failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Remove_DB:
                data = command.data
                if isinstance(data, DBRequestFields):
                    try:
                        s.remove_db(db_type=data.db_type, db_name=data.db_name)
                    except Exception as ex:
                        ErrorLogger.log_error("MasterRequestHandler.send_and_receive()", ex,
                                              "cmd = ServerCommand.Com_DB-RM-DB")
                        reply.cmd = ServerCommand.Com_ReplyError
                        reply.data = "Remove DB failed"
                else:
                    ErrorLogger.log_error("MasterRequestHandler.send_and_receive()",
                                          Exception("wrong data type recieved."), "cmd = ServerCommand.Com_DB-RM-DB")
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Remove DB failed"
                CommandProcessor.send_command(out_buffer, reply)
            elif command.cmd == ServerCommand.Com_Start_Filter:
                data = command.data
                try:
                    if s.is_alive():
                        s.stop()
                        s.join()
                    if isinstance(data,  FilteringSetupData):  #MatrixFilterControl.FilteringSetupData
                        self.server.addtional_obj = MiningMasterController(ref=data.ref, accounts=data.accounts,
                                                                           crawl_matrix=data.crawl_matrix,
                                                                           filtering_only_mode=True,
                                                                           filtering_offset=data.offset,
                                                                           filtering_total=data.total
                                                                           )
                        self.server.addtional_obj.start()
                except Exception as ex:
                    print(ex)
                    ErrorLogger.log_error("MasterRequestHandler.send_and_receive()", ex, "cmd = ServerCommand.Com_Start_Filter()")
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Com_Start_Filter failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Setup:  # test this
                data = command.data
                try:
                    if s.is_alive():
                        s.stop()
                        s.join()
                    if isinstance(data, SetupData):
                        self.server.addtional_obj = MiningMasterController(ref=data.ref, accounts=data.accounts,
                                                                           cap_slave=data.cap,
                                                                           cap_slave_process=data.cap2,
                                                                           cap_concurrent_page=data.cap3,
                                                                           all_job=data.total,
                                                                           offset=data.offset,
                                                                           max_page_level=data.max_page_level,
                                                                           max_page_limit=data.max_page_limit,
                                                                           loopback_database=data.loopback,
                                                                           refresh_rate=data.refresh_rate,
                                                                           filters=data.db_filter,
                                                                           crawl_matrix=data.crawl_matrix,
                                                                           )
                        if data.addtional_data is not None and isinstance(data.addtional_data, SlaveOperationData):
                            self.add_slaves(self.server.addtional_obj, data.addtional_data)
                            self.server.addtional_obj.setup_minging_slaves()
                        self.server.addtional_obj.start()
                    else:
                        raise NotImplementedError("other data type is not implemented.")
                except Exception as ex:
                    print(ex)
                    ErrorLogger.log_error("MasterRequestHandler.send_and_receive()", ex, "cmd = ServerCommand.Com_Setup()")
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Setup failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Clear_Cache:
                try:
                    if s.is_alive():
                        s.stop()
                        s.join()
                    s.clear_host_cache()
                    s.clear_slave_cache()
                except Exception as ex:
                    print(ex)
                    ErrorLogger.log_error("MasterRequestHandler.send_and_receive()", ex, "cmd = ServerCommand.Com_Clear_Cache()")
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Setup failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Add_Seed:
                data = command.data
                if isinstance(data, MiningList):
                    s.add_seeds(data)
                else:
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Add Seed Failed, format is wrong in server handler."
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Add_Slave: # test this
                try:
                    data = command.data
                    if isinstance(data, SlaveOperationData):
                        self.add_slaves(s, data)
                    else:
                        raise NotImplementedError("other data type is not implemented.")
                except Exception as ex:
                    print(ex)
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Add slave failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Del_Slave: # test this
                try:
                    data = command.data
                    if isinstance(data, SlaveOperationData):
                        self.remove_slaves(s, data)
                    else:
                        raise NotImplementedError("other data type is not implemented.")
                except Exception as ex:
                    print(ex)
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Add slave failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Begin_Mining:  # not implemented, use setup to begin mining
                reply.cmd = ServerCommand.Com_ReplyError
                reply.data = "Add slave failed"

            elif command.cmd == ServerCommand.Com_Stop_Mining:  # test this
                try:
                    EC2 = EC2Controller("")
                    addrs = [slave.address.address for slave in s.slaves if isinstance(slave, Server)]
                    s.pause()
                    #s.slaves.clear()
                    #if s.isAlive:
                    #    s.join(0)
                    #self.server.addtional_obj = MiningMasterController()

                    EC2.shut_down_machines_list(addrs)
                except:
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "Stop site failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Status:  # test this
                try:
                    CPU = MachineInfo.get_cpu(1)
                    MEM = MachineInfo.get_memory()
                    NET = MachineInfo.get_network(1)
                    slaveStatus = [slave.status for slave in s.slaves]
                    totalPage = sum([slave.total_page_done for slave in slaveStatus])
                    ave_page = 0
                    filter_progress = s.get_filter_progress()
                    if len(s.slaves) > 0:
                        ave_page = int(sum([slave.page_per_site for slave in slaveStatus])/len(s.slaves))
                    total_result = sum([slave.result for slave in slaveStatus])
                    total_cap_slave = sum([slave.cap_slave for slave in slaveStatus])
                    total_cap_process = sum([slave.cap_slave * slave.cap_process for slave in slaveStatus])
                    total_cap_page = sum([slave.cap_slave * slave.cap_process * slave.cap_concurrent_page for slave in slaveStatus])
                    status = ServerStatus(wait_job=s.job_all - s.job_done, done_job=s.job_done, all_job=s.job_all,
                                          total_page_done=totalPage, page_per_site=ave_page,
                                          result=total_result, cpu_cores=CPU[0], cpu_percent=CPU[1],
                                          toal_memory=MEM[0], memory_percent=MEM[1], net_recieved=NET[0], net_send=NET[1],
                                          cap_slave=total_cap_slave, cap_process= total_cap_process, cap_concurrent_page= total_cap_page,
                                          filter_done=filter_progress[0], filter_total=filter_progress[1])
                    server = Server(server_type=ServerType.ty_Host, status=status, address=ServerAddress("localhost", MiningTCPServer.DefaultListenPort))
                    servers = []
                    servers.append(server)
                    servers += s.slaves
                    reply.data = MiningList(s.ref, servers)
                except:
                    reply.cmd = ServerCommand.Com_ReplyError
                    reply.data = "getting status failed"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_DataBase_Status:  # test this
                reply.data = s.get_db_stats()  # send back a copy
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Set_DB_Filter:
                data = command.data
                if isinstance(data, DBFilterCollection):
                    if data != s.filter_shadow:
                        s.filter_shadow = data
                        s.update_db_stats(True)
                else:
                    reply.cmd =ServerCommand.Com_ReplyError
                    reply.data = "wrong data type for filters, should be DBFilterCollection"
                CommandProcessor.send_command(out_buffer, reply)

            elif command.cmd == ServerCommand.Com_Progress: # this this
                reply.data = PrograssData(ref=s.ref, done=s.job_done, all_job=s.job_all,offset=s.offset,
                                          duration=s.end_time - s.start_time, in_progress=s.in_progress)
                CommandProcessor.send_command(out_buffer, reply)

            else:
                reply.cmd = ServerCommand.Com_ReplyError
                reply.data = "command is not valid, please try again"
                CommandProcessor.send_command(out_buffer, reply)

            #CommandProcessor.send_command(out_buffer, reply)
            #print("finished cmd ", command.cmd)
            self.send_and_receive()  # recursive to make a conversation
 def terminate_callback():
     ErrorLogger.log_error("WhoisChecker", StopIteration("terminated."))