Example #1
0
 def process_data(self, data: FilteredDomainData, **kwargs):
     result_ok = False
     if isinstance(data, FilteredDomainData):
         try:
             if len(data.domain_var) == 0:
                 data.domain_var = data.domain
             links = ArchiveOrg.get_url_info(data.domain_var, min_size=self._min_page_size, limit=-100)
             count = len(links)
             data.archive = count
             if count < self._min_profile:
                 pass
                 # raise ValueError("profile count is less than:" + str(self._min_profile))
             result_ok = True
         except Exception as ex:
             if not self._is_throughput_debug:
                 pass
                 # ErrorLogger.log_error("ArchiveOrgFilter.process_data()", ex, data.domain_var)
         finally:
             with self._sync_lock:
                 self._job_done += 1
                     #with self._process_queue_lock:
                 if result_ok:
                     if not self._is_throughput_debug:
                         CsvLogger.log_to_file(self._log_file, [(data.domain, data.da, data.archive)]) # log this to file
                     self._output_queue.put(data)
                     # return data
                 else:
                     if self._is_throughput_debug:
                         self._output_queue.put(data)
                     # return None
     else:
         return None