def testArchiveProfileFilter1(self): input_param ={"input_queue": queue.Queue(), "output_queue": queue.Queue(), "stop_event": Event(), "throughput_debug": True, "worker_number": 2} archive_filter = get_archive_filter(**input_param) links = FileIO.FileHandler.read_lines_from_file("/Users/superCat/Desktop/PycharmProjectPortable/test/archive_domain_test.txt") for link in links: site = FilteredDomainData(domain=link) archive_filter.process_data(data=site, )
def testMajesticFilter(self): filter = get_majestic_filter(worker_number=1, input_queue=Queue(), output_queue=Queue(), stop_event=Event()) param = {"Account": majestic_account} links = FileIO.FileHandler.read_lines_from_file("/Users/superCat/Desktop/PycharmProjectPortable/test/spam_test2.txt") for link in links: link = LinkChecker.get_root_domain(link)[1] print("doing link:", link) site = FilteredDomainData(domain=link) filter.process_data(data=site, **param)
def input_thread(): data_path = "/Users/superCat/Desktop/PycharmProjectPortable/test/03-09-2015-Bad-Results.csv" input_c = 0 with open(data_path, mode='r', newline='') as csv_file: rd = csv.reader(csv_file, delimiter=',') for row in rd: if input_c < site_count: input_q.put(FilteredDomainData(domain=row[0])) time.sleep(0.0001) else: break input_c += 1
def testFilter(self): manager = AccountManager() manager.AccountList.append(majestic_account) input_param = { "input_queue": queue.Queue(), "output_queue": queue.Queue(), "stop_event": Event() } filter = MajesticFilter(manager=manager, **input_param) param = {"Account": majestic_account} links = FileIO.FileHandler.read_lines_from_file( "/Users/superCat/Desktop/PycharmProjectPortable/test/spam_test1.txt" ) for link in links: site = FilteredDomainData(domain=link) filter.process_data(data=site, **param)
def testFilterAll1(self): db_path = "/Users/superCat/Desktop/PycharmProjectPortable/sync/" manager = AccountManager(db_path) input_param ={"input_queue": queue.Queue(), "output_queue": queue.Queue(), "stop_event": Event(), "throughput_debug": True, "worker_number": 2} file_path = "/Users/superCat/Desktop/PycharmProjectPortable/sync/Moz_filtering.csv" archive_filter = get_archive_filter(**input_param) majestic_filter = get_majestic_filter(**input_param) param = {"Account": majestic_account} count = 0 with open(file_path, 'rt') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: link, da = row site = FilteredDomainData(domain=link, da=int(da)) print(count, " process:", link) archive_data = archive_filter.process_data(data=site, ) if archive_data is not None: majestic_data = majestic_filter.process_data(data=site, **param) print(majestic_data) count += 1
def format_input(data): if isinstance(data, FilteredDomainData): return data elif isinstance(data, tuple) and len(data) == 2: return FilteredDomainData(domain=data[0])
def input_thread(): for i in range(site_count): input_q.put(FilteredDomainData(domain="domain{0:d}.com".format(i,))) time.sleep(0.0001)