Example #1
0
 def testArchiveProfileFilter1(self):
     input_param ={"input_queue": queue.Queue(), "output_queue": queue.Queue(), "stop_event": Event(),
                   "throughput_debug": True, "worker_number": 2}
     archive_filter = get_archive_filter(**input_param)
     links = FileIO.FileHandler.read_lines_from_file("/Users/superCat/Desktop/PycharmProjectPortable/test/archive_domain_test.txt")
     for link in links:
         site = FilteredDomainData(domain=link)
         archive_filter.process_data(data=site, )
Example #2
0
    def testMajesticFilter(self):

        filter = get_majestic_filter(worker_number=1, input_queue=Queue(), output_queue=Queue(), stop_event=Event())
        param = {"Account": majestic_account}
        links = FileIO.FileHandler.read_lines_from_file("/Users/superCat/Desktop/PycharmProjectPortable/test/spam_test2.txt")

        for link in links:
            link = LinkChecker.get_root_domain(link)[1]
            print("doing link:", link)
            site = FilteredDomainData(domain=link)
            filter.process_data(data=site, **param)
Example #3
0
        def input_thread():
            data_path = "/Users/superCat/Desktop/PycharmProjectPortable/test/03-09-2015-Bad-Results.csv"
            input_c = 0
            with open(data_path, mode='r', newline='') as csv_file:
                rd = csv.reader(csv_file, delimiter=',')
                for row in rd:

                    if input_c < site_count:
                        input_q.put(FilteredDomainData(domain=row[0]))
                        time.sleep(0.0001)
                    else:
                        break
                    input_c += 1
 def testFilter(self):
     manager = AccountManager()
     manager.AccountList.append(majestic_account)
     input_param = {
         "input_queue": queue.Queue(),
         "output_queue": queue.Queue(),
         "stop_event": Event()
     }
     filter = MajesticFilter(manager=manager, **input_param)
     param = {"Account": majestic_account}
     links = FileIO.FileHandler.read_lines_from_file(
         "/Users/superCat/Desktop/PycharmProjectPortable/test/spam_test1.txt"
     )
     for link in links:
         site = FilteredDomainData(domain=link)
         filter.process_data(data=site, **param)
Example #5
0
 def testFilterAll1(self):
     db_path = "/Users/superCat/Desktop/PycharmProjectPortable/sync/"
     manager = AccountManager(db_path)
     input_param ={"input_queue": queue.Queue(), "output_queue": queue.Queue(), "stop_event": Event(),
                   "throughput_debug": True, "worker_number": 2}
     file_path = "/Users/superCat/Desktop/PycharmProjectPortable/sync/Moz_filtering.csv"
     archive_filter = get_archive_filter(**input_param)
     majestic_filter = get_majestic_filter(**input_param)
     param = {"Account": majestic_account}
     count = 0
     with open(file_path, 'rt') as csvfile:
         reader = csv.reader(csvfile, delimiter=',')
         for row in reader:
             link, da = row
             site = FilteredDomainData(domain=link, da=int(da))
             print(count, " process:", link)
             archive_data = archive_filter.process_data(data=site, )
             if archive_data is not None:
                 majestic_data = majestic_filter.process_data(data=site, **param)
                 print(majestic_data)
             count += 1
Example #6
0
 def format_input(data):
     if isinstance(data, FilteredDomainData):
         return data
     elif isinstance(data, tuple) and len(data) == 2:
         return FilteredDomainData(domain=data[0])
Example #7
0
 def input_thread():
     for i in range(site_count):
         input_q.put(FilteredDomainData(domain="domain{0:d}.com".format(i,)))
         time.sleep(0.0001)