Beispiel #1
0
class TestGatherCrawlOutliers(unittest.TestCase):
    def create_file_path(self, file_name):
        path = os.path.join(self.data_file_path, file_name)
        return path 

    def setUp(self):       
        arguments = parse_arguments(['-p',
                                     'harvestman',
                                     '-d',
                                     'http://192.168.124.30',
                                     '-P',
                                     '6800',
                                     '-s',
                                     '10'])

        self.overwatch = Overwatch(arguments)
        
        self.data_file_path = os.path.join(os.getcwd(), 'test_data')
        self.json_file_name = 'scrapyd_list_jobs_for_loop_json.json'
        self.json_path = self.create_file_path(self.json_file_name)

        self.scrapyd_json = open(self.json_path, 'rb').read()

        json_dict = json.loads(self.scrapyd_json)

        self.session = requests.Session()
        self.adapter = requests_mock.Adapter()
        self.session.mount('mock', self.adapter)
        self.adapter.register_uri(
            'GET',
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman',
            json=json_dict,
            status_code=200,
            )

        self.overwatch.response = self.session.get(
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman')
        self.outliers = self.overwatch.gather_crawl_outliers()

    def test_gather_crawl_outliers(self):
        expted = {'strt': datetime.datetime(2016, 4, 1, 1, 1, 59, 999999), 
                  'end': datetime.datetime(2016, 4, 1, 23, 59, 59, 999999)}

        self.assertEqual(self.outliers, expted)
Beispiel #2
0
class TestGatherCrawlOutliers(unittest.TestCase):
    def create_file_path(self, file_name):
        path = os.path.join(self.data_file_path, file_name)
        return path

    def setUp(self):
        arguments = parse_arguments([
            '-p', 'harvestman', '-d', 'http://192.168.124.30', '-P', '6800',
            '-s', '10'
        ])

        self.overwatch = Overwatch(arguments)

        self.data_file_path = os.path.join(os.getcwd(), 'test_data')
        self.json_file_name = 'scrapyd_list_jobs_for_loop_json.json'
        self.json_path = self.create_file_path(self.json_file_name)

        self.scrapyd_json = open(self.json_path, 'rb').read()

        json_dict = json.loads(self.scrapyd_json)

        self.session = requests.Session()
        self.adapter = requests_mock.Adapter()
        self.session.mount('mock', self.adapter)
        self.adapter.register_uri(
            'GET',
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman',
            json=json_dict,
            status_code=200,
        )

        self.overwatch.response = self.session.get(
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman')
        self.outliers = self.overwatch.gather_crawl_outliers()

    def test_gather_crawl_outliers(self):
        expted = {
            'strt': datetime.datetime(2016, 4, 1, 1, 1, 59, 999999),
            'end': datetime.datetime(2016, 4, 1, 23, 59, 59, 999999)
        }

        self.assertEqual(self.outliers, expted)
Beispiel #3
0
class TestOverwatch(unittest.TestCase):
    def create_file_path(self, file_name):
        path = os.path.join(self.data_file_path, file_name)
        return path 

    def setUp(self):
        today = datetime.datetime.today().strftime('%d-%m-%Y') 
        arguments = parse_arguments(['-p',
                                     'harvestman',
                                     '-d',
                                     'http://192.168.124.30',
                                     '-P',
                                     '6800',
                                     '-s',
                                     '50'])

        self.overwatch = Overwatch(arguments)
        self.exp_url = 'http://192.168.124.30:6800/listjobs.json?project=harvestman'
        
        filename = '{}_{}.csv'.format(today,
                                      self.overwatch.arguments.project_name[0]) 
        
        self.output_file = os.path.join(settings.OUTPUT_PATH, filename) 
        self.temp_dir = tempfile.mkdtemp(prefix='temp_test_dir')
        
        self.data_file_path = os.path.join(os.getcwd(), 'test_data')
        self.json_outliers_file_name = 'scrapyd_list_jobs_outliers_json.json'
        self.scrapyd_outliers_json_path = self.create_file_path(
            self.json_outliers_file_name)

        self.scrapyd_outliers_json = open(self.scrapyd_outliers_json_path,
                                          'rb').read()

        outliers_json_dict = json.loads(self.scrapyd_outliers_json)
        self.session = requests.Session()
        self.adapter = requests_mock.Adapter()
        self.session.mount('mock', self.adapter)
        self.adapter.register_uri(
            'GET',
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman',
            json=outliers_json_dict,
            status_code=200,
            )

        self.overwatch.response = self.session.get(
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman')

    def test__init__(self):
        self.assertEqual(self.overwatch.query_url, self.exp_url)
        self.assertEqual(self.overwatch.con_spiders, 50)
        self.assertEqual(self.overwatch.output_file, self.output_file)

    def test_gather_crawl_outliers(self):
        expected = {'strt': datetime.datetime(2016,
                                              4,
                                              29,
                                              10,
                                              28,
                                              8,
                                              4732),
                    'end': datetime.datetime(2016,
                                             4,
                                             29,
                                             10,
                                             33,
                                             51,
                                             420786)
                    }
        self.assertEqual(self.overwatch.gather_crawl_outliers(), expected)

    def test_gather_completed_crawl_count(self):
        self.assertEqual(self.overwatch.gather_completed_crawl_count(), 3)

    def test_calculate_total_duration(self):
        expected = 343.416054
        self.assertEqual(self.overwatch.calculate_total_duration(), expected)

    def test_gather_crawl_durations(self):
        expected = [241.547793, 233.489018, 258.652448]
        self.assertEqual(self.overwatch.gather_crawl_durations(), expected)

    def test_calculate_av_crawl_duration(self):
        expected = 244.56
        self.assertEqual(self.overwatch.calculate_av_crawl_duration(),
                         expected)

    def test_calculate_single_crawls_per_hour(self):
        expected = 14.72
        self.assertEqual(self.overwatch.calculate_single_crawls_per_hour(),
                         expected)

    def test_calculate_est_total_crawls_per_hour(self):
        expected = 736
        self.assertEqual(self.overwatch.calculate_est_total_crawls_per_hour(),
                         expected)

    def test_calculate_single_crawls_per_day(self):
        expected = 353.28
        self.assertEqual(self.overwatch.calculate_single_crawls_per_day(),
                         expected)

    def test_calculate_est_total_crawls_per_day(self):
        expected = 17664
        self.assertEqual(self.overwatch.calculate_est_total_crawls_per_day(),
                         expected)  

    def test_calculate_single_crawls_per_week(self):
        expected = 2472.96
        self.assertEqual(self.overwatch.calculate_single_crawls_per_week(),
                         expected)

    def test_calculate_est_total_crawls_per_week(self):
        expected = 123648
        self.assertEqual(self.overwatch.calculate_est_total_crawls_per_week(),
                         expected)  

    def test_gather_scrapy_metrics(self):
        expected = { 
            'Av CR (S)': 244.56, 
            'Longest CR (S)': 258.652448, 
            'Shortest CR (S)': 233.489018, 
            'Total Duration': 343.416054, 
            'Single CR p/h': 14.72, 
            'Max CR p/h': 736, 
            'Single CR p/d': 353.28, 
            'Max CR p/d': 17664, 
            'Single CR p/7d': 2472.96, 
            'Max CR p/7d': 123648,
            'Completed crawls': 3 
        } 
        self.assertEqual(self.overwatch.gather_scrapy_metrics(), expected)  

    def test_write_to_csv(self):
        filename = 'test_ouput.csv'
        self.overwatch.output_file = os.path.join(self.temp_dir,
                                                  filename)
        os.mknod(self.overwatch.output_file)
        self.overwatch.write_to_csv()

        with open(self.overwatch.output_file, 'rb') as csvfile:
            reader = csv.DictReader(csvfile)
            csv_data = [row for row in reader]
            self.assertEqual(csv_data[0]['Total Duration'], '343.416054')
            self.assertEqual(csv_data[0]['Completed crawls'], '3')
Beispiel #4
0
class TestOverwatch(unittest.TestCase):
    def create_file_path(self, file_name):
        path = os.path.join(self.data_file_path, file_name)
        return path

    def setUp(self):
        today = datetime.datetime.today().strftime('%d-%m-%Y')
        arguments = parse_arguments([
            '-p', 'harvestman', '-d', 'http://192.168.124.30', '-P', '6800',
            '-s', '50'
        ])

        self.overwatch = Overwatch(arguments)
        self.exp_url = 'http://192.168.124.30:6800/listjobs.json?project=harvestman'

        filename = '{}_{}.csv'.format(today,
                                      self.overwatch.arguments.project_name[0])

        self.output_file = os.path.join(settings.OUTPUT_PATH, filename)
        self.temp_dir = tempfile.mkdtemp(prefix='temp_test_dir')

        self.data_file_path = os.path.join(os.getcwd(), 'test_data')
        self.json_outliers_file_name = 'scrapyd_list_jobs_outliers_json.json'
        self.scrapyd_outliers_json_path = self.create_file_path(
            self.json_outliers_file_name)

        self.scrapyd_outliers_json = open(self.scrapyd_outliers_json_path,
                                          'rb').read()

        outliers_json_dict = json.loads(self.scrapyd_outliers_json)
        self.session = requests.Session()
        self.adapter = requests_mock.Adapter()
        self.session.mount('mock', self.adapter)
        self.adapter.register_uri(
            'GET',
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman',
            json=outliers_json_dict,
            status_code=200,
        )

        self.overwatch.response = self.session.get(
            'mock://0.0.0.1:6800/listjobs.json?project=harvestman')

    def test__init__(self):
        self.assertEqual(self.overwatch.query_url, self.exp_url)
        self.assertEqual(self.overwatch.con_spiders, 50)
        self.assertEqual(self.overwatch.output_file, self.output_file)

    def test_gather_crawl_outliers(self):
        expected = {
            'strt': datetime.datetime(2016, 4, 29, 10, 28, 8, 4732),
            'end': datetime.datetime(2016, 4, 29, 10, 33, 51, 420786)
        }
        self.assertEqual(self.overwatch.gather_crawl_outliers(), expected)

    def test_gather_completed_crawl_count(self):
        self.assertEqual(self.overwatch.gather_completed_crawl_count(), 3)

    def test_calculate_total_duration(self):
        expected = 343.416054
        self.assertEqual(self.overwatch.calculate_total_duration(), expected)

    def test_gather_crawl_durations(self):
        expected = [241.547793, 233.489018, 258.652448]
        self.assertEqual(self.overwatch.gather_crawl_durations(), expected)

    def test_calculate_av_crawl_duration(self):
        expected = 244.56
        self.assertEqual(self.overwatch.calculate_av_crawl_duration(),
                         expected)

    def test_calculate_single_crawls_per_hour(self):
        expected = 14.72
        self.assertEqual(self.overwatch.calculate_single_crawls_per_hour(),
                         expected)

    def test_calculate_est_total_crawls_per_hour(self):
        expected = 736
        self.assertEqual(self.overwatch.calculate_est_total_crawls_per_hour(),
                         expected)

    def test_calculate_single_crawls_per_day(self):
        expected = 353.28
        self.assertEqual(self.overwatch.calculate_single_crawls_per_day(),
                         expected)

    def test_calculate_est_total_crawls_per_day(self):
        expected = 17664
        self.assertEqual(self.overwatch.calculate_est_total_crawls_per_day(),
                         expected)

    def test_calculate_single_crawls_per_week(self):
        expected = 2472.96
        self.assertEqual(self.overwatch.calculate_single_crawls_per_week(),
                         expected)

    def test_calculate_est_total_crawls_per_week(self):
        expected = 123648
        self.assertEqual(self.overwatch.calculate_est_total_crawls_per_week(),
                         expected)

    def test_gather_scrapy_metrics(self):
        expected = {
            'Av CR (S)': 244.56,
            'Longest CR (S)': 258.652448,
            'Shortest CR (S)': 233.489018,
            'Total Duration': 343.416054,
            'Single CR p/h': 14.72,
            'Max CR p/h': 736,
            'Single CR p/d': 353.28,
            'Max CR p/d': 17664,
            'Single CR p/7d': 2472.96,
            'Max CR p/7d': 123648,
            'Completed crawls': 3
        }
        self.assertEqual(self.overwatch.gather_scrapy_metrics(), expected)

    def test_write_to_csv(self):
        filename = 'test_ouput.csv'
        self.overwatch.output_file = os.path.join(self.temp_dir, filename)
        os.mknod(self.overwatch.output_file)
        self.overwatch.write_to_csv()

        with open(self.overwatch.output_file, 'rb') as csvfile:
            reader = csv.DictReader(csvfile)
            csv_data = [row for row in reader]
            self.assertEqual(csv_data[0]['Total Duration'], '343.416054')
            self.assertEqual(csv_data[0]['Completed crawls'], '3')