Beispiel #1
0
  def test_add_ip_metadata_caida(self) -> None:
    """Test merging given IP metadata with given measurements."""
    runner = beam_tables.ScanDataBeamPipelineRunner(
        '', '', '', '', FakeIpMetadataChooserFactory())

    metadatas = list(
        runner._add_ip_metadata('2020-01-01', ['1.1.1.1', '8.8.8.8']))

    expected_key_1: satellite.DateIpKey = ('2020-01-01', '1.1.1.1')
    expected_value_1: beam_tables.Row = {
        'netblock': '1.0.0.1/24',
        'asn': 13335,
        'as_name': 'CLOUDFLARENET',
        'as_full_name': 'Cloudflare Inc.',
        'as_class': 'Content',
        'country': 'US',
        'organization': 'Fake Cloudflare Sub-Org',
    }

    expected_key_2: satellite.DateIpKey = ('2020-01-01', '8.8.8.8')
    expected_value_2: beam_tables.Row = {
        'netblock': '8.8.8.0/24',
        'asn': 15169,
        'as_name': 'GOOGLE',
        'as_full_name': 'Google LLC',
        'as_class': 'Content',
        'country': 'US',
        # No organization data is added since the ASN doesn't match dbip
    }

    self.assertListEqual(metadatas, [(expected_key_1, expected_value_1),
                                     (expected_key_2, expected_value_2)])
Beispiel #2
0
  def test_get_full_table_name(self) -> None:
    project = 'firehook-censoredplanet'
    runner = beam_tables.ScanDataBeamPipelineRunner(
        project, '', '', '', FakeIpMetadataChooserFactory())

    full_name = runner._get_full_table_name('prod.echo_scan')
    self.assertEqual(full_name, 'firehook-censoredplanet:prod.echo_scan')
Beispiel #3
0
    def test_add_ip_metadata(self) -> None:
        """Test merging given IP metadata with given measurements."""
        runner = beam_tables.ScanDataBeamPipelineRunner(
            '', {}, '', '', '', FakeIpMetadata, '')

        metadatas = list(
            runner._add_ip_metadata('2020-01-01', ['1.1.1.1', '8.8.8.8']))

        expected_key_1: beam_tables.DateIpKey = ('2020-01-01', '1.1.1.1')
        expected_value_1: beam_tables.Row = {
            'netblock': '1.0.0.1/24',
            'asn': 13335,
            'as_name': 'CLOUDFLARENET',
            'as_full_name': 'Cloudflare Inc.',
            'as_class': 'Content',
            'country': 'US',
        }

        expected_key_2: beam_tables.DateIpKey = ('2020-01-01', '8.8.8.8')
        expected_value_2: beam_tables.Row = {
            'netblock': '8.8.8.0/24',
            'asn': 15169,
            'as_name': 'GOOGLE',
            'as_full_name': 'Google LLC',
            'as_class': 'Content',
            'country': 'US',
        }

        self.assertListEqual(metadatas, [(expected_key_1, expected_value_1),
                                         (expected_key_2, expected_value_2)])
Beispiel #4
0
def get_firehook_beam_pipeline_runner(
) -> beam_tables.ScanDataBeamPipelineRunner:
    """Factory function to get a beam pipeline class with firehook values."""
    # importing here to avoid beam pickling issues
    import firehook_resources

    return beam_tables.ScanDataBeamPipelineRunner(
        firehook_resources.PROJECT_NAME, beam_tables.SCAN_BIGQUERY_SCHEMA,
        firehook_resources.INPUT_BUCKET,
        firehook_resources.BEAM_STAGING_LOCATION,
        firehook_resources.BEAM_TEMP_LOCATION, ip_metadata.IpMetadata,
        firehook_resources.CAIDA_FILE_LOCATION)
def get_firehook_beam_pipeline_runner(
) -> beam_tables.ScanDataBeamPipelineRunner:
    """Factory function to get a beam pipeline class with firehook values."""
    # importing here to avoid beam pickling issues
    import firehook_resources  # pylint: disable=import-outside-toplevel

    matadata_chooser_factory = IpMetadataChooserFactory(
        firehook_resources.CAIDA_FILE_LOCATION,
        firehook_resources.MAXMIND_FILE_LOCATION,
        firehook_resources.DBIP_FILE_LOCATION)

    return beam_tables.ScanDataBeamPipelineRunner(
        firehook_resources.PROJECT_NAME, firehook_resources.INPUT_BUCKET,
        firehook_resources.BEAM_STAGING_LOCATION,
        firehook_resources.BEAM_TEMP_LOCATION, matadata_chooser_factory)
Beispiel #6
0
  def disabled_test_add_ip_metadata_maxmind(self) -> None:
    """Test merging given IP metadata with given measurements."""
    # TODO turn back on once maxmind is reenabled.

    runner = beam_tables.ScanDataBeamPipelineRunner(
        '', '', '', '', FakeIpMetadataChooserFactory())

    metadatas = list(runner._add_ip_metadata('2020-01-01', ['1.1.1.3']))

    # Test Maxmind lookup when country data is missing
    # Cloudflare IPs return Australia
    expected_key_1 = ('2020-01-01', '1.1.1.3')
    expected_value_1 = {
        'netblock': '1.0.0.1/24',
        'asn': 13335,
        'as_name': 'CLOUDFLARENET',
        'as_full_name': 'Cloudflare Inc.',
        'as_class': 'Content',
        'country': None,
        'organization': 'Fake Cloudflare Sub-Org',
    }
    expected_value_1['country'] = 'AU'

    self.assertListEqual(metadatas, [(expected_key_1, expected_value_1)])
Beispiel #7
0
  def test_add_metadata(self) -> None:  # pylint: disable=no-self-use
    """Test adding IP metadata to mesurements."""
    rows: List[beam_tables.Row] = [{
        'domain': 'www.example.com',
        'ip': '8.8.8.8',
        'date': '2020-01-01',
        'success': True,
    }, {
        'domain': 'www.example.com',
        'ip': '1.1.1.1',
        'date': '2020-01-01',
        'success': False,
    }, {
        'domain': 'www.example.com',
        'ip': '8.8.8.8',
        'date': '2020-01-02',
        'success': False,
    }, {
        'domain': 'www.example.com',
        'ip': '1.1.1.1',
        'date': '2020-01-02',
        'success': True,
    }]

    p = TestPipeline()
    rows = (p | beam.Create(rows))

    runner = beam_tables.ScanDataBeamPipelineRunner(
        '', '', '', '', FakeIpMetadataChooserFactory())

    rows_with_metadata = runner._add_metadata(rows)
    beam_test_util.assert_that(
        rows_with_metadata,
        beam_test_util.equal_to([{
            'domain': 'www.example.com',
            'ip': '8.8.8.8',
            'date': '2020-01-01',
            'success': True,
            'netblock': '8.8.8.0/24',
            'asn': 15169,
            'as_name': 'GOOGLE',
            'as_full_name': 'Google LLC',
            'as_class': 'Content',
            'country': 'US',
        }, {
            'domain': 'www.example.com',
            'ip': '1.1.1.1',
            'date': '2020-01-01',
            'success': False,
            'netblock': '1.0.0.1/24',
            'asn': 13335,
            'as_name': 'CLOUDFLARENET',
            'as_full_name': 'Cloudflare Inc.',
            'as_class': 'Content',
            'country': 'US',
        }, {
            'domain': 'www.example.com',
            'ip': '8.8.8.8',
            'date': '2020-01-02',
            'success': False,
            'netblock': '8.8.8.0/24',
            'asn': 15169,
            'as_name': 'GOOGLE',
            'as_full_name': 'Google LLC',
            'as_class': 'Content',
            'country': 'US',
        }, {
            'domain': 'www.example.com',
            'ip': '1.1.1.1',
            'date': '2020-01-02',
            'success': True,
            'netblock': '1.0.0.1/24',
            'asn': 13335,
            'as_name': 'CLOUDFLARENET',
            'as_full_name': 'Cloudflare Inc.',
            'as_class': 'Content',
            'country': 'US',
        }]))