Esempio n. 1
0
def test_happy_path(valid_build, client, elasticsearch):
    build = valid_build()
    Build.insert(build)
    elasticsearch.flush()

    url = reverse("api:search")
    response = client.get(url)
    assert response.status_code == 200
    result = response.json()
    assert result["hits"]["total"] == 1
    hit, = result["hits"]["hits"]
    assert hit["_source"]["target"]["version"] == build["target"]["version"]
Esempio n. 2
0
def test_insert(settings, valid_build):
    build = valid_build()
    inserted = Build.insert(build)
    assert inserted.build_hash
    assert inserted.build == build
    assert inserted.created_at
    assert inserted.build_hash in repr(inserted)

    # It's idempotent.
    second_time = Build.insert(build)
    assert not second_time
    assert Build.objects.all().count() == 1
Esempio n. 3
0
def test_happy_path(valid_build, client, elasticsearch):
    build = valid_build()
    Build.insert(build)
    elasticsearch.flush()

    url = reverse('api:search')
    response = client.get(url)
    assert response.status_code == 200
    result = response.json()
    assert result['hits']['total'] == 1
    hit, = result['hits']['hits']
    assert hit['_source']['target']['version'] == build['target']['version']
Esempio n. 4
0
def test_happy_path_records(valid_build, client, elasticsearch):
    url = reverse("api:records")
    response = client.get(url)
    assert response.status_code == 200
    result = response.json()
    assert result["builds"]["total"] == 0

    build = valid_build()
    Build.insert(build)
    response = client.get(url)
    assert response.status_code == 200
    result = response.json()
    assert result["builds"]["total"] == 1
Esempio n. 5
0
def test_insert_invalid(settings, valid_build):
    build = valid_build()
    # We can't completely mess with the schema to the point were it
    # breaks Elasticsearch writes.
    build["source"]["junk"] = True
    with pytest.raises(ValidationError) as exception:
        Build.insert(build)
    err_msg = "Additional properties are not allowed ('junk' was unexpected)"
    assert err_msg in str(exception.value)

    # The 'skip_validation' is kinda dumb but it exists for when you're
    # super certain that the stuff you're inserting really is valid.
    Build.insert(build, skip_validation=True)
Esempio n. 6
0
def test_happy_path(valid_build, client, elasticsearch):
    build = valid_build()
    Build.insert(build)
    elasticsearch.flush()

    url = reverse("api:search")
    response = client.get(url)
    assert response.status_code == 200
    result = response.json()
    assert result["hits"]["total"] == 1
    (hit, ) = result["hits"]["hits"]
    assert hit["_source"]["target"]["version"] == build["target"]["version"]

    # No CSP header for the API requests since they're always JSON.
    assert not response.has_header("Content-Security-Policy")
Esempio n. 7
0
def test_model_serialization(valid_build):
    """Example document:
    ```
    {
        "build_hash": "v1:465552ab2ea1b5039a086987b70c598c",
        "metadata": {
            "version": "Testing"
        },
        "build": {
            ...
        },
        "created_at": "2020-01-10T22:46:32.274Z",
        "s3_object_key": "",
        "s3_object_etag": ""
    }
    ```
    """
    build = valid_build()
    inserted = Build.insert(build)
    doc = inserted.to_dict()
    assert set(doc.keys()) == {
        "build_hash",
        "build",
        "metadata",
        "created_at",
        "s3_object_key",
        "s3_object_etag",
    }
Esempio n. 8
0
def test_rebuild_bigquery_command(bigquery_client, bigquery_testing_table,
                                  valid_build, settings):
    """Test that the fixture is created and insertion is successful.

    Note that streaming data into a recreated table does not work in testing due
    to caching (see salting in the bigquery fixture in conftest.py).
    """
    client = bigquery_client
    table = bigquery_testing_table

    # We insert data into the database that predates BigQuery functionality
    settings.BQ_ENABLED = False
    n_documents = 10
    build = valid_build()
    for i in range(n_documents):
        build["build"]["number"] = i
        inserted = Build.insert(build)
        assert inserted

    settings.BQ_ENABLED = True
    settings.BQ_DATASET_ID = table.dataset_id
    settings.BQ_TABLE_ID = table.table_id
    settings.BQ_REBUILD_MAX_ERROR_COUNT = 0
    # done in 4 chunks
    settings.BQ_REBUILD_CHUNK_SIZE = 3

    call_command("rebuild-bigquery", yes=True)

    table_id = f"{table.dataset_id}.{table.table_id}"
    query = f"SELECT COUNT(*) as n_rows FROM {table_id}"
    print(query)
    job = client.query(query)
    result = list(job.result())[0]
    assert result.n_rows == n_documents
Esempio n. 9
0
def test_insert_writes_to_elasticsearch(settings, elasticsearch, valid_build):
    build = valid_build()
    inserted = Build.insert(build)
    assert inserted

    # Because Elasticsearch is async, the content written won't be there
    # until we wait or flush.
    elasticsearch.flush()
    search = BuildDoc.search()
    response = search.execute()
    assert response.hits.total == 1
    (build_doc, ) = response
    assert build_doc.id == inserted.id
    as_dict = build_doc.to_dict()
    as_dict.pop("id")
    # Can't easily compare these because elasticseach_dsl will convert
    # dates to datetime.datetime objects.
    # But if we convert dates from the Elasticsearch query to a string
    # we can compare.
    as_dict["build"]["date"] = as_dict["build"]["date"].isoformat()[:19]
    as_dict["download"]["date"] = as_dict["download"]["date"].isoformat()[:19]
    build = inserted.build
    build["build"]["date"] = build["build"]["date"][:19]
    build["download"]["date"] = build["download"]["date"][:19]
    assert as_dict == build
Esempio n. 10
0
 def download_and_insert(obj, maybe=False):
     key = obj["Key"]
     with io.BytesIO() as f:
         # 'bucket_name' and 's3_client' is hoisted from the closure
         s3_client.download_fileobj(bucket_name, key, f)
         # After it has been populated by download_fileobj() we need to
         # rewind it so we can send it to json.load().
         f.seek(0)
         # Before exiting this context (and freeing up the binary data),
         # we turn it into a Python dict.
         build = json.load(f)
     inserted = Build.insert(
         build=build, s3_object_key=obj["Key"], s3_object_etag=obj["ETag"]
     )
     if inserted:
         logger.info(f"New Build inserted from backfill ({key})")
         metrics.incr("backfill_inserted")
     else:
         logger.info(f"Key downloaded but not inserted again ({key})")
         metrics.incr("backfill_not_inserted")
     if maybe and not inserted:
         # If this happens, it means that the build exists exactly with
         # this build_hash already but the ETag isn't matching.
         # Update the s3_object_* attributes
         found = Build.objects.filter(
             s3_object_key=key, build_hash=Build.get_build_hash(build)
         )
         found.update(s3_object_etag=obj["ETag"])
Esempio n. 11
0
def test_ingest_idempotently(
    mocked_boto3, settings, valid_build, itertools_count, mocker
):
    mocked_message = mocker.MagicMock()
    message = {
        "Message": json.dumps(
            {
                "Records": [
                    {
                        "s3": {
                            "object": {
                                "key": "some/path/to/buildhub.json",
                                "eTag": "e4eb6609382efd6b3bc9deec616ad5c0",
                            },
                            "bucket": {"name": "buildhubses"},
                        }
                    }
                ]
            }
        )
    }

    mocked_message.body = json.dumps(message)
    mocked_queue = mocker.MagicMock()
    mocked_queue.receive_messages().__iter__.return_value = [mocked_message]
    mocked_boto3.resource().get_queue_by_name.return_value = mocked_queue

    mocked_s3_client = mocker.MagicMock()
    mocked_boto3.client.return_value = mocked_s3_client

    build = valid_build()
    Build.insert(build)

    def mocked_download_fileobj(bucket_name, key_name, f):
        # Sanity checks that the mocking is right
        assert bucket_name == "buildhubses"
        assert key_name == "some/path/to/buildhub.json"
        f.write(json.dumps(build).encode("utf-8"))

    mocked_s3_client.download_fileobj.side_effect = mocked_download_fileobj
    start(settings.SQS_QUEUE_URL)
    mocked_boto3.resource().get_queue_by_name.assert_called_with(
        QueueName="buildhub-s3-events"
    )
    # It should have created no new Builds
    assert Build.objects.all().count() == 1
Esempio n. 12
0
def test_insert_skips_writes_to_bigquery_when_disabled(mocked_bigquery,
                                                       valid_build, settings):
    settings.BQ_ENABLED = False
    build = valid_build()
    inserted = Build.insert(build)
    assert inserted

    mocked_bigquery.assert_not_called()
Esempio n. 13
0
def process_buildhub_json_key(config, s3):
    logger.debug(f"S3 buildhub.json key {s3!r}")
    key_name = s3["object"]["key"]
    assert os.path.basename(key_name).endswith("buildhub.json"), key_name
    bucket_name = s3["bucket"]["name"]
    # We need a S3 connection client to be able to download this one.
    if bucket_name not in config:
        logger.debug("Creating a new BOTO3 S3 CLIENT")
        if settings.UNSIGNED_SQS_S3_CLIENT:
            config[bucket_name] = boto3.client(
                "s3",
                config["region_name"],
                config=Config(signature_version=UNSIGNED))
        else:
            config[bucket_name] = boto3.client("s3", config["region_name"])

    with io.BytesIO() as f:
        try:
            config[bucket_name].download_fileobj(bucket_name, key_name, f)
        except ClientError as exception:
            if exception.response["Error"]["Code"] == "404":
                logger.warning(
                    f"Tried to download {key_name} (in {bucket_name}) "
                    "but not found.")
                return
            raise

        # After it has been populated by download_fileobj() we need to
        # rewind it so we can send it to json.load().
        f.seek(0)
        # Before exiting this context (and freeing up the binary data),
        # we turn it into a Python dict.
        build = json.load(f)

    # XXX Needs to deal with how to avoid corrupt buildhub.json S3 keys
    # never leaving the system.
    try:
        inserted = Build.insert(
            build=build,
            s3_object_key=s3["object"]["key"],
            s3_object_etag=s3["object"]["eTag"],
        )
    except ValidationError as exc:
        # We're only doing a try:except ValidationError: here so we get a
        # chance to log a useful message about the S3 object and the
        # validation error message.
        logger.warning(
            "Failed to insert build because the build was not valid. "
            f"S3 key {key_name!r} (bucket {bucket_name!r}). "
            f"Validation error message: {exc.message}")
        raise
    if inserted:
        metrics.incr("sqs_inserted")
        logger.info(
            f"Inserted {key_name} as a valid Build ({inserted.build_hash})")
    else:
        metrics.incr("sqs_not_inserted")
        logger.info(f"Did not insert {key_name} because we already had it")
Esempio n. 14
0
def test_ingest_idempotently(
    mocked_boto3,
    settings,
    valid_build,
    itertools_count,
    mocker,
):
    mocked_message = mocker.MagicMock()
    mocked_message.body = json.dumps({
        'Records': [
            {
                's3': {
                    'object': {
                        'key': 'some/path/to/buildhub.json',
                        'eTag': 'e4eb6609382efd6b3bc9deec616ad5c0',
                    },
                    'bucket': {
                        'name': 'buildhubses',
                    }
                }
            },
        ]
    })
    mocked_queue = mocker.MagicMock()
    mocked_queue.receive_messages().__iter__.return_value = [mocked_message]
    mocked_boto3.resource().get_queue_by_name.return_value = mocked_queue

    mocked_s3_client = mocker.MagicMock()
    mocked_boto3.client.return_value = mocked_s3_client

    build = valid_build()
    Build.insert(build)

    def mocked_download_fileobj(bucket_name, key_name, f):
        # Sanity checks that the mocking is right
        assert bucket_name == 'buildhubses'
        assert key_name == 'some/path/to/buildhub.json'
        f.write(json.dumps(build).encode('utf-8'))

    mocked_s3_client.download_fileobj.side_effect = mocked_download_fileobj
    start(settings.SQS_QUEUE_URL)
    mocked_boto3.resource().get_queue_by_name.assert_called_with(
        QueueName='buildhub-s3-events')
    # It should have created no new Builds
    assert Build.objects.all().count() == 1
Esempio n. 15
0
def test_search_aggregations(valid_build, json_poster, elasticsearch):
    build = valid_build()
    build["target"]["version"] = "60.0.1"
    Build.insert(build)
    build = valid_build()
    build["target"]["version"] = "60.0.2"
    Build.insert(build)
    build = valid_build()
    build["target"]["version"] = "60.1"
    Build.insert(build)

    elasticsearch.flush()

    search = {
        "aggs": {
            "versions": {
                "filter": {"match_all": {}},
                "aggs": {
                    "target.version": {
                        "terms": {
                            "field": "target.version",
                            "size": 1000,
                            "order": {"_term": "desc"},
                            "include": "6.*",
                        }
                    },
                    "target.version_count": {
                        "cardinality": {"field": "target.version"}
                    },
                },
            }
        },
        "size": 0,
    }

    url = reverse("api:search")
    response = json_poster(url, search)
    assert response.status_code == 200
    result = response.json()
    assert result["hits"]["total"] == 3
    assert not result["hits"]["hits"]  # because only aggregations
    agg_key = "versions"
    buckets = result["aggregations"][agg_key]["target.version"]["buckets"]
    assert buckets == [
        {"key": "60.1", "doc_count": 1},
        {"key": "60.0.2", "doc_count": 1},
        {"key": "60.0.1", "doc_count": 1},
    ]

    # This time filter more
    search["aggs"][agg_key]["aggs"]["target.version"]["terms"]["include"] = "60.0.*"
    response = json_poster(url, search)
    assert response.status_code == 200
    result = response.json()

    buckets = result["aggregations"][agg_key]["target.version"]["buckets"]
    assert buckets == [
        {"key": "60.0.2", "doc_count": 1},
        {"key": "60.0.1", "doc_count": 1},
    ]
Esempio n. 16
0
def process_buildhub_json_key(config, s3):
    logger.debug(f"S3 buildhub.json key {s3!r}")
    key_name = s3['object']['key']
    assert os.path.basename(key_name) == 'buildhub.json', key_name
    bucket_name = s3['bucket']['name']
    # We need a S3 connection client to be able to download this one.
    if bucket_name not in config:
        logger.debug('Creating a new BOTO3 S3 CLIENT')
        config[bucket_name] = boto3.client('s3', config['region_name'])

    with io.BytesIO() as f:
        try:
            config[bucket_name].download_fileobj(bucket_name, key_name, f)
        except ClientError as exception:
            if exception.response['Error']['Code'] == '404':
                logger.warning(
                    f"Tried to download {key_name} (in {bucket_name}) "
                    "but not found.")
                return
            raise

        # After it has been populated by download_fileobj() we need to
        # rewind it so we can send it to json.load().
        f.seek(0)
        # Before exiting this context (and freeing up the binary data),
        # we turn it into a Python dict.
        build = json.load(f)

    # XXX Needs to deal with how to avoid corrupt buildhub.json S3 keys
    # never leaving the system.
    try:
        inserted = Build.insert(
            build=build,
            s3_object_key=s3['object']['key'],
            s3_object_etag=s3['object']['eTag'],
        )
    except ValidationError as exc:
        # We're only doing a try:except ValidationError: here so we get a
        # chance to log a useful message about the S3 object and the
        # validation error message.
        logger.warning(
            "Failed to insert build because the build was not valid. "
            f"S3 key {key_name!r} (bucket {bucket_name!r}). "
            f"Validation error message: {exc.message}")
        raise
    if inserted:
        logger.info(
            f"Inserted {key_name} as a valid Build ({inserted.build_hash})")
    else:
        logger.info(f"Did not insert {key_name} because we already had it")
Esempio n. 17
0
def test_serialized_instance_inserts_into_bigquery(bigquery_client,
                                                   bigquery_testing_table,
                                                   valid_build, settings):
    """Test that the fixture is created and insertion is successful."""
    # This test does not rely on auto-insertion
    settings.BQ_ENABLED = False
    client = bigquery_client
    table = bigquery_testing_table
    doc = Build.insert(valid_build()).to_dict()
    errors = client.insert_rows(table, [doc])
    assert errors == []

    table_id = f"{table.dataset_id}.{table.table_id}"
    job = client.query(f"SELECT COUNT(*) as n_rows FROM {table_id}")
    result = list(job.result())[0]
    assert result.n_rows == 1
Esempio n. 18
0
def test_insert_writes_to_bigquery_when_enabled(mocked_bigquery, valid_build,
                                                settings, mocker):
    mocked_client = mocker.MagicMock()
    mocked_bigquery.Client.return_value = mocked_client

    settings.BQ_ENABLED = True
    build = valid_build()
    inserted = Build.insert(build)
    assert inserted

    mocked_client.insert_rows.assert_called_once()
    args = mocked_client.insert_rows.call_args
    # takes a (table, document) tuple
    documents = args[0][1]
    assert len(documents) == 1
    assert documents[0]["build_hash"] == inserted.build_hash
Esempio n. 19
0
def test_insert_writes_to_bigquery(bigquery_client, bigquery_testing_table,
                                   valid_build, settings):
    """Test that the fixture is created and insertion is successful."""
    client = bigquery_client
    table = bigquery_testing_table

    # mock settings to ensure callback sends data to the right place
    settings.BQ_DATASET_ID = table.dataset_id
    settings.BQ_TABLE_ID = table.table_id

    build = valid_build()
    inserted = Build.insert(build)
    assert inserted

    table_id = f"{table.dataset_id}.{table.table_id}"
    job = client.query(f"SELECT COUNT(*) as n_rows FROM {table_id}")
    result = list(job.result())[0]
    assert result.n_rows == 1
Esempio n. 20
0
def test_search_aggregations(valid_build, json_poster, elasticsearch):
    build = valid_build()
    build['target']['version'] = '60.0.1'
    Build.insert(build)
    build = valid_build()
    build['target']['version'] = '60.0.2'
    Build.insert(build)
    build = valid_build()
    build['target']['version'] = '60.1'
    Build.insert(build)

    elasticsearch.flush()

    search = {
        "aggs": {
            "versions": {
                "filter": {
                    "match_all": {}
                },
                "aggs": {
                    "target.version": {
                        "terms": {
                            "field": "target.version",
                            "size": 1000,
                            "order": {
                                "_term": "desc"
                            },
                            "include": "6.*"
                        }
                    },
                    "target.version_count": {
                        "cardinality": {
                            "field": "target.version"
                        }
                    }
                }
            }
        },
        "size": 0
    }

    url = reverse('api:search')
    response = json_poster(url, search)
    assert response.status_code == 200
    result = response.json()
    assert result['hits']['total'] == 3
    assert not result['hits']['hits']  # because only aggregations
    agg_key = 'versions'
    buckets = result['aggregations'][agg_key]['target.version']['buckets']
    assert buckets == [{
        'key': '60.1',
        'doc_count': 1
    }, {
        'key': '60.0.2',
        'doc_count': 1
    }, {
        'key': '60.0.1',
        'doc_count': 1
    }]

    # This time filter more
    search['aggs'][agg_key]['aggs']['target.version']['terms'][
        'include'] = '60\.0.*'
    response = json_poster(url, search)
    assert response.status_code == 200
    result = response.json()

    buckets = result['aggregations'][agg_key]['target.version']['buckets']
    assert buckets == [{
        'key': '60.0.2',
        'doc_count': 1
    }, {
        'key': '60.0.1',
        'doc_count': 1
    }]
Esempio n. 21
0
def test_backfill_happy_path(
    mocked_boto3,
    settings,
    valid_build,
    itertools_count,
    mocker,
):

    # Create a ready build that is *exactly* like our mocked S3 thing is.
    build = valid_build()
    build['download']['mimetype'] = 'one/buildhub.json'
    Build.insert(
        build=build,
        s3_object_key='one/buildhub.json',
        s3_object_etag='abc123',
    )

    # Create one build that has the same build_hash as the second mocked
    # key but make the s3_object_etag mismatch.
    build = valid_build()
    build['download']['mimetype'] = 'two/buildhub.json'
    Build.insert(
        build=build,
        s3_object_key='two/buildhub.json',
        s3_object_etag='somethingdifferent',
    )

    mocked_s3_client = mocker.MagicMock()
    mocked_boto3.client.return_value = mocked_s3_client

    def mocked_download_fileobj(bucket_name, key_name, f):
        assert bucket_name == 'buildhubses'
        build = valid_build()
        # Just need to mess with the build a little bit so that it's
        # still valid to the schema but makes a different build_hash.
        if key_name == 'two/buildhub.json':
            build['download']['mimetype'] = key_name
        elif key_name == 'three/buildhub.json':
            build['download']['mimetype'] = key_name
        else:
            raise NotImplementedError(key_name)
        f.write(json.dumps(build).encode('utf-8'))

    mocked_s3_client.download_fileobj.side_effect = mocked_download_fileobj

    def mocked_list_objects(**kwargs):
        if kwargs.get('ContinuationToken'):  # you're on page 2
            return {
                'Contents': [
                    {
                        'Key': 'three/buildhub.json',
                        'ETag': 'ghi345',
                    },
                ]
            }
        else:
            return {
                'Contents': [
                    {
                        'Key': 'one/buildhub.json',
                        'ETag': 'abc123',
                    },
                    {
                        'Key': 'two/buildhub.json',
                        'ETag': 'def234',
                    },
                ],
                'NextContinuationToken':
                'nextpageplease',
            }

    mocked_s3_client.list_objects_v2.side_effect = mocked_list_objects
    backfill(settings.S3_BUCKET_URL)

    # We had 2 before, this should have created 1 new and edited 1
    assert Build.objects.all().count() == 3
    # The second one should have had its etag updated
    assert not Build.objects.filter(
        s3_object_key='two/buildhub.json',
        s3_object_etag='somethingdifferent',
    )
    assert Build.objects.get(
        s3_object_key='two/buildhub.json',
        s3_object_etag='def234',
    )
Esempio n. 22
0
def test_backfill_happy_path(mocked_boto3, settings, valid_build,
                             itertools_count, mocker):

    # Create a ready build that is *exactly* like our mocked S3 thing is.
    build = valid_build()
    build["download"]["mimetype"] = "one/buildhub.json"
    Build.insert(build=build,
                 s3_object_key="one/buildhub.json",
                 s3_object_etag="abc123")

    # Create one build that has the same build_hash as the second mocked
    # key but make the s3_object_etag mismatch.
    build = valid_build()
    build["download"]["mimetype"] = "two/buildhub.json"
    Build.insert(
        build=build,
        s3_object_key="two/buildhub.json",
        s3_object_etag="somethingdifferent",
    )

    mocked_s3_client = mocker.MagicMock()
    mocked_boto3.client.return_value = mocked_s3_client

    def mocked_download_fileobj(bucket_name, key_name, f):
        assert bucket_name == "buildhubses"
        build = valid_build()
        # Just need to mess with the build a little bit so that it's
        # still valid to the schema but makes a different build_hash.
        if key_name == "two/buildhub.json":
            build["download"]["mimetype"] = key_name
        elif key_name == "three/buildhub.json":
            build["download"]["mimetype"] = key_name
        elif key_name == "three/Firefox-99-buildhub.json":
            build["download"]["mimetype"] = key_name
        else:
            raise NotImplementedError(key_name)
        f.write(json.dumps(build).encode("utf-8"))

    mocked_s3_client.download_fileobj.side_effect = mocked_download_fileobj

    def mocked_list_objects(**kwargs):
        if kwargs.get("ContinuationToken"):  # you're on page 2
            return {
                "Contents": [{
                    "Key": "three/buildhub.json",
                    "ETag": "ghi345"
                }]
            }
        else:
            return {
                "Contents": [
                    {
                        "Key": "one/buildhub.json",
                        "ETag": "abc123"
                    },
                    {
                        "Key": "two/buildhub.json",
                        "ETag": "def234"
                    },
                    {
                        "Key": "three/Firefox-99-buildhub.json",
                        "ETag": "xyz987"
                    },
                ],
                "NextContinuationToken":
                "nextpageplease",
            }

    mocked_s3_client.list_objects_v2.side_effect = mocked_list_objects
    backfill(settings.S3_BUCKET_URL)

    # We had 2 before, this should have created 2 new and edited 1
    assert Build.objects.all().count() == 4
    # The second one should have had its etag updated
    assert not Build.objects.filter(s3_object_key="two/buildhub.json",
                                    s3_object_etag="somethingdifferent")
    assert Build.objects.get(s3_object_key="two/buildhub.json",
                             s3_object_etag="def234")
    assert Build.objects.get(s3_object_key="three/Firefox-99-buildhub.json",
                             s3_object_etag="xyz987")
Esempio n. 23
0
def process_buildhub_json_key(config, s3):
    logger.debug(f"S3 buildhub.json key {s3!r}")
    key_name = s3["object"]["key"]
    assert os.path.basename(key_name).endswith("buildhub.json"), key_name
    bucket_name = s3["bucket"]["name"]
    # We need a S3 connection client to be able to download this one.
    if bucket_name not in config:
        logger.debug("Creating a new BOTO3 S3 CLIENT")
        connection_config = None
        if settings.UNSIGNED_S3_CLIENT:
            connection_config = Config(signature_version=UNSIGNED)
        config[bucket_name] = boto3.client("s3",
                                           config["region_name"],
                                           config=connection_config)

    with io.BytesIO() as f:
        try:
            config[bucket_name].download_fileobj(bucket_name, key_name, f)
        except ClientError as exception:
            if exception.response["Error"]["Code"] == "404":
                logger.warning(
                    f"Tried to download {key_name} (in {bucket_name}) "
                    "but not found.")
                return
            raise

        # After it has been populated by download_fileobj() we need to
        # rewind it so we can send it to json.load().
        f.seek(0)
        # Before exiting this context (and freeing up the binary data),
        # we turn it into a Python dict.
        build = json.load(f)

    # XXX Needs to deal with how to avoid corrupt buildhub.json S3 keys
    # never leaving the system.
    inserted = []
    try:
        ret = Build.insert(
            build=build,
            s3_object_key=s3["object"]["key"],
            s3_object_etag=s3["object"]["eTag"],
        )
        inserted.append(ret)
        # This is a hack to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1470948
        # In some future world we might be able to architecture buildhub in such a way
        # where this sort of transformation isn't buried down deep in the code
        if (build["source"]["product"] == "firefox"
                and build["target"]["channel"] == "release"):
            beta_build = deepcopy(build)
            beta_build["target"]["channel"] = "beta"
            ret = Build.insert(
                build=beta_build,
                s3_object_key=s3["object"]["key"],
                s3_object_etag=s3["object"]["eTag"],
            )
            inserted.append(ret)

    except ValidationError as exc:
        # We're only doing a try:except ValidationError: here so we get a
        # chance to log a useful message about the S3 object and the
        # validation error message.
        logger.warning(
            "Failed to insert build because the build was not valid. "
            f"S3 key {key_name!r} (bucket {bucket_name!r}). "
            f"Validation error message: {exc.message}")
        raise
    # Build.insert() above can return None (for Builds that already exist).
    # If anything was _actually_ inserted, log it.
    if any(inserted):
        for i in inserted:
            metrics.incr("sqs_inserted")
            logger.info(
                f"Inserted {key_name} as a valid Build ({i.build_hash})")
    else:
        metrics.incr("sqs_not_inserted")
        logger.info(f"Did not insert {key_name} because we already had it")