Beispiel #1
0
def upload_hash_content_to_blobstore(generate_upload_url, data, hash_key,
                                     content):
    """Uploads the given hash contents directly to the blobsotre via a generated
  url.

  Arguments:
    generate_upload_url: The url to get the new upload url from.
    data: extra POST data.
    hash_key: sha1 of the uncompressed version of content.
    content: The contents to upload. Must fit in memory for now.
  """
    logging.debug('Generating url to directly upload file to blobstore')
    assert isinstance(hash_key, str), hash_key
    assert isinstance(content, str), (hash_key, content)
    # TODO(maruel): Support large files. This would require streaming support.
    content_type, body = encode_multipart_formdata(
        data, [('content', hash_key, content)])
    for _ in range(run_isolated.MAX_URL_OPEN_ATTEMPTS):
        # Retry HTTP 50x here.
        response = run_isolated.url_open(generate_upload_url, data=data)
        if not response:
            raise run_isolated.MappingError('Unable to connect to server %s' %
                                            generate_upload_url)
        upload_url = response.read()

        # Do not retry this request on HTTP 50x. Regenerate an upload url each time
        # since uploading "consumes" the upload url.
        result = run_isolated.url_open(upload_url,
                                       data=body,
                                       content_type=content_type,
                                       retry_50x=False)
        if result:
            return result.read()
    raise run_isolated.MappingError('Unable to connect to server %s' %
                                    generate_upload_url)
Beispiel #2
0
def update_files_to_upload(query_url, queries, upload):
    """Queries the server to see which files from this batch already exist there.

  Arguments:
    queries: The hash files to potential upload to the server.
    upload: Any new files that need to be upload are sent to this function.
  """
    body = ''.join(
        (binascii.unhexlify(meta_data['h']) for (_, meta_data) in queries))
    assert (len(body) % 20) == 0, repr(body)

    response = url_open(query_url,
                        data=body,
                        content_type='application/octet-stream').read()
    if len(queries) != len(response):
        raise run_isolated.MappingError(
            'Got an incorrect number of responses from the server. Expected %d, '
            'but got %d' % (len(queries), len(response)))

    hit = 0
    for i in range(len(response)):
        if response[i] == chr(0):
            upload(queries[i])
        else:
            hit += 1
    logging.info('Queried %d files, %d cache hit', len(queries), hit)
Beispiel #3
0
def url_open(url, **kwargs):
    result = run_isolated.url_open(url, **kwargs)
    if not result:
        # If we get no response from the server, assume it is down and raise an
        # exception.
        raise run_isolated.MappingError('Unable to connect to server %s' % url)
    return result
Beispiel #4
0
def check_files_exist_on_server(query_url, queries):
    """Queries the server to see which files from this batch already exist there.

  Arguments:
    queries: The hash files to potential upload to the server.
  Returns:
    missing_files: list of files that are missing on the server.
  """
    logging.info('Checking existence of %d files...', len(queries))
    body = ''.join(
        (binascii.unhexlify(meta_data['h']) for (_, meta_data) in queries))
    assert (len(body) % 20) == 0, repr(body)

    response = url_open(query_url,
                        data=body,
                        content_type='application/octet-stream').read()
    if len(queries) != len(response):
        raise run_isolated.MappingError(
            'Got an incorrect number of responses from the server. Expected %d, '
            'but got %d' % (len(queries), len(response)))

    missing_files = [
        queries[i] for i, flag in enumerate(response) if flag == chr(0)
    ]
    logging.info('Queried %d files, %d cache hit', len(queries),
                 len(queries) - len(missing_files))
    return missing_files