コード例 #1
0
ファイル: bundles.py プロジェクト: bkgoksel/codalab-cli
def _fetch_bundle_contents_info(uuid, path=''):
    """
    Fetch metadata of the bundle contents or a subpath within the bundle.

    Query parameters:
    - `depth`: recursively fetch subdirectory info up to this depth.
      Default is 0.

    Response format:
    ```
    {
      "data": {
          "name": "<name of file or directory>",
          "link": "<string representing target if file is a symbolic link>",
          "type": "<file|directory|link>",
          "size": <size of file in bytes>,
          "perm": <unix permission integer>,
          "contents": [
              {
                "name": ...,
                <each file of directory represented recursively with the same schema>
              },
              ...
          ]
      }
    }
    ```
    """
    depth = query_get_type(int, 'depth', default=0)
    if depth < 0:
        abort(httplib.BAD_REQUEST, "Depth must be at least 0")

    check_bundles_have_read_permission(local.model, request.user, [uuid])
    try:
        info = local.download_manager.get_target_info(uuid, path, depth)
    except NotFoundError as e:
        abort(httplib.NOT_FOUND, e.message)
    except Exception as e:
        abort(httplib.BAD_REQUEST, e.message)

    return {'data': info}
コード例 #2
0
def _fetch_bundles():
    """
    Fetch bundles in the following two ways:
    1. By bundle `specs` OR search `keywords` . Behavior is undefined
    when both `specs` and `keywords` are provided.

    Query parameters:

     - `worksheet`: UUID of the base worksheet. Required when fetching by specs.
     - `specs`: Bundle spec of bundle to fetch. May be provided multiples times
        to fetch multiple bundle specs. A bundle spec is either:
        1. a UUID (8 or 32 hex characters with a preceding '0x')
        2. a bundle name referring to the last bundle with that name on the
           given base worksheet
        3. or a reverse index of the form `^N` referring to the Nth-to-last
           bundle on the given base worksheet.
     - `keywords`: Search keyword. May be provided multiple times for multiple
        keywords. Bare keywords match the names and descriptions of bundles.
        Examples of other special keyword forms:
        - `name=<name>            ` : More targeted search of using metadata fields.
        - `size=.sort             ` : Sort by a particular field.
        - `size=.sort-            ` : Sort by a particular field in reverse.
        - `size=.sum              ` : Compute total of a particular field.
        - `.mine                  ` : Match only bundles I own.
        - `.floating              ` : Match bundles that aren't on any worksheet.
        - `.count                 ` : Count the number of bundles.
        - `.limit=10              ` : Limit the number of results to the top 10.
     - `include_display_metadata`: `1` to include additional metadata helpful
       for displaying the bundle info, `0` to omit them. Default is `0`.
     - `include`: comma-separated list of related resources to include, such as "owner"

    When aggregation keywords such as `.count` are used, the resulting value
    is returned as:
    ```
    {
        "meta": {
            "results": <value>
        }
    }
    ```
    2. By bundle `command` and/or `dependencies` (for `--memoized` option in cl [run/mimic] command).
    When `dependencies` is not defined, the searching result will include bundles that match with command only.

    Query parameters:
     - `command`      : the command of a bundle in string
     - `dependencies` : the dependencies of a bundle in the format of
                        '[{"child_path":key1, "parent_uuid":UUID1},
                        {"child_path":key2, "parent_uuid":UUID2}]'
        1. a UUID should be in the format of 32 hex characters with a preceding '0x' (partial UUID is not allowed).
        2. the key should be able to uniquely identify a (child_path, parent_uuid) pair in the list.
    The returning result will be aggregated in the same way as 1.
    """
    keywords = query_get_list('keywords')
    specs = query_get_list('specs')
    worksheet_uuid = request.query.get('worksheet')
    descendant_depth = query_get_type(int, 'depth', None)
    command = query_get_type(str, 'command', '')
    dependencies = query_get_type(str, 'dependencies', '[]')

    if keywords:
        # Handle search keywords
        keywords = resolve_owner_in_keywords(keywords)
        search_result = local.model.search_bundles(request.user.user_id,
                                                   keywords)
        # Return simple dict if scalar result (e.g. .sum or .count queries)
        if search_result['is_aggregate']:
            return json_api_meta({}, {'result': search_result['result']})
        # If not aggregate this is a list
        bundle_uuids = search_result['result']
    elif specs:
        # Resolve bundle specs
        bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user,
                                                     worksheet_uuid, specs)
    elif command:
        bundle_uuids = local.model.get_memoized_bundles(
            request.user.user_id, command, dependencies)
    else:
        abort(
            http.client.BAD_REQUEST,
            "Request must include either 'keywords' "
            "or 'specs' query parameter",
        )

    # Find all descendants down to the provided depth
    if descendant_depth is not None:
        bundle_uuids = local.model.get_self_and_descendants(
            bundle_uuids, depth=descendant_depth)

    return build_bundles_document(bundle_uuids)
コード例 #3
0
def _fetch_bundle_contents_blob(uuid, path=''):
    """
    API to download the contents of a bundle or a subpath within a bundle.

    For directories, this method always returns a tarred and gzipped archive of
    the directory.

    For files, if the request has an Accept-Encoding header containing gzip,
    then the returned file is gzipped. Otherwise, the file is returned as-is.

    HTTP Request headers:
    - `Range: bytes=<start>-<end>`: fetch bytes from the range
      `[<start>, <end>)`.
    - `Accept-Encoding: <encoding>`: indicate that the client can accept
      encoding `<encoding>`. Currently only `gzip` encoding is supported.

    Query parameters:
    - `head`: number of lines to fetch from the beginning of the file.
      Default is 0, meaning to fetch the entire file.
    - `tail`: number of lines to fetch from the end of the file.
      Default is 0, meaning to fetch the entire file.
    - `max_line_length`: maximum number of characters to fetch from each line,
      if either `head` or `tail` is specified. Default is 128.

    HTTP Response headers (for single-file targets):
    - `Content-Disposition: inline; filename=<bundle name or target filename>`
    - `Content-Type: <guess of mimetype based on file extension>`
    - `Content-Encoding: [gzip|identity]`
    - `Target-Type: file`

    HTTP Response headers (for directories):
    - `Content-Disposition: attachment; filename=<bundle or directory name>.tar.gz`
    - `Content-Type: application/gzip`
    - `Content-Encoding: identity`
    - `Target-Type: directory`
    """
    byte_range = get_request_range()
    head_lines = query_get_type(int, 'head', default=0)
    tail_lines = query_get_type(int, 'tail', default=0)
    truncation_text = query_get_type(str, 'truncation_text', default='')
    max_line_length = query_get_type(int, 'max_line_length', default=128)
    check_bundles_have_read_permission(local.model, request.user, [uuid])
    target = BundleTarget(uuid, path)

    try:
        target_info = local.download_manager.get_target_info(target, 0)
        if target_info['resolved_target'] != target:
            check_bundles_have_read_permission(
                local.model, request.user,
                [target_info['resolved_target'].bundle_uuid])
        target = target_info['resolved_target']
    except NotFoundError as e:
        abort(http.client.NOT_FOUND, str(e))
    except Exception as e:
        abort(http.client.BAD_REQUEST, str(e))

    # Figure out the file name.
    bundle_name = local.model.get_bundle(target.bundle_uuid).metadata.name
    if not path and bundle_name:
        filename = bundle_name
    else:
        filename = target_info['name']

    if target_info['type'] == 'directory':
        if byte_range:
            abort(http.client.BAD_REQUEST,
                  'Range not supported for directory blobs.')
        if head_lines or tail_lines:
            abort(http.client.BAD_REQUEST,
                  'Head and tail not supported for directory blobs.')
        # Always tar and gzip directories
        gzipped_stream = False  # but don't set the encoding to 'gzip'
        mimetype = 'application/gzip'
        filename += '.tar.gz'
        fileobj = local.download_manager.stream_tarred_gzipped_directory(
            target)
    elif target_info['type'] == 'file':
        # Let's gzip to save bandwidth.
        # For simplicity, we do this even if the file is already a packed
        # archive (which should be relatively rare).
        # The browser will transparently decode the file.
        gzipped_stream = request_accepts_gzip_encoding()

        # Since guess_type() will interpret '.tar.gz' as an 'application/x-tar' file
        # with 'gzip' encoding, which would usually go into the Content-Encoding
        # header. But if the bundle contents is actually a packed archive, we don't
        # want the client to automatically decompress the file, so we don't want to
        # set the Content-Encoding header. Instead, if guess_type() detects an
        # archive, we just set mimetype to indicate an arbitrary binary file.
        mimetype, encoding = mimetypes.guess_type(filename, strict=False)
        if encoding is not None:
            mimetype = 'application/octet-stream'

        if byte_range and (head_lines or tail_lines):
            abort(http.client.BAD_REQUEST,
                  'Head and range not supported on the same request.')
        elif byte_range:
            start, end = byte_range
            fileobj = local.download_manager.read_file_section(
                target, start, end - start + 1, gzipped_stream)
        elif head_lines or tail_lines:
            fileobj = local.download_manager.summarize_file(
                target, head_lines, tail_lines, max_line_length,
                truncation_text, gzipped_stream)
        else:
            fileobj = local.download_manager.stream_file(
                target, gzipped_stream)
    else:
        # Symlinks.
        abort(http.client.FORBIDDEN,
              'Cannot download files of this type (%s).' % target_info['type'])

    # Set headers.
    response.set_header('Content-Type', mimetype or 'text/plain')
    response.set_header('Content-Encoding',
                        'gzip' if gzipped_stream else 'identity')
    if target_info['type'] == 'file':
        response.set_header('Content-Disposition',
                            'inline; filename="%s"' % filename)
    else:
        response.set_header('Content-Disposition',
                            'attachment; filename="%s"' % filename)
    response.set_header('Target-Type', target_info['type'])

    return fileobj
コード例 #4
0
ファイル: bundles.py プロジェクト: solar5/codalab-cli
def _fetch_bundles():
    """
    Fetch bundles by bundle `specs` OR search `keywords`. Behavior is undefined
    when both `specs` and `keywords` are provided.

    Query parameters:

     - `worksheet`: UUID of the base worksheet. Required when fetching by specs.
     - `specs`: Bundle spec of bundle to fetch. May be provided multiples times
        to fetch multiple bundle specs. A bundle spec is either:
        1. a UUID (8 or 32 hex characters with a preceding '0x')
        2. a bundle name referring to the last bundle with that name on the
           given base worksheet
        3. or a reverse index of the form `^N` referring to the Nth-to-last
           bundle on the given base worksheet.
     - `keywords`: Search keyword. May be provided multiples times for multiple
        keywords. Bare keywords match the names and descriptions of bundles.
        Examples of other special keyword forms:
        - `name=<name>            ` : More targeted search of using metadata fields.
        - `size=.sort             ` : Sort by a particular field.
        - `size=.sort-            ` : Sort by a particular field in reverse.
        - `size=.sum              ` : Compute total of a particular field.
        - `.mine                  ` : Match only bundles I own.
        - `.floating              ` : Match bundles that aren't on any worksheet.
        - `.count                 ` : Count the number of bundles.
        - `.limit=10              ` : Limit the number of results to the top 10.
     - `include_display_metadata`: `1` to include additional metadata helpful
       for displaying the bundle info, `0` to omit them. Default is `0`.
     - `include`: comma-separated list of related resources to include, such as "owner"

    When aggregation keywords such as `.count` are used, the resulting value
    is returned as:
    ```
    {
        "meta": {
            "results": <value>
        }
    }
    ```

    """
    keywords = query_get_list('keywords')
    specs = query_get_list('specs')
    worksheet_uuid = request.query.get('worksheet')
    descendant_depth = query_get_type(int, 'depth', None)

    if keywords:
        # Handle search keywords
        keywords = resolve_owner_in_keywords(keywords)
        bundle_uuids = local.model.search_bundle_uuids(request.user.user_id, keywords)
    elif specs:
        # Resolve bundle specs
        bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs)
    else:
        abort(httplib.BAD_REQUEST,
              "Request must include either 'keywords' "
              "or 'specs' query parameter")

    # Find all descendants down to the provided depth
    if descendant_depth is not None:
        bundle_uuids = local.model.get_self_and_descendants(bundle_uuids, depth=descendant_depth)

    # Return simple dict if scalar result (e.g. .sum or .count queries)
    if not isinstance(bundle_uuids, list):
        return json_api_meta({}, {'result': bundle_uuids})

    return build_bundles_document(bundle_uuids)
コード例 #5
0
ファイル: bundles.py プロジェクト: Adama94/codalab-cli
def _fetch_bundle_contents_blob(uuid, path=''):
    """
    API to download the contents of a bundle or a subpath within a bundle.

    For directories this method always returns a tarred and gzipped archive of
    the directory.

    For files, if the request has an Accept-Encoding header containing gzip,
    then the returned file is gzipped.
    """
    byte_range = get_request_range()
    head_lines = query_get_type(int, 'head', default=0)
    tail_lines = query_get_type(int, 'tail', default=0)
    max_line_length = query_get_type(int, 'max_line_length', default=128)
    check_bundles_have_read_permission(local.model, request.user, [uuid])
    bundle = local.model.get_bundle(uuid)

    target_info = local.download_manager.get_target_info(uuid, path, 0)
    if target_info is None:
        abort(httplib.NOT_FOUND, 'Not found.')

    # Figure out the file name.
    if not path and bundle.metadata.name:
        filename = bundle.metadata.name
    else:
        filename = target_info['name']

    if target_info['type'] == 'directory':
        if byte_range:
            abort(httplib.BAD_REQUEST,
                  'Range not supported for directory blobs.')
        if head_lines:
            abort(httplib.BAD_REQUEST,
                  'Head not supported for directory blobs.')
        # Always tar and gzip directories.
        filename = filename + '.tar.gz'
        fileobj = local.download_manager.stream_tarred_gzipped_directory(
            uuid, path)
    elif target_info['type'] == 'file':
        gzipped = False
        if not zip_util.path_is_archive(
                filename) and request_accepts_gzip_encoding():
            # Let's gzip to save bandwidth. The browser will transparently decode
            # the file.
            filename = filename + '.gz'
            gzipped = True

        if byte_range and (head_lines or tail_lines):
            abort(httplib.BAD_REQUEST,
                  'Head and range not supported on the same request.')
        elif byte_range:
            start, end = byte_range
            fileobj = local.download_manager.read_file_section(
                uuid, path, start, end - start + 1, gzipped)
        elif head_lines or tail_lines:
            fileobj = local.download_manager.summarize_file(
                uuid, path, head_lines, tail_lines, max_line_length, None,
                gzipped)
        else:
            fileobj = local.download_manager.stream_file(uuid, path, gzipped)
    else:
        # Symlinks.
        abort(httplib.FORBIDDEN, 'Cannot download files of this type.')

    # Set headers.
    mimetype, _ = mimetypes.guess_type(filename, strict=False)
    response.set_header('Content-Type', mimetype or 'text/plain')
    if zip_util.get_archive_ext(
            filename) == '.gz' and request_accepts_gzip_encoding():
        filename = zip_util.strip_archive_ext(filename)
        response.set_header('Content-Encoding', 'gzip')
    else:
        response.set_header('Content-Encoding', 'identity')
    response.set_header('Content-Disposition', 'filename="%s"' % filename)

    return fileobj