def _fetch_bundles(): """ Fetch bundles by bundle specs OR search keywords. """ keywords = query_get_list('keywords') specs = query_get_list('specs') worksheet_uuid = request.query.get('worksheet') descendant_depth = query_get_type(int, 'depth', None) if keywords: # Handle search keywords keywords = resolve_owner_in_keywords(keywords) bundle_uuids = local.model.search_bundle_uuids(request.user.user_id, worksheet_uuid, keywords) elif specs: # Resolve bundle specs bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs) else: abort( httplib.BAD_REQUEST, "Request must include either 'keywords' " "or 'specs' query parameter") # Find all descendants down to the provided depth if descendant_depth is not None: bundle_uuids = local.model.get_self_and_descendants( bundle_uuids, depth=descendant_depth) # Return simple dict if scalar result (e.g. .sum or .count queries) if not isinstance(bundle_uuids, list): return json_api_meta({}, {'result': bundle_uuids}) return build_bundles_document(bundle_uuids)
def fetch_worksheets(): """ Fetch worksheets by worksheet specs (names) OR search keywords. Query parameters: - `include`: comma-separated list of related resources to include, such as "owner" """ keywords = query_get_list('keywords') specs = query_get_list('specs') base_worksheet_uuid = request.query.get('base') include_set = query_get_json_api_include_set( supported={'owner', 'group_permissions'}) if specs: uuids = [ get_worksheet_uuid_or_create(base_worksheet_uuid, spec) for spec in specs ] worksheets = [ w.to_dict() for w in local.model.batch_get_worksheets(fetch_items=False, uuid=uuids) ] else: keywords = resolve_owner_in_keywords(keywords) worksheets = local.model.search_worksheets(request.user.user_id, keywords) # Build response document document = WorksheetSchema(many=True).dump(worksheets).data # Include users if 'owner' in include_set: owner_ids = {w['owner_id'] for w in worksheets} if owner_ids: json_api_include( document, UserSchema(), local.model.get_users(user_ids=owner_ids)['results']) # Include permissions if 'group_permissions' in include_set: for w in worksheets: if 'group_permissions' in w: json_api_include(document, WorksheetPermissionSchema(), w['group_permissions']) return document
def _get_parents(uuid): print("REACHED HERE 3") depth = query_get_type(int, 'depth', default=0) if depth < 0: abort(httplib.BAD_REQUEST, "Depth must be at least 0") specs = query_get_list('specs') worksheet_uuid = request.query.get('worksheet') if specs: # Resolve bundle specs bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs) else: abort( httplib.BAD_REQUEST, "Request must include either 'keywords' " "or 'specs' query parameter", ) parent_uuids = local.model.get_self_and_ancesters(bundle_uuids, depth=depth) final_result = [] for level in range(len(parent_uuids)): final_result.append([]) final_result[level].extend( [build_bundles_document(parent_uuids[level])]) return {'data': final_result}
def fetch_worksheets(): """ Fetch bundles by bundle specs OR search keywords. """ keywords = query_get_list('keywords') specs = query_get_list('specs') base_worksheet_uuid = request.query.get('base') if specs: uuids = [ get_worksheet_uuid_or_create(base_worksheet_uuid, spec) for spec in specs ] worksheets = [ w.to_dict() for w in local.model.batch_get_worksheets(fetch_items=False, uuid=uuids) ] else: keywords = resolve_owner_in_keywords(keywords) worksheets = local.model.search_worksheets(request.user.user_id, keywords) # Build response document document = WorksheetSchema(many=True).dump(worksheets).data # Include users owner_ids = {w['owner_id'] for w in worksheets} if owner_ids: json_api_include(document, UserSchema(), local.model.get_users(owner_ids)) # Include permissions for w in worksheets: if 'group_permissions' in w: json_api_include(document, WorksheetPermissionSchema(), w['group_permissions']) return document
def fetch_worksheets(): """ Fetch worksheets by worksheet specs (names) OR search keywords. Query parameters: - `include`: comma-separated list of related resources to include, such as "owner" """ keywords = query_get_list('keywords') specs = query_get_list('specs') base_worksheet_uuid = request.query.get('base') include_set = query_get_json_api_include_set(supported={'owner', 'group_permissions'}) if specs: uuids = [get_worksheet_uuid_or_create(base_worksheet_uuid, spec) for spec in specs] worksheets = [ w.to_dict() for w in local.model.batch_get_worksheets(fetch_items=False, uuid=uuids) ] else: keywords = resolve_owner_in_keywords(keywords) worksheets = local.model.search_worksheets(request.user.user_id, keywords) # Build response document document = WorksheetSchema(many=True).dump(worksheets).data # Include users if 'owner' in include_set: owner_ids = {w['owner_id'] for w in worksheets} if owner_ids: json_api_include(document, UserSchema(), local.model.get_users(owner_ids)) # Include permissions if 'group_permissions' in include_set: for w in worksheets: if 'group_permissions' in w: json_api_include(document, WorksheetPermissionSchema(), w['group_permissions']) return document
def _fetch_locations(): """ Fetch locations of bundles. Query parameters: - `uuids`: List of bundle UUID's to get the locations for """ bundle_uuids = query_get_list('uuids') bundle_link_urls = local.model.get_bundle_metadata(bundle_uuids, "link_url") uuids_to_locations = { uuid: bundle_link_urls.get(uuid) or local.bundle_store.get_bundle_location(uuid) for uuid in bundle_uuids } return dict(data=uuids_to_locations)
def fetch_users(): """ Fetch list of users, filterable by username and email. Takes the following query parameters: filter[user_name]=name1,name2,... filter[email]=email1,email2,... Query parameters: - `keywords`: Search keyword. May be provided multiple times for multiple keywords. Examples of other special keyword forms: - `name=<name> ` : More targeted search of using metadata fields. - `date_joined=.sort ` : Sort by a particular field. - `date_joined=.sort- ` : Sort by a particular field in reverse. - `.count ` : Count the number of users. - `.limit=10 ` : Limit the number of results to the top 10. """ # Combine username and email filters usernames = set(request.query.get('filter[user_name]', '').split(',')) usernames |= set(request.query.get('filter[email]', '').split(',')) usernames.discard('') # str.split(',') will return '' on empty strings keywords = query_get_list('keywords') if usernames is None and keywords is None: abort(http.client.BAD_REQUEST, "Request must include 'keywords' query parameter or usernames") if request.user.user_id != local.model.root_user_id: for key in keywords: if not all(accessed_field in key for accessed_field in USER_ACCESSIBLE_KEYWORDS): abort(http.client.FORBIDDEN, "You don't have access to search for these fields") # Handle search keywords users = local.model.get_users(keywords=(keywords or None), usernames=(usernames or None)) # Return simple dict if scalar result (e.g. .sum or .count queries) if users.get('is_aggregate'): return json_api_meta({}, {'results': users['results']}) else: users = users['results'] return allowed_user_schema()(many=True).dump(users).data
def _update_bundle_contents_blob(uuid): """ Update the contents of the given running or uploading bundle. Query parameters: - `urls`: (optional) comma-separated list of URLs from which to fetch data to fill the bundle, using this option will ignore any uploaded file data - `git`: (optional) 1 if URL should be interpreted as git repos to clone or 0 otherwise, default is 0. - `filename`: (optional) filename of the uploaded file, used to indicate whether or not it is an archive, default is 'contents' - `unpack`: (optional) 1 if the uploaded file should be unpacked if it is an archive, or 0 otherwise, default is 1 - `simplify`: (optional) 1 if the uploaded file should be 'simplified' if it is an archive, or 0 otherwise, default is 1. - `finalize_on_failure`: (optional) 1 if bundle state should be set to 'failed' in the case of a failure during upload, or 0 if the bundle state should not change on failure. Default is 0. - `finalize_on_success`: (optional) 1 if bundle state should be set to 'state_on_success' when the upload finishes successfully. Default is True - `state_on_success`: (optional) Update the bundle state to this state if the upload completes successfully. Must be either 'ready' or 'failed'. Default is 'ready'. """ check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) if bundle.state in State.FINAL_STATES: abort(http.client.FORBIDDEN, 'Contents cannot be modified, bundle already finalized.') # Get and validate query parameters finalize_on_failure = query_get_bool('finalize_on_failure', default=False) finalize_on_success = query_get_bool('finalize_on_success', default=True) final_state = request.query.get('state_on_success', default=State.READY) if finalize_on_success and final_state not in State.FINAL_STATES: abort( http.client.BAD_REQUEST, 'state_on_success must be one of %s' % '|'.join(State.FINAL_STATES), ) # If this bundle already has data, remove it. if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) # Store the data. try: sources = None if request.query.urls: sources = query_get_list('urls') # request without "filename" doesn't need to upload to bundle store if request.query.filename: filename = request.query.get('filename', default='contents') sources = [(filename, request['wsgi.input'])] if sources: local.upload_manager.upload_to_bundle_store( bundle, sources=sources, follow_symlinks=False, exclude_patterns=None, remove_sources=False, git=query_get_bool('git', default=False), unpack=query_get_bool('unpack', default=True), simplify_archives=query_get_bool('simplify', default=True), ) # See UploadManager for full explanation of 'simplify' bundle_link_url = getattr(bundle.metadata, "link_url", None) bundle_location = bundle_link_url or local.bundle_store.get_bundle_location( bundle.uuid) local.model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True) except UsageError as err: # This is a user error (most likely disk quota overuser) so raise a client HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % err local.model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': msg } }) abort(http.client.BAD_REQUEST, msg) except Exception as e: # Upload failed: cleanup, update state if desired, and return HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % e # The client may not want to finalize the bundle on failure, to keep # open the possibility of retrying the upload in the case of transient # failure. # Workers also use this API endpoint to upload partial contents of # running bundles, and they should use finalize_on_failure=0 to avoid # letting transient errors during upload fail the bundles prematurely. if finalize_on_failure: local.model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': msg } }) abort(http.client.INTERNAL_SERVER_ERROR, msg) else: if finalize_on_success: # Upload succeeded: update state local.model.update_bundle(bundle, {'state': final_state})
def _fetch_bundles(): """ Fetch bundles in the following two ways: 1. By bundle `specs` OR search `keywords` . Behavior is undefined when both `specs` and `keywords` are provided. Query parameters: - `worksheet`: UUID of the base worksheet. Required when fetching by specs. - `specs`: Bundle spec of bundle to fetch. May be provided multiples times to fetch multiple bundle specs. A bundle spec is either: 1. a UUID (8 or 32 hex characters with a preceding '0x') 2. a bundle name referring to the last bundle with that name on the given base worksheet 3. or a reverse index of the form `^N` referring to the Nth-to-last bundle on the given base worksheet. - `keywords`: Search keyword. May be provided multiple times for multiple keywords. Bare keywords match the names and descriptions of bundles. Examples of other special keyword forms: - `name=<name> ` : More targeted search of using metadata fields. - `size=.sort ` : Sort by a particular field. - `size=.sort- ` : Sort by a particular field in reverse. - `size=.sum ` : Compute total of a particular field. - `.mine ` : Match only bundles I own. - `.floating ` : Match bundles that aren't on any worksheet. - `.count ` : Count the number of bundles. - `.limit=10 ` : Limit the number of results to the top 10. - `include_display_metadata`: `1` to include additional metadata helpful for displaying the bundle info, `0` to omit them. Default is `0`. - `include`: comma-separated list of related resources to include, such as "owner" When aggregation keywords such as `.count` are used, the resulting value is returned as: ``` { "meta": { "results": <value> } } ``` 2. By bundle `command` and/or `dependencies` (for `--memoized` option in cl [run/mimic] command). When `dependencies` is not defined, the searching result will include bundles that match with command only. Query parameters: - `command` : the command of a bundle in string - `dependencies` : the dependencies of a bundle in the format of '[{"child_path":key1, "parent_uuid":UUID1}, {"child_path":key2, "parent_uuid":UUID2}]' 1. a UUID should be in the format of 32 hex characters with a preceding '0x' (partial UUID is not allowed). 2. the key should be able to uniquely identify a (child_path, parent_uuid) pair in the list. The returning result will be aggregated in the same way as 1. """ keywords = query_get_list('keywords') specs = query_get_list('specs') worksheet_uuid = request.query.get('worksheet') descendant_depth = query_get_type(int, 'depth', None) command = query_get_type(str, 'command', '') dependencies = query_get_type(str, 'dependencies', '[]') if keywords: # Handle search keywords keywords = resolve_owner_in_keywords(keywords) search_result = local.model.search_bundles(request.user.user_id, keywords) # Return simple dict if scalar result (e.g. .sum or .count queries) if search_result['is_aggregate']: return json_api_meta({}, {'result': search_result['result']}) # If not aggregate this is a list bundle_uuids = search_result['result'] elif specs: # Resolve bundle specs bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs) elif command: bundle_uuids = local.model.get_memoized_bundles( request.user.user_id, command, dependencies) else: abort( http.client.BAD_REQUEST, "Request must include either 'keywords' " "or 'specs' query parameter", ) # Find all descendants down to the provided depth if descendant_depth is not None: bundle_uuids = local.model.get_self_and_descendants( bundle_uuids, depth=descendant_depth) return build_bundles_document(bundle_uuids)
def _update_bundle_contents_blob(uuid): """ Update the contents of the given running or uploading bundle. Query parameters: - `urls`: (optional) URL from which to fetch data to fill the bundle; using this option will ignore any uploaded file data. Only supports one URL. - `git`: (optional) 1 if URL should be interpreted as git repos to clone or 0 otherwise, default is 0. - `filename`: (optional) filename of the uploaded file, used to indicate whether or not it is an archive, default is 'contents' - `unpack`: (optional) 1 if the uploaded file should be unpacked if it is an archive, or 0 otherwise, default is 1 - `finalize_on_failure`: (optional) 1 if bundle state should be set to 'failed' in the case of a failure during upload, or 0 if the bundle state should not change on failure. Default is 0. - `finalize_on_success`: (optional) 1 if bundle state should be set to 'state_on_success' when the upload finishes successfully. Default is True - `state_on_success`: (optional) Update the bundle state to this state if the upload completes successfully. Must be either 'ready' or 'failed'. Default is 'ready'. - `use_azure_blob_beta`: (optional) Use Azure Blob Storage to store the bundle. Default is False. If CODALAB_ALWAYS_USE_AZURE_BLOB_BETA is set, this parameter is disregarded, as Azure Blob Storage will always be used. """ check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) if bundle.state in State.FINAL_STATES: abort(http.client.FORBIDDEN, 'Contents cannot be modified, bundle already finalized.') # Get and validate query parameters finalize_on_failure = query_get_bool('finalize_on_failure', default=False) finalize_on_success = query_get_bool('finalize_on_success', default=True) use_azure_blob_beta = os.getenv("CODALAB_ALWAYS_USE_AZURE_BLOB_BETA") or query_get_bool( 'use_azure_blob_beta', default=False ) final_state = request.query.get('state_on_success', default=State.READY) if finalize_on_success and final_state not in State.FINAL_STATES: abort( http.client.BAD_REQUEST, 'state_on_success must be one of %s' % '|'.join(State.FINAL_STATES), ) # If this bundle already has data, remove it. if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) # Store the data. try: source = None if request.query.urls: sources = query_get_list('urls') if len(sources) != 1: abort(http.client.BAD_REQUEST, "Exactly one url must be provided.") source = sources[0] # request without "filename" doesn't need to upload to bundle store if request.query.filename: filename = request.query.get('filename', default='contents') source = (filename, request['wsgi.input']) bundle_link_url = getattr(bundle.metadata, "link_url", None) if bundle_link_url: # Don't upload to bundle store if using --link, as the path # already exists. pass elif source: local.upload_manager.upload_to_bundle_store( bundle, source=source, git=query_get_bool('git', default=False), unpack=query_get_bool('unpack', default=True), use_azure_blob_beta=use_azure_blob_beta, ) bundle_link_url = getattr(bundle.metadata, "link_url", None) bundle_location = bundle_link_url or local.bundle_store.get_bundle_location(bundle.uuid) local.model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True) except UsageError as err: # This is a user error (most likely disk quota overuser) so raise a client HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % err local.model.update_bundle( bundle, { 'state': State.FAILED, 'metadata': {'failure_message': msg, 'error_traceback': traceback.format_exc()}, }, ) abort(http.client.BAD_REQUEST, msg) except Exception as e: # Upload failed: cleanup, update state if desired, and return HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % e # The client may not want to finalize the bundle on failure, to keep # open the possibility of retrying the upload in the case of transient # failure. # Workers also use this API endpoint to upload partial contents of # running bundles, and they should use finalize_on_failure=0 to avoid # letting transient errors during upload fail the bundles prematurely. if finalize_on_failure: local.model.update_bundle( bundle, { 'state': State.FAILED, 'metadata': {'failure_message': msg, 'error_traceback': traceback.format_exc()}, }, ) abort(http.client.INTERNAL_SERVER_ERROR, msg) else: if finalize_on_success: # Upload succeeded: update state local.model.update_bundle(bundle, {'state': final_state})
def _fetch_bundles(): """ Fetch bundles by bundle `specs` OR search `keywords`. Behavior is undefined when both `specs` and `keywords` are provided. Query parameters: - `worksheet`: UUID of the base worksheet. Required when fetching by specs. - `specs`: Bundle spec of bundle to fetch. May be provided multiples times to fetch multiple bundle specs. A bundle spec is either: 1. a UUID (8 or 32 hex characters with a preceding '0x') 2. a bundle name referring to the last bundle with that name on the given base worksheet 3. or a reverse index of the form `^N` referring to the Nth-to-last bundle on the given base worksheet. - `keywords`: Search keyword. May be provided multiples times for multiple keywords. Bare keywords match the names and descriptions of bundles. Examples of other special keyword forms: - `name=<name> ` : More targeted search of using metadata fields. - `size=.sort ` : Sort by a particular field. - `size=.sort- ` : Sort by a particular field in reverse. - `size=.sum ` : Compute total of a particular field. - `.mine ` : Match only bundles I own. - `.floating ` : Match bundles that aren't on any worksheet. - `.count ` : Count the number of bundles. - `.limit=10 ` : Limit the number of results to the top 10. - `include_display_metadata`: `1` to include additional metadata helpful for displaying the bundle info, `0` to omit them. Default is `0`. - `include`: comma-separated list of related resources to include, such as "owner" When aggregation keywords such as `.count` are used, the resulting value is returned as: ``` { "meta": { "results": <value> } } ``` """ keywords = query_get_list('keywords') specs = query_get_list('specs') worksheet_uuid = request.query.get('worksheet') descendant_depth = query_get_type(int, 'depth', None) if keywords: # Handle search keywords keywords = resolve_owner_in_keywords(keywords) bundle_uuids = local.model.search_bundle_uuids(request.user.user_id, keywords) elif specs: # Resolve bundle specs bundle_uuids = canonicalize.get_bundle_uuids(local.model, request.user, worksheet_uuid, specs) else: abort(httplib.BAD_REQUEST, "Request must include either 'keywords' " "or 'specs' query parameter") # Find all descendants down to the provided depth if descendant_depth is not None: bundle_uuids = local.model.get_self_and_descendants(bundle_uuids, depth=descendant_depth) # Return simple dict if scalar result (e.g. .sum or .count queries) if not isinstance(bundle_uuids, list): return json_api_meta({}, {'result': bundle_uuids}) return build_bundles_document(bundle_uuids)
def _update_bundle_contents_blob(uuid): """ Update the contents of the given running or uploading bundle. Query parameters: urls - comma-separated list of URLs from which to fetch data to fill the bundle, using this option will ignore any uploaded file data git - (optional) 1 if URL should be interpreted as git repos to clone or 0 otherwise, default is 0 OR filename - (optional) filename of the uploaded file, used to indicate whether or not it is an archive, default is 'contents' Query parameters that are always available: unpack - (optional) 1 if the uploaded file should be unpacked if it is an archive, or 0 otherwise, default is 1 simplify - (optional) 1 if the uploaded file should be 'simplified' if it is an archive, or 0 otherwise, default is 1 (See UploadManager for full explanation of 'simplification') finalize_on_failure - (optional) True ('1') if bundle state should be set to 'failed' in the case of a failure during upload, or False ('0') if the bundle state should not change on failure. Default is False. state_on_success - (optional) Update the bundle state to this state if the upload completes successfully. Must be either 'ready' or 'failed'. Default is 'ready'. """ check_bundles_have_all_permission(local.model, request.user, [uuid]) bundle = local.model.get_bundle(uuid) if bundle.state in State.FINAL_STATES: abort(httplib.FORBIDDEN, 'Contents cannot be modified, bundle already finalized.') # Get and validate query parameters finalize_on_failure = query_get_bool('finalize_on_failure', default=False) final_state = request.query.get('state_on_success', default=State.READY) if final_state not in State.FINAL_STATES: abort( httplib.BAD_REQUEST, 'state_on_success must be one of %s' % '|'.join(State.FINAL_STATES)) # If this bundle already has data, remove it. if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) # Store the data. try: if request.query.urls: sources = query_get_list('urls') else: filename = request.query.get('filename', default='contents') sources = [(filename, request['wsgi.input'])] local.upload_manager.upload_to_bundle_store( bundle, sources=sources, follow_symlinks=False, exclude_patterns=None, remove_sources=False, git=query_get_bool('git', default=False), unpack=query_get_bool('unpack', default=True), simplify_archives=query_get_bool('simplify', default=True)) local.upload_manager.update_metadata_and_save(bundle, new_bundle=False) except Exception as e: # Upload failed: cleanup, update state if desired, and return HTTP error if local.upload_manager.has_contents(bundle): local.upload_manager.cleanup_existing_contents(bundle) msg = "Upload failed: %s" % e # The client may not want to finalize the bundle on failure, to keep # open the possibility of retrying the upload in the case of transient # failure. # Workers also use this API endpoint to upload partial contents of # running bundles, and they should use finalize_on_failure=0 to avoid # letting transient errors during upload fail the bundles prematurely. if finalize_on_failure: local.model.update_bundle(bundle, { 'state': State.FAILED, 'metadata': { 'failure_message': msg }, }) abort(httplib.INTERNAL_SERVER_ERROR, msg) else: # Upload succeeded: update state local.model.update_bundle(bundle, {'state': final_state})