class UploadedBundle(NamedBundle): METADATA_SPECS = list(NamedBundle.METADATA_SPECS) METADATA_SPECS.append( MetadataSpec('license', basestring, 'which license this program/data is released under')) METADATA_SPECS.append( MetadataSpec('source_url', basestring, 'where this data came from')) @classmethod def construct(cls, data_hash, metadata, owner_id, uuid=None): row = { 'bundle_type': cls.BUNDLE_TYPE, 'command': None, 'data_hash': data_hash, 'state': State.READY, 'metadata': metadata, 'dependencies': [], 'owner_id': owner_id } if uuid: row['uuid'] = uuid return super(UploadedBundle, cls).construct(row) def run(self, bundle_store, parent_dict): assert ( False), '%ss should never be run!' % (self.__class__.__name__, )
class UploadedBundle(NamedBundle): METADATA_SPECS = list(NamedBundle.METADATA_SPECS) # Don't format specs # fmt: off METADATA_SPECS.append( MetadataSpec( 'license', str, 'The license under which this program/dataset is released.')) METADATA_SPECS.append( MetadataSpec( 'source_url', str, 'URL corresponding to the original source of this bundle.')) # fmt: on @classmethod def construct(cls, metadata, owner_id, uuid=None): row = { 'bundle_type': cls.BUNDLE_TYPE, 'command': None, 'data_hash': None, 'state': State.READY, 'metadata': metadata, 'dependencies': [], 'owner_id': owner_id, } if uuid: row['uuid'] = uuid return super(UploadedBundle, cls).construct(row) def run(self, bundle_store, parent_dict): assert False, '%ss should never be run!' % (self.__class__.__name__, )
class ProgramBundle(UploadedBundle): BUNDLE_TYPE = 'program' METADATA_SPECS = list(UploadedBundle.METADATA_SPECS) METADATA_SPECS.append( MetadataSpec('architectures', list, 'viable architectures')) METADATA_SPECS.append( MetadataSpec('language', list, 'which programming language was used'))
class MockBundle(Bundle): BUNDLE_TYPE = 'mock' METADATA_SPECS = ( MetadataSpec('str_metadata', basestring, 'test str metadata'), MetadataSpec('int_metadata', int, 'test int metadata'), MetadataSpec('list_metadata', list, 'test list metadata'), ) @classmethod def construct(cls, **kwargs): final_kwargs = dict(kwargs, bundle_type=MockBundle.BUNDLE_TYPE) return cls(final_kwargs)
class UploadedBundle(NamedBundle): METADATA_SPECS = list(NamedBundle.METADATA_SPECS) # type: List # Don't format specs # fmt: off METADATA_SPECS.append( MetadataSpec( 'license', str, 'The license under which this program/dataset is released.')) METADATA_SPECS.append( MetadataSpec( 'source_url', str, 'URL corresponding to the original source of this bundle.')) METADATA_SPECS.append( MetadataSpec('link_url', str, 'Link URL of bundle.', optional=True)) METADATA_SPECS.append( MetadataSpec('link_format', str, 'Link format of bundle. Can be equal to' '"raw" or "zip" (only "raw" is supported as of now).', optional=True)) METADATA_SPECS.append( MetadataSpec( 'store', str, 'The name of the bundle store where the bundle should be uploaded to. ' 'If unspecified, an optimal available bundle store will be chosen.', default=None, hidden=True, optional=True)) # fmt: on @classmethod def construct(cls, metadata, owner_id, uuid=None): row = { 'bundle_type': cls.BUNDLE_TYPE, 'command': None, 'data_hash': None, 'state': State.READY, 'metadata': metadata, 'dependencies': [], 'owner_id': owner_id, } if uuid: row['uuid'] = uuid return super(UploadedBundle, cls).construct(row) def run(self, bundle_store, parent_dict): assert False, '%ss should never be run!' % (self.__class__.__name__, )
class NamedBundle(Bundle): NAME_LENGTH = 32 METADATA_SPECS = ( MetadataSpec('name', basestring, 'Short variable name (not necessarily unique); must conform to %s.' % spec_util.NAME_REGEX.pattern, short_key='n'), MetadataSpec('description', basestring, 'Full description of the bundle.', short_key='d'), MetadataSpec('tags', list, 'Space-separated list of tags used for search (e.g., machine-learning).', metavar='TAG'), MetadataSpec('created', int, 'Time when this bundle was created.', generated=True, formatting='date'), MetadataSpec('data_size', int, 'Size of this bundle (in bytes).', generated=True, formatting='size'), MetadataSpec('failure_message', basestring, 'Error message if this run bundle failed.', generated=True), ) @classmethod def construct(cls, row): # The base NamedBundle construct method takes a bundle row and adds in # automatically generated metadata values. row['metadata'] = dict(row['metadata'], created=int(time.time())) return cls(row) def validate(self): super(NamedBundle, self).validate() bundle_type = self.bundle_type.title() if not self.metadata.name: raise UsageError('%ss must have non-empty names' % (bundle_type,)) spec_util.check_name(self.metadata.name) def __repr__(self): return '%s(uuid=%r, name=%r)' % ( self.__class__.__name__, str(self.uuid), str(self.metadata.name), ) def simple_str(self): return self.metadata.name + '(' + self.uuid + ')'
class DerivedBundle(NamedBundle): METADATA_SPECS = list(NamedBundle.METADATA_SPECS) # type: List[ClassVar] # Don't format specs # fmt: off METADATA_SPECS.append( MetadataSpec( 'allow_failed_dependencies', bool, 'Whether to allow this bundle to have failed or killed dependencies.', default=False, )) # fmt: on @classmethod def construct(cls, targets, command, metadata, owner_id, uuid, data_hash, state): if not uuid: uuid = spec_util.generate_uuid() # Check that targets does not include both keyed and anonymous targets. if len(targets) > 1 and any(key == '' for key, value in targets): raise UsageError( 'Must specify keys when packaging multiple targets!') # List the dependencies of this bundle on its targets. dependencies = [] for (child_path, (parent_uuid, parent_path)) in targets: dependencies.append({ 'child_uuid': uuid, 'child_path': child_path, 'parent_uuid': parent_uuid, 'parent_path': parent_path, }) return super(DerivedBundle, cls).construct({ 'uuid': uuid, 'bundle_type': cls.BUNDLE_TYPE, 'command': command, 'data_hash': data_hash, 'state': state, 'metadata': metadata, 'dependencies': dependencies, 'owner_id': owner_id, })
class RunBundle(DerivedBundle): BUNDLE_TYPE = 'run' METADATA_SPECS = list(DerivedBundle.METADATA_SPECS) # type: List # Note that these are strings, which need to be parsed # Request a machine with this much resources and don't let run exceed these resources # Don't format metadata specs # fmt: off METADATA_SPECS.append( MetadataSpec('request_docker_image', str, 'Which docker image (either tag or digest, e.g., ' 'codalab/default-cpu:latest) we wish to use.', completer=DockerImagesCompleter, hide_when_anonymous=True, default=None)) METADATA_SPECS.append( MetadataSpec( 'request_time', str, 'Amount of time (e.g., 3, 3m, 3h, 3d) allowed for this run. Defaults to user time quota left.', formatting='duration', default=None)) METADATA_SPECS.append( MetadataSpec( 'request_memory', str, 'Amount of memory (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run.', formatting='size', default='2g')) METADATA_SPECS.append( MetadataSpec( 'request_disk', str, 'Amount of disk space (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run. Defaults to user disk quota left.', formatting='size', default=None)) METADATA_SPECS.append( MetadataSpec('request_cpus', int, 'Number of CPUs allowed for this run.', default=1)) METADATA_SPECS.append( MetadataSpec('request_gpus', int, 'Number of GPUs allowed for this run.', default=0)) METADATA_SPECS.append( MetadataSpec('request_queue', str, 'Submit run to this job queue.', hide_when_anonymous=True, default=None)) METADATA_SPECS.append( MetadataSpec( 'request_priority', int, 'Job priority (higher is more important). Negative ' 'priority bundles are queued behind bundles with no specified priority.', default=None)) METADATA_SPECS.append( MetadataSpec('request_network', bool, 'Whether to allow network access.', default=False)) METADATA_SPECS.append( MetadataSpec( 'exclude_patterns', list, 'Exclude these file patterns from being saved into the bundle contents.', default=[])) METADATA_SPECS.append( MetadataSpec('actions', list, 'Actions (e.g., kill) that were performed on this run.', generated=True)) METADATA_SPECS.append( MetadataSpec( 'time', float, 'Amount of wall clock time (seconds) used by this run in total. ' '[Runtime of the Docker container excluding CodaLab related ' 'steps such as preparing/uploading results]', generated=True, formatting='duration')) METADATA_SPECS.append( MetadataSpec('time_user', float, 'Amount of user time (seconds) used by this run.', generated=True, formatting='duration')) METADATA_SPECS.append( MetadataSpec('time_system', float, 'Amount of system time (seconds) used by this run.', generated=True, formatting='duration')) METADATA_SPECS.append( MetadataSpec('memory', float, 'Amount of memory (bytes) used by this run.', generated=True, formatting='size')) METADATA_SPECS.append( MetadataSpec( 'memory_max', float, 'Maximum amount of memory (bytes) used by this run at any time during execution.', generated=True, formatting='size')) METADATA_SPECS.append( MetadataSpec('started', int, 'Time when this bundle started executing.', generated=True, formatting='date')) METADATA_SPECS.append( MetadataSpec( 'last_updated', int, 'Time when information about this bundle was last updated.', generated=True, formatting='date')) METADATA_SPECS.append( MetadataSpec('run_status', str, 'Execution status of the bundle.', generated=True)) METADATA_SPECS.append( MetadataSpec('staged_status', str, 'Information about the status of the staged bundle.', generated=True)) # Information about running METADATA_SPECS.append( MetadataSpec('docker_image', str, 'Which docker image was used to run the process.', generated=True, hide_when_anonymous=True)) METADATA_SPECS.append( MetadataSpec('exitcode', int, 'Exitcode of the process.', generated=True)) METADATA_SPECS.append( MetadataSpec('job_handle', str, 'Identifies the job handle (internal).', generated=True, hide_when_anonymous=True)) METADATA_SPECS.append( MetadataSpec('remote', str, 'Where this job is/was run (internal).', generated=True, hide_when_anonymous=True)) # fmt: on @classmethod def construct(cls, targets, command, metadata, owner_id, uuid=None, data_hash=None, state=State.CREATED): if not isinstance(command, str): raise UsageError('%r is not a valid command!' % (command, )) return super(RunBundle, cls).construct(targets, command, metadata, owner_id, uuid, data_hash, state) def validate(self): super(RunBundle, self).validate() for dep in self.dependencies: dep.validate(require_child_path=True)
class RunBundle(DerivedBundle): BUNDLE_TYPE = 'run' METADATA_SPECS = list(DerivedBundle.METADATA_SPECS) # Note that these are strings, which need to be parsed # Request a machine with this much resources and don't let run exceed these resources METADATA_SPECS.append( MetadataSpec( 'request_docker_image', basestring, 'Which docker image (e.g., codalab/ubuntu:1.9) we wish to use.', completer=DockerImagesCompleter)) METADATA_SPECS.append( MetadataSpec( 'request_time', basestring, 'Amount of time (e.g., 3, 3m, 3h, 3d) allowed for this run.', formatting='duration')) METADATA_SPECS.append( MetadataSpec( 'request_memory', basestring, 'Amount of memory (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run.', formatting='size')) METADATA_SPECS.append( MetadataSpec( 'request_disk', basestring, 'Amount of disk space (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run.', formatting='size')) METADATA_SPECS.append( MetadataSpec('request_cpus', int, 'Number of CPUs allowed for this run.')) METADATA_SPECS.append( MetadataSpec('request_gpus', int, 'Number of GPUs allowed for this run.')) METADATA_SPECS.append( MetadataSpec('request_queue', basestring, 'Submit run to this job queue.')) METADATA_SPECS.append( MetadataSpec('request_priority', int, 'Job priority (higher is more important).')) METADATA_SPECS.append( MetadataSpec('request_network', bool, 'Whether to allow network access.')) METADATA_SPECS.append( MetadataSpec('actions', list, 'Actions (e.g., kill) that were performed on this run.', generated=True)) METADATA_SPECS.append( MetadataSpec('time', float, 'Amount of time (seconds) used by this run (total).', generated=True, formatting='duration')) METADATA_SPECS.append( MetadataSpec('time_user', float, 'Amount of time (seconds) by user.', generated=True, formatting='duration')) METADATA_SPECS.append( MetadataSpec('time_system', float, 'Amount of time (seconds) by the system.', generated=True, formatting='duration')) METADATA_SPECS.append( MetadataSpec('memory', float, 'Amount of memory (bytes) used by this run.', generated=True, formatting='size')) METADATA_SPECS.append( MetadataSpec( 'memory_max', float, 'Maximum amount of memory (bytes) used by this run at any time during execution.', generated=True, formatting='size')) METADATA_SPECS.append( MetadataSpec('started', int, 'Time when this bundle started executing.', generated=True, formatting='date')) METADATA_SPECS.append( MetadataSpec( 'last_updated', int, 'Time when information about this bundle was last updated.', generated=True, formatting='date')) METADATA_SPECS.append( MetadataSpec('run_status', basestring, 'Execution status of the bundle.', generated=True)) # Information about running METADATA_SPECS.append( MetadataSpec('docker_image', basestring, 'Which docker image was used to run the process.', generated=True)) METADATA_SPECS.append( MetadataSpec('exitcode', int, 'Exitcode of the process.', generated=True)) METADATA_SPECS.append( MetadataSpec('job_handle', basestring, 'Identifies the job handle (internal).', generated=True)) METADATA_SPECS.append( MetadataSpec('remote', basestring, 'Where this job is/was run (internal).', generated=True)) @classmethod def construct(cls, targets, command, metadata, owner_id, uuid=None, data_hash=None, state=State.CREATED): if not isinstance(command, basestring): raise UsageError('%r is not a valid command!' % (command, )) return super(RunBundle, cls).construct(targets, command, metadata, owner_id, uuid, data_hash, state)
class RunBundle(NamedBundle): BUNDLE_TYPE = 'run' METADATA_SPECS = list(NamedBundle.METADATA_SPECS) # Note that these are strings, which need to be parsed # Request a machine with this much resources and don't let run exceed these resources METADATA_SPECS.append(MetadataSpec('request_docker_image', basestring, 'which docker container we wish to use')) METADATA_SPECS.append(MetadataSpec('request_time', basestring, 'amount of time (e.g. 3, 3m, 3h, 3d) allowed for this run')) METADATA_SPECS.append(MetadataSpec('request_memory', basestring, 'amount of memory (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run')) METADATA_SPECS.append(MetadataSpec('request_disk', basestring, 'amount of disk space (e.g. 3, 3k, 3m, 3g, 3t) allowed for this run')) METADATA_SPECS.append(MetadataSpec('request_cpus', int, 'number of CPUs allowed for this run')) METADATA_SPECS.append(MetadataSpec('request_gpus', int, 'number of GPUs allowed for this run')) METADATA_SPECS.append(MetadataSpec('request_queue', basestring, 'submit job to this queue')) METADATA_SPECS.append(MetadataSpec('request_priority', int, 'job priority (higher is more important)')) METADATA_SPECS.append(MetadataSpec('actions', list, 'actions performed on this run', generated=True)) METADATA_SPECS.append(MetadataSpec('time', float, 'amount of time (seconds) used by this run (total)', generated=True, formatting='duration')) METADATA_SPECS.append(MetadataSpec('time_user', float, 'amount of time (seconds) by user', generated=True, formatting='duration')) METADATA_SPECS.append(MetadataSpec('time_system', float, 'amount of time (seconds) by the system', generated=True, formatting='duration')) METADATA_SPECS.append(MetadataSpec('memory', float, 'amount of memory (bytes) used by this run', generated=True, formatting='size')) METADATA_SPECS.append(MetadataSpec('disk_read', float, 'number of bytes read', generated=True, formatting='size')) METADATA_SPECS.append(MetadataSpec('disk_write', float, 'number of bytes written', generated=True, formatting='size')) # Information about running METADATA_SPECS.append(MetadataSpec('docker_image', basestring, 'which docker container was used to run the process', generated=True)) METADATA_SPECS.append(MetadataSpec('exitcode', int, 'exitcode of the process', generated=True)) METADATA_SPECS.append(MetadataSpec('job_handle', basestring, 'identifies the job handle (internal)', generated=True)) METADATA_SPECS.append(MetadataSpec('remote', basestring, 'where this job was run', generated=True)) METADATA_SPECS.append(MetadataSpec('temp_dir', basestring, 'temporary directory where job is running (internal)', generated=True)) @classmethod def construct(cls, targets, command, metadata, owner_id, uuid=None, data_hash=None, state=State.CREATED): if not uuid: uuid = spec_util.generate_uuid() # Check that targets does not include both keyed and anonymous targets. if len(targets) > 1 and any(key == '' for key, value in targets): raise UsageError('Must specify keys when packaging multiple targets!') if not isinstance(command, basestring): raise UsageError('%r is not a valid command!' % (command,)) # List the dependencies of this bundle on its targets. dependencies = [] for (child_path, (parent_uuid, parent_path)) in targets: dependencies.append({ 'child_uuid': uuid, 'child_path': child_path, 'parent_uuid': parent_uuid, 'parent_path': parent_path, }) return super(RunBundle, cls).construct({ 'uuid': uuid, 'bundle_type': cls.BUNDLE_TYPE, 'command': command, 'data_hash': data_hash, 'state': state, 'metadata': metadata, 'dependencies': dependencies, 'owner_id': owner_id, })