Example #1
0
class UploadedBundle(NamedBundle):
    METADATA_SPECS = list(NamedBundle.METADATA_SPECS)
    METADATA_SPECS.append(
        MetadataSpec('license', basestring,
                     'which license this program/data is released under'))
    METADATA_SPECS.append(
        MetadataSpec('source_url', basestring, 'where this data came from'))

    @classmethod
    def construct(cls, data_hash, metadata, owner_id, uuid=None):
        row = {
            'bundle_type': cls.BUNDLE_TYPE,
            'command': None,
            'data_hash': data_hash,
            'state': State.READY,
            'metadata': metadata,
            'dependencies': [],
            'owner_id': owner_id
        }
        if uuid:
            row['uuid'] = uuid
        return super(UploadedBundle, cls).construct(row)

    def run(self, bundle_store, parent_dict):
        assert (
            False), '%ss should never be run!' % (self.__class__.__name__, )
Example #2
0
class UploadedBundle(NamedBundle):
    METADATA_SPECS = list(NamedBundle.METADATA_SPECS)
    # Don't format specs
    # fmt: off
    METADATA_SPECS.append(
        MetadataSpec(
            'license', str,
            'The license under which this program/dataset is released.'))
    METADATA_SPECS.append(
        MetadataSpec(
            'source_url', str,
            'URL corresponding to the original source of this bundle.'))
    # fmt: on

    @classmethod
    def construct(cls, metadata, owner_id, uuid=None):
        row = {
            'bundle_type': cls.BUNDLE_TYPE,
            'command': None,
            'data_hash': None,
            'state': State.READY,
            'metadata': metadata,
            'dependencies': [],
            'owner_id': owner_id,
        }
        if uuid:
            row['uuid'] = uuid
        return super(UploadedBundle, cls).construct(row)

    def run(self, bundle_store, parent_dict):
        assert False, '%ss should never be run!' % (self.__class__.__name__, )
class ProgramBundle(UploadedBundle):
    BUNDLE_TYPE = 'program'
    METADATA_SPECS = list(UploadedBundle.METADATA_SPECS)
    METADATA_SPECS.append(
        MetadataSpec('architectures', list, 'viable architectures'))
    METADATA_SPECS.append(
        MetadataSpec('language', list, 'which programming language was used'))
class MockBundle(Bundle):
    BUNDLE_TYPE = 'mock'
    METADATA_SPECS = (
        MetadataSpec('str_metadata', basestring, 'test str metadata'),
        MetadataSpec('int_metadata', int, 'test int metadata'),
        MetadataSpec('list_metadata', list, 'test list metadata'),
    )

    @classmethod
    def construct(cls, **kwargs):
        final_kwargs = dict(kwargs, bundle_type=MockBundle.BUNDLE_TYPE)
        return cls(final_kwargs)
class UploadedBundle(NamedBundle):
    METADATA_SPECS = list(NamedBundle.METADATA_SPECS)  # type: List
    # Don't format specs
    # fmt: off
    METADATA_SPECS.append(
        MetadataSpec(
            'license', str,
            'The license under which this program/dataset is released.'))
    METADATA_SPECS.append(
        MetadataSpec(
            'source_url', str,
            'URL corresponding to the original source of this bundle.'))

    METADATA_SPECS.append(
        MetadataSpec('link_url', str, 'Link URL of bundle.', optional=True))
    METADATA_SPECS.append(
        MetadataSpec('link_format',
                     str, 'Link format of bundle. Can be equal to'
                     '"raw" or "zip" (only "raw" is supported as of now).',
                     optional=True))

    METADATA_SPECS.append(
        MetadataSpec(
            'store',
            str,
            'The name of the bundle store where the bundle should be uploaded to. '
            'If unspecified, an optimal available bundle store will be chosen.',
            default=None,
            hidden=True,
            optional=True))

    # fmt: on

    @classmethod
    def construct(cls, metadata, owner_id, uuid=None):
        row = {
            'bundle_type': cls.BUNDLE_TYPE,
            'command': None,
            'data_hash': None,
            'state': State.READY,
            'metadata': metadata,
            'dependencies': [],
            'owner_id': owner_id,
        }
        if uuid:
            row['uuid'] = uuid
        return super(UploadedBundle, cls).construct(row)

    def run(self, bundle_store, parent_dict):
        assert False, '%ss should never be run!' % (self.__class__.__name__, )
Example #6
0
class NamedBundle(Bundle):
    NAME_LENGTH = 32

    METADATA_SPECS = (
      MetadataSpec('name', basestring, 'Short variable name (not necessarily unique); must conform to %s.' % spec_util.NAME_REGEX.pattern, short_key='n'),
      MetadataSpec('description', basestring, 'Full description of the bundle.', short_key='d'),
      MetadataSpec('tags', list, 'Space-separated list of tags used for search (e.g., machine-learning).', metavar='TAG'),
      MetadataSpec('created', int, 'Time when this bundle was created.', generated=True, formatting='date'),
      MetadataSpec('data_size', int, 'Size of this bundle (in bytes).', generated=True, formatting='size'),
      MetadataSpec('failure_message', basestring, 'Error message if this run bundle failed.', generated=True),
    )

    @classmethod
    def construct(cls, row):
        # The base NamedBundle construct method takes a bundle row and adds in
        # automatically generated metadata values.
        row['metadata'] = dict(row['metadata'], created=int(time.time()))
        return cls(row)

    def validate(self):
        super(NamedBundle, self).validate()
        bundle_type = self.bundle_type.title()
        if not self.metadata.name:
            raise UsageError('%ss must have non-empty names' % (bundle_type,))
        spec_util.check_name(self.metadata.name)

    def __repr__(self):
        return '%s(uuid=%r, name=%r)' % (
          self.__class__.__name__,
          str(self.uuid),
          str(self.metadata.name),
        )

    def simple_str(self):
        return self.metadata.name + '(' + self.uuid + ')'
class DerivedBundle(NamedBundle):
    METADATA_SPECS = list(NamedBundle.METADATA_SPECS)  # type: List[ClassVar]
    # Don't format specs
    # fmt: off
    METADATA_SPECS.append(
        MetadataSpec(
            'allow_failed_dependencies',
            bool,
            'Whether to allow this bundle to have failed or killed dependencies.',
            default=False,
        ))
    # fmt: on

    @classmethod
    def construct(cls, targets, command, metadata, owner_id, uuid, data_hash,
                  state):
        if not uuid:
            uuid = spec_util.generate_uuid()
        # Check that targets does not include both keyed and anonymous targets.
        if len(targets) > 1 and any(key == '' for key, value in targets):
            raise UsageError(
                'Must specify keys when packaging multiple targets!')

        # List the dependencies of this bundle on its targets.
        dependencies = []
        for (child_path, (parent_uuid, parent_path)) in targets:
            dependencies.append({
                'child_uuid': uuid,
                'child_path': child_path,
                'parent_uuid': parent_uuid,
                'parent_path': parent_path,
            })
        return super(DerivedBundle, cls).construct({
            'uuid': uuid,
            'bundle_type': cls.BUNDLE_TYPE,
            'command': command,
            'data_hash': data_hash,
            'state': state,
            'metadata': metadata,
            'dependencies': dependencies,
            'owner_id': owner_id,
        })
Example #8
0
class RunBundle(DerivedBundle):
    BUNDLE_TYPE = 'run'
    METADATA_SPECS = list(DerivedBundle.METADATA_SPECS)  # type: List
    # Note that these are strings, which need to be parsed
    # Request a machine with this much resources and don't let run exceed these resources
    # Don't format metadata specs
    # fmt: off
    METADATA_SPECS.append(
        MetadataSpec('request_docker_image',
                     str, 'Which docker image (either tag or digest, e.g., '
                     'codalab/default-cpu:latest) we wish to use.',
                     completer=DockerImagesCompleter,
                     hide_when_anonymous=True,
                     default=None))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_time',
            str,
            'Amount of time (e.g., 3, 3m, 3h, 3d) allowed for this run. Defaults to user time quota left.',
            formatting='duration',
            default=None))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_memory',
            str,
            'Amount of memory (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run.',
            formatting='size',
            default='2g'))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_disk',
            str,
            'Amount of disk space (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run. Defaults to user disk quota left.',
            formatting='size',
            default=None))
    METADATA_SPECS.append(
        MetadataSpec('request_cpus',
                     int,
                     'Number of CPUs allowed for this run.',
                     default=1))
    METADATA_SPECS.append(
        MetadataSpec('request_gpus',
                     int,
                     'Number of GPUs allowed for this run.',
                     default=0))
    METADATA_SPECS.append(
        MetadataSpec('request_queue',
                     str,
                     'Submit run to this job queue.',
                     hide_when_anonymous=True,
                     default=None))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_priority',
            int, 'Job priority (higher is more important). Negative '
            'priority bundles are queued behind bundles with no specified priority.',
            default=None))
    METADATA_SPECS.append(
        MetadataSpec('request_network',
                     bool,
                     'Whether to allow network access.',
                     default=False))
    METADATA_SPECS.append(
        MetadataSpec(
            'exclude_patterns',
            list,
            'Exclude these file patterns from being saved into the bundle contents.',
            default=[]))

    METADATA_SPECS.append(
        MetadataSpec('actions',
                     list,
                     'Actions (e.g., kill) that were performed on this run.',
                     generated=True))

    METADATA_SPECS.append(
        MetadataSpec(
            'time',
            float,
            'Amount of wall clock time (seconds) used by this run in total. '
            '[Runtime of the Docker container excluding CodaLab related '
            'steps such as preparing/uploading results]',
            generated=True,
            formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec('time_user',
                     float,
                     'Amount of user time (seconds) used by this run.',
                     generated=True,
                     formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec('time_system',
                     float,
                     'Amount of system time (seconds) used by this run.',
                     generated=True,
                     formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec('memory',
                     float,
                     'Amount of memory (bytes) used by this run.',
                     generated=True,
                     formatting='size'))
    METADATA_SPECS.append(
        MetadataSpec(
            'memory_max',
            float,
            'Maximum amount of memory (bytes) used by this run at any time during execution.',
            generated=True,
            formatting='size'))

    METADATA_SPECS.append(
        MetadataSpec('started',
                     int,
                     'Time when this bundle started executing.',
                     generated=True,
                     formatting='date'))
    METADATA_SPECS.append(
        MetadataSpec(
            'last_updated',
            int,
            'Time when information about this bundle was last updated.',
            generated=True,
            formatting='date'))
    METADATA_SPECS.append(
        MetadataSpec('run_status',
                     str,
                     'Execution status of the bundle.',
                     generated=True))
    METADATA_SPECS.append(
        MetadataSpec('staged_status',
                     str,
                     'Information about the status of the staged bundle.',
                     generated=True))

    # Information about running
    METADATA_SPECS.append(
        MetadataSpec('docker_image',
                     str,
                     'Which docker image was used to run the process.',
                     generated=True,
                     hide_when_anonymous=True))
    METADATA_SPECS.append(
        MetadataSpec('exitcode',
                     int,
                     'Exitcode of the process.',
                     generated=True))
    METADATA_SPECS.append(
        MetadataSpec('job_handle',
                     str,
                     'Identifies the job handle (internal).',
                     generated=True,
                     hide_when_anonymous=True))
    METADATA_SPECS.append(
        MetadataSpec('remote',
                     str,
                     'Where this job is/was run (internal).',
                     generated=True,
                     hide_when_anonymous=True))
    # fmt: on

    @classmethod
    def construct(cls,
                  targets,
                  command,
                  metadata,
                  owner_id,
                  uuid=None,
                  data_hash=None,
                  state=State.CREATED):
        if not isinstance(command, str):
            raise UsageError('%r is not a valid command!' % (command, ))
        return super(RunBundle,
                     cls).construct(targets, command, metadata, owner_id, uuid,
                                    data_hash, state)

    def validate(self):
        super(RunBundle, self).validate()
        for dep in self.dependencies:
            dep.validate(require_child_path=True)
Example #9
0
class RunBundle(DerivedBundle):
    BUNDLE_TYPE = 'run'
    METADATA_SPECS = list(DerivedBundle.METADATA_SPECS)
    # Note that these are strings, which need to be parsed
    # Request a machine with this much resources and don't let run exceed these resources
    METADATA_SPECS.append(
        MetadataSpec(
            'request_docker_image',
            basestring,
            'Which docker image (e.g., codalab/ubuntu:1.9) we wish to use.',
            completer=DockerImagesCompleter))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_time',
            basestring,
            'Amount of time (e.g., 3, 3m, 3h, 3d) allowed for this run.',
            formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_memory',
            basestring,
            'Amount of memory (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run.',
            formatting='size'))
    METADATA_SPECS.append(
        MetadataSpec(
            'request_disk',
            basestring,
            'Amount of disk space (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run.',
            formatting='size'))
    METADATA_SPECS.append(
        MetadataSpec('request_cpus', int,
                     'Number of CPUs allowed for this run.'))
    METADATA_SPECS.append(
        MetadataSpec('request_gpus', int,
                     'Number of GPUs allowed for this run.'))
    METADATA_SPECS.append(
        MetadataSpec('request_queue', basestring,
                     'Submit run to this job queue.'))
    METADATA_SPECS.append(
        MetadataSpec('request_priority', int,
                     'Job priority (higher is more important).'))
    METADATA_SPECS.append(
        MetadataSpec('request_network', bool,
                     'Whether to allow network access.'))

    METADATA_SPECS.append(
        MetadataSpec('actions',
                     list,
                     'Actions (e.g., kill) that were performed on this run.',
                     generated=True))

    METADATA_SPECS.append(
        MetadataSpec('time',
                     float,
                     'Amount of time (seconds) used by this run (total).',
                     generated=True,
                     formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec('time_user',
                     float,
                     'Amount of time (seconds) by user.',
                     generated=True,
                     formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec('time_system',
                     float,
                     'Amount of time (seconds) by the system.',
                     generated=True,
                     formatting='duration'))
    METADATA_SPECS.append(
        MetadataSpec('memory',
                     float,
                     'Amount of memory (bytes) used by this run.',
                     generated=True,
                     formatting='size'))
    METADATA_SPECS.append(
        MetadataSpec(
            'memory_max',
            float,
            'Maximum amount of memory (bytes) used by this run at any time during execution.',
            generated=True,
            formatting='size'))

    METADATA_SPECS.append(
        MetadataSpec('started',
                     int,
                     'Time when this bundle started executing.',
                     generated=True,
                     formatting='date'))
    METADATA_SPECS.append(
        MetadataSpec(
            'last_updated',
            int,
            'Time when information about this bundle was last updated.',
            generated=True,
            formatting='date'))
    METADATA_SPECS.append(
        MetadataSpec('run_status',
                     basestring,
                     'Execution status of the bundle.',
                     generated=True))

    # Information about running
    METADATA_SPECS.append(
        MetadataSpec('docker_image',
                     basestring,
                     'Which docker image was used to run the process.',
                     generated=True))
    METADATA_SPECS.append(
        MetadataSpec('exitcode',
                     int,
                     'Exitcode of the process.',
                     generated=True))
    METADATA_SPECS.append(
        MetadataSpec('job_handle',
                     basestring,
                     'Identifies the job handle (internal).',
                     generated=True))
    METADATA_SPECS.append(
        MetadataSpec('remote',
                     basestring,
                     'Where this job is/was run (internal).',
                     generated=True))

    @classmethod
    def construct(cls,
                  targets,
                  command,
                  metadata,
                  owner_id,
                  uuid=None,
                  data_hash=None,
                  state=State.CREATED):
        if not isinstance(command, basestring):
            raise UsageError('%r is not a valid command!' % (command, ))
        return super(RunBundle,
                     cls).construct(targets, command, metadata, owner_id, uuid,
                                    data_hash, state)
Example #10
0
class RunBundle(NamedBundle):
    BUNDLE_TYPE = 'run'
    METADATA_SPECS = list(NamedBundle.METADATA_SPECS)
    # Note that these are strings, which need to be parsed
    # Request a machine with this much resources and don't let run exceed these resources
    METADATA_SPECS.append(MetadataSpec('request_docker_image', basestring, 'which docker container we wish to use'))
    METADATA_SPECS.append(MetadataSpec('request_time', basestring, 'amount of time (e.g. 3, 3m, 3h, 3d) allowed for this run'))
    METADATA_SPECS.append(MetadataSpec('request_memory', basestring, 'amount of memory (e.g., 3, 3k, 3m, 3g, 3t) allowed for this run'))
    METADATA_SPECS.append(MetadataSpec('request_disk', basestring, 'amount of disk space (e.g. 3, 3k, 3m, 3g, 3t) allowed for this run'))
    METADATA_SPECS.append(MetadataSpec('request_cpus', int, 'number of CPUs allowed for this run'))
    METADATA_SPECS.append(MetadataSpec('request_gpus', int, 'number of GPUs allowed for this run'))
    METADATA_SPECS.append(MetadataSpec('request_queue', basestring, 'submit job to this queue'))
    METADATA_SPECS.append(MetadataSpec('request_priority', int, 'job priority (higher is more important)'))

    METADATA_SPECS.append(MetadataSpec('actions', list, 'actions performed on this run', generated=True))

    METADATA_SPECS.append(MetadataSpec('time', float, 'amount of time (seconds) used by this run (total)', generated=True, formatting='duration'))
    METADATA_SPECS.append(MetadataSpec('time_user', float, 'amount of time (seconds) by user', generated=True, formatting='duration'))
    METADATA_SPECS.append(MetadataSpec('time_system', float, 'amount of time (seconds) by the system', generated=True, formatting='duration'))
    METADATA_SPECS.append(MetadataSpec('memory', float, 'amount of memory (bytes) used by this run', generated=True, formatting='size'))
    METADATA_SPECS.append(MetadataSpec('disk_read', float, 'number of bytes read', generated=True, formatting='size'))
    METADATA_SPECS.append(MetadataSpec('disk_write', float, 'number of bytes written', generated=True, formatting='size'))

    # Information about running
    METADATA_SPECS.append(MetadataSpec('docker_image', basestring, 'which docker container was used to run the process', generated=True))
    METADATA_SPECS.append(MetadataSpec('exitcode', int, 'exitcode of the process', generated=True))
    METADATA_SPECS.append(MetadataSpec('job_handle', basestring, 'identifies the job handle (internal)', generated=True))
    METADATA_SPECS.append(MetadataSpec('remote', basestring, 'where this job was run', generated=True))
    METADATA_SPECS.append(MetadataSpec('temp_dir', basestring, 'temporary directory where job is running (internal)', generated=True))

    @classmethod
    def construct(cls, targets, command, metadata, owner_id, uuid=None, data_hash=None, state=State.CREATED):
        if not uuid: uuid = spec_util.generate_uuid()
        # Check that targets does not include both keyed and anonymous targets.
        if len(targets) > 1 and any(key == '' for key, value in targets):
            raise UsageError('Must specify keys when packaging multiple targets!')
        if not isinstance(command, basestring):
            raise UsageError('%r is not a valid command!' % (command,))

        # List the dependencies of this bundle on its targets.
        dependencies = []
        for (child_path, (parent_uuid, parent_path)) in targets:
            dependencies.append({
              'child_uuid': uuid,
              'child_path': child_path,
              'parent_uuid': parent_uuid,
              'parent_path': parent_path,
            })
        return super(RunBundle, cls).construct({
          'uuid': uuid,
          'bundle_type': cls.BUNDLE_TYPE,
          'command': command,
          'data_hash': data_hash,
          'state': state,
          'metadata': metadata,
          'dependencies': dependencies,
          'owner_id': owner_id,
        })