Beispiel #1
0
            print >> sys.stderr, """
Permissions on password file are too lax.
Only the user should be allowed to access the file.
On Linux, run:
chmod 600 %s""" % args.password_file
            exit(1)
        with open(args.password_file) as f:
            username = f.readline().strip()
            password = f.readline().strip()
    else:
        username = raw_input('Username: '******'%(asctime)s %(message)s',
                            level=logging.DEBUG)

    max_work_dir_size_bytes = parse_size(args.max_work_dir_size)
    worker = Worker(args.id, args.tag, args.work_dir, max_work_dir_size_bytes,
                    args.shared_file_system, args.slots,
                    BundleServiceClient(args.server, username, password),
                    DockerClient())

    # Register a signal handler to ensure safe shutdown.
    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
        signal.signal(sig, lambda signup, frame: worker.signal())

    print 'Worker started.'
    worker.run()
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser(description='CodaLab worker.')
    parser.add_argument('--tag', help='Tag that allows for scheduling runs on specific ' 'workers.')
    parser.add_argument(
        '--server',
        default='https://worksheets.codalab.org',
        help='URL of the CodaLab server, in the format '
        '<http|https>://<hostname>[:<port>] (e.g., https://worksheets.codalab.org)',
    )
    parser.add_argument(
        '--work-dir',
        default='codalab-worker-scratch',
        help='Directory where to store temporary bundle data, '
        'including dependencies and the data from run '
        'bundles.',
    )
    parser.add_argument(
        '--network-prefix', default='codalab_worker_network', help='Docker network name prefix'
    )
    parser.add_argument(
        '--cpuset',
        type=str,
        metavar='CPUSET_STR',
        default='ALL',
        help='Comma-separated list of CPUs in which to allow bundle execution, '
        '(e.g., \"0,2,3\", \"1\").',
    )
    parser.add_argument(
        '--gpuset',
        type=str,
        metavar='GPUSET_STR',
        default='ALL',
        help='Comma-separated list of GPUs in which to allow bundle execution. '
        'Each GPU can be specified by its index or UUID'
        '(e.g., \"0,1\", \"1\", \"GPU-62casdfasd-asfas...\"',
    )
    parser.add_argument(
        '--max-work-dir-size',
        type=str,
        metavar='SIZE',
        default='10g',
        help='Maximum size of the temporary bundle data ' '(e.g., 3, 3k, 3m, 3g, 3t).',
    )
    parser.add_argument(
        '--max-image-cache-size',
        type=str,
        metavar='SIZE',
        help='Limit the disk space used to cache Docker images '
        'for worker jobs to the specified amount (e.g. '
        '3, 3k, 3m, 3g, 3t). If the limit is exceeded, '
        'the least recently used images are removed first. '
        'Worker will not remove any images if this option '
        'is not specified.',
    )
    parser.add_argument(
        '--password-file',
        help='Path to the file containing the username and '
        'password for logging into the bundle service, '
        'each on a separate line. If not specified, the '
        'password is read from standard input.',
    )
    parser.add_argument(
        '--verbose', action='store_true', help='Whether to output verbose log messages.'
    )
    parser.add_argument(
        '--exit-when-idle',
        action='store_true',
        help='If specified the worker quits if it finds itself with no jobs after a checkin',
    )
    parser.add_argument(
        '--id',
        default='%s(%d)' % (socket.gethostname(), os.getpid()),
        help='Internal use: ID to use for the worker.',
    )
    parser.add_argument(
        '--shared-file-system',
        action='store_true',
        help='Internal use: Whether the file system containing '
        'bundle data is shared between the bundle service '
        'and the worker.',
    )
    args = parser.parse_args()

    # Get the username and password.
    logger.info('Connecting to %s' % args.server)
    if args.password_file:
        if os.stat(args.password_file).st_mode & (stat.S_IRWXG | stat.S_IRWXO):
            print >>sys.stderr, """
Permissions on password file are too lax.
Only the user should be allowed to access the file.
On Linux, run:
chmod 600 %s""" % args.password_file
            exit(1)
        with open(args.password_file) as f:
            username = f.readline().strip()
            password = f.readline().strip()
    else:
        username = os.environ.get('CODALAB_USERNAME')
        if username is None:
            username = raw_input('Username: '******'CODALAB_PASSWORD')
        if password is None:
            password = getpass.getpass()

    # Set up logging.
    if args.verbose:
        logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

    try:
        bundle_service = BundleServiceClient(args.server, username, password)
    except BundleAuthException as ex:
        logger.error('Cannot log into the bundle service. Please check your worker credentials.\n')
        logger.debug('Auth error: {}'.format(ex))
        return

    max_work_dir_size_bytes = parse_size(args.max_work_dir_size)
    if args.max_image_cache_size is None:
        max_images_bytes = None
    else:
        max_images_bytes = parse_size(args.max_image_cache_size)

    if not os.path.exists(args.work_dir):
        logging.debug('Work dir %s doesn\'t exist, creating.', args.work_dir)
        os.makedirs(args.work_dir, 0o770)

    def create_local_run_manager(worker):
        """
        To avoid circular dependencies the Worker initializes takes a RunManager factory
        to initilize its run manager. This method creates a LocalFilesystem-Docker RunManager
        which is the default execution architecture Codalab uses
        """
        docker_runtime = docker_utils.get_available_runtime()
        cpuset = parse_cpuset_args(args.cpuset)
        gpuset = parse_gpuset_args(args.gpuset)

        dependency_manager = LocalFileSystemDependencyManager(
            os.path.join(args.work_dir, 'dependencies-state.json'),
            bundle_service,
            args.work_dir,
            max_work_dir_size_bytes,
        )

        image_manager = DockerImageManager(
            os.path.join(args.work_dir, 'images-state.json'), max_images_bytes
        )

        return LocalRunManager(
            worker,
            image_manager,
            dependency_manager,
            os.path.join(args.work_dir, 'run-state.json'),
            cpuset,
            gpuset,
            args.work_dir,
            docker_runtime=docker_runtime,
            docker_network_prefix=args.network_prefix,
        )

    worker = Worker(
        create_local_run_manager,
        os.path.join(args.work_dir, 'worker-state.json'),
        args.id,
        args.tag,
        args.work_dir,
        args.exit_when_idle,
        bundle_service,
    )

    # Register a signal handler to ensure safe shutdown.
    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
        signal.signal(sig, lambda signup, frame: worker.signal())

    # BEGIN: DO NOT CHANGE THIS LINE UNLESS YOU KNOW WHAT YOU ARE DOING
    # THIS IS HERE TO KEEP TEST-CLI FROM HANGING
    print('Worker started.')
    # END

    worker.start()
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser(description='CodaLab worker.')
    parser.add_argument('--tag',
                        help='Tag that allows for scheduling runs on specific '
                        'workers.')
    parser.add_argument(
        '--server',
        default='https://worksheets.codalab.org',
        help='URL of the CodaLab server, in the format '
        '<http|https>://<hostname>[:<port>] (e.g., https://worksheets.codalab.org)'
    )
    parser.add_argument('--work-dir',
                        default='codalab-worker-scratch',
                        help='Directory where to store temporary bundle data, '
                        'including dependencies and the data from run '
                        'bundles.')
    parser.add_argument('--max-work-dir-size',
                        type=str,
                        metavar='SIZE',
                        default='10g',
                        help='Maximum size of the temporary bundle data '
                        '(e.g., 3, 3k, 3m, 3g, 3t).')
    parser.add_argument(
        '--max-image-cache-size',
        type=str,
        metavar='SIZE',
        help='Limit the disk space used to cache Docker images '
        'for worker jobs to the specified amount (e.g. '
        '3, 3k, 3m, 3g, 3t). If the limit is exceeded, '
        'the least recently used images are removed first. '
        'Worker will not remove any images if this option '
        'is not specified.')
    parser.add_argument('--slots',
                        type=int,
                        default=1,
                        help='Number of slots to use for running bundles. '
                        'A single bundle takes up a single slot.')
    parser.add_argument('--password-file',
                        help='Path to the file containing the username and '
                        'password for logging into the bundle service, '
                        'each on a separate line. If not specified, the '
                        'password is read from standard input.')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='Whether to output verbose log messages.')
    parser.add_argument('--id',
                        default='%s(%d)' % (socket.gethostname(), os.getpid()),
                        help='Internal use: ID to use for the worker.')
    parser.add_argument(
        '--shared-file-system',
        action='store_true',
        help='Internal use: Whether the file system containing '
        'bundle data is shared between the bundle service '
        'and the worker.')
    args = parser.parse_args()

    # Get the username and password.
    logger.info('Connecting to %s' % args.server)
    if args.password_file:
        if os.stat(args.password_file).st_mode & (stat.S_IRWXG | stat.S_IRWXO):
            print >> sys.stderr, """
Permissions on password file are too lax.
Only the user should be allowed to access the file.
On Linux, run:
chmod 600 %s""" % args.password_file
            exit(1)
        with open(args.password_file) as f:
            username = f.readline().strip()
            password = f.readline().strip()
    else:
        username = os.environ.get('CODALAB_USERNAME')
        if username is None:
            username = raw_input('Username: '******'CODALAB_PASSWORD')
        if password is None:
            password = getpass.getpass()

    # Set up logging.
    if args.verbose:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.INFO)

    max_work_dir_size_bytes = parse_size(args.max_work_dir_size)
    if args.max_image_cache_size is None:
        max_images_bytes = None
    else:
        max_images_bytes = parse_size(args.max_image_cache_size)
    worker = Worker(args.id, args.tag, args.work_dir, max_work_dir_size_bytes,
                    max_images_bytes, args.shared_file_system, args.slots,
                    BundleServiceClient(args.server, username, password),
                    DockerClient())

    # Register a signal handler to ensure safe shutdown.
    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
        signal.signal(sig, lambda signup, frame: worker.signal())

    logger.info('Worker started.')
    worker.run()
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(description='CodaLab worker.')
    parser.add_argument('--tag', help='Tag that allows for scheduling runs on specific ' 'workers.')
    parser.add_argument(
        '--server',
        default='https://worksheets.codalab.org',
        help='URL of the CodaLab server, in the format '
        '<http|https>://<hostname>[:<port>] (e.g., https://worksheets.codalab.org)',
    )
    parser.add_argument(
        '--work-dir',
        default='codalab-worker-scratch',
        help='Directory where to store temporary bundle data, '
        'including dependencies and the data from run '
        'bundles.',
    )
    parser.add_argument(
        '--network-prefix', default='codalab_worker_network', help='Docker network name prefix'
    )
    parser.add_argument(
        '--cpuset',
        type=str,
        metavar='CPUSET_STR',
        default='ALL',
        help='Comma-separated list of CPUs in which to allow bundle execution, '
        '(e.g., \"0,2,3\", \"1\").',
    )
    parser.add_argument(
        '--gpuset',
        type=str,
        metavar='GPUSET_STR',
        default='ALL',
        help='Comma-separated list of GPUs in which to allow bundle execution. '
        'Each GPU can be specified by its index or UUID'
        '(e.g., \"0,1\", \"1\", \"GPU-62casdfasd-asfas...\"',
    )
    parser.add_argument(
        '--max-work-dir-size',
        type=str,
        metavar='SIZE',
        default='10g',
        help='Maximum size of the temporary bundle data ' '(e.g., 3, 3k, 3m, 3g, 3t).',
    )
    parser.add_argument(
        '--max-image-cache-size',
        type=str,
        metavar='SIZE',
        help='Limit the disk space used to cache Docker images '
        'for worker jobs to the specified amount (e.g. '
        '3, 3k, 3m, 3g, 3t). If the limit is exceeded, '
        'the least recently used images are removed first. '
        'Worker will not remove any images if this option '
        'is not specified.',
    )
    parser.add_argument(
        '--password-file',
        help='Path to the file containing the username and '
        'password for logging into the bundle service, '
        'each on a separate line. If not specified, the '
        'password is read from standard input.',
    )
    parser.add_argument(
        '--verbose', action='store_true', help='Whether to output verbose log messages.'
    )
    parser.add_argument(
        '--exit-when-idle',
        action='store_true',
        help='If specified the worker quits if it finds itself with no jobs after a checkin',
    )
    parser.add_argument(
        '--id',
        default='%s(%d)' % (socket.gethostname(), os.getpid()),
        help='Internal use: ID to use for the worker.',
    )
    parser.add_argument(
        '--shared-file-system',
        action='store_true',
        help='Internal use: Whether the file system containing '
        'bundle data is shared between the bundle service '
        'and the worker.',
    )
    args = parser.parse_args()

    # Get the username and password.
    logger.info('Connecting to %s' % args.server)
    if args.password_file:
        if os.stat(args.password_file).st_mode & (stat.S_IRWXG | stat.S_IRWXO):
            print >>sys.stderr, """
Permissions on password file are too lax.
Only the user should be allowed to access the file.
On Linux, run:
chmod 600 %s""" % args.password_file
            exit(1)
        with open(args.password_file) as f:
            username = f.readline().strip()
            password = f.readline().strip()
    else:
        username = os.environ.get('CODALAB_USERNAME')
        if username is None:
            username = raw_input('Username: '******'CODALAB_PASSWORD')
        if password is None:
            password = getpass.getpass()

    # Set up logging.
    if args.verbose:
        logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)

    try:
        bundle_service = BundleServiceClient(args.server, username, password)
    except BundleAuthException as ex:
        logger.error('Cannot log into the bundle service. Please check your worker credentials.\n')
        logger.debug('Auth error: {}'.format(ex))
        return

    max_work_dir_size_bytes = parse_size(args.max_work_dir_size)
    if args.max_image_cache_size is None:
        max_images_bytes = None
    else:
        max_images_bytes = parse_size(args.max_image_cache_size)

    if not os.path.exists(args.work_dir):
        logging.debug('Work dir %s doesn\'t exist, creating.', args.work_dir)
        os.makedirs(args.work_dir, 0o770)

    def create_local_run_manager(worker):
        """
        To avoid circular dependencies the Worker initializes takes a RunManager factory
        to initilize its run manager. This method creates a LocalFilesystem-Docker RunManager
        which is the default execution architecture Codalab uses
        """
        docker_runtime = docker_utils.get_available_runtime()
        cpuset = parse_cpuset_args(args.cpuset)
        gpuset = parse_gpuset_args(args.gpuset)

        dependency_manager = LocalFileSystemDependencyManager(
            os.path.join(args.work_dir, 'dependencies-state.json'),
            bundle_service,
            args.work_dir,
            max_work_dir_size_bytes,
        )

        image_manager = DockerImageManager(
            os.path.join(args.work_dir, 'images-state.json'), max_images_bytes
        )

        return LocalRunManager(
            worker,
            image_manager,
            dependency_manager,
            os.path.join(args.work_dir, 'run-state.json'),
            cpuset,
            gpuset,
            args.work_dir,
            docker_runtime=docker_runtime,
            docker_network_prefix=args.network_prefix,
        )

    worker = Worker(
        create_local_run_manager,
        os.path.join(args.work_dir, 'worker-state.json'),
        args.id,
        args.tag,
        args.work_dir,
        args.exit_when_idle,
        bundle_service,
    )

    # Register a signal handler to ensure safe shutdown.
    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
        signal.signal(sig, lambda signup, frame: worker.signal())

    # BEGIN: DO NOT CHANGE THIS LINE UNLESS YOU KNOW WHAT YOU ARE DOING
    # THIS IS HERE TO KEEP TEST-CLI FROM HANGING
    logger.info('Worker started!')
    # END

    worker.start()
Beispiel #5
0
def start_bundle_container(
    bundle_path,
    uuid,
    dependencies,
    command,
    docker_image,
    network=None,
    cpuset=None,
    gpuset=None,
    memory_bytes=0,
    detach=True,
    tty=False,
    runtime=DEFAULT_RUNTIME,
):
    # Impose a minimum container request memory 4mb, same as docker's minimum allowed value
    # https://docs.docker.com/config/containers/resource_constraints/#limit-a-containers-access-to-memory
    # When using the REST api, it is allowed to set Memory to 0 but that means the container has unbounded
    # access to the host machine's memory, which we have decided to not allow
    if memory_bytes < parse_size('4m'):
        raise DockerException('Minimum memory must be 4m ({} bytes)'.format(parse_size('4m')))
    if not command.endswith(';'):
        command = '{};'.format(command)
    docker_command = ['bash', '-c', '( %s ) >stdout 2>stderr' % command]
    docker_bundle_path = '/' + uuid
    volumes = get_bundle_container_volume_binds(bundle_path, docker_bundle_path, dependencies)
    environment = {'HOME': docker_bundle_path}
    working_dir = docker_bundle_path
    # Unset entrypoint regardless of image
    entrypoint = ''
    cpuset_str = ','.join(cpuset) if cpuset else ''
    # Get user/group that owns the bundle directory
    # Then we can ensure that any created files are owned by the user/group
    # that owns the bundle directory, not root.
    bundle_stat = os.stat(bundle_path)
    uid = bundle_stat.st_uid
    gid = bundle_stat.st_gid
    # TODO: Fix potential permissions issues arising from this setting
    # This can cause problems if users expect to run as a specific user
    user = '******' % (uid, gid)

    if runtime == NVIDIA_RUNTIME:
        # nvidia-docker runtime uses this env variable to allocate GPUs
        environment['NVIDIA_VISIBLE_DEVICES'] = ','.join(gpuset) if gpuset else 'all'

    container = client.containers.run(
        image=docker_image,
        command=docker_command,
        network=network,
        mem_limit=memory_bytes,
        cpuset_cpus=cpuset_str,
        environment=environment,
        working_dir=working_dir,
        entrypoint=entrypoint,
        volumes=volumes,
        user=user,
        detach=detach,
        runtime=runtime,
        tty=tty,
        stdin_open=tty,
    )
    logger.debug('Started Docker container for UUID %s, container ID %s,', uuid, container.id)
    return container
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser(description='CodaLab worker.')
    parser.add_argument('--tag',
                        help='Tag that allows for scheduling runs on specific '
                             'workers.')
    parser.add_argument('--server', default='https://worksheets.codalab.org',
                        help='URL of the CodaLab server, in the format '
                             '<http|https>://<hostname>[:<port>] (e.g., https://worksheets.codalab.org)')
    parser.add_argument('--work-dir', default='codalab-worker-scratch',
                        help='Directory where to store temporary bundle data, '
                             'including dependencies and the data from run '
                             'bundles.')
    parser.add_argument('--network-prefix', default='codalab_worker_network',
                        help='Docker network name prefix')
    parser.add_argument('--cpuset', type=str, metavar='CPUSET_STR', default='ALL',
                        help='Comma-separated list of CPUs in which to allow bundle execution, '
                             '(e.g., \"0,2,3\", \"1\").')
    parser.add_argument('--gpuset', type=str, metavar='GPUSET_STR', default='ALL',
                        help='Comma-separated list of GPUs in which to allow bundle execution '
                             '(e.g., \"0,1\", \"1\").')
    parser.add_argument('--max-work-dir-size', type=str, metavar='SIZE', default='10g',
                        help='Maximum size of the temporary bundle data '
                             '(e.g., 3, 3k, 3m, 3g, 3t).')
    parser.add_argument('--max-dependencies-serialized-length', type=int, default=60000,
                        help='Maximum length of serialized json of dependency list of worker '
                             '(e.g., 50, 30000, 60000).')
    parser.add_argument('--max-image-cache-size', type=str, metavar='SIZE',
                        help='Limit the disk space used to cache Docker images '
                             'for worker jobs to the specified amount (e.g. '
                             '3, 3k, 3m, 3g, 3t). If the limit is exceeded, '
                             'the least recently used images are removed first. '
                             'Worker will not remove any images if this option '
                             'is not specified.')
    parser.add_argument('--password-file',
                        help='Path to the file containing the username and '
                             'password for logging into the bundle service, '
                             'each on a separate line. If not specified, the '
                             'password is read from standard input.')
    parser.add_argument('--verbose', action='store_true',
                        help='Whether to output verbose log messages.')
    parser.add_argument('--id', default='%s(%d)' % (socket.gethostname(), os.getpid()),
                        help='Internal use: ID to use for the worker.')
    parser.add_argument('--shared-file-system', action='store_true',
                        help='Internal use: Whether the file system containing '
                             'bundle data is shared between the bundle service '
                             'and the worker.')
    parser.add_argument('--batch-queue',
                        help='Name of the AWS Batch queue to use for run submission. '
                             'Providing this option will cause runs to be submitted to Batch rather than local docker. '
                             'The queue must already exist and you must have AWS credentials to submit to it.'
                        )
    args = parser.parse_args()

    # Get the username and password.
    logger.info('Connecting to %s' % args.server)
    if args.password_file:
        if os.stat(args.password_file).st_mode & (stat.S_IRWXG | stat.S_IRWXO):
            print >>sys.stderr, """
Permissions on password file are too lax.
Only the user should be allowed to access the file.
On Linux, run:
chmod 600 %s""" % args.password_file
            exit(1)
        with open(args.password_file) as f:
            username = f.readline().strip()
            password = f.readline().strip()
    else:
        username = os.environ.get('CODALAB_USERNAME')
        if username is None:
            username = raw_input('Username: '******'CODALAB_PASSWORD')
        if password is None:
            password = getpass.getpass()

    # Set up logging.
    if args.verbose:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.INFO)

    max_work_dir_size_bytes = parse_size(args.max_work_dir_size)
    max_dependencies_serialized_length = args.max_dependencies_serialized_length
    if args.max_image_cache_size is None:
        max_images_bytes = None
    else:
        max_images_bytes = parse_size(args.max_image_cache_size)

    bundle_service = BundleServiceClient(args.server, username, password)

    # TODO Break the dependency of RunManagers on Worker to make this initialization nicer
    def create_run_manager(w):
        if args.batch_queue is None:
            # We defer importing the run managers so their dependencies are lazily loaded
            from docker_run import DockerRunManager
            from docker_client import DockerClient
            from docker_image_manager import DockerImageManager

            logging.info("Using local docker client for run submission.")

            docker = DockerClient()
            image_manager = DockerImageManager(docker, args.work_dir, max_images_bytes)
            cpuset = parse_cpuset_args(args.cpuset)
            gpuset = parse_gpuset_args(docker, args.gpuset)
            return DockerRunManager(docker, bundle_service, image_manager, w, args.network_prefix, cpuset, gpuset)
        else:
            try:
                import boto3
            except ImportError:
                logging.exception("Missing dependencies, please install boto3 to enable AWS support.")
                import sys
                sys.exit(1)

            from aws_batch import AwsBatchRunManager

            logging.info("Using AWS Batch queue %s for run submission.", args.batch_queue)

            batch_client = boto3.client('batch')
            return AwsBatchRunManager(batch_client, args.batch_queue, bundle_service, w)

    worker = Worker(args.id, args.tag, args.work_dir, max_work_dir_size_bytes, max_dependencies_serialized_length,
                    args.shared_file_system, bundle_service, create_run_manager)

    # Register a signal handler to ensure safe shutdown.
    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
        signal.signal(sig, lambda signup, frame: worker.signal())

    # BEGIN: DO NOT CHANGE THIS LINE UNLESS YOU KNOW WHAT YOU ARE DOING
    # THIS IS HERE TO KEEP TEST-CLI FROM HANGING
    print('Worker started.')
    # END

    worker.run()
    def start_container(self,
                        bundle_path,
                        uuid,
                        command,
                        docker_image,
                        network_name,
                        dependencies,
                        cpuset,
                        gpuset,
                        memory_bytes=0):

        # Impose a minimum container request memory 4mb, same as docker's minimum allowed value
        # https://docs.docker.com/config/containers/resource_constraints/#limit-a-containers-access-to-memory
        # When using the REST api, it is allowed to set Memory to 0 but that means the container has unbounded
        # access to the host machine's memory, which we have decided to not allow
        if memory_bytes < parse_size('4m'):
            raise DockerException(
                'Minimum memory must be 4m ({} bytes)'.format(
                    parse_size('4m')))

        docker_commands = self._get_docker_commands(bundle_path, uuid, command,
                                                    docker_image, dependencies)

        volume_bindings = self._get_volume_bindings(bundle_path, uuid, command,
                                                    docker_image, dependencies)

        # Get user/group that owns the bundle directory
        # Then we can ensure that any created files are owned by the user/group
        # that owns the bundle directory, not root.
        bundle_stat = os.stat(bundle_path)
        uid = bundle_stat.st_uid
        gid = bundle_stat.st_gid

        docker_bundle_path = '/' + uuid

        # Create the container.
        create_request = {
            'Cmd': ['bash', '-c', '; '.join(docker_commands)],
            'Image': docker_image,
            'WorkingDir': docker_bundle_path,
            'Env': ['HOME=%s' % docker_bundle_path],
            'Entrypoint': [''],  # unset entry point regardless of image
            'HostConfig': {
                'Binds': volume_bindings,
                'NetworkMode': network_name,
                'Memory': memory_bytes,  # hard memory limit
                'CpusetCpus': ','.join([str(k) for k in cpuset]),
            },
            # TODO: Fix potential permissions issues arising from this setting
            # This can cause problems if users expect to run as a specific user
            'User': '******' % (uid, gid),
        }

        if self._use_nvidia_docker:
            # Allocate the requested number of GPUs and isolate
            self._add_nvidia_docker_arguments(create_request,
                                              [str(k) for k in gpuset])

        with closing(self._create_connection()) as create_conn:
            create_conn.request('POST', '/containers/create',
                                json.dumps(create_request),
                                {'Content-Type': 'application/json'})
            create_response = create_conn.getresponse()
            if create_response.status != 201:
                raise DockerException(create_response.read())
            container_id = json.loads(create_response.read())['Id']

        # Start the container.
        logger.debug(
            'Starting Docker container for UUID %s with command %s, container ID %s',
            uuid, command, container_id)
        with closing(self._create_connection()) as start_conn:
            start_conn.request('POST', '/containers/%s/start' % container_id)
            start_response = start_conn.getresponse()
            if start_response.status != 204:
                raise DockerException(start_response.read())

        return container_id
Beispiel #8
0
            print >> sys.stderr, """
Permissions on password file are too lax.
Only the user should be allowed to access the file.
On Linux, run:
chmod 600 %s""" % args.password_file
            exit(1)
        with open(args.password_file) as f:
            username = f.readline().strip()
            password = f.readline().strip()
    else:
        username = raw_input('Username: '******'%(asctime)s %(message)s',
                            level=logging.DEBUG)

    max_work_dir_size_bytes = parse_size(args.max_work_dir_size)
    worker = Worker(args.id, args.tag, args.work_dir, max_work_dir_size_bytes,
                    args.shared_file_system, args.slots,
                    BundleServiceClient(args.server, username, password),
                    DockerClient())

    # Register a signal handler to ensure safe shutdown.
    for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
        signal.signal(sig, lambda signup, frame: worker.signal())

    print 'Worker started.'
    worker.run()
Beispiel #9
0
 def requested_memory_bytes(self):
     """ Return request_memory, or 4 megabytes if None (this is for backwards compatibility  """
     return self._resources['request_memory'] or parse_size('4m')
Beispiel #10
0
 def requested_memory_bytes(self):
     """
     If request_memory is defined, then return that, otherwise return 4m (for backwards compatibility)
     """
     return self.resources.get('request_memory') or parse_size('4m')