Esempio n. 1
0
 def task_put(self, task):
     limits = KolejkaLimits()
     limits.cpus = self.config.cpus
     limits.memory = self.config.memory
     limits.pids = self.config.pids
     limits.storage = self.config.storage
     limits.time = self.config.time
     limits.network = self.config.network
     task.limits.update(limits)
     if not self.instance_session:
         self.login()
     for k, f in task.files.items():
         if not f.reference or not self.blob_check(
                 blob_reference=f.reference):
             f.reference = None
             if f.path:
                 f.reference = self.blob_put(os.path.join(
                     task.path, f.path))['key']
             else:
                 raise
     info = self.post('/task/task/', data=json.dumps(task.dump()))
     if info.status_code == 200:
         task = KolejkaTask(None)
         task.load(info.json()['task'])
         return task
     else:
         print(info)
         print(info.text)
Esempio n. 2
0
def settings(request):
    settings = django.conf.settings
    if request.method == 'GET':
        response = dict()
        response['blob_hash_algorithm'] = settings.BLOB_HASH_ALGORITHM
        limits = KolejkaLimits(
            cpus=settings.LIMIT_CPUS,
            memory=settings.LIMIT_MEMORY,
            pids=settings.LIMIT_PIDS,
            storage=settings.LIMIT_STORAGE,
            network=settings.LIMIT_NETWORK,
            time=settings.LIMIT_TIME,
        )
        response['limits'] = limits.dump()
        return JsonResponse(response)
Esempio n. 3
0
 def limits(self, limits=KolejkaLimits()):
     if limits.memory is not None:
         assert 'memory' in self.groups
         limit_file = self.group_path('memory',
                                      filename='memory.limit_in_bytes')
         with open(limit_file, 'w') as f:
             f.write(str(limits.memory))
         logging.debug('Limited session %s memory to %s bytes' %
                       (self.id, limits.memory))
     if limits.cpus is not None:
         assert 'cpuset' in self.groups
         cpuset_cpus = self.available_cpus()
         logging.debug('Available cpus: %s',
                       ','.join([str(c) for c in cpuset_cpus]))
         cpus_offset = limits.cpus_offset or 0
         if len(cpuset_cpus) < cpus_offset + limits.cpus:
             cpus_offset = 0
         if len(cpuset_cpus) > cpus_offset + limits.cpus:
             cpuset_cpus = cpuset_cpus[0:limits.cpus]
         limit_file = self.group_path('cpuset', filename='cpuset.cpus')
         with open(limit_file, 'w') as f:
             f.write(','.join([str(c) for c in cpuset_cpus]))
         logging.debug('Limited session %s cpus to %s' %
                       (self.id, ','.join([str(c) for c in cpuset_cpus])))
     if limits.pids is not None:
         assert 'pids' in self.groups
         limit_file = self.group_path('pids', filename='pids.max')
         with open(limit_file, 'w') as f:
             f.write(str(limits.pids))
         logging.debug('Limited session %s pids to %s' %
                       (self.id, limits.pids))
Esempio n. 4
0
    def run_command(self, command, stdin: Optional[Path],
                    stdout: Optional[Path], stderr: Optional[Path], env, user,
                    group):
        from kolejka import observer
        from kolejka.common import KolejkaLimits

        with ExitStack() as stack:
            stdin_file = stdin and stack.enter_context(
                self.get_file_handle(stdin, 'r'))
            stdout_file = stdout and stack.enter_context(
                self.get_file_handle(stdout, 'w'))
            stderr_file = stderr and stack.enter_context(
                self.get_file_handle(stderr, 'w'))
            execution_status = observer.run(
                command,
                stdin=stdin_file,
                stdout=stdout_file,
                stderr=stderr_file,
                env=env,
                limits=KolejkaLimits(**self.limits),
                preexec_fn=self.get_change_user_function(user=user,
                                                         group=group))
            execution_status.stdout = stdout
            execution_status.stderr = stderr

            return execution_status
Esempio n. 5
0
def settings(request):
    if not request.user.is_authenticated:
        return HttpResponseForbidden()
    if request.method != 'GET':
        return HttpResponseNotAllowed(['GET'])
    settings = django.conf.settings
    response = dict()
    response['blob_hash_algorithm'] = settings.BLOB_HASH_ALGORITHM
    limits = KolejkaLimits(
        cpus=settings.LIMIT_CPUS,
        memory=settings.LIMIT_MEMORY,
        swap=settings.LIMIT_SWAP,
        pids=settings.LIMIT_PIDS,
        storage=settings.LIMIT_STORAGE,
        image=settings.LIMIT_IMAGE,
        workspace=settings.LIMIT_WORKSPACE,
        network=settings.LIMIT_NETWORK,
        time=settings.LIMIT_TIME,
    )
    response['limits'] = limits.dump()
    return OKResponse(response)
Esempio n. 6
0
def run(args, limits=None, **kwargs):
    if limits is None:
        limits = KolejkaLimits()
    def target(conn, args, limits, kwargs):
        client = KolejkaObserverClient()
        client.attach()
        client.limit(limits)
        res = subprocess.run(args, **kwargs)
        stats = client.stats()
        client.close()
#TODO: add stats to res, handle exceptions
        conn.send(res)
        conn.close()
    p_conn, c_conn = multiprocessing.Pipe()
    proc = Process(target=target, args=(c_conn, args, limits, kwargs))
    proc.start()
    res = p_conn.recv()
    proc.join()
    return res
Esempio n. 7
0
    def start_command(self, command, stdin_path, stdout_path, stdout_append,
                      stdout_max_bytes, stderr_path, stderr_append,
                      stderr_max_bytes, environment, work_path, user, group,
                      limits):
        import kolejka.observer.runner
        from kolejka.common import KolejkaLimits

        change_user, change_group, change_groups = self.get_user_group_groups(
            user, group)

        resources = self.get_resources(limits)
        limits = KolejkaLimits(
            cpus=limits.cores,
            memory=limits.memory,
            swap=0,
            pids=limits.pids,
            time=limits.real_time,
        )
        #TODO: cpu_time !!!!!

        stdin_file = self.read_file(stdin_path)
        stdout_file, stdout_writer = self.file_writer(
            stdout_path, stdout_append, max_bytes=stdout_max_bytes)
        stderr_file, stderr_writer = self.file_writer(
            stderr_path, stderr_append, max_bytes=stderr_max_bytes)
        writers = (stdout_writer, stderr_writer)
        process = kolejka.observer.runner.start(
            command,
            limits=limits,
            user=change_user,
            group=change_group,
            groups=change_groups,
            resources=resources,
            stdin=stdin_file,
            stdout=stdout_file,
            stderr=stderr_file,
            env=environment,
            cwd=work_path,
        )
        stdin_file.close()
        stdout_file.close()
        stderr_file.close()
        return (process, writers)
Esempio n. 8
0
 def __init__(self, params):
     self.session_id = params.get('session_id', None)
     self.secret = params.get('secret', None)
     self.limits = KolejkaLimits()
     self.limits.load(params.get('limits', {}))
Esempio n. 9
0
 def limits(self, session_id, limits=KolejkaLimits()):
     assert session_id in self.sessions
     return self.sessions[session_id].limits(limits=limits)
Esempio n. 10
0
def dequeue(request):
    if request.method == 'POST':
        if not request.user.is_authenticated():
            return HttpResponseForbidden()
#TODO: Check that user can get tasks
        tasks = list()
        params = json.loads(str(request.read(), request.encoding or 'utf-8'))
        concurency = params.get('concurency', 1)
        limits = KolejkaLimits()
        limits.load(params.get('limits', dict()))
        tags = set(params.get('tags', list()))
        resources = KolejkaLimits()
        resources.update(limits)

        available_tasks = Task.objects.filter(
            assignee=None).order_by('time_create')[0:100]
        for t in available_tasks:
            if len(tasks) > concurency:
                break
            tt = t.task()
            if len(tasks) > 0 and tt.exclusive:
                continue
            if not set(tt.requires).issubset(tags):
                continue
            if resources.cpus is not None and (
                    tt.limits.cpus is None or tt.limits.cpus > resources.cpus):
                continue
            if resources.memory is not None and (
                    tt.limits.memory is None
                    or tt.limits.memory > resources.memory):
                continue
            if resources.pids is not None and (
                    tt.limits.pids is None or tt.limits.pids > resources.pids):
                continue
            if resources.storage is not None and (
                    tt.limits.storage is None
                    or tt.limits.storage > resources.storage):
                continue
            if resources.network is not None and (tt.limits.network is None
                                                  or tt.limits.network
                                                  and not resources.network):
                continue
            if resources.time is not None and (
                    tt.limits.time is None or tt.limits.time > resources.time):
                continue
            tasks.append(tt.dump())
            t.assignee = request.user
            t.save()
            if resources.cpus is not None:
                resources.cpus -= tt.limits.cpus
            if resources.memory is not None:
                resources.memory -= tt.limits.memory
            if resources.pids is not None:
                resources.pids -= tt.limits.pids
            if resources.storage is not None:
                resources.storage -= tt.limits.storage
            if tt.exclusive:
                break

        response = dict()
        response['tasks'] = tasks
        return JsonResponse(response)
    return HttpResponseNotAllowed(['POST'])
Esempio n. 11
0
def dequeue(request):
    if not request.user.has_perm('task.process_task'):
        return HttpResponseForbidden()
    if request.method != 'POST':
        return HttpResponseNotAllowed(['POST'])
    content_type = ContentType.objects.get_for_model(Task)
    tasks = list()
    params = json.loads(str(request.read(), request.encoding or 'utf-8'))
    concurency = params.get('concurency', 1)
    limits = KolejkaLimits()
    limits.load(params.get('limits', dict()))
    tags = set(params.get('tags', list()))
    resources = KolejkaLimits()
    resources.update(limits)
    image_usage = dict()

    available_tasks = Task.objects.filter(assignee__isnull=True).order_by('time_create')[0:100]
    for t in available_tasks:
        if len(tasks) > concurency:
            break
        tt = t.task()
        if len(tasks) > 0 and tt.exclusive:
            print('At exclusive')
            continue
        if not set(tt.requires).issubset(tags):
            continue
        if resources.cpus is not None and (tt.limits.cpus is None or tt.limits.cpus > resources.cpus):
            continue
        if tt.limits.gpus is not None and tt.limits.gpus > 0:
            if resources.gpus is not None and (tt.limits.gpus is None or tt.limits.gpus > resources.gpus):
                continue
        if resources.memory is not None and (tt.limits.memory is None or tt.limits.memory > resources.memory):
            continue
        if resources.swap is not None and (tt.limits.swap is None or tt.limits.swap > resources.swap):
            continue
        if resources.pids is not None and (tt.limits.pids is None or tt.limits.pids > resources.pids):
            continue
        if resources.storage is not None and (tt.limits.storage is None or tt.limits.storage > resources.storage):
            continue
        if resources.image is not None:
            if tt.limits.image is None:
                continue
            image_usage_add = max(image_usage.get(tt.image, 0), tt.limits.image) - image_usage.get(tt.image, 0)
            if image_usage_add > resources.image:
                continue
        if resources.workspace is not None and (tt.limits.workspace is None or tt.limits.workspace > resources.workspace):
            continue
        if resources.network is not None and (tt.limits.network is None or tt.limits.network and not resources.network):
            continue
        if resources.time is not None and (tt.limits.time is None or tt.limits.time > resources.time):
            continue

        tasks.append(tt.dump())
        t.assignee = request.user
        t.time_assign = django.utils.timezone.now() 
        t.save()
        if resources.cpus is not None:
            resources.cpus -= tt.limits.cpus
        if resources.gpus is not None:
            resources.gpus -= tt.limits.gpus
        if resources.memory is not None:
            resources.memory -= tt.limits.memory
        if resources.swap is not None:
            resources.swap -= tt.limits.swap
        if resources.pids is not None:
            resources.pids -= tt.limits.pids
        if resources.storage is not None:
            resources.storage -= tt.limits.storage
        if resources.image is not None:
            resources.image -= image_usage_add
            image_usage[tt.image] = max(image_usage.get(tt.image, 0), tt.limits.image)
        if resources.workspace is not None:
            resources.workspace -= tt.limits.workspace
        if tt.exclusive:
            break

    response = dict()
    response['tasks'] = tasks
    return OKResponse(response)
Esempio n. 12
0
def foreman():
    config = foreman_config()
    limits = KolejkaLimits()
    limits.cpus = config.cpus
    limits.memory = config.memory
    limits.pids = config.pids
    limits.storage = config.storage
    limits.time = config.time
    limits.network = config.network
    client = KolejkaClient()
    while True:
        try:
            tasks = client.dequeue(config.concurency, limits, config.tags)
            if len(tasks) == 0:
                time.sleep(config.interval)
            else:
                while len(tasks) > 0:
                    resources = KolejkaLimits()
                    resources.update(limits)
                    processes = list()
                    cpus_offset = 0
                    for task in tasks:
                        if len(processes) >= config.concurency:
                            break
                        if task.exclusive and len(processes) > 0:
                            break
                        task.limits.update(limits)
                        task.limits.cpus_offset = cpus_offset
                        ok = True
                        if resources.cpus is not None and task.limits.cpus > resources.cpus:
                            ok = False
                        if resources.memory is not None and task.limits.memory > resources.memory:
                            ok = False
                        if resources.pids is not None and task.limits.pids > resources.pids:
                            ok = False
                        if resources.storage is not None and task.limits.storage > resources.storage:
                            ok = False
                        if ok:
                            proc = Thread(target=foreman_single,
                                          args=(config.temp_path, client,
                                                task))
                            proc.start()
                            processes.append(proc)
                            cpus_offset += task.limits.cpus
                            if resources.cpus is not None:
                                resources.cpus -= task.limits.cpus
                            if resources.memory is not None:
                                resources.memory -= task.limits.memory
                            if resources.pids is not None:
                                resources.pids -= task.limits.pids
                            if resources.storage is not None:
                                resources.storage -= task.limits.storage
                            tasks = tasks[1:]
                            if task.exclusive:
                                break
                        else:
                            break
                    for proc in processes:
                        proc.join()
        except:
            time.sleep(config.interval)
Esempio n. 13
0
def stage0(task_path, result_path, temp_path=None, consume_task_folder=False):
    config = worker_config()
    cgs = ControlGroupSystem()
    task = KolejkaTask(task_path)
    if not task.id:
        task.id = uuid.uuid4().hex
        logging.warning('Assigned id {} to the task'.format(task.id))
    if not task.image:
        logging.error('Task does not define system image')
        sys.exit(1)
    if not task.args:
        logging.error('Task does not define args')
        sys.exit(1)
    if not task.files.is_local:
        logging.error('Task contains non-local files')
        sys.exit(1)
    limits = KolejkaLimits()
    limits.cpus = config.cpus
    limits.memory = config.memory
    limits.swap = config.swap
    limits.pids = config.pids
    limits.storage = config.storage
    limits.image = config.image
    limits.workspace = config.workspace
    limits.time = config.time
    limits.network = config.network
    limits.gpus = config.gpus
    task.limits.update(limits)

    docker_task = 'kolejka_worker_{}'.format(task.id)

    docker_cleanup = [
        ['docker', 'kill', docker_task],
        ['docker', 'rm', docker_task],
    ]

    with tempfile.TemporaryDirectory(dir=temp_path) as jailed_path:
        #TODO jailed_path size remains unlimited?
        logging.debug('Using {} as temporary directory'.format(jailed_path))
        jailed_task_path = os.path.join(jailed_path, 'task')
        os.makedirs(jailed_task_path, exist_ok=True)
        jailed_result_path = os.path.join(jailed_path, 'result')
        os.makedirs(jailed_result_path, exist_ok=True)

        jailed = KolejkaTask(os.path.join(jailed_path, 'task'))
        jailed.load(task.dump())
        jailed.files.clear()
        volumes = list()
        check_python_volume()
        if os.path.exists(OBSERVER_SOCKET):
            volumes.append((OBSERVER_SOCKET, OBSERVER_SOCKET, 'rw'))
        else:
            logging.warning('Observer is not running.')
        volumes.append(
            (jailed_result_path, os.path.join(WORKER_DIRECTORY,
                                              'result'), 'rw'))
        for key, val in task.files.items():
            if key != TASK_SPEC:
                src_path = os.path.join(task.path, val.path)
                dst_path = os.path.join(jailed_path, 'task', key)
                os.makedirs(os.path.dirname(dst_path), exist_ok=True)
                if consume_task_folder:
                    shutil.move(src_path, dst_path)
                else:
                    shutil.copy(src_path, dst_path)
                jailed.files.add(key)
        jailed.files.add(TASK_SPEC)
        #jailed.limits = KolejkaLimits() #TODO: Task is limited by docker, no need to limit it again?
        jailed.commit()
        volumes.append((jailed.path, os.path.join(WORKER_DIRECTORY,
                                                  'task'), 'rw'))
        if consume_task_folder:
            try:
                shutil.rmtree(task_path)
            except:
                logging.warning('Failed to remove {}'.format(task_path))
                pass
        for spath in [os.path.dirname(__file__)]:
            stage1 = os.path.join(spath, 'stage1.sh')
            if os.path.isfile(stage1):
                volumes.append(
                    (stage1, os.path.join(WORKER_DIRECTORY,
                                          'stage1.sh'), 'ro'))
                break
        for spath in [os.path.dirname(__file__)]:
            stage2 = os.path.join(spath, 'stage2.py')
            if os.path.isfile(stage2):
                volumes.append(
                    (stage2, os.path.join(WORKER_DIRECTORY,
                                          'stage2.py'), 'ro'))
                break

        docker_call = ['docker', 'run']
        docker_call += ['--detach']
        docker_call += ['--name', docker_task]
        docker_call += [
            '--entrypoint',
            os.path.join(WORKER_DIRECTORY, 'stage1.sh')
        ]
        for key, val in task.environment.items():
            docker_call += ['--env', '{}={}'.format(key, val)]
        docker_call += ['--hostname', WORKER_HOSTNAME]
        docker_call += ['--init']
        if task.limits.cpus is not None:
            docker_call += [
                '--cpuset-cpus', ','.join([
                    str(c) for c in cgs.limited_cpuset(cgs.full_cpuset(
                    ), task.limits.cpus, task.limits.cpus_offset)
                ])
            ]

        if task.limits.gpus is not None and task.limits.gpus > 0:
            check_gpu_runtime_availability()
            gpus = ','.join(
                map(
                    str,
                    limited_gpuset(full_gpuset(), task.limits.gpus,
                                   task.limits.gpus_offset)))
            docker_call += [
                '--runtime=nvidia', '--shm-size=1g', '--gpus',
                f'"device={gpus}"'
            ]

        if task.limits.memory is not None:
            docker_call += ['--memory', str(task.limits.memory)]
            if task.limits.swap is not None:
                docker_call += [
                    '--memory-swap',
                    str(task.limits.memory + task.limits.swap)
                ]
        if task.limits.storage is not None:
            docker_info_run = subprocess.run(
                ['docker', 'system', 'info', '--format', '{{json .Driver}}'],
                stdout=subprocess.PIPE,
                check=True)
            storage_driver = str(
                json.loads(str(docker_info_run.stdout, 'utf-8')))
            if storage_driver == 'overlay2':
                docker_info_run = subprocess.run([
                    'docker', 'system', 'info', '--format',
                    '{{json .DriverStatus}}'
                ],
                                                 stdout=subprocess.PIPE,
                                                 check=True)
                storage_fs = dict(
                    json.loads(str(docker_info_run.stdout,
                                   'utf-8')))['Backing Filesystem']
                if storage_fs in ['xfs']:
                    storage_limit = task.limits.storage
                    docker_call += [
                        '--storage-opt', 'size=' + str(storage_limit)
                    ]
                else:
                    logging.warning(
                        "Storage limit on {} ({}) is not supported".format(
                            storage_driver, storage_fs))
            else:
                logging.warning("Storage limit on {} is not supported".format(
                    storage_driver))
        if task.limits.network is not None:
            if not task.limits.network:
                docker_call += ['--network=none']
        docker_call += ['--cap-add', 'SYS_NICE']
        if task.limits.pids is not None:
            docker_call += ['--pids-limit', str(task.limits.pids)]
        if task.limits.time is not None:
            docker_call += [
                '--stop-timeout',
                str(int(math.ceil(task.limits.time.total_seconds())))
            ]
        docker_call += [
            '--volume',
            '{}:{}:{}'.format(WORKER_PYTHON_VOLUME,
                              os.path.join(WORKER_DIRECTORY, 'python3'), 'ro')
        ]
        for v in volumes:
            docker_call += [
                '--volume', '{}:{}:{}'.format(os.path.realpath(v[0]), v[1],
                                              v[2])
            ]
        docker_call += ['--workdir', WORKER_DIRECTORY]
        docker_image = task.image
        docker_call += [docker_image]
        docker_call += ['--consume']
        if config.debug:
            docker_call += ['--debug']
        if config.verbose:
            docker_call += ['--verbose']
        docker_call += [os.path.join(WORKER_DIRECTORY, 'task')]
        docker_call += [os.path.join(WORKER_DIRECTORY, 'result')]
        logging.debug('Docker call : {}'.format(docker_call))

        pull_image = config.pull
        if not pull_image:
            docker_inspect_run = subprocess.run(
                ['docker', 'image', 'inspect', docker_image],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.STDOUT)
            if docker_inspect_run.returncode != 0:
                pull_image = True
        if pull_image:
            subprocess.run(['docker', 'pull', docker_image], check=True)

        for docker_clean in docker_cleanup:
            silent_call(docker_clean)

        if os.path.exists(result_path):
            shutil.rmtree(result_path)
        os.makedirs(result_path, exist_ok=True)
        result = KolejkaResult(result_path)
        result.id = task.id
        result.limits = task.limits
        result.stdout = task.stdout
        result.stderr = task.stderr

        start_time = datetime.datetime.now()
        docker_run = subprocess.run(docker_call, stdout=subprocess.PIPE)
        cid = str(docker_run.stdout, 'utf-8').strip()
        logging.info('Started container {}'.format(cid))

        try:
            if task.limits.gpus is not None and task.limits.gpus > 0:
                result.stats.update(
                    gpu_stats(gpus=limited_gpuset(full_gpuset(
                    ), task.limits.gpus, task.limits.gpus_offset)))
        except:
            pass
        time.sleep(0.1)

        while True:
            try:
                docker_state_run = subprocess.run(
                    ['docker', 'inspect', '--format', '{{json .State}}', cid],
                    stdout=subprocess.PIPE)
                state = json.loads(str(docker_state_run.stdout, 'utf-8'))
            except:
                break
            try:
                result.stats.update(cgs.name_stats(cid))

                if task.limits.gpus is not None and task.limits.gpus > 0:
                    result.stats.update(
                        gpu_stats(gpus=limited_gpuset(full_gpuset(
                        ), task.limits.gpus, task.limits.gpus_offset)))
            except:
                pass
            time.sleep(0.1)
            if not state['Running']:
                result.result = state['ExitCode']
                try:
                    result.stats.time = dateutil.parser.parse(
                        state['FinishedAt']) - dateutil.parser.parse(
                            state['StartedAt'])
                except:
                    result.stats.time = None
                break
            if task.limits.time is not None and datetime.datetime.now(
            ) - start_time > task.limits.time + datetime.timedelta(seconds=2):
                docker_kill_run = subprocess.run(
                    ['docker', 'kill', docker_task])
        subprocess.run(['docker', 'logs', cid], stdout=subprocess.PIPE)
        try:
            summary = KolejkaResult(jailed_result_path)
            result.stats.update(summary.stats)
        except:
            pass

        stop_time = datetime.datetime.now()
        if result.stats.time is None:
            result.stats.time = stop_time - start_time
        result.stats.pids.usage = None
        result.stats.memory.usage = None
        result.stats.memory.swap = None

        for dirpath, dirnames, filenames in os.walk(jailed_result_path):
            for filename in filenames:
                abspath = os.path.join(dirpath, filename)
                realpath = os.path.realpath(abspath)
                if realpath.startswith(
                        os.path.realpath(jailed_result_path) + '/'):
                    relpath = abspath[len(jailed_result_path) + 1:]
                    if relpath != RESULT_SPEC:
                        destpath = os.path.join(result.path, relpath)
                        os.makedirs(os.path.dirname(destpath), exist_ok=True)
                        shutil.move(realpath, destpath)
                        os.chmod(destpath, 0o640)
                        result.files.add(relpath)
        result.commit()
        os.chmod(result.spec_path, 0o640)

        for docker_clean in docker_cleanup:
            silent_call(docker_clean)
Esempio n. 14
0
def foreman():
    config = foreman_config()
    limits = KolejkaLimits()
    limits.cpus = config.cpus
    limits.memory = config.memory
    limits.swap = config.swap
    limits.pids = config.pids
    limits.storage = config.storage
    limits.image = config.image
    limits.workspace = config.workspace
    limits.time = config.time
    limits.network = config.network
    limits.gpus = config.gpus
    client = KolejkaClient()
    while True:
        try:
            tasks = client.dequeue(config.concurency, limits, config.tags)
            if len(tasks) == 0:
                time.sleep(config.interval)
            else:
                check_python_volume()
                while len(tasks) > 0:
                    resources = KolejkaLimits()
                    resources.update(limits)
                    image_usage = dict()
                    processes = list()
                    cpus_offset = 0
                    gpus_offset = 0

                    for task in tasks:
                        if len(processes) >= config.concurency:
                            break
                        if task.exclusive and len(processes) > 0:
                            break
                        task.limits.update(limits)
                        task.limits.cpus_offset = cpus_offset
                        task.limits.gpus_offset = gpus_offset
                        ok = True
                        if resources.cpus is not None and task.limits.cpus > resources.cpus:
                            ok = False
                        if task.limits.gpus is not None and task.limits.gpus > 0:
                            if resources.gpus is None or task.limits.gpus > resources.gpus:
                                ok = False
                        if resources.memory is not None and task.limits.memory > resources.memory:
                            ok = False
                        if resources.gpus is not None:
                            if task.limits.gpus > resources.gpus:
                                ok = False
                        if resources.swap is not None and task.limits.swap > resources.swap:
                            ok = False
                        if resources.pids is not None and task.limits.pids > resources.pids:
                            ok = False
                        if resources.storage is not None and task.limits.storage > resources.storage:
                            ok = False
                        if resources.image is not None:
                            image_usage_add = max(
                                image_usage.get(task.image, 0),
                                task.limits.image) - image_usage.get(
                                    task.image, 0)
                            if image_usage_add > resources.image:
                                ok = False
                        if resources.workspace is not None and task.limits.workspace > resources.workspace:
                            ok = False
                        if ok:
                            proc = Process(target=foreman_single,
                                           args=(config.temp_path, task))
                            processes.append(proc)
                            cpus_offset += task.limits.cpus
                            if resources.cpus is not None:
                                resources.cpus -= task.limits.cpus
                            gpus_offset += task.limits.gpus
                            if resources.gpus is not None:
                                resources.gpus -= task.limits.gpus
                            if resources.memory is not None:
                                resources.memory -= task.limits.memory
                            if resources.swap is not None:
                                resources.swap -= task.limits.swap
                            if resources.pids is not None:
                                resources.pids -= task.limits.pids
                            if resources.storage is not None:
                                resources.storage -= task.limits.storage
                            if resources.image is not None:
                                resources.image -= image_usage_add
                                image_usage[task.image] = max(
                                    image_usage.get(task.image, 0),
                                    task.limits.image)
                            if resources.workspace is not None:
                                resources.workspace -= task.limits.workspace
                            tasks = tasks[1:]
                            if task.exclusive:
                                break
                        else:
                            break
                    if config.image is not None:
                        manage_images(config.pull, config.image, image_usage,
                                      [task.image for task in tasks])
                    for proc in processes:
                        proc.start()
                    for proc in processes:
                        proc.join()
        except KeyboardInterrupt:
            raise
        except:
            traceback.print_exc()
            time.sleep(config.interval)
Esempio n. 15
0
def task(request, key=''):
    if request.method == 'POST':
        if key != '':
            return HttpResponseForbidden()
        if not request.user.has_perm('task.add_task'):
            return HttpResponseForbidden()
        t = KolejkaTask(None)
        t.load(request.read())
        for image_re, image_sub in settings.IMAGE_NAME_MAPS:
            t.image = re.sub(r'^' + image_re + r'$', image_sub, t.image)
        accept_image = False
        for image_re in settings.LIMIT_IMAGE_NAME:
            if re.match(image_re, t.image):
                accept_image = True
                break
        if not accept_image:
            return FAILResponse(
                message='Image {} is not accepted by the server'.format(
                    t.image))
        local_image = False
        for image_re in settings.LOCAL_IMAGE_NAMES:
            if re.match(image_re, t.image):
                local_image = True
                break
        t.id = uuid.uuid4().hex
        for k, f in t.files.items():
            if not f.reference:
                return FAILResponse(
                    message='File {} does not have a reference'.format(k))
            f.path = None
        refs = list()
        for k, f in t.files.items():
            try:
                ref = Reference.objects.get(key=f.reference)
            except Reference.DoesNotExist:
                return FAILResponse(
                    message='Reference for file {} is unknown'.format(k))
            if not ref.public:
                if not request.user.has_perm(
                        'blob.view_reference') and request.user != ref.user:
                    return FAILResponse(
                        message='Reference for file {} is unknown'.format(k))
            refs.append(ref)
        limits = KolejkaLimits(
            cpus=settings.LIMIT_CPUS,
            memory=settings.LIMIT_MEMORY,
            swap=settings.LIMIT_SWAP,
            pids=settings.LIMIT_PIDS,
            storage=settings.LIMIT_STORAGE,
            network=settings.LIMIT_NETWORK,
            time=settings.LIMIT_TIME,
            image=settings.LIMIT_IMAGE,
            workspace=settings.LIMIT_WORKSPACE,
        )
        t.limits.update(limits)

        if settings.IMAGE_REGISTRY is not None and settings.IMAGE_REGISTRY_NAME is not None and not local_image:
            try:
                subprocess.run(['docker', 'pull', t.image], check=True)
                docker_inspect_run = subprocess.run([
                    'docker', 'image', 'inspect', '--format', '{{json .Id}}',
                    t.image
                ],
                                                    stdout=subprocess.PIPE,
                                                    check=True)
                image_id = str(
                    json.loads(str(docker_inspect_run.stdout,
                                   'utf-8'))).split(':')[-1]
                logging.info(image_id)
                docker_inspect_run = subprocess.run([
                    'docker', 'image', 'inspect', '--format', '{{json .Size}}',
                    t.image
                ],
                                                    stdout=subprocess.PIPE,
                                                    check=True)
                image_size = int(
                    json.loads(str(docker_inspect_run.stdout, 'utf-8')))
            except:
                return FAILResponse(
                    message='Image {} could not be pulled'.format(t.image))
            if t.limits.image is not None and image_size > t.limits.image:
                return FAILResponse(
                    message='Image {} exceeds image size limit {}'.format(
                        t.image, t.limits.image))
            image_name = settings.IMAGE_REGISTRY + '/' + settings.IMAGE_REGISTRY_NAME + ':' + image_id
            try:
                subprocess.run(['docker', 'tag', t.image, image_name],
                               check=True)
                subprocess.run(['docker', 'push', image_name], check=True)
                subprocess.run(['docker', 'rmi', image_name], check=True)
            except:
                return FAILResponse(
                    message='Image {} could not be pushed to local repository'.
                    format(t.image))
            t.image = image_name
            t.limits.image = image_size

        task = models.Task(user=request.user,
                           key=t.id,
                           description=json.dumps(t.dump()))
        task.save()
        for ref in refs:
            task.files.add(ref)
        response = dict()
        response['task'] = task.task().dump()
        return OKResponse(response)
    if not request.user.is_authenticated:
        return HttpResponseForbidden()
    try:
        task = models.Task.objects.get(key=key)
    except models.Task.DoesNotExist:
        return HttpResponseNotFound()
    if not request.user.has_perm(
            'task.view_task'
    ) and request.user != task.user and request.user != task.assignee:
        return HttpResponseForbidden()
    if request.method == 'PUT':
        response = dict()
        response['task'] = task.task().dump()
        return OKResponse(response)
    if request.method == 'DELETE':
        if not request.user.has_perm(
                'task.delete_task') and request.user != task.user:
            return HttpResponseForbidden()
        task.delete()
        return OKResponse({'deleted': True})
    if request.method == 'GET' or request.method == 'HEAD':
        response = dict()
        response['task'] = task.task().dump()
        return OKResponse(response)
    return HttpResponseNotAllowed(['HEAD', 'GET', 'POST', 'PUT', 'DELETE'])
Esempio n. 16
0
 def task_put(self, task):
     limits = KolejkaLimits()
     limits.cpus = self.config.cpus
     limits.memory = self.config.memory
     limits.swap = self.config.swap
     limits.pids = self.config.pids
     limits.storage = self.config.storage
     limits.image = self.config.image
     limits.workspace = self.config.workspace
     limits.time = self.config.time
     limits.network = self.config.network
     limits.gpus = self.config.gpus
     task.limits.update(limits)
     if not self.instance_session:
         self.login()
     for f in task.files.values():
         if not f.reference or not self.blob_check(
                 blob_reference=f.reference):
             assert f.path
             f.reference = self.blob_put(os.path.join(task.path,
                                                      f.path))['key']
     response = self.post('/task/task/', data=json.dumps(task.dump()))
     task = KolejkaTask(None)
     task.load(response.json()['task'])
     return task