Example #1
0
    def create_task(self):
        # type: () -> Task
        """
        Create the new populated Task

        :return: newly created Task object
        """
        local_entry_file = None
        repo_info = None
        if self.folder or (self.script and Path(self.script).is_file()
                           and not self.repo):
            self.folder = os.path.expandvars(os.path.expanduser(
                self.folder)) if self.folder else None
            self.script = os.path.expandvars(os.path.expanduser(
                self.script)) if self.script else None
            self.cwd = os.path.expandvars(os.path.expanduser(
                self.cwd)) if self.cwd else None
            if Path(self.script).is_file():
                entry_point = self.script
            else:
                entry_point = (Path(self.folder) / self.script).as_posix()
            entry_point = os.path.abspath(entry_point)
            if not os.path.isfile(entry_point):
                raise ValueError(
                    "Script entrypoint file \'{}\' could not be found".format(
                        entry_point))

            local_entry_file = entry_point
            repo_info, requirements = ScriptInfo.get(
                filepaths=[entry_point],
                log=getLogger(),
                create_requirements=self.packages is True,
                uncommitted_from_remote=True,
                detect_jupyter_notebook=False)

        # check if we have no repository and no requirements raise error
        if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \
                and not self.repo and (
                not repo_info or not repo_info.script or not repo_info.script.get('repository')):
            raise ValueError(
                "Standalone script detected \'{}\', but no requirements provided"
                .format(self.script))

        if self.base_task_id:
            if self.verbose:
                print('Cloning task {}'.format(self.base_task_id))
            task = Task.clone(source_task=self.base_task_id,
                              project=Task.get_project_id(self.project_name))
        else:
            # noinspection PyProtectedMember
            task = Task._create(task_name=self.task_name,
                                project_name=self.project_name,
                                task_type=self.task_type
                                or Task.TaskTypes.training)

            # if there is nothing to populate, return
            if not any([
                    self.folder, self.commit, self.branch, self.repo,
                    self.script, self.cwd, self.packages,
                    self.requirements_file, self.base_task_id, self.docker
            ]):
                return task

        task_state = task.export_task()
        if 'script' not in task_state:
            task_state['script'] = {}

        if repo_info:
            task_state['script']['repository'] = repo_info.script['repository']
            task_state['script']['version_num'] = repo_info.script[
                'version_num']
            task_state['script']['branch'] = repo_info.script['branch']
            task_state['script']['diff'] = repo_info.script['diff'] or ''
            task_state['script']['working_dir'] = repo_info.script[
                'working_dir']
            task_state['script']['entry_point'] = repo_info.script[
                'entry_point']
            task_state['script']['binary'] = repo_info.script['binary']
            task_state['script']['requirements'] = repo_info.script.get(
                'requirements') or {}
            if self.cwd:
                self.cwd = self.cwd
                cwd = self.cwd if Path(self.cwd).is_dir() else (
                    Path(repo_info.script['repo_root']) / self.cwd).as_posix()
                if not Path(cwd).is_dir():
                    raise ValueError(
                        "Working directory \'{}\' could not be found".format(
                            cwd))
                cwd = Path(cwd).relative_to(
                    repo_info.script['repo_root']).as_posix()
                entry_point = \
                    Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[
                        'entry_point']
                entry_point = entry_point.relative_to(cwd).as_posix()
                task_state['script']['entry_point'] = entry_point
                task_state['script']['working_dir'] = cwd
        elif self.repo:
            # normalize backslashes and remove first one
            entry_point = '/'.join(
                [p for p in self.script.split('/') if p and p != '.'])
            cwd = '/'.join(
                [p for p in (self.cwd or '.').split('/') if p and p != '.'])
            if cwd and entry_point.startswith(cwd + '/'):
                entry_point = entry_point[len(cwd) + 1:]
            task_state['script']['repository'] = self.repo
            task_state['script']['version_num'] = self.commit or None
            task_state['script']['branch'] = self.branch or None
            task_state['script']['diff'] = ''
            task_state['script']['working_dir'] = cwd or '.'
            task_state['script']['entry_point'] = entry_point
        else:
            # standalone task
            task_state['script']['entry_point'] = self.script
            task_state['script']['working_dir'] = '.'

        # update requirements
        reqs = []
        if self.requirements_file:
            with open(self.requirements_file.as_posix(), 'rt') as f:
                reqs = [line.strip() for line in f.readlines()]
        if self.packages and self.packages is not True:
            reqs += self.packages
        if reqs:
            # make sure we have clearml.
            clearml_found = False
            for line in reqs:
                if line.strip().startswith('#'):
                    continue
                package = reduce(lambda a, b: a.split(b)[0], "#;@=~<>",
                                 line).strip()
                if package == 'clearml':
                    clearml_found = True
                    break
            if not clearml_found:
                reqs.append('clearml')
            task_state['script']['requirements'] = {'pip': '\n'.join(reqs)}
        elif not self.repo and repo_info and not repo_info.script.get(
                'requirements'):
            # we are in local mode, make sure we have "requirements.txt" it is a must
            reqs_txt_file = Path(
                repo_info.script['repo_root']) / "requirements.txt"
            if self.raise_on_missing_entries and not reqs_txt_file.is_file():
                raise ValueError("requirements.txt not found [{}] "
                                 "Use --requirements or --packages".format(
                                     reqs_txt_file.as_posix()))

        if self.add_task_init_call:
            script_entry = os.path.abspath(
                '/' + task_state['script'].get('working_dir', '.') + '/' +
                task_state['script']['entry_point'])
            idx_a = 0
            # find the right entry for the patch if we have a local file (basically after __future__
            if local_entry_file:
                with open(local_entry_file, 'rt') as f:
                    lines = f.readlines()
                future_found = self._locate_future_import(lines)
                if future_found >= 0:
                    idx_a = future_found + 1

            task_init_patch = ''
            if self.repo or task_state.get('script', {}).get('repository'):
                # if we do not have requirements, add clearml to the requirements.txt
                if not reqs:
                    task_init_patch += \
                        "diff --git a/requirements.txt b/requirements.txt\n" \
                        "--- a/requirements.txt\n" \
                        "+++ b/requirements.txt\n" \
                        "@@ -0,0 +1,1 @@\n" \
                        "+clearml\n"

                # Add Task.init call
                task_init_patch += \
                    "diff --git a{script_entry} b{script_entry}\n" \
                    "--- a{script_entry}\n" \
                    "+++ b{script_entry}\n" \
                    "@@ -{idx_a},0 +{idx_b},3 @@\n" \
                    "+from clearml import Task\n" \
                    "+Task.init()\n" \
                    "+\n".format(
                        script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1)
            else:
                # Add Task.init call
                task_init_patch += \
                    "from clearml import Task\n" \
                    "Task.init()\n\n"

            # make sure we add the dif at the end of the current diff
            task_state['script']['diff'] = task_state['script'].get('diff', '')
            if task_state['script']['diff'] and not task_state['script'][
                    'diff'].endswith('\n'):
                task_state['script']['diff'] += '\n'
            task_state['script']['diff'] += task_init_patch

        # set base docker image if provided
        if self.docker:
            task.set_base_docker(
                docker_cmd=self.docker.get('image'),
                docker_arguments=self.docker.get('args'),
                docker_setup_bash_script=self.docker.get('bash_script'),
            )

        if self.verbose:
            if task_state['script']['repository']:
                repo_details = {
                    k: v
                    for k, v in task_state['script'].items()
                    if v and k not in ('diff', 'requirements', 'binary')
                }
                print('Repository Detected\n{}'.format(
                    json.dumps(repo_details, indent=2)))
            else:
                print('Standalone script detected\n  Script: {}'.format(
                    self.script))

            if task_state['script'].get('requirements') and \
                    task_state['script']['requirements'].get('pip'):
                print('Requirements:{}{}'.format(
                    '\n  Using requirements.txt: {}'.format(
                        self.requirements_file.as_posix())
                    if self.requirements_file else '',
                    '\n  {}Packages: {}'.format(
                        'Additional ' if self.requirements_file else '',
                        self.packages) if self.packages else ''))
            if self.docker:
                print('Base docker image: {}'.format(self.docker))

        # update the Task
        task.update_task(task_state)
        self.task = task
        return task