def create_task(self): # type: () -> Task """ Create the new populated Task :return: newly created Task object """ local_entry_file = None repo_info = None if self.folder or (self.script and Path(self.script).is_file() and not self.repo): self.folder = os.path.expandvars(os.path.expanduser( self.folder)) if self.folder else None self.script = os.path.expandvars(os.path.expanduser( self.script)) if self.script else None self.cwd = os.path.expandvars(os.path.expanduser( self.cwd)) if self.cwd else None if Path(self.script).is_file(): entry_point = self.script else: entry_point = (Path(self.folder) / self.script).as_posix() entry_point = os.path.abspath(entry_point) if not os.path.isfile(entry_point): raise ValueError( "Script entrypoint file \'{}\' could not be found".format( entry_point)) local_entry_file = entry_point repo_info, requirements = ScriptInfo.get( filepaths=[entry_point], log=getLogger(), create_requirements=self.packages is True, uncommitted_from_remote=True, detect_jupyter_notebook=False) # check if we have no repository and no requirements raise error if self.raise_on_missing_entries and (not self.requirements_file and not self.packages) \ and not self.repo and ( not repo_info or not repo_info.script or not repo_info.script.get('repository')): raise ValueError( "Standalone script detected \'{}\', but no requirements provided" .format(self.script)) if self.base_task_id: if self.verbose: print('Cloning task {}'.format(self.base_task_id)) task = Task.clone(source_task=self.base_task_id, project=Task.get_project_id(self.project_name)) else: # noinspection PyProtectedMember task = Task._create(task_name=self.task_name, project_name=self.project_name, task_type=self.task_type or Task.TaskTypes.training) # if there is nothing to populate, return if not any([ self.folder, self.commit, self.branch, self.repo, self.script, self.cwd, self.packages, self.requirements_file, self.base_task_id, self.docker ]): return task task_state = task.export_task() if 'script' not in task_state: task_state['script'] = {} if repo_info: task_state['script']['repository'] = repo_info.script['repository'] task_state['script']['version_num'] = repo_info.script[ 'version_num'] task_state['script']['branch'] = repo_info.script['branch'] task_state['script']['diff'] = repo_info.script['diff'] or '' task_state['script']['working_dir'] = repo_info.script[ 'working_dir'] task_state['script']['entry_point'] = repo_info.script[ 'entry_point'] task_state['script']['binary'] = repo_info.script['binary'] task_state['script']['requirements'] = repo_info.script.get( 'requirements') or {} if self.cwd: self.cwd = self.cwd cwd = self.cwd if Path(self.cwd).is_dir() else ( Path(repo_info.script['repo_root']) / self.cwd).as_posix() if not Path(cwd).is_dir(): raise ValueError( "Working directory \'{}\' could not be found".format( cwd)) cwd = Path(cwd).relative_to( repo_info.script['repo_root']).as_posix() entry_point = \ Path(repo_info.script['repo_root']) / repo_info.script['working_dir'] / repo_info.script[ 'entry_point'] entry_point = entry_point.relative_to(cwd).as_posix() task_state['script']['entry_point'] = entry_point task_state['script']['working_dir'] = cwd elif self.repo: # normalize backslashes and remove first one entry_point = '/'.join( [p for p in self.script.split('/') if p and p != '.']) cwd = '/'.join( [p for p in (self.cwd or '.').split('/') if p and p != '.']) if cwd and entry_point.startswith(cwd + '/'): entry_point = entry_point[len(cwd) + 1:] task_state['script']['repository'] = self.repo task_state['script']['version_num'] = self.commit or None task_state['script']['branch'] = self.branch or None task_state['script']['diff'] = '' task_state['script']['working_dir'] = cwd or '.' task_state['script']['entry_point'] = entry_point else: # standalone task task_state['script']['entry_point'] = self.script task_state['script']['working_dir'] = '.' # update requirements reqs = [] if self.requirements_file: with open(self.requirements_file.as_posix(), 'rt') as f: reqs = [line.strip() for line in f.readlines()] if self.packages and self.packages is not True: reqs += self.packages if reqs: # make sure we have clearml. clearml_found = False for line in reqs: if line.strip().startswith('#'): continue package = reduce(lambda a, b: a.split(b)[0], "#;@=~<>", line).strip() if package == 'clearml': clearml_found = True break if not clearml_found: reqs.append('clearml') task_state['script']['requirements'] = {'pip': '\n'.join(reqs)} elif not self.repo and repo_info and not repo_info.script.get( 'requirements'): # we are in local mode, make sure we have "requirements.txt" it is a must reqs_txt_file = Path( repo_info.script['repo_root']) / "requirements.txt" if self.raise_on_missing_entries and not reqs_txt_file.is_file(): raise ValueError("requirements.txt not found [{}] " "Use --requirements or --packages".format( reqs_txt_file.as_posix())) if self.add_task_init_call: script_entry = os.path.abspath( '/' + task_state['script'].get('working_dir', '.') + '/' + task_state['script']['entry_point']) idx_a = 0 # find the right entry for the patch if we have a local file (basically after __future__ if local_entry_file: with open(local_entry_file, 'rt') as f: lines = f.readlines() future_found = self._locate_future_import(lines) if future_found >= 0: idx_a = future_found + 1 task_init_patch = '' if self.repo or task_state.get('script', {}).get('repository'): # if we do not have requirements, add clearml to the requirements.txt if not reqs: task_init_patch += \ "diff --git a/requirements.txt b/requirements.txt\n" \ "--- a/requirements.txt\n" \ "+++ b/requirements.txt\n" \ "@@ -0,0 +1,1 @@\n" \ "+clearml\n" # Add Task.init call task_init_patch += \ "diff --git a{script_entry} b{script_entry}\n" \ "--- a{script_entry}\n" \ "+++ b{script_entry}\n" \ "@@ -{idx_a},0 +{idx_b},3 @@\n" \ "+from clearml import Task\n" \ "+Task.init()\n" \ "+\n".format( script_entry=script_entry, idx_a=idx_a, idx_b=idx_a + 1) else: # Add Task.init call task_init_patch += \ "from clearml import Task\n" \ "Task.init()\n\n" # make sure we add the dif at the end of the current diff task_state['script']['diff'] = task_state['script'].get('diff', '') if task_state['script']['diff'] and not task_state['script'][ 'diff'].endswith('\n'): task_state['script']['diff'] += '\n' task_state['script']['diff'] += task_init_patch # set base docker image if provided if self.docker: task.set_base_docker( docker_cmd=self.docker.get('image'), docker_arguments=self.docker.get('args'), docker_setup_bash_script=self.docker.get('bash_script'), ) if self.verbose: if task_state['script']['repository']: repo_details = { k: v for k, v in task_state['script'].items() if v and k not in ('diff', 'requirements', 'binary') } print('Repository Detected\n{}'.format( json.dumps(repo_details, indent=2))) else: print('Standalone script detected\n Script: {}'.format( self.script)) if task_state['script'].get('requirements') and \ task_state['script']['requirements'].get('pip'): print('Requirements:{}{}'.format( '\n Using requirements.txt: {}'.format( self.requirements_file.as_posix()) if self.requirements_file else '', '\n {}Packages: {}'.format( 'Additional ' if self.requirements_file else '', self.packages) if self.packages else '')) if self.docker: print('Base docker image: {}'.format(self.docker)) # update the Task task.update_task(task_state) self.task = task return task