def transmit_artifacts(self, id): artifacts = (self.info[id][self.artifacts] if self.artifacts in self.info[id].keys() else {}) task = self.call_func( "Task.get_task", id, lambda id_: Task.get_task(project_name="MLFlow Migration", task_name=id_), self.get_run_name_by_id(id), ) for type_, l in artifacts.items(): if type_ == "folder": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type_ == "text": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type_ == "dataframe": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type_ == "image": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type_ == "dictionary": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj) elif type_ == "storage-server": for name, obj in l: task.upload_artifact(name=name, artifact_object=obj)
def transmit_information(self, id): parameters = self.get_params(id) general_information = self.get_general_information(id) artifact = self.get_artifact(id) tags = self.get_tags(id) task = self.call_func( "Task.get_task", id, lambda id_: Task.get_task(project_name="MLFlow Migration", task_name=id_), self.get_run_name_by_id(id), ) task_values = self.call_func( "task.export_task", id, lambda _: task.export_task(), self.get_run_name_by_id(id), ) task_values["comment"] = (tags["note.content"] if "note.content" in tags.keys() else "") task_values["hyperparams"]["Args"] = parameters task_values["started"] = general_information["started"] task_values["completed"] = general_information["completed"] task_values["script"]["branch"] = (tags["source.git.branch"] if "source.git.branch" in tags.keys() else self.branch) task_values["script"]["repository"] = (tags["source.git.repoURL"] if "source.git.repoURL" in tags.keys() else "") task_values["script"]["version_num"] = (tags["source.git.commit"] if "source.git.commit" in tags.keys() else "") task_values["script"]["entry_point"] = tags["entry_point"] task_values["script"]["working_dir"] = tags["working_dir"] if "project.env" in tags.keys(): task_values["script"]["requirements"][tags["project.env"]] = ( artifact["requirements"] if "requirements" in artifact.keys() else "") task_values["user"] = tags["user"] self.call_func( "task.update_task", id, lambda _task_values: task.update_task(_task_values), task_values, ) if len(tags["VALUETAG"].keys()) > 0: self.call_func( "task.connect_configuration", id, lambda _dict: task.connect_configuration(_dict, name="MLflow Tags"), tags["VALUETAG"], )
def main(): # Getting the task we want to get the artifacts from artifacts_task = Task.get_task(project_name='examples', task_name='Artifacts example') # getting the numpy object back numpy_artifact = artifacts_task.artifacts['Numpy Eye'].get() print("numpy_artifact is:\n{}\n".format(numpy_artifact)) # download the numpy object as a npz file download_numpy_artifact = artifacts_task.artifacts['Numpy Eye'].get_local_copy() print("download_numpy_artifact path is:\n{}\n".format(download_numpy_artifact)) # getting the PIL Image object pil_artifact = artifacts_task.artifacts['pillow_image'].get() print("pil_artifact is:\n{}\n".format(pil_artifact)) # getting the pandas object pandas_artifact = artifacts_task.artifacts['Pandas'].get() print("pandas_artifact is:\n{}\n".format(pandas_artifact)) # getting the dictionary object dictionary_artifact = artifacts_task.artifacts['dictionary'].get() print("dictionary_artifact is:\n") pprint(dictionary_artifact) # getting the train DataFrame df_artifact = artifacts_task.artifacts['train'].get() print("df_artifact is:\n{}\n".format(df_artifact)) # download the train DataFrame csv in the same format as in the UI (gz file) df_artifact_as_gz = artifacts_task.artifacts['train'].get_local_copy() print("df_artifact_as_gz path is:\n{}\n".format(df_artifact_as_gz)) # download the wildcard jpegs images (getting the zip file already extracted into a cached folder), # the path containing those will be returned jpegs_artifact = artifacts_task.artifacts['wildcard jpegs'].get() print("jpegs_artifact path is:\n{}\n".format(jpegs_artifact)) # download the local folder that was uploaded (getting the zip file already extracted into a cached folder), # the path containing those will be returned local_folder_artifact = artifacts_task.artifacts['local folder'].get() print("local_folder_artifact path is:\n{}\n".format(local_folder_artifact)) # download the local folder that was uploaded (getting the zip file without extracting it), # the path containing the zip file will be returned local_folder_artifact_as_zip = artifacts_task.artifacts['local folder'].get_local_copy(extract_archive=False) print("local_folder_artifact_as_zip path is:\n{}\n".format(local_folder_artifact_as_zip)) # download the local file that was uploaded (getting the zip file already extracted into a cached folder), # the path containing this file will be returned local_file_artifact = artifacts_task.artifacts['local file'].get() print("local_file_artifact path is:\n{}\n".format(local_file_artifact))
def main(): task = Task.init(project_name="Nvidia Clara examples with ClearML", task_name="Export models to Artifacts") task.set_base_docker( "nvcr.io/nvidia/clara-train-sdk:v3.1.01 --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864" ) parser = ArgumentParser() parser.add_argument('--model_name', required=True) parser.add_argument('--model_file_path', required=True) parser.add_argument('--input_node_names', required=True) parser.add_argument('--output_node_names', required=True) parser.add_argument('--checkpoint_ext', default='.ckpt') parser.add_argument('--meta_file_ext', default='.meta') parser.add_argument('--regular_frozen_file_ext', default='.fzn.pb') parser.add_argument('--trt_file_ext', default='.trt.pb') parser.add_argument('--trt_precision_mode', default='FP32') parser.add_argument('--trt_dynamic_mode', action='store_true') parser.add_argument('--max_batch_size', type=int, default=4) parser.add_argument('--trt_min_seg_size', type=int, default=50) parser.add_argument('--model_file_format', default='CKPT') parser.add_argument('--trtis_export', action='store_true') parser.add_argument('--trtis_model_name', type=str, default='tlt_model') parser.add_argument('--trtis_input_shape', nargs='+', type=int, help='Full input shape. For example, --trtis_input_shape dim1 dim2 dim3 dim4 in CDHW case') parser.add_argument('--models_task', type=str, help='The training task id') set_env_vars() args = parser.parse_args() if args.models_task: m_task = Task.get_task(task_id=args.models_task) output_models = m_task.get_models().get("output") for mdl in output_models: m_output = mdl.get_local_copy() for model in os.listdir(m_output): os.system("mv {} {}".format(os.path.join(m_output, model), args.model_file_path)) export_main() # noinspection PyBroadException try: task.upload_artifact(name="fzn file", artifact_object="{}{}".format(os.path.join(args.model_file_path, args.model_name), args.regular_frozen_file_ext)) print("frozen file uploaded as artifact") except Exception: pass # noinspection PyBroadException try: task.upload_artifact(name="trt file", artifact_object="{}{}".format(os.path.join(args.model_file_path, args.model_name), args.trt_file_ext)) print("trt file uploaded as artifact") except Exception: pass
def get_dataset(name_or_task_id) -> Dataset: if name_or_task_id is None: raise Exception() try: gen_task = Task.get_task(task_id=name_or_task_id) except ValueError: gen_task = None if name_or_task_id in __name_to_class: return __name_to_class[name_or_task_id]() elif gen_task is not None: dataset_path = gen_task.artifacts["dataset"].get_local_copy() return Dataset.read(dataset_path, sep=",") raise Exception("Unknown dataset " + str(name_or_task_id))
def model_prune(task_args): # Create an output directory if it doesn't exist. get_output("mkdir -p /home/{}/experiment_dir_pruned".format( task_args.arch)) train_task = Task.get_task(task_id=task_args.trains_model_task) unpruned_weights = train_task.artifacts["unpruned_weights"].get_local_copy( ) tlt_prune(task_args, unpruned_weights) tlt_task = Task.current_task() tlt_task.upload_artifact( name="pruned_weights", artifact_object=os.path.join( os.path.expandvars("{}".format(task_args.output_file))), )
def transmit_metrics(self, id): task = self.call_func( "Task.get_task", id, lambda id_: Task.get_task(project_name="MLFlow Migration", task_name=id_), self.get_run_name_by_id(id), ) logger = task.get_logger() metrics = self.get_metrics(id) for graph_name, series_name, table in metrics: for p in table: logger.report_scalar(graph_name, series_name, iteration=p[0], value=float(p[1])) task.completed()
def _load_task_params(): if not PatchClick.__remote_task_params: from clearml import Task t = Task.get_task(task_id=get_remote_task_id()) # noinspection PyProtectedMember PatchClick.__remote_task_params = t._get_task_property('hyperparams') or {} params_dict = t.get_parameters(backwards_compatibility=False) skip = len(PatchClick._section_name)+1 PatchClick.__remote_task_params_dict = { k[skip:]: v for k, v in params_dict.items() if k.startswith(PatchClick._section_name+'/') } params = PatchClick.__remote_task_params command = [ p.name for p in params['Args'].values() if p.type == PatchClick._command_type and cast_str_to_bool(p.value, strip=True)] return command[0] if command else None
system_tags=["archived"], only_fields=["id"], order_by=["-last_update"], page_size=page_size, page=page, status_changed=[ "<{}".format(datetime.utcfromtimestamp(timestamp)) ], ) page += 1 if tasks: print("Deleting {} tasks".format(len(tasks))) # delete and cleanup tasks for task in tasks: # noinspection PyBroadException try: deleted_task = Task.get_task(task_id=task.id) deleted_task.delete(delete_artifacts_and_models=True, skip_models_used_by_other_tasks=True, raise_on_error=False) except Exception as ex: logging.warning("Could not delete Task ID={}, {}".format( task.id, ex.message if hasattr(ex, "message") else ex)) continue # sleep until the next day print("going to sleep for {} days".format(args["cleanup_period_in_days"])) sleep(60 * 60 * 24.0 * args["cleanup_period_in_days"])
def trigger_task_func(task_id): task = Task.get_task(task_id=task_id) print('Task ID {} metric above threshold'.format(task.id))
# Connecting ClearML with the current process, # from here on everything is logged automatically task = Task.init(project_name="examples", task_name="Pipeline step 3 train model") # Arguments args = { 'dataset_task_id': 'REPLACE_WITH_DATASET_TASK_ID', } task.connect(args) # only create the task, we will actually execute it later task.execute_remotely() print('Retrieving Iris dataset') dataset_task = Task.get_task(task_id=args['dataset_task_id']) X_train = dataset_task.artifacts['X_train'].get() X_test = dataset_task.artifacts['X_test'].get() y_train = dataset_task.artifacts['y_train'].get() y_test = dataset_task.artifacts['y_test'].get() print('Iris dataset loaded') model = LogisticRegression(solver='liblinear', multi_class='auto') model.fit(X_train, y_train) joblib.dump(model, 'model.pkl', compress=True) loaded_model = joblib.load('model.pkl') result = loaded_model.score(X_test, y_test) print('model trained & stored')
def _patched_parse_args(original_parse_fn, self, args=None, namespace=None): current_task = PatchArgumentParser._current_task # if we are running remotely, we always have a task id, so we better patch the argparser as soon as possible. if not current_task: from ..config import running_remotely, get_remote_task_id if running_remotely(): # this will cause the current_task() to set PatchArgumentParser._current_task from clearml import Task # noinspection PyBroadException try: current_task = Task.get_task(task_id=get_remote_task_id()) # make sure we do not store back the values # (we will do that when we actually call parse args) # this will make sure that if we have args we should not track we know them # noinspection PyProtectedMember current_task._arguments.exclude_parser_args({'*': True}) except Exception: pass # automatically connect to current task: if current_task: from ..config import running_remotely if PatchArgumentParser._calling_current_task: # if we are here and running remotely by now we should try to parse the arguments parsed_args = None if original_parse_fn: parsed_args = original_parse_fn(self, args=args, namespace=namespace) PatchArgumentParser._add_last_parsed_args( self, parsed_args) return parsed_args or PatchArgumentParser._last_parsed_args[-1] PatchArgumentParser._calling_current_task = True # Store last instance and result PatchArgumentParser._add_last_arg_parser(self) parsed_args = parsed_args_str = None # parse if we are running in dev mode if not running_remotely() and original_parse_fn: parsed_args = original_parse_fn(self, args=args, namespace=namespace) parsed_args_str = PatchArgumentParser._add_last_parsed_args( self, parsed_args) # noinspection PyBroadException try: # sync to/from task # noinspection PyProtectedMember current_task._connect_argparse( self, args=args, namespace=namespace, parsed_args=parsed_args_str[0] if isinstance( parsed_args_str, tuple) else parsed_args_str) except Exception: pass # sync back and parse if running_remotely(): if original_parse_fn: # if we are running python2 check if we have subparsers, # if we do we need to patch the args, because there is no default subparser if PY2: import itertools def _get_sub_parsers_defaults(subparser, prev=[]): actions_grp = [ v._actions for v in subparser.choices.values() ] if isinstance( subparser, _SubParsersAction) else [subparser._actions] _sub_parsers_defaults = [[subparser]] if hasattr( subparser, 'default') and subparser.default else [] for actions in actions_grp: _sub_parsers_defaults += [ _get_sub_parsers_defaults(v, prev) for v in actions if isinstance(v, _SubParsersAction) and hasattr(v, 'default') and v.default ] return list( itertools.chain.from_iterable( _sub_parsers_defaults)) sub_parsers_defaults = _get_sub_parsers_defaults(self) if sub_parsers_defaults: if args is None: # args default to the system args import sys as _sys args = _sys.argv[1:] else: args = list(args) # make sure we append the subparsers for a in sub_parsers_defaults: if a.default not in args: args.append(a.default) parsed_args = original_parse_fn(self, args=args, namespace=namespace) PatchArgumentParser._add_last_parsed_args( self, parsed_args) else: # we should never get here parsed_args = parsed_args_str or {} PatchArgumentParser._add_last_parsed_args( self, parsed_args) PatchArgumentParser._calling_current_task = False return parsed_args # Store last instance and result PatchArgumentParser._add_last_arg_parser(self) parsed_args = {} if not original_parse_fn else original_parse_fn( self, args=args, namespace=namespace) PatchArgumentParser._add_last_parsed_args(self, parsed_args) return parsed_args
def run_hyperparam_optim(project_name, task_name, lambda_min, lambda_max, lambda_step, reg_min, reg_max, reg_step, check_exp_period, min_iter_per_job, max_iter_per_job, time_limit_per_job, task_id=None): # Connecting CLEARML task = Task.init(project_name='BBcluster Hyper-Parameter Optimization', task_name='Automatic Hyper-Parameter Optimization', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) # experiment template to optimize in the hyper-parameter optimization args = { 'template_task_id': task_id, 'run_as_service': False, } args = task.connect(args) # Get the template task experiment that we want to optimize if not args['template_task_id']: args['template_task_id'] = Task.get_task(project_name=project_name, task_name=task_name).id an_optimizer = HyperParameterOptimizer( # This is the experiment we want to optimize base_task_id=args['template_task_id'], hyper_parameters=[ UniformParameterRange('lambda_val', min_value=lambda_min, max_value=lambda_max, step_size=lambda_step), UniformParameterRange('reg', min_value=reg_min, max_value=reg_max, step_size=reg_step) ], objective_metric_title='val_ARI', objective_metric_series='val_ARI', objective_metric_sign='max', max_number_of_concurrent_tasks=4, optimizer_class=search_strategy, execution_queue='default', # Optional: Limit the execution time of a single experiment, in minutes. # (this is optional, and if using OptimizerBOHB, it is ignored) time_limit_per_job=time_limit_per_job, # Check the experiments every 6 seconds is way too often, we should probably set it to 5 min, # assuming a single experiment is usually hours... pool_period_min=check_exp_period, # set the maximum number of jobs to launch for the optimization, default (None) unlimited # If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs # basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job total_max_jobs=None, # This is only applicable for OptimizerBOHB and ignore by the rest # set the minimum number of iterations for an experiment, before early stopping min_iteration_per_job=min_iter_per_job, # Set the maximum number of iterations for an experiment to execute # (This is optional, unless using OptimizerBOHB where this is a must) max_iteration_per_job=max_iter_per_job) an_optimizer.set_report_period(check_exp_period) # start the optimization process, callback function to be called every time an experiment is completed # this function returns immediately an_optimizer.start(job_complete_callback=job_complete_callback) # set the time limit for the optimization process an_optimizer.set_time_limit(in_minutes=1440.0) # wait until process is done (notice we are controlling the optimization process in the background) an_optimizer.wait() # optimization is completed, print the top performing experiments id top_exp = an_optimizer.get_top_experiments(top_k=5) print([t.id for t in top_exp]) # make sure background optimization stopped an_optimizer.stop() print('We are done, good bye')
Upload artifacts from a Task, and then a different Task can access and utilize the data from that artifact. """ from clearml import Task from time import sleep task1 = Task.init(project_name='examples', task_name='create artifact') # upload data file to the initialized task, inputting a name and file location task1.upload_artifact(name='data file', artifact_object='data_samples/sample.json') # close the task, to be able to initialize a new task task1.close() # initialize another task to use some other task's artifacts task2 = Task.init(project_name='examples', task_name='use artifact from other task') # get instance of Task that created artifact (task1), using Task's project and name. You could also use its ID number. preprocess_task = Task.get_task(project_name='examples', task_name='create artifact') # access artifact from task1, using the artifact's name # get_local_copy() caches the files for later use and returns a path to the cached file local_json = preprocess_task.artifacts['data file'].get_local_copy() # Doing some stuff with file from other Task in current Task with open(local_json) as data_file: file_text = data_file.read() print(file_text) # Simulate the work of a Task sleep(1.0) print('Finished doing stuff with some data :)')
def main(): task = Task.init(project_name="Nvidia Clara examples with ClearML", task_name="Validate Clara") task.set_base_docker( "nvcr.io/nvidia/clara-train-sdk:v3.1.01 --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864" ) parser = argparse.ArgumentParser() parser.add_argument('--mmar', '-m', type=str, help='MMAR_ROOT folder', required=True) parser.add_argument('--config', '-c', type=str, help='evaluate config file', required=True) parser.add_argument('--env', '-e', type=str, help='environment file') parser.add_argument('--log_config', '-l', type=str, help='log config file') parser.add_argument('--set', metavar='KEY=VALUE', nargs='*') parser.add_argument('--models_task', type=str, help='The training task id') parser.add_argument("--dataset_task", type=str, help="The dataset task id, if not provided, a task named `Example data` will be chosen") set_env_vars() args = parser.parse_args() mmar = args.mmar or os.environ["MMAR_ROOT"] evaluate_config = args.config env = args.env log_config = args.log_config kv = args.set dataset_task = args.dataset_task evaluate_conf = task.connect_configuration(evaluate_config, name="evaluate", description="evaluate config file") if env: env_conf = task.connect_configuration(env, name="env", description="environment file") if dataset_task: dataset_task = Dataset.get(dataset_id=dataset_task) else: dataset_task = Dataset.get(dataset_project="Nvidia Clara examples with ClearML", dataset_name="Example data") with open(env_conf, "r") as env_file: import json env_dict = json.load(env_file) data_root = env_dict.get("DATA_ROOT", "/") # noinspection PyBroadException try: os.makedirs(os.path.join(mmar, data_root)) except Exception: pass dataset_json = env_dict.get("DATASET_JSON", "/") try: dataset_json_file = task.connect_configuration(os.path.join(mmar, dataset_json), name="dataset_json", description="dataset file") # noinspection PyBroadException try: os.makedirs(os.path.join(mmar, dataset_json.rpartition("/")[0])) except Exception: pass os.system("cp -R {} {}".format(dataset_json_file, os.path.join(mmar, dataset_json))) except Exception as ex: print("Can not connect dataset config file {},\n{}".format(dataset_json, ex)) local_data = dataset_task.get_local_copy() for artifact in os.listdir(local_data): os.system("cp -R {} {}".format(os.path.join(local_data, artifact), str(os.path.join(mmar, data_root)))) os.system("mv {} {}".format(os.path.join(local_data, artifact), os.path.join(mmar, data_root, artifact))) else: env_conf = env log_conf = task.connect_configuration(log_config, name="log config", description="log config file") if log_config \ else log_config # noinspection PyBroadException try: os.makedirs(os.path.join(mmar, evaluate_config.rpartition("/")[0])) except Exception: pass os.system("cp -R {} {}".format(evaluate_conf, os.path.join(mmar, evaluate_config))) # noinspection PyBroadException try: os.makedirs(os.path.join(mmar, env.rpartition("/")[0])) except Exception: pass os.system("cp -R {} {}".format(env_conf, os.path.join(mmar, env))) # noinspection PyBroadException try: os.makedirs(os.path.join(mmar, log_config.rpartition("/")[0])) except Exception: pass os.system("cp -R {} {}".format(log_conf, os.path.join(mmar, log_config))) if args.models_task: m_task = Task.get_task(task_id=args.models_task) output_models = m_task.get_models().get("output") script_path = Path(__file__).parent.absolute() dest = [elem.partition("=")[2] for elem in kv if elem.startswith("MMAR_CKPT_DIR")][0] # noinspection PyBroadException try: os.makedirs(dest) except Exception: pass for mdl in output_models: m_output = mdl.get_weights_package() for model in m_output: os.system("mv {} {}".format(os.path.join(script_path, model), dest)) evaluate_mmar() # noinspection PyBroadException try: for f in Path(os.path.join(mmar, env_dict.get("MMAR_EVAL_OUTPUT_PATH", "/"))).rglob('*'): task.upload_artifact(f.name, artifact_object=f) except Exception: pass
# In this example we pass next task's name as a parameter param["next_task_name"] = "Toy Base Task" # This is a parameter name in the next task we want to change param["param_name"] = "Example_Param" # This is the parameter value in the next task we want to change param["param_name_new_value"] = 3 # The queue where we want the template task (clone) to be sent to param["execution_queue_name"] = "default" # Simulate the work of a Task print("Processing....") sleep(2.0) print("Done processing :)") # Get a reference to the task to pipe to. next_task = Task.get_task(project_name=task.get_project_name(), task_name=param["next_task_name"]) # Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified. cloned_task = Task.clone(source_task=next_task, name="Auto generated cloned task") # Get the original parameters of the Task, modify the value of one parameter, # and set the parameters in the next Task cloned_task_parameters = cloned_task.get_parameters() cloned_task_parameters[param["param_name"]] = param["param_name_new_value"] cloned_task.set_parameters(cloned_task_parameters) # Enqueue the Task for execution. The enqueued Task must already exist in the clearml platform print("Enqueue next step in pipeline to queue: {}".format( param["execution_queue_name"])) Task.enqueue(cloned_task.id, queue_name=param["execution_queue_name"])
'layer_1': lambda: sample(range(*params['layer_1']), 1)[0], 'layer_2': lambda: sample(range(*params['layer_2']), 1)[0], } # number of random samples to test from 'space' params['total_number_of_experiments'] = 3 # execution queue to add experiments to params['execution_queue_name'] = 'default' # experiment template to optimize with random parameter search params['experiment_template_name'] = 'Keras HP optimization base' # Select base template task # Notice we can be more imaginative and use task_id which will eliminate the need to use project name template_task = Task.get_task(project_name='examples', task_name=params['experiment_template_name']) for i in range(params['total_number_of_experiments']): # clone the template task into a new write enabled task (where we can change parameters) cloned_task = Task.clone(source_task=template_task, name=template_task.name + ' {}'.format(i), parent=template_task.id) # get the original template parameters cloned_task_parameters = cloned_task.get_parameters() # override with random samples form grid for k in space.keys(): cloned_task_parameters[k] = space[k]() # put back into the new cloned task
def load_generator(task_id): gen_task = Task.get_task(task_id=task_id) generator_path = gen_task.artifacts["generator"].get_local_copy() with open(generator_path, "rb") as f: generator_obj = pickle.load(f) # type: SyntheticDataGenerator return generator_obj
def read(self): self.thread_id = threading.current_thread().ident for id, path in self.paths: self.info[id] = {} self.call_func( "read_general_information", id, lambda id_, path_: self.read_general_information(id_, path_), id, path, ) self.call_func( "read_tags", id, lambda id_, path_: self.read_tags(id_, path_), id, path + self.tags, ) if "runName" in self.info[id][self.tags].keys(): self.ID_to_Name[id] = self.info[id][self.tags]["runName"] if self.project_exist: task = self.call_func( "Task.get_task", id, lambda id_: Task.get_task(project_name="MLFlow Migration", task_name=id_), self.get_run_name_by_id(id), ) if task: system_tags = (task.data.system_tags if hasattr( task.data, "system_tags") else task.data.tags) if ARCHIVED_TAG not in system_tags: del self.info[id] self.msgs["FAILED"].append( f"task {id} already exist, if you want to migrate it again, " f"you can archive it in Allegro Trains") self.pbar.update(1) continue self.call_func( "read_artifacts", id, lambda id_, path_: self.read_artifacts(id_, path_), id, path + self.artifacts, ) self.call_func( "read_metrics", id, lambda id_, path_: self.read_metrics(id_, path_), id, path + self.metrics, ) self.call_func( "read_params", id, lambda id_, path_: self.read_params(id_, path_), id, path + self.params, )
def main(): task = Task.init(project_name="TLT3", task_name="TLT eval") parser = ArgumentParser() parser.add_argument( "-a", "--arch", help="Architecture", default="classification", choices=[ "classification", "detectnet_v2", "ssd", "dssd", "yolo", "faster_rcnn", "retinanet", "mask_rcnn", ], ) parser.add_argument( "-e", "--experiment_spec_file", help="Path to configuration file", required=True ) parser.add_argument( "-t", "--train-task", help="The training task id", required=True, ) parser.add_argument( "--dataset-export-spec", help="Path to the detection dataset spec containing the config for exporting .tfrecord files", required=True, ) parser.add_argument( "-d", "--dataset-task", help="The task id with dataset as artifact. Artifact name should be 'dataset'", ) parser.add_argument( "-k", "--key", default=None, type=str, help="The key to load pretrained weights and save intermediate snapshopts and final model. " "If not provided, an OS environment named 'KEY' must be set.", ) cmd_train_task = None flag = False if "-m" not in sys.argv and "--model_file" not in sys.argv: for ar in sys.argv: if flag: cmd_train_task = ar break if ar == "-t" or ar == "--train-task": flag = True if cmd_train_task: weights_task = Task.get_task(task_id=cmd_train_task) unpruned_weights = weights_task.artifacts["unpruned_weights"].get() sys.argv.extend(["-m", str(unpruned_weights)]) parser.add_argument( "-m", "--model_file", default=str(unpruned_weights) if cmd_train_task else None, type=str, ) args = parser.parse_args() arch = args.arch config_file = args.experiment_spec_file train_task = args.train_task dataset_export_spec = args.dataset_export_spec key = args.key task.set_base_docker("nvcr.io/nvidia/tlt-streamanalytics:v3.0-dp-py3") config_file = task.connect_configuration(config_file, name="config file") get_converted_data(args.dataset_task, config_file) dataset_export_spec = task.connect_configuration( dataset_export_spec, name="dataset export spec" ) kitti_to_tfrecord(dataset_export_spec, config_file) if train_task and running_remotely(): unpruned_weights = Task.get_task(task_id=train_task).artifacts["unpruned_weights"].get() os.system(f"ls {str(unpruned_weights).rpartition('/')[0]}") params = task.get_parameters_as_dict() os.system(f"mkdir -p {params['Args']['model_file'].rpartition('/')[0]}") os.system(f"cp {unpruned_weights} {params['Args']['model_file']}") eval_unpruned()
# override versions for colab Task.add_requirements('pandas', '1.1.5') Task.add_requirements('numpy', '1.19.5') # Track everything on ClearML Free task = Task.init( project_name='R|D?R&D! Webinar 01', task_name='EDA example', output_uri=True, # auto save everything to Clearml Free ) cfg = EDAConf() task.connect(cfg, 'EDA Config') task.execute_remotely('colab') datasets_metadata_task = Task.get_task(cfg.dataset_metadata_id) artifact = datasets_metadata_task.artifacts[ cfg.dataset_metadata_artifact_name] metadata = artifact.get() for image_size, meta in metadata.items(): print(f'processing {image_size}...') # get augmentations - including mean pixel value norm_info = meta['norm_info'] # get dataset id's train_dataset_id = meta.get('train', "") valid_dataset_id = meta.get('val', "") if not len(train_dataset_id) or not len(valid_dataset_id): raise ValueError('Preprocess error: could not find' f' datasets for image size {image_size}') # download dataset (cached!)
value=epoch_loss) Logger.current_logger().report_scalar("test", "accuracy", iteration=epoch, value=(acc / len(test_loader.dataset))) task = Task.init(project_name="mushrooms", task_name="mushrooms step 2 train model") args = { 'stage_data_task_id': 'REPLACE_WITH_DATASET_TASK_ID', } task.connect(args) task.execute_remotely() dataset_task = Task.get_task(task_id=args["stage_data_task_id"]) dataset = MushRoomsDataset(dataset_task.artifacts["dataset"].get()) trainsize = int(0.8 * len(dataset)) testsize = len(dataset) - trainsize trainset, testset = random_split(dataset, [trainsize, testsize]) trainloader = DataLoader(trainset, batch_size=128, shuffle=True) testloader = DataLoader(testset, batch_size=32, shuffle=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Net().to(device) criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=0.01) for epoch in range(1, 100): train(model, device, trainloader, criterion, optimizer, epoch) test(model, device, criterion, testloader, epoch)
# from here on everything is logged automatically task = Task.init(project_name='Hyper-Parameter Optimization', task_name='Automatic Hyper-Parameter Optimization', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) # experiment template to optimize in the hyper-parameter optimization args = { 'template_task_id': None, 'run_as_service': False, } args = task.connect(args) # Get the template task experiment that we want to optimize if not args['template_task_id']: args['template_task_id'] = Task.get_task( project_name='examples', task_name='Keras HP optimization base').id # Set default queue name for the Training tasks themselves. # later can be overridden in the UI execution_queue = '1xGPU' # Example use case: an_optimizer = HyperParameterOptimizer( # This is the experiment we want to optimize base_task_id=args['template_task_id'], # here we define the hyper-parameters to optimize # Notice: The parameter name should exactly match what you see in the UI: <section_name>/<parameter> # For Example, here we see in the base experiment a section Named: "General" # under it a parameter named "batch_size", this becomes "General/batch_size" # If you have `argparse` for example, then arguments will appear under the "Args" section, # and you should instead pass "Args/batch_size"
# In this example we pass next task's name as a parameter param['next_task_name'] = 'Toy Base Task' # This is a parameter name in the next task we want to change param['param_name'] = 'Example_Param' # This is the parameter value in the next task we want to change param['param_name_new_value'] = 3 # The queue where we want the template task (clone) to be sent to param['execution_queue_name'] = 'default' # Simulate the work of a Task print('Processing....') sleep(2.0) print('Done processing :)') # Get a reference to the task to pipe to. next_task = Task.get_task(project_name=task.get_project_name(), task_name=param['next_task_name']) # Clone the task to pipe to. This creates a task with status Draft whose parameters can be modified. cloned_task = Task.clone(source_task=next_task, name='Auto generated cloned task') # Get the original parameters of the Task, modify the value of one parameter, # and set the parameters in the next Task cloned_task_parameters = cloned_task.get_parameters() cloned_task_parameters[param['param_name']] = param['param_name_new_value'] cloned_task.set_parameters(cloned_task_parameters) # Enqueue the Task for execution. The enqueued Task must already exist in the clearml platform print('Enqueue next step in pipeline to queue: {}'.format( param['execution_queue_name'])) Task.enqueue(cloned_task.id, queue_name=param['execution_queue_name'])