def _launch_remote_best_trial(self, best_trial): model_specs = self.optimal_model.unwrap() dataset_input = dl.FunctionIO(type='Dataset', name='dataset', value={"dataset_id": self.dataset_id}) train_query_input = dl.FunctionIO(type='Json', name='train_query', value=self.train_query) val_query_input = dl.FunctionIO(type='Json', name='val_query', value=self.val_query) hp_value_input = dl.FunctionIO(type='Json', name='hp_values', value=best_trial['hp_values']) model_specs_input = dl.FunctionIO(type='Json', name='model_specs', value=model_specs) inputs = [ dataset_input, train_query_input, val_query_input, hp_value_input, model_specs_input ] execution_obj = self._run_trial_remote_execution(inputs) while execution_obj.latest_status['status'] != 'success': time.sleep(5) execution_obj = dl.executions.get(execution_id=execution_obj.id) if execution_obj.latest_status['status'] == 'failed': raise Exception("package execution failed") return execution_obj
def push_package(project): item_input = dl.FunctionIO(type='Item', name='item') model_input = dl.FunctionIO(type='Json', name='model_id') checkpoint_input = dl.FunctionIO(type='Json', name='checkpoint_id') predict_item_function = dl.PackageFunction(name='predict_single_item', inputs=[item_input], outputs=[], description='') load_checkpoint_function = dl.PackageFunction( name='load_new_inference_checkpoint', inputs=[model_input, checkpoint_input], outputs=[], description='') predict_item_module = dl.PackageModule( entry_point='prediction_module.py', name='predict_item_module', functions=[predict_item_function, load_checkpoint_function], init_inputs=[model_input, checkpoint_input]) module_path = os.path.join(os.getcwd(), 'dataloop_services') package_obj = project.packages.push(package_name='ObDetNet', src_path=module_path, modules=[predict_item_module]) return package_obj
def maybe_do_deployment_stuff(): if args.deploy: logger.info('about to launch 2 deployments, zazu and trial') with open('global_configs.json', 'r') as fp: global_project_name = json.load(fp)['project'] global_project = dl.projects.get(project_name=global_project_name) global_package_obj = push_package(global_project) try: # predict_service = deploy_predict(package=global_package_obj) trial_service = deploy_model(package=global_package_obj) zazu_service = deploy_zazu(package=global_package_obj) logger.info('deployments launched successfully') except: # predict_service.delete() trial_service.delete() zazu_service.delete() if args.zazu_timer: logger.info('about to launch timer deployment') with open('global_configs.json', 'r') as fp: global_project_name = json.load(fp)['project'] global_project = dl.projects.get(project_name=global_project_name) global_package_obj = push_package(global_project) with open('configs.json', 'r') as fp: configs = json.load(fp) configs_input = dl.FunctionIO(type='Json', name='configs', value=json.dumps(configs)) time_input = dl.FunctionIO(type='Json', name='time', value=3600*0.25) test_dataset_input = dl.FunctionIO(type='Json', name='test_dataset_id', value='5eb7e0bdd4eb9434c77d80b5') query_input = dl.FunctionIO(type='Json', name='query', value=json.dumps({"resource": "items", "sort": {}, "page": 0, "pageSize": 1000, "filter": {"$and": [{"dir": "/items/val*"}, {"hidden": False}, {"type": "file"}]}})) init_inputs = [configs_input, time_input, test_dataset_input, query_input] deploy_zazu_timer(package=global_package_obj, init_inputs=init_inputs) logger.info('timer deployment launched successfully') if args.update: with open('global_configs.json', 'r') as fp: global_project_name = json.load(fp) maybe_login() global_project = dl.projects.get(project_name=global_project_name) update_service(global_project, 'trial') update_service(global_project, 'zazu')
def maybe_do_deployment_stuff(): if args.deploy: with open('global_configs.json', 'r') as fp: global_project_name = json.load(fp)['project'] global_project = dl.projects.get(project_name=global_project_name) global_package_obj = push_package(global_project) try: # predict_service = deploy_predict(package=global_package_obj) trial_service = deploy_model(package=global_package_obj) zazu_service = deploy_zazu(package=global_package_obj) except: # predict_service.delete() trial_service.delete() zazu_service.delete() elif args.zazu_timer: with open('global_configs.json', 'r') as fp: global_project_name = json.load(fp)['project'] global_project = dl.projects.get(project_name=global_project_name) global_package_obj = push_package(global_project) with open('configs.json', 'r') as fp: configs = json.load(fp) configs_input = dl.FunctionIO(type='Json', name='configs', value=configs) time_input = dl.FunctionIO(type='Json', name='time', value=3600) deploy_zazu_timer(package=global_package_obj, configs=configs_input, time=time_input, test_dataset=test_dataset_input, query=query_input) if args.update: with open('global_configs.json', 'r') as fp: global_project_name = json.load(fp) maybe_login() global_project = dl.projects.get(project_name=global_project_name) update_service(global_project, 'trial') update_service(global_project, 'zazu')
def _launch_remote_trials(self): threads = ThreadManager() model_specs = self.optimal_model.unwrap() logger.info('launching new set of trials') for trial_id, trial in self.ongoing_trials.trials.items(): dataset_input = dl.FunctionIO(type='Dataset', name='dataset', value={"dataset_id": self.dataset_id}) hp_value_input = dl.FunctionIO(type='Json', name='hp_values', value=trial['hp_values']) model_specs_input = dl.FunctionIO(type='Json', name='model_specs', value=model_specs) inputs = [dataset_input, hp_value_input, model_specs_input] threads.new_thread(target=self._collect_metrics, inputs=inputs, trial_id=trial_id) threads.wait() ongoing_trials_results = threads.results for trial_id, metrics in ongoing_trials_results.items(): self.ongoing_trials.update_metrics(trial_id, metrics)
def execute(service_name, item_id): service = dl.services.get(service_name=service_name) service.execute(function_name='run', sync=True, stream_logs=True, execution_input=dl.FunctionIO( type=dl.PackageInputType.ITEM, name='item', value={'item_id': item_id}))
def push_package(project_name, package_name): project = dl.projects.get(project_name=project_name) inpt = dl.FunctionIO(type='Item', name='item') func = dl.PackageFunction(inputs=inpt) module = dl.PackageModule(functions=func) package = project.packages.push(package_name=package_name, modules=module, src_path=os.getcwd()) print('Package pushed!') package.print()
def _launch_predict_remote(self, checkpoint_path): self.service = self.global_project.services.get(service_name='predict') model_specs = self.optimal_model.unwrap() dataset_input = dl.FunctionIO(type='Dataset', name='dataset', value={"dataset_id": self.dataset_id}) checkpoint_path_input = dl.FunctionIO( type='Json', name='checkpoint_path', value={"checkpoint_path": checkpoint_path}) val_query_input = dl.FunctionIO(type='Json', name='val_query', value=self.val_query) model_specs_input = dl.FunctionIO(type='Json', name='model_specs', value=model_specs) inputs = [ dataset_input, val_query_input, checkpoint_path_input, model_specs_input ] logger.info('checkpoint is type: ' + str(type(checkpoint_path))) try: logger.info("trying to get execution object") execution_obj = self._run_pred_remote_execution(inputs) logger.info("got execution object") # TODO: Turn execution_obj into metrics while execution_obj.latest_status['status'] != 'success': time.sleep(5) execution_obj = dl.executions.get( execution_id=execution_obj.id) if execution_obj.latest_status['status'] == 'failed': raise Exception("plugin execution failed") logger.info("execution object status is successful") # download artifacts, should contain dir with txt file annotations # TODO: download many different metrics then should have id hash as well.. self.project.artifacts.download(package_name=self.package_name, execution_id=execution_obj.id, local_path=os.getcwd()) except Exception as e: Exception(' had an exception: \n', repr(e))
def _launch_remote_trials(self): self.service = self.global_project.services.get(service_name='trial') threads = ThreadManager() model_specs = self.optimal_model.unwrap() logger.info('launching new set of trials') for trial_id, trial in self.ongoing_trials.trials.items(): dataset_input = dl.FunctionIO( type='Dataset', name='dataset', value={"dataset_id": self.dataset_id}) train_query_input = dl.FunctionIO(type='Json', name='train_query', value=self.train_query) val_query_input = dl.FunctionIO(type='Json', name='val_query', value=self.val_query) hp_value_input = dl.FunctionIO(type='Json', name='hp_values', value=trial['hp_values']) model_specs_input = dl.FunctionIO(type='Json', name='model_specs', value=model_specs) inputs = [ dataset_input, train_query_input, val_query_input, hp_value_input, model_specs_input ] threads.new_thread(target=self._collect_metrics, inputs=inputs, trial_id=trial_id) threads.wait() ongoing_trials_results = threads.results for trial_id, metrics_and_checkpoint in ongoing_trials_results.items(): self.ongoing_trials.update_metrics(trial_id, metrics_and_checkpoint)
def deploy_service(project_name, package_name): project = dl.projects.get(project_name=project_name) package = project.packages.get(package_name=package_name) service = package.services.deploy(service_name=package.name, sdk_version='1.15.6', init_input=[ dl.FunctionIO( type=dl.PackageInputType.JSON, name='package_name', value=package_name) ], runtime={ 'gpu': True, 'numReplicas': 1, 'concurrency': 1 }) print('Service deployed!') service.print() print(service.status())
def __init__(self, configs, time, test_dataset, query): self.configs_input = dl.FunctionIO(type='Json', name='configs', value=configs) self.service = dl.services.get('zazu') project_name = configs['dataloop']['project'] self.project = dl.projects.get(project_name) maybe_download_pred_data(dataset_obj=test_dataset, val_query=query) filters = dl.Filters() filters.custom_filter = query dataset_name = test_dataset.name path_to_dataset = os.path.join(os.getcwd(), dataset_name) download_and_organize(path_to_dataset=path_to_dataset, dataset_obj=test_dataset, filters=filters) json_file_path = os.path.join(path_to_dataset, 'json') self.compute = precision_recall_compute() self.compute.add_dataloop_local_annotations(json_file_path) self._circle(time)
def push_package(project): dataset_input = dl.FunctionIO(type='Dataset', name='dataset') hp_value_input = dl.FunctionIO(type='Json', name='hp_values') model_specs_input = dl.FunctionIO(type='Json', name='model_specs') package_name_input = dl.FunctionIO(type='Json', name='package_name') service_name_input = dl.FunctionIO(type='Json', name='service_name') configs_input = dl.FunctionIO(type='Json', name='configs') model_inputs = [dataset_input, hp_value_input, model_specs_input] zazu_inputs = [configs_input] model_function = dl.PackageFunction(name='run', inputs=model_inputs, outputs=[], description='') train_function = dl.PackageFunction(name='train', inputs=zazu_inputs, outputs=[], description='') search_function = dl.PackageFunction(name='search', inputs=zazu_inputs, outputs=[], description='') models_module = dl.PackageModule( entry_point='dataloop_services/service_executor.py', name='models_module', functions=[model_function], init_inputs=[package_name_input, service_name_input]) zazu_module = dl.PackageModule( entry_point='dataloop_services/zazu_module.py', name='zazu_module', functions=[train_function, search_function], init_inputs=package_name_input) package_obj = project.packages.push(package_name='zazuml', src_path=os.getcwd(), modules=[models_module, zazu_module]) return package_obj
def push_package(project): dataset_input = dl.FunctionIO(type='Dataset', name='dataset') train_query_input = dl.FunctionIO(type='Json', name='train_query') val_query_input = dl.FunctionIO(type='Json', name='val_query') hp_value_input = dl.FunctionIO(type='Json', name='hp_values') model_specs_input = dl.FunctionIO(type='Json', name='model_specs') checkpoint_path_input = dl.FunctionIO(type='Json', name='checkpoint_path') package_name_input = dl.FunctionIO(type='Json', name='package_name') configs_input = dl.FunctionIO(type='Json', name='configs') time_input = dl.FunctionIO(type='Json', name='time') test_dataset_input = dl.FunctionIO(type='Json', name='test_dataset_id') query_input = dl.FunctionIO(type='Json', name='query') item_input = dl.FunctionIO(type='Item', name='item') model_input = dl.FunctionIO(type='Json', name='model_id') checkpoint_input = dl.FunctionIO(type='Json', name='checkpoint_id') predict_inputs = [ dataset_input, val_query_input, checkpoint_path_input, model_specs_input ] model_inputs = [ dataset_input, train_query_input, val_query_input, hp_value_input, model_specs_input ] zazu_inputs = [configs_input] predict_function = dl.PackageFunction(name='run', inputs=predict_inputs, outputs=[], description='') model_function = dl.PackageFunction(name='run', inputs=model_inputs, outputs=[], description='') zazu_search_function = dl.PackageFunction(name='search', inputs=zazu_inputs, outputs=[], description='') zazu_predict_function = dl.PackageFunction(name='predict', inputs=zazu_inputs, outputs=[], description='') timer_update_function = dl.PackageFunction(name='update_time', inputs=time_input, outputs=[], description='') predict_item_function = dl.PackageFunction(name='predict_single_item', inputs=[item_input], outputs=[], description='') load_checkpoint_function = dl.PackageFunction( name='load_new_inference_checkpoint', inputs=[model_input, checkpoint_input], outputs=[], description='') predict_module = dl.PackageModule( entry_point='dataloop_services/predict_module.py', name='predict_module', functions=[predict_function], init_inputs=[package_name_input]) models_module = dl.PackageModule( entry_point='dataloop_services/trial_module.py', name='models_module', functions=[model_function], init_inputs=[package_name_input]) zazu_module = dl.PackageModule( entry_point='dataloop_services/zazu_module.py', name='zazu_module', functions=[zazu_search_function, zazu_predict_function], init_inputs=package_name_input) zazu_timer_module = dl.PackageModule( entry_point='dataloop_services/zazu_timer_module.py', name='zazu_timer_module', functions=[timer_update_function], init_inputs=[ configs_input, time_input, test_dataset_input, query_input ]) predict_item_module = dl.PackageModule( entry_point='dataloop_services/prediction_module.py', name='predict_item_module', functions=[predict_item_function, load_checkpoint_function], init_inputs=[model_input, checkpoint_input]) package_obj = project.packages.push(package_name='zazuml', src_path=os.getcwd(), modules=[ predict_module, models_module, zazu_module, zazu_timer_module, predict_item_module ]) return package_obj
parser.add_argument("--predict_once", action='store_true', default=False) parser.add_argument("--zazu_timer", action='store_true', default=False) args = parser.parse_args() with open('configs.json', 'r') as fp: configs = json.load(fp) try: maybe_login(configs['dataloop']['setenv']) except: pass maybe_do_deployment_stuff() if args.remote: configs_input = dl.FunctionIO(type='Json', name='configs', value=configs) inputs = [configs_input] zazu_service = dl.services.get('zazu') # get project id for billing bla bla bla dataset_obj = get_dataset_obj(configs['dataloop']) id = dataset_obj.project.id if args.search: zazu_service.execute(function_name='search', execution_input=inputs, project_id=id) if args.predict: zazu_service.execute(function_name='predict', execution_input=inputs, project_id=id)
def __init__(self, configs, time, test_dataset_id, query): logger.info('dtlpy version: ' + str(dl.__version__)) logger.info('dtlpy info: ' + str(dl.info())) time = int(time) dl.setenv('prod') configs = json.loads(configs) query = json.loads(query) self.configs_input = dl.FunctionIO(type='Json', name='configs', value=configs) self.service = dl.services.get('zazu') project_name = configs['dataloop']['project'] self.project = dl.projects.get(project_name) test_dataset = self.project.datasets.get(dataset_id=test_dataset_id) maybe_download_pred_data(dataset_obj=test_dataset, val_query=query) # add gt annotations filters = dl.Filters() filters.custom_filter = query dataset_name = test_dataset.name path_to_dataset = os.path.join(os.getcwd(), dataset_name) # only download if doesnt exist if not os.path.exists(path_to_dataset): download_and_organize(path_to_dataset=path_to_dataset, dataset_obj=test_dataset, filters=filters) json_file_path = os.path.join(path_to_dataset, 'json') self.model_obj = self.project.models.get(model_name='retinanet') self.adapter = self.model_obj.build(local_path=os.getcwd()) logger.info('model built') while 1: self.compute = precision_recall_compute() self.compute.add_dataloop_local_annotations(json_file_path) logger.info("running new execution") execution_obj = self.service.execute(function_name='search', execution_input=[self.configs_input], project_id='72bb623f-517f-472b-ad69-104fed8ee94a') while execution_obj.latest_status['status'] != 'success': sleep(5) execution_obj = dl.executions.get(execution_id=execution_obj.id) if execution_obj.latest_status['status'] == 'failed': raise Exception("plugin execution failed") logger.info("execution object status is successful") self.project.artifacts.download(package_name='zazuml', execution_id=execution_obj.id, local_path=os.getcwd()) logs_file_name = 'timer_logs_' + str(execution_obj.id) + '.conf' graph_file_name = 'precision_recall_' + str(execution_obj.id) + '.png' self.cycle_logger = init_logging(__name__, filename=logs_file_name) logger.info('artifact download finished') logger.info(str(os.listdir('.'))) # load new checkpoint and change to unique name new_checkpoint_name = 'checkpoint_' + str(execution_obj.id) + '.pt' logger.info(str(os.listdir('.'))) os.rename('checkpoint0.pt', new_checkpoint_name) new_model_name = new_checkpoint_name[:-3] logger.info(str(os.listdir('.'))) new_checkpoint = torch.load(new_checkpoint_name, map_location=torch.device('cpu')) # self.model_obj = self.project.models.get(model_name=new_checkpoint['model_specs']['name']) # self.adapter = self.model_obj.build(local_path=os.getcwd()) # logger.info('model built') self.new_home_path = new_checkpoint['model_specs']['data']['home_path'] self._compute_predictions(checkpoint_path=new_checkpoint_name, model_name=new_model_name) if len(self.compute.by_model_name.keys()) < 2: # if the model cant predict anything then just skip it logger.info('''model couldn't make any predictions, trying to train again''') continue # if previous best checkpoint doesnt exist there must not be a service, launch prediction service with new # new_checkpoint and create trigger if 'check0' not in [checkp.name for checkp in self.model_obj.checkpoints.list()]: logger.info('there is no check0, will add upload new checkpoint as check0 and ' 'deploy prediction service') new_checkpoint_obj = self.model_obj.checkpoints.upload(checkpoint_name='check0', local_path=new_checkpoint_name) logger.info('uploaded this checkpoint as the new check0 : ' + new_checkpoint_name[:-3]) self._maybe_launch_predict(new_checkpoint_obj) continue logger.info('i guess check0 does exist') best_checkpoint = self.model_obj.checkpoints.get('check0') check0_path = best_checkpoint.download(local_path=os.getcwd()) logger.info('downloading best checkpoint') logger.info(str(os.listdir('.'))) logger.info('check0 path is: ' + str(check0_path)) self._compute_predictions(checkpoint_path=check0_path, model_name=best_checkpoint.name) # compute metrics new_checkpoint_mAP = self.compute.get_metric(model_name=new_model_name, precision_to_recall_ratio=1.) best_checkpoint_mAP = self.compute.get_metric(model_name=best_checkpoint.name, precision_to_recall_ratio=1.) logger.info('best checkpoint: ' + str(best_checkpoint_mAP)) logger.info('new checkpoint: ' + str(new_checkpoint_mAP)) # if new checkpoint performs better switch out prediction if new_checkpoint_mAP > best_checkpoint_mAP: logger.info('new checkpoint is better') logger.info('uploading old best checkpoint under new name') self.model_obj.checkpoints.upload(checkpoint_name='checkpoint_' + check0_path.split('_')[-1][:-3], local_path=check0_path) logger.info('deleting old best checkpoint') best_checkpoint.delete() logger.info('uploading new best checkpoint as check0') new_best_checkpoint_obj = self.model_obj.checkpoints.upload(checkpoint_name='check0', local_path=new_checkpoint_name) if 'predict' not in [s.name for s in dl.services.list()]: self._maybe_launch_predict(new_best_checkpoint_obj) else: self._update_predict_service(new_best_checkpoint_obj) logger.info('switched with new checkpoint') self.compute.save_plot_metrics(save_path=graph_file_name) self.project.artifacts.upload(filepath=logs_file_name, package_name='zazuml', execution_id=execution_obj.id) self.project.artifacts.upload(filepath=graph_file_name, package_name='zazuml', execution_id=execution_obj.id) logger.info('waiting ' + str(time) + ' seconds for next execution . . . .') sleep(time)
import dtlpy as dl import os package_name = 'video-tracker' project_name = 'your_project_name' project = dl.projects.get(project_name=project_name) ########################## # define package modules # ########################## modules = [ dl.PackageModule( init_inputs=[ dl.FunctionIO(name='project_name', type=dl.PackageInputType.JSON), dl.FunctionIO(name='package_name', type=dl.PackageInputType.JSON) ], name='default_module', entry_point='main.py', functions=[ dl.PackageFunction( inputs=[ dl.FunctionIO(name='item', type=dl.PackageInputType.ITEM), dl.FunctionIO(name='annotation', type=dl.PackageInputType.ANNOTATION), dl.FunctionIO(name='frame_duration', type=dl.PackageInputType.JSON) ], name='track_bounding_box', description='Tracks a bounding box annotation on video') ]) ]
if args.predict: model.load_inference(checkpoint_path='checkpoint.pt') model.predict() if args.predict_single: model.predict_single_image(image_path='/home/noam/0120122798.jpg') if args.predict_item: project = dl.projects.get('buffs_project') dataset = project.datasets.get('tiny_mice_p') item = dataset.items.get('/items/253597.jpg') # filters = dl.Filters(field='filename', values='/items/253*') # pages = dataset.items.list(filters=filters) # items = [item for page in pages for item in page] items = [item] model.predict_items(items, 'checkpoint.pt') if args.new_checkpoint: service = dl.services.get('predict') model_id = '5e9d56bb7f6a015540d2efb4' checkpoint_id = '5e92e4b1e37a96cd28811a1a' model_input = dl.FunctionIO(type='Json', name='model_id', value=model_id) checkpoint_input = dl.FunctionIO(type='Json', name='checkpoint_id', value=checkpoint_id) inputs = [model_input, checkpoint_input] execution_obj = service.execute(execution_input=inputs, function_name='load_new_inference_checkpoint', project_id='fcdd792b-5146-4c62-8b27-029564f1b74e') while execution_obj.latest_status['status'] != 'success': time.sleep(5) execution_obj = dl.executions.get(execution_id=execution_obj.id) if execution_obj.latest_status['status'] == 'failed': raise Exception("plugin execution failed") logger.info("execution object status is successful")
import dtlpy as dl package_name = 'annotation-automation' project_name = 'My project' project = dl.projects.get(project_name=project_name) ########################## # define package modules # ########################## modules = [ dl.PackageModule( init_inputs=[ dl.FunctionIO(type='Json', name='project_name'), dl.FunctionIO(type='Json', name='package_name') ], name='default', entry_point='main.py', functions=[ dl.PackageFunction( inputs=[ dl.FunctionIO(type="Item", name="item"), dl.FunctionIO(type="Json", name="annotations"), dl.FunctionIO(type="Json", name="config") ], name='bbox_to_segmentation', description='Converts a bounding box into a segmentation'), dl.PackageFunction( inputs=[ dl.FunctionIO(type="Item", name="item"), dl.FunctionIO(type="Json", name="annotations"), dl.FunctionIO(type="Json", name="config")