def _setup_datastore(self, blob_dataset_name, output_path=None): """ sets up the datastore in azureml. Either retrieves a pre-existing datastore or registers a new one in the workspace. :param str blob_dataset_name: [required] name of the datastore registered with the workspace. If the datastore does not yet exist, the name it will be registered under. :param str output_path: [optional] if registering a datastore for inferencing, the output path for writing back predictions. """ try: self.blob_ds = Datastore.get(self.ws, blob_dataset_name) print("Found Blob Datastore with name: %s" % blob_dataset_name) except HttpOperationError: self.blob_ds = Datastore.register_azure_blob_container( workspace=self.ws, datastore_name=blob_dataset_name, account_name=self.account_name, container_name=self.container_name, account_key=self.account_key, subscription_id=self.blob_sub_id, ) print("Registered blob datastore with name: %s" % blob_dataset_name) if output_path is not None: self.output_dir = PipelineData( name="output", datastore=self.ws.get_default_datastore(), output_path_on_compute=output_path)
def __init__(self): self._parser = argparse.ArgumentParser("train") self._parser.add_argument( "--release_id", type=str, help="The ID of the release triggering this pipeline run") self._parser.add_argument("--model_name", type=str, help="Name of the tf model") self._parser.add_argument("--ckpt_path", type=str, help="Chekpoint path", default="checkpoint/yolov3.ckpt") self._parser.add_argument("--datastore", type=str, help="Name of the datastore", default="epis_datastore") self._parser.add_argument("--storage_container", type=str, help="Name of the storage container", default="ppe") self._args = self._parser.parse_args() self._run = Run.get_context() self._exp = self._run.experiment self._ws = self._run.experiment.workspace self._tb = Tensorboard([self._run]) self._datastore = Datastore.get(self._ws, datastore_name=self._args.datastore)
def __init__(self): self._parser = argparse.ArgumentParser("evaluate") self._parser.add_argument( "--release_id", type=str, help="The ID of the release triggering this pipeline run") self._parser.add_argument("--model_name", type=str, help="Name of the tf model") self._parser.add_argument("--ckpt_path", type=str, help="Chekpoint path", default="checkpoint/yolov3.ckpt") self._parser.add_argument("--datastore", type=str, help="Name of the datastore", default="epis_datastore") self._parser.add_argument("--storage_container", type=str, help="Name of the storage container", default="ppe") self._args = self._parser.parse_args() self._run = Run.get_context() self._exp = self._run.experiment self._ws = self._run.experiment.workspace self._datastore = Datastore.get(self._ws, datastore_name=self._args.datastore) self._INPUT_SIZE = 416 self._NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) self._CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) self._predicted_dir_path = 'mAP/predicted' self._ground_truth_dir_path = 'mAP/ground-truth'
def update_dataset(ws, datastore_name, dataset, time_stamp): datastore = Datastore.get(ws, datastore_name) #datastore = adlsgen2_datastore if dataset["dataset_name"] in ws.datasets: print("Dataset " + dataset["dataset_name"] + " already created in " + ws.name + ", will update to new version...") else: print("Dataset " + dataset["dataset_name"] + " is new and will be created in " + ws.name + "...") # create a TabularDataset from the path in the datastore datastore_paths = [(datastore, dataset["dataset_path"])] retrieved_dataset = Dataset.Tabular.from_delimited_files( path=datastore_paths) #Register the dataset (and make a new version if needed) #The timestamp description to make it easier to see the same # dataset was registered at the same time in different workspaces if you want to filter retrieved_dataset = retrieved_dataset.register( workspace=ws, name=dataset["dataset_name"], description='versioned data, timestamp: ' + time_stamp, create_new_version=True) print("Updated dataset " + dataset["dataset_name"] + " in workspace " + ws.name + " at timestamp " + time_stamp) return retrieved_dataset
def __init__(self): self.__parser = argparse.ArgumentParser("preprocessing") self.__parser.add_argument("--datastore", type=str, help="Name of the datastore", default="workspaceblobstore") self.__parser.add_argument("--dataset_name", type=str, help="Name of the dataset") self.__parser.add_argument("--dataset_preprocessed_name", type=str, help="Standard preprocessed dataset") self.__parser.add_argument("--output_preprocess_dataset", type=str, help="Name of the PipelineData reference") self.__args = self.__parser.parse_args() self.__run = Run.get_context() self.__local_run = type(self.__run) == _OfflineRun if self.__local_run: self.__ws = Workspace.from_config('../../notebooks-settings') self.__exp = Experiment(self.__ws, 'exploratory_analysis') self.__run = self.__exp.start_logging() else: self.__ws = self.__run.experiment.workspace self.__exp = self.__run.experiment self.__datastore = Datastore.get(self.__ws, datastore_name=self.__args.datastore)
def write_results(df, cols, output_datastore, output_path, model, run): ws = run.experiment.workspace datastore = Datastore.get(ws, output_datastore) output_folder = tempfile.TemporaryDirectory(dir="/tmp") filename = os.path.join(output_folder.name, os.path.basename(output_path)) print("Output filename: {}".format(filename)) try: os.remove(filename) except OSError: pass df["ScoredLabels"] = model.predict(df[cols].astype(int).values) print("resultLabels", df["ScoredLabels"].iloc[:10]) df["ScoredProbabilities"] = model.predict_proba( df[cols].astype(int).values)[:, 1] print("resultProbabilities", df["ScoredProbabilities"].iloc[:10]) # set HotelCustomerID to index to remove the column1 columns in the dataframe df = df.set_index("CustomerId") directory_name = os.path.dirname(output_path) print("Extracting Directory {} from path {}".format( directory_name, output_path)) df.to_csv(filename) # Datastore.upload() is supported currently, but is being deprecated by Dataset.File.upload_directory() # datastore.upload(src_dir=output_folder.name, target_path=directory_name, overwrite=False, show_progress=True) # upload_directory can fail sometimes. output_dataset = Dataset.File.upload_directory(src_dir=output_folder.name, target=(datastore, directory_name)) return df
def create_pipeline(self): ''' IRIS Data training and Validation ''' self.datastore = Datastore.get(self.workspace, self.workspace.get_default_datastore().name) print("Received datastore") input_ds = self.get_files_from_datastore(self.args.container_name,self.args.input_csv) final_df = input_ds.to_pandas_dataframe() print("Input DF Info",final_df.info()) print("Input DF Head",final_df.head()) X = final_df[["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"]] y = final_df[["Species"]] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=1984) model = DecisionTreeClassifier() model.fit(X_train,y_train) y_pred = model.predict(X_test) print("Model Score : ", model.score(X_test,y_test)) joblib.dump(model, self.args.model_path) self.validate(y_test, y_pred, X_test) match = re.search('([^\/]*)$', self.args.model_path) # Upload Model to Run artifacts self.run.upload_file(name=self.args.artifact_loc + match.group(1), path_or_stream=self.args.model_path) print("Run Files : ", self.run.get_file_names()) self.run.complete()
def setup_azureml(): """ Get an Azure ML workspace from environment variables. Assumes the following are created outside of the code in this project: AML workspace AML datastore AML compute resource for training (can be blank for inferencing) AML compute resource for inferencing (can be blank for training) """ subscription_id = os.environ['AML_SUBSCRIPTION'] resource_group = os.environ['AML_RESOURCE_GROUP'] workspace_name = os.environ['AML_WORKSPACE'] datastore_name = os.environ['AML_DATASTORE'] training_target_name = os.environ.get('AML_COMPUTE') inference_target_name = os.environ.get('AML_INFERENCE_COMPUTE') ws = Workspace(subscription_id, resource_group, workspace_name) ds = Datastore.get(ws, datastore_name=datastore_name) if training_target_name: training_target = ws.compute_targets[training_target_name] else: training_target = None if inference_target_name: inference_target = ws.compute_targets[inference_target_name] else: inference_target = None return ws, ds, training_target, inference_target
def main(): # Connect to your AMLS Workspace and set your Datastore ws = run.experiment.workspace datastoreName = args.datastore_name datastore = Datastore.get(ws, datastoreName) print('Datastore Set') # Set your Time Zone timeZone = pytz.timezone(args.pytz_time_zone) timeLocal = dt.datetime.now(timeZone).strftime('%Y-%m-%d') print('Time Zone Set') # Specify your File Names trainFile = timeLocal + '/' + args.train_file_name valFile = timeLocal + '/' + args.val_file_name print('File Names Set for Training and Validation Data.') # Set Tags and Description description = args.project_description trainTags = set_tags(['Project', 'Dataset Type', 'Date Created'],\ [args.project_name, 'Training', timeLocal]) valTags = set_tags(['Project', 'Dataset Type', 'Date Created'],\ [args.project_name, 'Validation', timeLocal]) print("Dataset Tags and Description Assigned") # Register your Training data as an Azure Tabular Dataset register_dataset(ws, datastore, args.datastore_path, trainFile, args.train_dataset_name, description, trainTags) print('Training Data Registered') # Register your Validation data as an Azure Tabular Dataset register_dataset(ws, datastore, args.datastore_path, valFile, args.val_dataset_name, description, valTags) print('Validation Data Registered')
def main(): # workspace ws = Workspace.from_config() #compute compute = AmlCompute(workspace=ws, name='gandalf') # datasource datastore = Datastore.get(ws, datastore_name='surfrider') # experiment script_params = { "--datastore": datastore.as_mount() } # Create and run experiment estimator = Estimator(source_directory='./', script_params=script_params, compute_target=compute, entry_script='train.py', use_gpu=True, pip_packages=['opencv-python>=4.1', 'tensorpack==0.9.8', 'tensorflow-gpu>=1.3,<2.0', 'tqdm>=4.36.1', 'cython>=0.29.13', 'scipy>=1.3.1', 'ffmpeg-python', 'wget']) exp = Experiment(ws, 'surfrider_rcnn') run = exp.submit(estimator)
def convert_voc_annotation(ws, ds, data_type, anno_path, container_name, use_difficult_bbox=True): classes = ['helmet', 'none'] datastore = Datastore.get(ws, datastore_name=ds) voc_dataset_annotations = datastore.blob_service.list_blobs( container_name, prefix='VOC/Annotations') voc_dataset_images = datastore.blob_service.list_blobs( container_name, prefix='VOC/JPEGImages') voc_dataset_imagesets = datastore.blob_service.list_blobs( container_name, prefix=f'VOC/ImageSets/Main/{data_type}.txt') voc_list_annotations = list(voc_dataset_annotations) print("Succesfully list annotations") voc_list_images = list(voc_dataset_images) print("Succesfully list images") voc_list_imagesets = list(voc_dataset_imagesets) print("Succesfully list imagesets") txt = datastore.blob_service.get_blob_to_text(container_name, voc_list_imagesets[0].name) txt_split = txt.content.splitlines() image_inds = [line.strip() for line in txt_split] with open(anno_path, 'a') as f: for image_ind in image_inds: image_path = datastore.blob_service.make_blob_url( container_name, 'VOC/JPEGImages/' + image_ind + '.jpg') annotation = image_path label_path = datastore.blob_service.get_blob_to_text( container_name, 'VOC/Annotations/' + image_ind + '.xml').content root = ET.fromstring(label_path) objects = root.findall('object') for obj in objects: difficult = obj.find('difficult').text.strip() if (not use_difficult_bbox) and (int(difficult) == 1): continue bbox = obj.find('bndbox') class_ind = classes.index( obj.find('name').text.lower().strip()) xmin = bbox.find('xmin').text.strip() xmax = bbox.find('xmax').text.strip() ymin = bbox.find('ymin').text.strip() ymax = bbox.find('ymax').text.strip() annotation += ' ' + ','.join( [xmin, ymin, xmax, ymax, str(class_ind)]) print(annotation) f.write(annotation + "\n") datastore.blob_service.create_blob_from_path( container_name, anno_path, anno_path, content_settings=ContentSettings( content_type=__get_mime_type(anno_path)))
def register_dataset(aml_workspace: Workspace, dataset_name: str, datastore_name: str, file_path: str) -> Dataset: datastore = Datastore.get(aml_workspace, datastore_name) dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path)) dataset = dataset.register(workspace=aml_workspace, name=dataset_name, create_new_version=True) return dataset
def register_dataset(workspace, datastore_name, dataset_name, file_path): datastore = Datastore.get(workspace=workspace, datastore_name=datastore_name) dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path)) dataset = dataset.regisetr( workspace=workspace, name=dataset_name, create_new_version=True ) #either create new version if existed of exist_ok=True return dataset
def main(_): # Export the trained model if not os.path.exists(FLAGS.export_dir): os.makedirs(FLAGS.export_dir) run.log('accuracy', float(0.91)) run.log('val_accuracy', float(0.901)) datastore = Datastore.get(ws, 'mtcseattle') datastore.download(FLAGS.export_dir, prefix="model")
def prepare_data(workspace): datastore = Datastore.get(workspace, TRAINING_DATASTORE) x_train = get_df_from_datastore_path(datastore, 'train/X_train.csv') y_train = get_df_from_datastore_path(datastore, 'train/y_train.csv') y_train = y_train['Target'] x_test = get_df_from_datastore_path(datastore, 'test/X_test.csv') y_test = get_df_from_datastore_path(datastore, 'test/y_test.csv') y_test = y_test['Target'] x_train = remove_collinear_cols(x_train) x_test = remove_collinear_cols(x_test) return x_train, y_train, x_test, y_test
def get_dataset(ws: Workspace) -> Dataset: if _dataset_name not in ws.datasets: datastore = Datastore.get(ws, _datastore_name) datastore_paths = [(datastore, f"{_dataset_name}/*.csv")] dataset = Dataset.Tabular.from_delimited_files(path=datastore_paths) dataset.register(workspace=ws, name=_dataset_name, description="Names with sentiment scores") else: dataset = ws.datasets[_dataset_name] return dataset
def get_data(workspace): # blob_storage_client = __get_blob_storage_client() # blob_client = blob_storage_client.get_blob_client("raw", "iris/year=2020/month=10/day=05/iris.csv") datastore = Datastore.get(workspace, "train") datastore_path = [(datastore, "iris/year=2020/month=10/day=05/iris.csv")] dataset = Dataset.Tabular.from_delimited_files(path=datastore_path) # content = blob_client.download_blob().readall().decode() # dataframe = pd.read_csv(StringIO(content)) dataframe = dataset.to_pandas_dataframe() x = dataframe.values[:, 0:4] y = dataframe.values[:, -1] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) return x_train, y_train, x_test, y_test, dataset
def __init__(self): self._parser = argparse.ArgumentParser("mAP") self._parser.add_argument( "--release_id", type=str, help="The ID of the release triggering this pipeline run") self._parser.add_argument("--model_name", type=str, help="Name of the tf model") self._parser.add_argument("--ckpt_path", type=str, help="Chekpoint path", default="checkpoint/yolov3.ckpt") self._parser.add_argument("--datastore", type=str, help="Name of the datastore", default="epis_datastore") self._parser.add_argument("--storage_container", type=str, help="Name of the storage container", default="ppe") self._parser.add_argument('-na', '--no-animation', help="no animation is shown.", action="store_true") self._parser.add_argument('-np', '--no-plot', help="no plot is shown.", action="store_true") self._parser.add_argument('-q', '--quiet', help="minimalistic console output.", action="store_true") self._parser.add_argument('-i', '--ignore', nargs='+', type=str, help="ignore a list of classes.") self._parser.add_argument('--set-class-iou', nargs='+', type=str, help="set IoU for a specific class.") self._args = self._parser.parse_args() self._run = Run.get_context() self._exp = self._run.experiment self._ws = self._run.experiment.workspace self._datastore = Datastore.get(self._ws, datastore_name=self._args.datastore) self._MINOVERLAP = 0.5
def prepare_data(workspace): training_datastore = Datastore.get(workspace, TRAINING_DATASTORE) #validation_datastore = Datastore.get(workspace, SCORING_CONTAINER) x_train = get_df_from_datastore_path(training_datastore, 'train/X_train.csv') y_train = get_df_from_datastore_path(training_datastore, 'train/y_train.csv') y_train = y_train['class'] x_test = get_df_from_datastore_path(training_datastore, 'valid/X_valid.csv') y_test = get_df_from_datastore_path(training_datastore, 'valid/y_valid.csv') y_test = y_test['class'] return x_train, y_train, x_test, y_test
def load_image_files(dimension=(256, 256)): subscription_id = os.environ['AZURE_SUBSCRIPTION_ID'] resource_group = 'ai-lab' workspace_name = 'ailabml' workspace = Workspace(subscription_id, resource_group, workspace_name) # get dataset (online run) #run = Run.get_context() #dataset = run.input_datasets['Light Bulbs-2019-12-08 00:35:33'] # get dataset (offline run) ds = Dataset.get_by_name(workspace, name='Light Bulbs-2019-12-08 00:35:33') df = ds.to_pandas_dataframe() # Images descr = "Defect Detection Dataset" images = [] flat_data = [] target = [] categories = set() for i in tqdm(range(df.shape[0])): si = df.loc[i].image_url.to_pod() if i == 0: datastore = Datastore.get(workspace, si['arguments']['datastoreName']) categories.add(df.loc[i].label[0]) datastore.download(target_path='.', prefix=si['resourceIdentifier'], overwrite=True, show_progress=False) img = imread(si['resourceIdentifier'], as_gray=True) img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect') flat_data.append(img_resized.flatten()) images.append(img_resized) target.append(df.loc[i].label[0]) categories = list(categories) flat_data = np.array(flat_data) target = np.array(target) images = np.array(images) return Bunch(data=flat_data, target=target, target_names=categories, images=images, DESCR=descr)
def register_dataset(aml_workspace: Workspace, dataset_name: str, datastore_name: str, file_path: str) -> Dataset: if datastore_name: datastore = Datastore.get(aml_workspace, datastore_name) else: datastore = aml_workspace.get_default_datastore() # if the path is same as the latest version, no new version will be registered # NOQA: E501 # however, run.input_datasets['name'] = dataset will not log the dataset in the run # NOQA: E501 # in this case, the dataset returned from Dataset.get_by_name does get logged # NOQA: E501 dataset = Dataset.File.from_files(path=(datastore, file_path)) dataset = dataset.register(workspace=aml_workspace, name=dataset_name, create_new_version=True) return Dataset.get_by_name(aml_workspace, dataset_name)
def create_dataset(ws, name, datastore, data_path): '''create the dataset object''' # get the datastore if datastore: datastore = Datastore.get(ws, datastore) else: datastore = ws.get_default_datastore() # define dataset dataset = Dataset.File.from_files(path=(datastore, data_path)) # register the dataset for future use dataset = dataset.register(workspace=ws, name=name, create_new_version=True)
def register_dataset( aml_workspace: Workspace, dataset_name: str, datastore_name: str, file_path: str = "COVID19Articles.csv", ) -> Dataset: if (datastore_name): datastore = Datastore.get(aml_workspace, datastore_name) else: datastore = Datastore.get_default(aml_workspace) dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path)) dataset = dataset.register(workspace=aml_workspace, name=dataset_name, create_new_version=True) return dataset
def convert_voc_annotation(ws, ds, data_type, container_name, use_difficult_bbox=True): classes = ['yellow', 'white', 'blue', 'red', 'hat'] datastore = Datastore.get(ws, datastore_name=ds) voc_dataset_annotations = datastore.blob_service.list_blobs( container_name, prefix='VOC/Annotations') voc_dataset_images = datastore.blob_service.list_blobs( container_name, prefix='VOC/JPEGImages') voc_dataset_imagesets = datastore.blob_service.list_blobs( container_name, prefix=f'VOC/ImageSets/Main/{data_type}.txt') voc_list_annotations = list(voc_dataset_annotations) print("Succesfully list annotations") voc_list_images = list(voc_dataset_images) print("Succesfully list images") voc_list_imagesets = list(voc_dataset_imagesets) print("Succesfully list imagesets") txt = datastore.blob_service.get_blob_to_text(container_name, voc_list_imagesets[0].name) txt_split = txt.content.splitlines() image_inds = [line.strip() for line in txt_split] for image_ind in image_inds: image_path = datastore.blob_service.make_blob_url( container_name, 'VOC/JPEGImages/' + image_ind + '.jpg') annotation = image_path label_path = datastore.blob_service.get_blob_to_text( container_name, 'VOC/Annotations/' + image_ind + '.xml').content print(f'XML {image_ind}') with open(f"./Test/{image_ind}.xml", 'w') as f: root = ET.fromstring(label_path) root.set('verified', '') root.find('path').text = f'{image_ind}.xml' root.find('folder').text = '' objects = root.findall('object') for obj in objects: class_ind = obj.find('name').text.lower().strip() if (class_ind in classes): obj.find('name').text = 'helmet' elif (class_ind == "person"): obj.find('name').text = 'none' f.write(ET.tostring(root, encoding='unicode'))
def _resolve_path(self, item, is_query=False): if not is_query and isinstance(item, str): return item item_prop = self._prop_query if is_query else self._prop_path subitem_prop = self._prop_query if is_query else self._prop_relative_path datastoreName = self._json_utility.try_get_value( item, self._prop_datastore_name, None, lambda v: isinstance(v, str) and len(v) > 0, 'Property "{}.{}" must be specified.'.format( item_prop, self._prop_datastore_name)) subitem = self._json_utility.try_get_value( item, subitem_prop, None, lambda v: isinstance(v, str) and len(v) > 0, 'Property "{}.{}" must be specified.'.format( item_prop, subitem_prop)) return (Datastore.get(self._workspace, datastoreName), subitem)
def get_pipeline_data(self, config): pipeline_data = [] for c in config: if c["type"] == StepArgParser.ARG_TYPE_PIPELINE_DATA: pconfig = c["config"] pname = pconfig["name"] pds = pconfig.get("datastore") or "default" if pds == "default": use_ds = self.workspace.get_default_datastore() else: use_ds = Datastore.get(workspace=self.workspace, datastore_name=pds) pd = PipelineData(pname, datastore=use_ds) pipeline_data.append(pd) return pipeline_data
def get_datastore(): env = EnvironmentVariables() datastore_name = env.datastore_name storage_account_name = env.storage_account_name storage_container_name = env.storage_container_name storage_account_key = env.storage_account_key workspace = get_workspace() try: datastore = Datastore.get(workspace=workspace, datastore_name=datastore_name) except HttpOperationError: datastore = Datastore.register_azure_blob_container( workspace=workspace, datastore_name=datastore_name, account_name=storage_account_name, container_name=storage_container_name, account_key=storage_account_key) return datastore
def run(input_path: str, output_path: str, datastore_name: str): """Run Function. Args: input_path (str): path to raw text files in the datastore output_path (str): path to the output directory datastore_name (str): name of the datastore """ logger.info("PREPARATION") logger.info(f"input files path: {input_path}") logger.info(f"output directory path: {output_path}") Path(output_path).mkdir(parents=True, exist_ok=True) # Download input datasets run = Run.get_context() workspace = run.experiment.workspace shared_blob_store = Datastore.get(workspace, datastore_name) shared_blob_store.download(target_path=output_path, prefix=input_path)
def create_sample_data_csv(aml_workspace: Workspace, datastore_name: str, file_name: str = "COVID19Articles.csv", for_scoring: bool = False): url = \ "https://solliancepublicdata.blob.core.windows.net" + \ "/ai-in-a-day/lab-02/" df = pd.read_csv(url + file_name) if for_scoring: df = df.drop(columns=['cluster']) df.to_csv(file_name, index=False) if (datastore_name): datastore = Datastore.get(aml_workspace, datastore_name) else: datastore = Datastore.get_default(aml_workspace) datastore.upload_files( files=[file_name], overwrite=True, show_progress=False, )
def __init__(self): self.__parser = argparse.ArgumentParser("preprocessing") self.__parser.add_argument("--datastore", type=str, help="Name of the datastore", default="workspaceblobstore") self.__parser.add_argument("--dataset_name", type=str, help="Name of the dataset") self.__parser.add_argument("--dataset_preprocessed_name", type=str, help="Standard preprocessed dataset") self.__parser.add_argument("--output_preprocess_dataset", type=str, help="Name of the PipelineData reference") self.__parser.add_argument( "--use_datadrift", type=distutils.util.strtobool, help= "Use datadrift(True/False). If true, we split the original datset by sex" ) self.__parser.add_argument("--retrain_status", type=distutils.util.strtobool, help="Retrain status") self.__args = self.__parser.parse_args() self.__run = Run.get_context() self.__local_run = type(self.__run) == _OfflineRun if self.__local_run: self.__ws = Workspace.from_config('../../notebooks-settings') self.__exp = Experiment(self.__ws, 'exploratory_analysis') self.__run = self.__exp.start_logging() else: self.__ws = self.__run.experiment.workspace self.__exp = self.__run.experiment self.__datastore = Datastore.get(self.__ws, datastore_name=self.__args.datastore)