コード例 #1
0
    def _setup_datastore(self, blob_dataset_name, output_path=None):
        """
        sets up the datastore in azureml. Either retrieves a pre-existing datastore
        or registers a new one in the workspace.

        :param str blob_dataset_name: [required] name of the datastore registered with the
                                 workspace. If the datastore does not yet exist, the
                                 name it will be registered under.
        :param str output_path: [optional] if registering a datastore for inferencing,
                                the output path for writing back predictions.
        """
        try:
            self.blob_ds = Datastore.get(self.ws, blob_dataset_name)
            print("Found Blob Datastore with name: %s" % blob_dataset_name)
        except HttpOperationError:
            self.blob_ds = Datastore.register_azure_blob_container(
                workspace=self.ws,
                datastore_name=blob_dataset_name,
                account_name=self.account_name,
                container_name=self.container_name,
                account_key=self.account_key,
                subscription_id=self.blob_sub_id,
            )

            print("Registered blob datastore with name: %s" %
                  blob_dataset_name)
        if output_path is not None:
            self.output_dir = PipelineData(
                name="output",
                datastore=self.ws.get_default_datastore(),
                output_path_on_compute=output_path)
コード例 #2
0
ファイル: upload.py プロジェクト: charlesCXK/FixMatch-pytorch
def prepare():
    ws = None
    try:
        print("Connecting to workspace '%s'..." % workspace_name)
        ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name)
    except:
        print("Workspace not accessible.")
    print(ws.get_details())

    ws.write_config()

    #
    # Register an existing datastore to the workspace.
    #
    if datastore_name not in ws.datastores:
        Datastore.register_azure_blob_container(
            workspace=ws,
            datastore_name=datastore_name,
            container_name=blob_container_name,
            account_name=blob_account_name,
            account_key=blob_account_key
        )
        print("Datastore '%s' registered." % datastore_name)
    else:
        print("Datastore '%s' has already been regsitered." % datastore_name)
コード例 #3
0
    def upload_dataset(self, dataset_name: str, local_folder: str, datastore_name: str = None, overwrite: bool = False, tags: dict = None) -> pd.DataFrame:
        '''
        Uploads data from a local directory into an AzureML Datastore that points to Azure Data lake
        Args:
            dataset_name (str): The name of the dataset to register
            local_folder (str): The location of the local directory to take files from
            datastore_path (str): The name of a DataStore that will contain the dataset
        Returns:
            FileDataset: The registered dataset, containing the files
        '''
        if not datastore_name:
            # No datastore name is given, so we'll take the default one
            datastore_name = self.__datastore_path

        # Connecting data store
        datastore = Datastore(self.__workspace, name=datastore_name)

        # TODO : check type of datastore
        datastore.upload(local_folder, dataset_name, overwrite, True)
        
        datastore_paths = [(datastore, dataset_name)]
        file_ds = Dataset.File.from_files(path=datastore_paths)

        file_ds = file_ds.register(workspace=self.__workspace,
                                 name=dataset_name,
                                 description=dataset_name, 
                                 tags = tags, create_new_version=True)
コード例 #4
0
 def register_datastore(self, datastore_name, blob_container,
                        storage_acct_name, storage_acct_key):
     Datastore.register_azure_blob_container(workspace=self.workspace,
                                             datastore_name=datastore_name,
                                             container_name=blob_container,
                                             account_name=storage_acct_name,
                                             account_key=storage_acct_key)
コード例 #5
0
    def __enter__(self):
        """Download files for datastore.

        :return:
        """
        module_logger.debug("Enter __enter__ function of datastore cmgr")
        from azureml.core import Datastore, Dataset
        for key, value in self._config.items():
            df_config, _ = self._to_data_reference_config(value)
            if self._is_upload(df_config):
                if df_config.path_on_compute:
                    dir_to_create = os.path.normpath(
                        os.path.dirname(df_config.path_on_compute))
                    if dir_to_create:
                        _safe_mkdirs(dir_to_create)
            else:
                target_path = df_config.data_store_name
                if df_config.path_on_compute:
                    target_path = os.path.join(df_config.data_store_name,
                                               df_config.path_on_compute)
                    # The target_path is always set using the data store name with no way
                    # for the user to overwrite this behavior. The user might attempt to use ../ in
                    # the path on compute as a solution but this throws an exception
                    # because the path is not normalized.
                    # Normalizing the path to allow the user to use up-level references.
                    target_path = os.path.normpath(target_path)
                if self._is_download(df_config):
                    self._validate_config(df_config, key)
                    ds = Datastore(workspace=self._workspace,
                                   name=df_config.data_store_name)
                    if self._is_datastore_adlsgen1(ds):
                        _log_and_print(
                            "AzureDataLake Gen1 used as Datastore for download"
                        )
                        if df_config.path_on_data_store is None:
                            df_config.path_on_data_store = ""
                        Dataset.File.from_files(
                            (ds, df_config.path_on_data_store)).download(
                                os.path.join(target_path,
                                             df_config.path_on_data_store),
                                overwrite=df_config.overwrite)
                    else:
                        count = ds.download(
                            target_path=target_path,
                            prefix=df_config.path_on_data_store,
                            overwrite=df_config.overwrite)
                        if count == 0:
                            import warnings
                            warnings.warn(
                                "Downloaded 0 files from datastore {} with path {}."
                                .format(ds.name, df_config.path_on_data_store))
                else:
                    _safe_mkdirs(target_path)

        module_logger.debug("Exit __enter__ function of datastore cmgr")
コード例 #6
0
ファイル: pipeline.py プロジェクト: naivelogic/seer
def get_datastore(ws: Workspace, datastore_name: str, container: str,
                  account_name: str, account_key: str) -> Datastore:
    if not datastore_name in ws.datastores:
        Datastore.register_azure_blob_container(workspace=ws,
                                                datastore_name=datastore_name,
                                                container_name=container,
                                                account_name=account_name,
                                                account_key=account_key,
                                                create_if_not_exists=True)

    return ws.datastores[datastore_name]
コード例 #7
0
ファイル: workspace.py プロジェクト: mariamendoza/mlops
def register_datastore(workspace, ds_config):
  ds_name = ds_config.get("name")

  if not is_datastore_exists(workspace, ds_name):
    Datastore.register_azure_blob_container(
      workspace=workspace,
      datastore_name=ds_name,
      account_name=ds_config.get("account_name"),
      container_name=ds_config.get("container_name"),
      account_key=ds_config.get("account_key"),
      create_if_not_exists=ds_config.get("create_if_not_exists")
    )
コード例 #8
0
def main():
    # Connect to your AMLS Workspace and set your Datastore
    ws = run.experiment.workspace
    datastoreName = args.datastore_name
    datastore = Datastore.get(ws, datastoreName)
    print('Datastore Set')

    # Set your Time Zone
    timeZone = pytz.timezone(args.pytz_time_zone)
    timeLocal = dt.datetime.now(timeZone).strftime('%Y-%m-%d')
    print('Time Zone Set')

    # Specify your File Names
    trainFile = timeLocal + '/' + args.train_file_name
    valFile = timeLocal + '/' + args.val_file_name
    print('File Names Set for Training and Validation Data.')

    # Set Tags and Description
    description = args.project_description
    trainTags = set_tags(['Project', 'Dataset Type', 'Date Created'],\
                         [args.project_name, 'Training', timeLocal])
    valTags = set_tags(['Project', 'Dataset Type', 'Date Created'],\
                       [args.project_name, 'Validation', timeLocal])
    print("Dataset Tags and Description Assigned")

    # Register your Training data as an Azure Tabular Dataset
    register_dataset(ws, datastore, args.datastore_path, trainFile,
                     args.train_dataset_name, description, trainTags)
    print('Training Data Registered')

    # Register your Validation data as an Azure Tabular Dataset
    register_dataset(ws, datastore, args.datastore_path, valFile,
                     args.val_dataset_name, description, valTags)
    print('Validation Data Registered')
コード例 #9
0
def update_dataset(ws, datastore_name, dataset, time_stamp):
    datastore = Datastore.get(ws, datastore_name)
    #datastore = adlsgen2_datastore

    if dataset["dataset_name"] in ws.datasets:
        print("Dataset " + dataset["dataset_name"] + " already created in " +
              ws.name + ", will update to new version...")
    else:
        print("Dataset " + dataset["dataset_name"] +
              " is new and will be created in " + ws.name + "...")

    # create a TabularDataset from the path in the datastore
    datastore_paths = [(datastore, dataset["dataset_path"])]
    retrieved_dataset = Dataset.Tabular.from_delimited_files(
        path=datastore_paths)

    #Register the dataset (and make a new version if needed)
    #The timestamp description to make it easier to see the same
    # dataset was registered at the same time in different workspaces if you want to filter
    retrieved_dataset = retrieved_dataset.register(
        workspace=ws,
        name=dataset["dataset_name"],
        description='versioned data, timestamp: ' + time_stamp,
        create_new_version=True)
    print("Updated dataset " + dataset["dataset_name"] + " in workspace " +
          ws.name + " at timestamp " + time_stamp)
    return retrieved_dataset
コード例 #10
0
def write_results(df, cols, output_datastore, output_path, model, run):

    ws = run.experiment.workspace
    datastore = Datastore.get(ws, output_datastore)
    output_folder = tempfile.TemporaryDirectory(dir="/tmp")
    filename = os.path.join(output_folder.name, os.path.basename(output_path))
    print("Output filename: {}".format(filename))

    try:
        os.remove(filename)
    except OSError:
        pass

    df["ScoredLabels"] = model.predict(df[cols].astype(int).values)
    print("resultLabels", df["ScoredLabels"].iloc[:10])
    df["ScoredProbabilities"] = model.predict_proba(
        df[cols].astype(int).values)[:, 1]
    print("resultProbabilities", df["ScoredProbabilities"].iloc[:10])

    # set HotelCustomerID to index to remove the column1 columns in the dataframe
    df = df.set_index("CustomerId")

    directory_name = os.path.dirname(output_path)
    print("Extracting Directory {} from path {}".format(
        directory_name, output_path))

    df.to_csv(filename)

    # Datastore.upload() is supported currently, but is being deprecated by Dataset.File.upload_directory()
    # datastore.upload(src_dir=output_folder.name, target_path=directory_name, overwrite=False, show_progress=True)
    # upload_directory can fail sometimes.
    output_dataset = Dataset.File.upload_directory(src_dir=output_folder.name,
                                                   target=(datastore,
                                                           directory_name))
    return df
コード例 #11
0
def setup_azureml():
    """
    Get an Azure ML workspace from environment variables.
    Assumes the following are created outside of the code in this project:
      AML workspace
      AML datastore
      AML compute resource for training (can be blank for inferencing)
      AML compute resource for inferencing (can be blank for training)
    """
    subscription_id = os.environ['AML_SUBSCRIPTION']
    resource_group = os.environ['AML_RESOURCE_GROUP']
    workspace_name = os.environ['AML_WORKSPACE']
    datastore_name = os.environ['AML_DATASTORE']
    training_target_name = os.environ.get('AML_COMPUTE')
    inference_target_name = os.environ.get('AML_INFERENCE_COMPUTE')
    ws = Workspace(subscription_id, resource_group, workspace_name)
    ds = Datastore.get(ws, datastore_name=datastore_name)
    if training_target_name:
        training_target = ws.compute_targets[training_target_name]
    else:
        training_target = None
    if inference_target_name:
        inference_target = ws.compute_targets[inference_target_name]
    else:
        inference_target = None
    return ws, ds, training_target, inference_target
コード例 #12
0
def register_sql_datastore(
    workspace: Workspace,
    sql_datastore_name: str,
    sql_server_name: str,
    sql_database_name: str,
    sql_username: str,
    sql_password: str,
) -> AzureSqlDatabaseDatastore:
    """
    Register a Azure SQL DB with the Azure Machine Learning Workspace

    :param workspace: Azure Machine Learning Workspace
    :param sql_datastore_name: Name used to id the SQL Datastore
    :param sql_server_name: Azure SQL Server Name
    :param sql_database_name: Azure SQL Database Name
    :param sql_username: Azure SQL Database Username
    :param sql_password: Azure SQL Database Password
    :return: Pointer to Azure Machine Learning SQL Datastore
    """
    return Datastore.register_azure_sql_database(
        workspace=workspace,
        datastore_name=sql_datastore_name,
        server_name=sql_server_name,
        database_name=sql_database_name,
        username=sql_username,
        password=sql_password,
    )
コード例 #13
0
ファイル: test.py プロジェクト: tusharkalecam/MLOps-YoloV3
    def __init__(self):
        self._parser = argparse.ArgumentParser("evaluate")
        self._parser.add_argument(
            "--release_id",
            type=str,
            help="The ID of the release triggering this pipeline run")
        self._parser.add_argument("--model_name",
                                  type=str,
                                  help="Name of the tf model")
        self._parser.add_argument("--ckpt_path",
                                  type=str,
                                  help="Chekpoint path",
                                  default="checkpoint/yolov3.ckpt")
        self._parser.add_argument("--datastore",
                                  type=str,
                                  help="Name of the datastore",
                                  default="epis_datastore")
        self._parser.add_argument("--storage_container",
                                  type=str,
                                  help="Name of the storage container",
                                  default="ppe")

        self._args = self._parser.parse_args()
        self._run = Run.get_context()
        self._exp = self._run.experiment
        self._ws = self._run.experiment.workspace
        self._datastore = Datastore.get(self._ws,
                                        datastore_name=self._args.datastore)

        self._INPUT_SIZE = 416
        self._NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
        self._CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)

        self._predicted_dir_path = 'mAP/predicted'
        self._ground_truth_dir_path = 'mAP/ground-truth'
コード例 #14
0
ファイル: train.py プロジェクト: tusharkalecam/MLOps-YoloV3
    def __init__(self):
        self._parser = argparse.ArgumentParser("train")
        self._parser.add_argument(
            "--release_id",
            type=str,
            help="The ID of the release triggering this pipeline run")
        self._parser.add_argument("--model_name",
                                  type=str,
                                  help="Name of the tf model")
        self._parser.add_argument("--ckpt_path",
                                  type=str,
                                  help="Chekpoint path",
                                  default="checkpoint/yolov3.ckpt")
        self._parser.add_argument("--datastore",
                                  type=str,
                                  help="Name of the datastore",
                                  default="epis_datastore")
        self._parser.add_argument("--storage_container",
                                  type=str,
                                  help="Name of the storage container",
                                  default="ppe")

        self._args = self._parser.parse_args()
        self._run = Run.get_context()
        self._exp = self._run.experiment
        self._ws = self._run.experiment.workspace
        self._tb = Tensorboard([self._run])
        self._datastore = Datastore.get(self._ws,
                                        datastore_name=self._args.datastore)
コード例 #15
0
def main():
    # workspace
    ws = Workspace.from_config()

    #compute
    compute = AmlCompute(workspace=ws, name='gandalf')

    # datasource
    datastore = Datastore.get(ws, datastore_name='surfrider')

    # experiment
    script_params = {
        "--datastore": datastore.as_mount()
    }

    # Create and run experiment
    estimator = Estimator(source_directory='./',
                            script_params=script_params,
                            compute_target=compute,
                            entry_script='train.py',
                            use_gpu=True,
                            pip_packages=['opencv-python>=4.1',
                                            'tensorpack==0.9.8',
                                            'tensorflow-gpu>=1.3,<2.0',
                                            'tqdm>=4.36.1',
                                            'cython>=0.29.13',
                                            'scipy>=1.3.1',
                                            'ffmpeg-python',
                                            'wget'])

    
    exp = Experiment(ws, 'surfrider_rcnn')
    run = exp.submit(estimator)
def register_dataset(
    aml_workspace: Workspace,
    dataset_name: str,
    datastore_name: str,
    file_path: str = "COVID19Articles.csv",
) -> Dataset:
    if (datastore_name):
        datastore = Datastore.get(aml_workspace, datastore_name)
    else:
        datastore = Datastore.get_default(aml_workspace)
    dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path))
    dataset = dataset.register(workspace=aml_workspace,
                               name=dataset_name,
                               create_new_version=True)

    return dataset
コード例 #17
0
def _create_datastore(
    aml_workspace,
    datastore_name,
    container_name,
    account_name,
    account_key,
    create_if_not_exists=True,
):
    """Creates datastore

    Args:
        datastore_name (string): Name you wish to assign to your datastore.
        container_name (string): Name of your container.
        account_name (string): Storage account name.
        account_key (string): The storage account key.

    Returns:
        azureml.core.Datastore
    """
    logger = logging.getLogger(__name__)
    ds = Datastore.register_azure_blob_container(
        workspace=aml_workspace,
        datastore_name=datastore_name,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key,
        create_if_not_exists=create_if_not_exists,
    )
    logger.info(f"Registered existing blob storage: {ds.name}.")
    return ds
コード例 #18
0
    def __init__(self):
        self.__parser = argparse.ArgumentParser("preprocessing")
        self.__parser.add_argument("--datastore",
                                   type=str,
                                   help="Name of the datastore",
                                   default="workspaceblobstore")
        self.__parser.add_argument("--dataset_name",
                                   type=str,
                                   help="Name of the dataset")
        self.__parser.add_argument("--dataset_preprocessed_name",
                                   type=str,
                                   help="Standard preprocessed dataset")
        self.__parser.add_argument("--output_preprocess_dataset",
                                   type=str,
                                   help="Name of the PipelineData reference")

        self.__args = self.__parser.parse_args()
        self.__run = Run.get_context()
        self.__local_run = type(self.__run) == _OfflineRun

        if self.__local_run:
            self.__ws = Workspace.from_config('../../notebooks-settings')
            self.__exp = Experiment(self.__ws, 'exploratory_analysis')
            self.__run = self.__exp.start_logging()
        else:
            self.__ws = self.__run.experiment.workspace
            self.__exp = self.__run.experiment

        self.__datastore = Datastore.get(self.__ws,
                                         datastore_name=self.__args.datastore)
コード例 #19
0
    def create_pipeline(self):
        '''
        IRIS Data training and Validation
        '''        
        self.datastore = Datastore.get(self.workspace, self.workspace.get_default_datastore().name)
        print("Received datastore")
        input_ds = self.get_files_from_datastore(self.args.container_name,self.args.input_csv)
        final_df = input_ds.to_pandas_dataframe()
        print("Input DF Info",final_df.info())
        print("Input DF Head",final_df.head())

        X = final_df[["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"]]
        y = final_df[["Species"]]

        X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=1984)
        
        model = DecisionTreeClassifier()
        model.fit(X_train,y_train)
        y_pred = model.predict(X_test)
        print("Model Score : ", model.score(X_test,y_test))

        joblib.dump(model, self.args.model_path)

        self.validate(y_test, y_pred, X_test)

        match = re.search('([^\/]*)$', self.args.model_path)
        # Upload Model to Run artifacts
        self.run.upload_file(name=self.args.artifact_loc + match.group(1),
                                path_or_stream=self.args.model_path)

        print("Run Files : ", self.run.get_file_names())
        self.run.complete()
コード例 #20
0
 def get_by_data_reference(cls, workspace, path):
     data_store = Datastore(workspace, cls.DEFAULT_GLOBAL_DATASET_STORE)
     return DataReference(
         datastore=data_store,
         data_reference_name=cls.DEFAULT_DATA_REFERENCE_NAME,
         path_on_datastore=path,
     )
コード例 #21
0
def ConnectToAzure():
    """
        Connect to Azure workspace, Compute Target, DataStore and Experiement
    """

    # Connect to workspace
    # config.json file expected in ./azureml directory
    # config.json can be generated from the azure portal while browsing the workspace
    global az_workspace
    az_workspace = Workspace.from_config()
    print("Workspace:", az_workspace.name)

    # Connect to compute for training
    # compute target must belong to the workspace AND compute targets are limited by the workspace region
    # there may be ability to do cross workspace compute targets in the future
    global az_computetarget
    az_computetarget = ComputeTarget(workspace=az_workspace,
                                     name="AzPytrch-NC6")
    print("Compute Target:", az_computetarget.name)

    # Connect to the datastore for the training images
    # datastore must be associated with storage account belonging to workspace
    global az_datastore
    az_datastore = Datastore.get_default(az_workspace)
    print("Datastore:", az_datastore.name)

    # Connect to the experiment
    global az_experiment
    az_experiment = Experiment(workspace=az_workspace, name='616_Final')
    print("Experiment:", az_experiment.name)
コード例 #22
0
def register_blob_datastore(
    workspace: Workspace,
    blob_datastore_name: str,
    container_name: str,
    account_name: str,
    account_key: str,
    datastore_rg: str,
) -> AzureBlobDatastore:
    """
    Register a Blob Storage Account with the Azure Machine Learning Workspace

    :param workspace: Azure Machine Learning Workspace
    :param blob_datastore_name: Name for blob datastore
    :param container_name: Name for blob container
    :param account_name: Name for blob account
    :param account_key: Blob Account Key using for auth
    :param datastore_rg: Resource Group containing Azure Storage Account
    :return: Pointer to Azure Machine Learning Blob Datastore
    """
    return Datastore.register_azure_blob_container(
        workspace=workspace,
        datastore_name=blob_datastore_name,
        container_name=container_name,
        account_name=account_name,
        account_key=account_key,
        resource_group=datastore_rg,
        overwrite=True,
    )
コード例 #23
0
    def load_tabular_partition(self,
                               partition_name: str,
                               datastore_name: str = None,
                               columns: np.array = None,
                               first_row_header: bool = False,
                               cloud_storage: bool = True) -> pd.DataFrame:
        '''
        Loads a partition from a tabular dataset. 
            The implementation will connect to the DataStore and get all delimited files matching the partition_name
            When configured locally, the implementation will append all files in the datastore_path with name {partition_name}.csv
        Args:
            partition_name (str): The name of the partition as a wildcard filter.  Example: B* will take all files starting with B, ending with csv
            columns: (np.array): The column names to assign to the dataframe
            datastore_path (str): The name of a DataStore that contains Datasets
            cloud_storage (bool): When changed to False, the dataset will be loaded from the local folder
        Returns:
            pd.DataFrame: The dataset, loaded as a DataFrame
        '''
        if not datastore_name:
            # No datastore name is given, so we'll take the default one
            datastore_name = self.__datastore_path

        if cloud_storage:
            # Connecting data store
            datastore = Datastore(self.__workspace, name=datastore_name)
            try:
                _header = PromoteHeadersBehavior.ALL_FILES_HAVE_SAME_HEADERS if first_row_header else False
                _aml_dataset = Dataset.Tabular.from_delimited_files(
                    header=_header,
                    path=DataPath(datastore, '/' + partition_name +
                                  '.csv'))  #, set_column_types=columns
                _df = _aml_dataset.to_pandas_dataframe()
            except DatasetValidationError as dsvalex:
                if 'provided path is not valid' in str(dsvalex):
                    return None
                else:
                    raise
        else:
            # Reading data from sub files in a folder
            _folder_path = datastore_name
            _partition_files = glob.glob(_folder_path + '/' + partition_name +
                                         '.csv')
            _record_found = False
            _df = None
            for filename in _partition_files:
                _header = 0 if first_row_header else None
                df = pd.read_csv(filename, index_col=None, header=_header)
                if not _record_found:
                    _df = df
                    _record_found = True
                else:
                    _df = _df.append(df)

            if not _record_found:
                return None

        if columns != None:
            _df.columns = columns
        return _df
コード例 #24
0
def convert_voc_annotation(ws,
                           ds,
                           data_type,
                           anno_path,
                           container_name,
                           use_difficult_bbox=True):
    classes = ['helmet', 'none']

    datastore = Datastore.get(ws, datastore_name=ds)
    voc_dataset_annotations = datastore.blob_service.list_blobs(
        container_name, prefix='VOC/Annotations')
    voc_dataset_images = datastore.blob_service.list_blobs(
        container_name, prefix='VOC/JPEGImages')
    voc_dataset_imagesets = datastore.blob_service.list_blobs(
        container_name, prefix=f'VOC/ImageSets/Main/{data_type}.txt')

    voc_list_annotations = list(voc_dataset_annotations)
    print("Succesfully list annotations")
    voc_list_images = list(voc_dataset_images)
    print("Succesfully list images")
    voc_list_imagesets = list(voc_dataset_imagesets)
    print("Succesfully list imagesets")

    txt = datastore.blob_service.get_blob_to_text(container_name,
                                                  voc_list_imagesets[0].name)
    txt_split = txt.content.splitlines()
    image_inds = [line.strip() for line in txt_split]
    with open(anno_path, 'a') as f:
        for image_ind in image_inds:
            image_path = datastore.blob_service.make_blob_url(
                container_name, 'VOC/JPEGImages/' + image_ind + '.jpg')
            annotation = image_path
            label_path = datastore.blob_service.get_blob_to_text(
                container_name,
                'VOC/Annotations/' + image_ind + '.xml').content
            root = ET.fromstring(label_path)
            objects = root.findall('object')
            for obj in objects:
                difficult = obj.find('difficult').text.strip()
                if (not use_difficult_bbox) and (int(difficult) == 1):
                    continue
                bbox = obj.find('bndbox')
                class_ind = classes.index(
                    obj.find('name').text.lower().strip())
                xmin = bbox.find('xmin').text.strip()
                xmax = bbox.find('xmax').text.strip()
                ymin = bbox.find('ymin').text.strip()
                ymax = bbox.find('ymax').text.strip()
                annotation += ' ' + ','.join(
                    [xmin, ymin, xmax, ymax,
                     str(class_ind)])
            print(annotation)
            f.write(annotation + "\n")
    datastore.blob_service.create_blob_from_path(
        container_name,
        anno_path,
        anno_path,
        content_settings=ContentSettings(
            content_type=__get_mime_type(anno_path)))
コード例 #25
0
    def _get_datastore_and_path(self, config):
        from azureml.core import Datastore

        output_location = config["OutputLocation"]
        data_path = output_location["DataPath"]
        datastore = Datastore(self._workspace, data_path["DatastoreName"])

        return datastore, data_path["RelativePath"]
コード例 #26
0
def register_dataset(aml_workspace: Workspace, dataset_name: str,
                     datastore_name: str, file_path: str) -> Dataset:
    datastore = Datastore.get(aml_workspace, datastore_name)
    dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path))
    dataset = dataset.register(workspace=aml_workspace,
                               name=dataset_name,
                               create_new_version=True)

    return dataset
コード例 #27
0
ファイル: train.py プロジェクト: tcsong456/MLops
def register_dataset(workspace, datastore_name, dataset_name, file_path):
    datastore = Datastore.get(workspace=workspace,
                              datastore_name=datastore_name)
    dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path))
    dataset = dataset.regisetr(
        workspace=workspace, name=dataset_name, create_new_version=True
    )  #either create new version if existed of exist_ok=True

    return dataset
コード例 #28
0
ファイル: datastore.py プロジェクト: pkaminskiPGS/MLOps.Azure
def get_datastore():
    env = EnvironmentVariables()
    datastore_name = env.datastore_name
    storage_account_name = env.storage_account_name
    storage_container_name = env.storage_container_name
    storage_account_key = env.storage_account_key
    workspace = get_workspace()

    try:
        datastore = Datastore.get(workspace=workspace, datastore_name=datastore_name)
    except HttpOperationError:
        datastore = Datastore.register_azure_blob_container(
            workspace=workspace,
            datastore_name=datastore_name,
            account_name=storage_account_name,
            container_name=storage_container_name,
            account_key=storage_account_key)

    return datastore
コード例 #29
0
def main(_):
    # Export the trained model
    if not os.path.exists(FLAGS.export_dir):
        os.makedirs(FLAGS.export_dir)

    run.log('accuracy', float(0.91))
    run.log('val_accuracy', float(0.901))

    datastore = Datastore.get(ws, 'mtcseattle')
    datastore.download(FLAGS.export_dir, prefix="model")
コード例 #30
0
ファイル: train.py プロジェクト: rameezshaik/MLops
def prepare_data(workspace):
    datastore = Datastore.get(workspace, TRAINING_DATASTORE)
    x_train = get_df_from_datastore_path(datastore, 'train/X_train.csv')
    y_train = get_df_from_datastore_path(datastore, 'train/y_train.csv')
    y_train = y_train['Target']
    x_test = get_df_from_datastore_path(datastore, 'test/X_test.csv')
    y_test = get_df_from_datastore_path(datastore, 'test/y_test.csv')
    y_test = y_test['Target']
    x_train = remove_collinear_cols(x_train)
    x_test = remove_collinear_cols(x_test)
    return x_train, y_train, x_test, y_test