def __init__(self, workspace_id, authorization_token, endpoint=Endpoints.default):
        """
        Initialize a workspace.

        Parameters
        ----------
        workspace_id : str
            Unique identifier for the existing workspace. Can be obtained from
            the URL in ML Studio when editing a workspace.
        authorization_token: str
            Access token for the workspace. Can be the primary or secondary
            token managed in ML Studio.
        endpoint: str
            URL of the endpoint to connect to. Specify this only if you host
            ML Studio on your own server(s).
        """
        _not_none_or_empty('workspace_id', workspace_id)
        _not_none_or_empty('authorization_token', authorization_token)
        _not_none_or_empty('endpoint', endpoint)

        self.workspace_id = workspace_id
        self.authorization_token = authorization_token
        self._rest = _RestClient(endpoint, authorization_token)
        self.datasets = Datasets(workspace=self)
        self.user_datasets = Datasets(workspace=self, example_filter=False)
        self.example_datasets = Datasets(workspace=self, example_filter=True)
        self.experiments = Experiments(workspace=self)
        self.user_experiments = Experiments(workspace=self, example_filter=False)
        self.example_experiments = Experiments(workspace=self, example_filter=True)
Example #2
0
    def __init__(self, workspace, experiment, node_id, port_name,
                 data_type_id):
        """
        INTERNAL USE ONLY. Initialize an intermediate dataset.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the dataset.
        experiment : Experiment
            Parent experiment of the dataset.
        node_id : str
            Module node id from the experiment graph.
        port_name : str
            Output port of the module.
        data_type_id : str
            Serialization format of the raw data.
            See the azureml.DataTypeIds class for constants.
        """
        _not_none('workspace', workspace)
        _not_none('experiment', experiment)
        _not_none_or_empty('node_id', node_id)
        _not_none_or_empty('port_name', port_name)
        _not_none_or_empty('data_type_id', data_type_id)

        self.workspace = workspace
        self.experiment = experiment
        self.node_id = node_id
        self.port_name = port_name
        self.data_type_id = data_type_id

        if is_supported(self.data_type_id):
            self.to_dataframe = self._to_dataframe
    def __init__(self, workspace, experiment, node_id, port_name, data_type_id):
        """
        INTERNAL USE ONLY. Initialize an intermediate dataset.

        Parameters
        ----------
        workspace : Workspace
            Parent workspace of the dataset.
        experiment : Experiment
            Parent experiment of the dataset.
        node_id : str
            Module node id from the experiment graph.
        port_name : str
            Output port of the module.
        data_type_id : str
            Serialization format of the raw data.
            See the azureml.DataTypeIds class for constants.
        """
        _not_none('workspace', workspace)
        _not_none('experiment', experiment)
        _not_none_or_empty('node_id', node_id)
        _not_none_or_empty('port_name', port_name)
        _not_none_or_empty('data_type_id', data_type_id)

        self.workspace = workspace
        self.experiment = experiment
        self.node_id = node_id
        self.port_name = port_name
        self.data_type_id = data_type_id

        if is_supported(self.data_type_id):
            self.to_dataframe = self._to_dataframe
def serialize_dataframe(writer, data_type_id, dataframe):
    """
    Serialize a dataframe.

    Parameters
    ----------
    writer : file
        File-like object to write to. Must be opened in binary mode.
    data_type_id : dict
        Serialization format to use.
        See the azureml.DataTypeIds class for constants.
    dataframe: pandas.DataFrame
        Dataframe to serialize.
    """
    _not_none('writer', writer)
    _not_none_or_empty('data_type_id', data_type_id)
    _not_none('dataframe', dataframe)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    serializer[0](writer=writer, dataframe=dataframe)
Example #5
0
def serialize_dataframe(writer, data_type_id, dataframe):
    """
    Serialize a dataframe.

    Parameters
    ----------
    writer : file
        File-like object to write to. Must be opened in binary mode.
    data_type_id : dict
        Serialization format to use.
        See the azureml.DataTypeIds class for constants.
    dataframe: pandas.DataFrame
        Dataframe to serialize.
    """
    _not_none('writer', writer)
    _not_none_or_empty('data_type_id', data_type_id)
    _not_none('dataframe', dataframe)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    serializer[0](writer=writer, dataframe=dataframe)
def deserialize_dataframe(reader, data_type_id):
    """
    Deserialize a dataframe.

    Parameters
    ----------
    reader : file
        File-like object to read from. Must be opened in binary mode.
    data_type_id : dict
        Serialization format of the raw data.
        See the azureml.DataTypeIds class for constants.

    Returns
    -------
    pandas.DataFrame
        Dataframe object.
    """
    _not_none('reader', reader)
    _not_none_or_empty('data_type_id', data_type_id)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    return serializer[1](reader=reader)
Example #7
0
def deserialize_dataframe(reader, data_type_id):
    """
    Deserialize a dataframe.

    Parameters
    ----------
    reader : file
        File-like object to read from. Must be opened in binary mode.
    data_type_id : dict
        Serialization format of the raw data.
        See the azureml.DataTypeIds class for constants.

    Returns
    -------
    pandas.DataFrame
        Dataframe object.
    """
    _not_none('reader', reader)
    _not_none_or_empty('data_type_id', data_type_id)

    serializer = _SERIALIZERS.get(data_type_id)
    if serializer is None:
        raise UnsupportedDatasetTypeError(data_type_id)
    return serializer[1](reader=reader)
Example #8
0
    def __init__(self,
                 workspace_id=None,
                 authorization_token=None,
                 endpoint=None):
        """
        Initialize a workspace.

        Parameters
        ----------
        workspace_id : str
            Unique identifier for the existing workspace. Can be obtained from
            the URL in ML Studio when editing a workspace.
        authorization_token: str
            Access token for the workspace. Can be the primary or secondary
            token managed in ML Studio.
        endpoint: str
            URL of the endpoint to connect to. Specify this only if you host
            ML Studio on your own server(s).

        Parameters that are omitted will be read from ~/.azureml/settings.ini:
        [workspace]
        id = abcd1234
        authorization_token = abcd1234
        endpoint = https://studio.azureml.net
        """
        workspace_id, authorization_token, endpoint, management_endpoint = _get_workspace_info(
            workspace_id, authorization_token, endpoint, None)

        _not_none_or_empty('workspace_id', workspace_id)
        _not_none_or_empty('authorization_token', authorization_token)
        _not_none_or_empty('endpoint', endpoint)

        self.workspace_id = workspace_id
        self.authorization_token = authorization_token
        self.api_endpoint = endpoint
        self.management_endpoint = management_endpoint
        self._rest = _RestClient(endpoint, authorization_token)
        self.datasets = Datasets(workspace=self)
        self.user_datasets = Datasets(workspace=self, example_filter=False)
        self.example_datasets = Datasets(workspace=self, example_filter=True)
        self.experiments = Experiments(workspace=self)
        self.user_experiments = Experiments(workspace=self,
                                            example_filter=False)
        self.example_experiments = Experiments(workspace=self,
                                               example_filter=True)
Example #9
0
    def add_from_dataframe(self, dataframe, data_type_id, name, description):
        """
        Serialize the specified DataFrame and upload it as a new dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str
            Format to serialize to.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('dataframe', dataframe)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        return self._upload(raw_data, data_type_id, name, description)
    def add_from_dataframe(self, dataframe, data_type_id, name, description):
        """
        Serialize the specified DataFrame and upload it as a new dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str
            Format to serialize to.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('dataframe', dataframe)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        return self._upload(raw_data, data_type_id, name, description)
    def __init__(self, workspace_id = None, authorization_token = None, endpoint=None):
        """
        Initialize a workspace.

        Parameters
        ----------
        workspace_id : str
            Unique identifier for the existing workspace. Can be obtained from
            the URL in ML Studio when editing a workspace.
        authorization_token: str
            Access token for the workspace. Can be the primary or secondary
            token managed in ML Studio.
        endpoint: str
            URL of the endpoint to connect to. Specify this only if you host
            ML Studio on your own server(s).

        Parameters that are omitted will be read from ~/.azureml/settings.ini:
        [workspace]
        id = abcd1234
        authorization_token = abcd1234
        endpoint = https://studio.azureml.net
        """
        workspace_id, authorization_token, endpoint, management_endpoint = _get_workspace_info(workspace_id, authorization_token, endpoint, None)

        _not_none_or_empty('workspace_id', workspace_id)
        _not_none_or_empty('authorization_token', authorization_token)
        _not_none_or_empty('endpoint', endpoint)

        self.workspace_id = workspace_id
        self.authorization_token = authorization_token
        self.api_endpoint = endpoint
        self.management_endpoint = management_endpoint
        self._rest = _RestClient(endpoint, authorization_token)
        self.datasets = Datasets(workspace=self)
        self.user_datasets = Datasets(workspace=self, example_filter=False)
        self.example_datasets = Datasets(workspace=self, example_filter=True)
        self.experiments = Experiments(workspace=self)
        self.user_experiments = Experiments(workspace=self, example_filter=False)
        self.example_experiments = Experiments(workspace=self, example_filter=True)
Example #12
0
    def add_from_raw_data(self, raw_data, data_type_id, name, description):
        """
        Upload already serialized raw data as a new dataset.

        Parameters
        ----------
        raw_data: bytes
            Dataset contents to upload.
        data_type_id : str
            Serialization format of the raw data.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
                'ARFF'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('raw_data', raw_data)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        return self._upload(raw_data, data_type_id, name, description)
    def add_from_raw_data(self, raw_data, data_type_id, name, description):
        """
        Upload already serialized raw data as a new dataset.

        Parameters
        ----------
        raw_data: bytes
            Dataset contents to upload.
        data_type_id : str
            Serialization format of the raw data.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
                'ARFF'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('raw_data', raw_data)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        return self._upload(raw_data, data_type_id, name, description)
def is_supported(data_type_id):
    """Return if a serializer is available for the specified format."""
    _not_none_or_empty('data_type_id', data_type_id)

    return _SERIALIZERS.get(data_type_id) is not None
Example #15
0
def is_supported(data_type_id):
    """Return if a serializer is available for the specified format."""
    _not_none_or_empty('data_type_id', data_type_id)

    return _SERIALIZERS.get(data_type_id) is not None