Example #1
0
    def _update_from_dataframe(self,
                               dataframe,
                               data_type_id=None,
                               name=None,
                               description=None):
        """
        Serialize the specified DataFrame and replace the existing dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str, optional
            Format to serialize to.
            If None, the existing format is preserved.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str, optional
            Name for the dataset.
            If None, the name of the existing dataset is used.
        description : str, optional
            Description for the dataset.
            If None, the name of the existing dataset is used.
        """
        _not_none('dataframe', dataframe)

        if data_type_id is None:
            data_type_id = self.data_type_id
        if name is None:
            name = self.name
        if description is None:
            description = self.description

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        self._upload_and_refresh(raw_data, data_type_id, name, description)
Example #2
0
    def add_from_dataframe(self, dataframe, data_type_id, name, description):
        """
        Serialize the specified DataFrame and upload it as a new dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str
            Format to serialize to.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('dataframe', dataframe)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        return self._upload(raw_data, data_type_id, name, description)
    def add_from_dataframe(self, dataframe, data_type_id, name, description):
        """
        Serialize the specified DataFrame and upload it as a new dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str
            Format to serialize to.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str
            Name for the new dataset.
        description : str
            Description for the new dataset.

        Returns
        -------
        SourceDataset
            Dataset that was just created.
            Use open(), read_as_binary(), read_as_text() or to_dataframe() on
            the dataset object to get its contents as a stream, bytes, str or
            pandas DataFrame.
        """
        _not_none('dataframe', dataframe)
        _not_none_or_empty('data_type_id', data_type_id)
        _not_none_or_empty('name', name)
        _not_none_or_empty('description', description)

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        return self._upload(raw_data, data_type_id, name, description)
    def _update_from_dataframe(self, dataframe, data_type_id=None, name=None,
                              description=None):
        """
        Serialize the specified DataFrame and replace the existing dataset.

        Parameters
        ----------
        dataframe : pandas.DataFrame
            Data to serialize.
        data_type_id : str, optional
            Format to serialize to.
            If None, the existing format is preserved.
            Supported formats are:
                'PlainText'
                'GenericCSV'
                'GenericTSV'
                'GenericCSVNoHeader'
                'GenericTSVNoHeader'
            See the azureml.DataTypeIds class for constants.
        name : str, optional
            Name for the dataset.
            If None, the name of the existing dataset is used.
        description : str, optional
            Description for the dataset.
            If None, the name of the existing dataset is used.
        """
        _not_none('dataframe', dataframe)

        if data_type_id is None:
            data_type_id = self.data_type_id
        if name is None:
            name = self.name
        if description is None:
            description = self.description

        try:
            output = BytesIO()
            serialize_dataframe(output, data_type_id, dataframe)
            raw_data = output.getvalue()
        finally:
            output.close()

        self._upload_and_refresh(raw_data, data_type_id, name, description)