Exemple #1
0
    def save_as(self, name, description=None, folder_id=None, table_name=None):
        """Creates a new single-table cube with the data frame stored in the
        Cube instance (cube.dataframe).

        Before the update, make sure that the data exists.
        Args:
            name(str): Name of cube.
            description(str): Description of the cube.
            folder_id (str, optional): ID of the shared folder that the dataset
                should be created within. If `None`, defaults to the user's
                My Reports folder.
            table_name (str, optional): Name of the table. If None (default),
                the first table name of the original cube will be used.
        """
        if len(self._tables) > 1:
            helper.exception_handler(
                msg="""This feature works only for the single-table cubes.
                                            \rTo export multi-table cube use Dataset class."""
            )
        else:
            if table_name is None:
                table_name = self._tables[0]["name"]

            dataset = Dataset(self._connection,
                              name=name,
                              description=description)
            dataset.add_table(name=table_name,
                              data_frame=self.dataframe,
                              update_policy="add")
            dataset.create(folder_id=folder_id)
Exemple #2
0
    def create_dataset(self,
                       data_frame,
                       dataset_name,
                       table_name,
                       to_metric=None,
                       to_attribute=None,
                       folder_id=None):
        """
        Create an in-memory MicroStrategy dataset from a Pandas Data Frame

        :param data_frame: A Pandas Data Frame from which an in-memory dataset will be created
        :param dataset_name: Name of the in-memory dataset
        :param table_name: Name of the table to create within the dataset
        :param to_metric: (optional) A vector of column names from the Data.Frame to format as metrics
        in the dataset. By default, numeric types are formatted as metrics while character and date types are formatted
        as attributes. For example, a column of integer-like strings ("1", "2", "3") would
        appear as an attribute in the newly created dataset. If the intent is to format this data as a metric, provide
        the corresponding column name as \code{to_metric=c('myStringIntegers')}
        :param to_attribute: (optional) Logical opposite of to_metric. Helpful for formatting an integer-based row
        identifier as a primary key in the dataset
        :param folder_id: (optional) ID of the shared folder that the dataset should be created within. If `None`,
            defaults to the user's My Reports folder.
        :return: Unique identifiers of the dataset and table within the newly created dataset. Required for
        update_dataset()
        """

        # warning for future deprecation / replacement by Datasets class
        warnings.warn(
            "This method will be deprecated. The Dataset constructor is preferred and supports multi-table data.",
            DeprecationWarning)

        # Replace any leading/trailing whitespace in df names, replace '.' with '_'
        _df = data_frame.copy()
        _df.columns = _df.columns.str.replace(".", "_")
        _df.columns = _df.columns.str.strip()

        if folder_id is None:
            folder_id = ""
        else:
            folder_id = folder_id

        # create dataset instance
        ds = Dataset(connection=self, name=dataset_name)

        # add table to the dataset
        ds.add_table(name=table_name,
                     data_frame=_df,
                     update_policy='add',
                     to_metric=to_metric,
                     to_attribute=to_attribute)

        # publish the dataset
        ds.create(folder_id=folder_id)

        return ds.dataset_id
Exemple #3
0
          "location": ["New York", "Seattle", "Los Angeles"]}
stores_df = pd.DataFrame(stores, columns=["store_id", "location"])

sales = {"store_id": [1, 2, 3],
         "category": ["TV", "Books", "Accessories"],
         "sales": [400, 200, 100],
         "sales_fmt": ["$400", "$200", "$100"]}
sales_df = pd.DataFrame(sales, columns=["store_id", "category", "sales", "sales_fmt"])

# add tables to the dataset and create it
# by default 'create()' will additionally upload data to the I-Server and publish it
# you can manipulate it by setting parameters `auto_upload` and `auto_publish`
ds = Dataset(connection=connection, name="Store Analysis")
ds.add_table(name="Stores", data_frame=stores_df, update_policy="add")
ds.add_table(name="Sales", data_frame=sales_df, update_policy="add")
ds.create()

# when using `Dataset.add_table()`, Pandas data types are mapped to MicroStrategy data types
# by default numeric data is modeled as MSTR metrics and non-numeric as attributes
# you can set manually which columns treat as attributes and which as metrics
ds.add_table(name="Stores", data_frame=stores_df, update_policy="add",
             to_attribute=["store_id"])

ds.add_table(name="Sales", data_frame=sales_df, update_policy="add",
             to_attribute=["store_id"],
             to_metric=["sales_fmt"])

# it is possible to update previously created dataset what looks really similar to creation
# you can use different update policies which are explained in the description of this script at the top
# by default `update()` is publishing data automatically, if you don't want to publish data, you have to
# set argument 'auto_publish` to False ; it is also possible to set chunksize for the update