Example #1
0
    def save_as(self, name, description=None, folder_id=None, table_name=None):
        """Creates a new single-table cube with the data frame stored in the
        Cube instance (cube.dataframe).

        Before the update, make sure that the data exists.
        Args:
            name(str): Name of cube.
            description(str): Description of the cube.
            folder_id (str, optional): ID of the shared folder that the dataset
                should be created within. If `None`, defaults to the user's
                My Reports folder.
            table_name (str, optional): Name of the table. If None (default),
                the first table name of the original cube will be used.
        """
        if len(self._tables) > 1:
            helper.exception_handler(
                msg="""This feature works only for the single-table cubes.
                                            \rTo export multi-table cube use Dataset class."""
            )
        else:
            if table_name is None:
                table_name = self._tables[0]["name"]

            dataset = Dataset(self._connection,
                              name=name,
                              description=description)
            dataset.add_table(name=table_name,
                              data_frame=self.dataframe,
                              update_policy="add")
            dataset.create(folder_id=folder_id)
Example #2
0
    def update_dataset(self, data_frame, dataset_id, table_name,
                       update_policy):
        """
        Update a previously created dataset with an Pandas Data Frame

        :param data_frame: Pandas Data Frame to use to update an in-memory dataset
        :param dataset_id: Identifier of the dataset to update, provided by create_dataset()
        :param table_name: Name of the table to update within the dataset
        :param update_policy: Update operation to perform. One of 'add' (inserts new, unique rows), 'update'
        (updates data in existing rows and columns), 'upsert' (updates existing
        data and inserts new rows), 'replace' (similar to truncate and load, replaces the existing data with new data)
        """

        # warning for future deprecation / replacement by Datasets class
        warnings.warn(
            "This method will be deprecated. The Dataset constructor is preferred and supports multi-table data.",
            DeprecationWarning)

        # Replace any leading/trailing whitespace in df names, replace '.' with '_'
        _df = data_frame.copy()
        _df.columns = _df.columns.str.replace(".", "_")
        _df.columns = _df.columns.str.strip()

        # create dataset instance, add table, then publish the updates to the dataset
        ds = Dataset(connection=self, dataset_id=dataset_id)
        ds.add_table(name=table_name,
                     data_frame=_df,
                     update_policy=update_policy)
        ds.update()
        ds.publish()
    def test_add_table(self):
        # Test that adding a table to the dataset increases length of tables property by one

        ds = Dataset(connection={}, name="test_name")

        ds.add_table(name="TEST1", data_frame=make_df(), update_policy="add")
        self.assertEqual(len(ds._tables), 1)

        ds.add_table(name="TEST2", data_frame=make_df(), update_policy="add")
        self.assertEqual(len(ds._tables), 2)
Example #4
0
    def create_dataset(self,
                       data_frame,
                       dataset_name,
                       table_name,
                       to_metric=None,
                       to_attribute=None,
                       folder_id=None):
        """
        Create an in-memory MicroStrategy dataset from a Pandas Data Frame

        :param data_frame: A Pandas Data Frame from which an in-memory dataset will be created
        :param dataset_name: Name of the in-memory dataset
        :param table_name: Name of the table to create within the dataset
        :param to_metric: (optional) A vector of column names from the Data.Frame to format as metrics
        in the dataset. By default, numeric types are formatted as metrics while character and date types are formatted
        as attributes. For example, a column of integer-like strings ("1", "2", "3") would
        appear as an attribute in the newly created dataset. If the intent is to format this data as a metric, provide
        the corresponding column name as \code{to_metric=c('myStringIntegers')}
        :param to_attribute: (optional) Logical opposite of to_metric. Helpful for formatting an integer-based row
        identifier as a primary key in the dataset
        :param folder_id: (optional) ID of the shared folder that the dataset should be created within. If `None`,
            defaults to the user's My Reports folder.
        :return: Unique identifiers of the dataset and table within the newly created dataset. Required for
        update_dataset()
        """

        # warning for future deprecation / replacement by Datasets class
        warnings.warn(
            "This method will be deprecated. The Dataset constructor is preferred and supports multi-table data.",
            DeprecationWarning)

        # Replace any leading/trailing whitespace in df names, replace '.' with '_'
        _df = data_frame.copy()
        _df.columns = _df.columns.str.replace(".", "_")
        _df.columns = _df.columns.str.strip()

        if folder_id is None:
            folder_id = ""
        else:
            folder_id = folder_id

        # create dataset instance
        ds = Dataset(connection=self, name=dataset_name)

        # add table to the dataset
        ds.add_table(name=table_name,
                     data_frame=_df,
                     update_policy='add',
                     to_metric=to_metric,
                     to_attribute=to_attribute)

        # publish the dataset
        ds.create(folder_id=folder_id)

        return ds.dataset_id
Example #5
0
 def update(self, update_policy='upsert'):
     """Update single-table cube easily with the data frame stored in the Cube instance (cube.dataframe).
     Before the update, make sure that the data frame has been modified
     Args:
         update_policy(str): Update operation to perform. One of 'add' (inserts new, unique rows), 'update' (updates
             data in existing rows and columns), 'upsert' (updates existing data and inserts new rows), or 'replace'
             (replaces the existing data with new data).
     """
     if len(self._tables) > 1:
         helper.exception_handler(
             msg="""This feature works only for the single-table cubes.
                                         \rTo update multi-table cube use Dataset class."""
         )
     else:
         table_name = self._tables[0]["name"]
         dataset = Dataset(self._connection, dataset_id=self._cube_id)
         dataset.add_table(name=table_name,
                           data_frame=self.dataframe,
                           update_policy=update_policy)
         dataset.update()
Example #6
0
# prepare Pandas DataFrames to add it into tables of dataset
stores = {"store_id": [1, 2, 3],
          "location": ["New York", "Seattle", "Los Angeles"]}
stores_df = pd.DataFrame(stores, columns=["store_id", "location"])

sales = {"store_id": [1, 2, 3],
         "category": ["TV", "Books", "Accessories"],
         "sales": [400, 200, 100],
         "sales_fmt": ["$400", "$200", "$100"]}
sales_df = pd.DataFrame(sales, columns=["store_id", "category", "sales", "sales_fmt"])

# add tables to the dataset and create it
# by default 'create()' will additionally upload data to the I-Server and publish it
# you can manipulate it by setting parameters `auto_upload` and `auto_publish`
ds = Dataset(connection=connection, name="Store Analysis")
ds.add_table(name="Stores", data_frame=stores_df, update_policy="add")
ds.add_table(name="Sales", data_frame=sales_df, update_policy="add")
ds.create()

# when using `Dataset.add_table()`, Pandas data types are mapped to MicroStrategy data types
# by default numeric data is modeled as MSTR metrics and non-numeric as attributes
# you can set manually which columns treat as attributes and which as metrics
ds.add_table(name="Stores", data_frame=stores_df, update_policy="add",
             to_attribute=["store_id"])

ds.add_table(name="Sales", data_frame=sales_df, update_policy="add",
             to_attribute=["store_id"],
             to_metric=["sales_fmt"])

# it is possible to update previously created dataset what looks really similar to creation
# you can use different update policies which are explained in the description of this script at the top