Beispiel #1
0
def indexFromPandas(dataframe,
                    columnName=None,
                    removeEmpty=True,
                    indexName="new index"):
    """ Return a cp.index from an column of a pandas dataframe.
    dataframe: pandas dataframe
    columnName: dataframe column name used for create cp.index. By default is created using the first column
    removeEmpty: True for remove empty rows
        Ex.
            cp.indexFromPandas(df)
            cp.indexFromPandas(df,"column10")
    """

    _serie = None
    if columnName is None:
        _serie = dataframe[dataframe.columns[0]]
    else:
        _serie = dataframe[columnName]

    if removeEmpty:
        _serie.dropna(inplace=True)
        if kindToString(_serie.dtype.kind) == "string" or kindToString(
                _serie.dtype.kind) == "object":
            _serie = _serie[_serie != ""]

    return index(indexName, _serie.unique())
    def previewNode(self, nodeDic, nodeId):
        from pyplan_engine.classes.Helpers import Helpers
        from sys import getsizeof
        res = {
            "resultType": str(type(nodeDic[nodeId].result)),
            "dims": [],
            "console": nodeDic[nodeId].lastEvaluationConsole,
            "preview": ""
        }
        if isinstance(nodeDic[nodeId].result, cubepy.Cube):
            cube = nodeDic[nodeId].result
            for _axis in cube.axes:
                _nodeTitle = None
                if _axis.name in nodeDic:
                    _nodeTitle = nodeDic[_axis.name].title

                if _nodeTitle is None:
                    _item = _axis.name + " [" + str(len(_axis)) + "]"
                else:
                    _item = _nodeTitle + \
                        " ("+_axis.name+") [" + str(len(_axis)) + "]"
                res["dims"].append(_item)

            res["preview"] += "Dimensions: " + str(cube.ndim)
            res["preview"] += "\nShape: " + str(cube.shape)
            res["preview"] += "\nSize: " + str(cube.size)
            res["preview"] += "\nMemory: " + \
                str(round(getsizeof(cube)/1024/1024, 2)) + " Mb"
            if not cube.values is None:
                res["preview"] += "\nData type: " + \
                    str(cube.values.dtype) + \
                    " (" + kindToString(cube.values.dtype.kind)+")"
                res["preview"] += "\nValues: \n\n" + str(cube.values)[:1000]

        elif isinstance(nodeDic[nodeId].result, cubepy.Index):
            index = nodeDic[nodeId].result

            res["preview"] += "Size: " + str(len(index))
            res["preview"] += "\nMemory: " + \
                str(round(getsizeof(index)/1024/1024, 2)) + " Mb"
            if not index.values is None:
                res["preview"] += "\nData type: " + \
                    str(index.values.dtype) + \
                    " (" + kindToString(index.values.dtype.kind)+")"
                #res["preview"] += "\nValues: \n" + '\n\t'.join([''.join(row) for row in index.values[:100]])
                res["preview"] += "\nValues: \n\t" + \
                    '\n\t'.join([''.join(str(row))
                                 for row in index.values[:100]])

        return json.dumps(res)
    def previewNode(self, nodeDic, nodeId):
        from pyplan_engine.classes.Helpers import Helpers
        from sys import getsizeof

        result = nodeDic[nodeId].result
        res = {
            "resultType": str(type(result)),
            "dims": [],
            "console": nodeDic[nodeId].lastEvaluationConsole,
            "preview": ""
        }

        for nn in range(result.ndim):
            _item = self.AXISNAME + str(nn) + " [" + str(
                result.shape[nn]) + "]"
            res["dims"].append(_item)

        res["preview"] += "Dimensions: " + str(result.ndim)
        res["preview"] += "\nShape: " + str(result.shape)
        res["preview"] += "\nSize: " + str(result.size)
        res["preview"] += "\nMemory: " + \
            str(round(result.nbytes/1024/1024, 2)) + " Mb"
        res["preview"] += "\nData type: " + \
            str(result.dtype) + " (" + kindToString(result.dtype.kind)+")"
        res["preview"] += "\nValues: \n\n" + str(result)[:1000]

        result = None

        return json.dumps(res)
Beispiel #4
0
def cube(axes, values=None, broadcast=True, dtype=None):
    """Create a cube  object. 
    axes: list of axis of the cube
    values: optional, list of values of the cube. Can be other cubes for build a report.
    Ex.
        cp.cube([time])
        cp.cube([time,product])
        cp.cube([time,product],[10,20,30,40,50,60,70,80])
        cp.cube([time,product],cp.random)
        
        cp.cube([index_reports],[report_1,report_2])
    """
    if values is None:
        if not dtype is None:
            if dtype is str:
                return cubepy.Cube.full(axes, '', dtype='U25')
            elif kindToString(np.dtype(dtype).kind) == "string":
                return cubepy.Cube.full(axes, '', dtype=dtype)
        return cubepy.Cube.zeros(axes)
    else:
        if isinstance(values, list) or isinstance(values, np.ndarray):
            if len(values) > 0:
                if isinstance(values[0], cubepy.Cube):
                    #use stack
                    if isinstance(axes, list):
                        axes = axes[0]
                    return cubepy.stack(values, axes, broadcast)
            return cubepy.Cube(axes, values, fillValues=True, dtype=dtype)
        elif isinstance(values, numbers.Number) and values == random:
            theSize = [len(x) for x in axes]
            return cube(axes,
                        np.random.randint(100, size=theSize),
                        dtype=dtype)
        else:
            return cubepy.Cube.full(axes, values, dtype=dtype)
Beispiel #5
0
    def previewNode(self, nodeDic, nodeId):
        from pyplan_engine.classes.Helpers import Helpers
        from sys import getsizeof
        res = {
            "resultType": str(type(nodeDic[nodeId].result)),
            "dims": [],
            "columns": [],
            "console": nodeDic[nodeId].lastEvaluationConsole,
            "preview": ""
        }
        if isinstance(nodeDic[nodeId].result, pd.DataFrame):
            cube = nodeDic[nodeId].result

            if self.isIndexed(cube):
                res["dims"] = list(cube.index.names)

            for idx, col in enumerate(cube.columns.values[:500]):
                res["columns"].append(
                    str(col) + " (" + kindToString(cube.dtypes[idx].kind) +
                    ")")

            res["preview"] += "Rows: " + str(len(cube.index))
            #res += "\nColumns: " + ', '.join([''.join(row) for row in cube.columns.values[:500]])
            res["preview"] += "\nShape: " + str(cube.shape)
            res["preview"] += "\nMemory: " + \
                str(round(cube.memory_usage(deep=True).sum() / 1024/1024, 2)) + " Mb"
            #res["preview"] += "\nValues: \n" + str(cube.head(20))
            res["preview"] += "\nValues: \n" + cube.head(20).to_string()
        elif isinstance(nodeDic[nodeId].result, pd.Series):
            serie = nodeDic[nodeId].result
            if self.isIndexed(serie):
                res["dims"] = list(serie.index.names)
            res["preview"] += "Rows: " + str(len(serie.index))
            res["preview"] += "\nMemory: " + \
                str(round(serie.memory_usage(deep=True) / 1024/1024, 2)) + " Mb"
            #res["preview"] += "\nValues: \n" + str(serie.head(20))
            res["preview"] += "\nValues: \n" + serie.head(20).to_string()
        elif isinstance(nodeDic[nodeId].result, pd.Index):
            res["preview"] = str(nodeDic[nodeId].result)[:1000]

        return json.dumps(res)
Beispiel #6
0
def cubeFromPandas(dataframe,
                   cubeIndexes,
                   valueColumns,
                   indexColumnHeaders=None,
                   replaceByIndex=None):
    """Create new cp.cube, converting pandas to multidimensional data, according to the parameters
        dataframe: pandas dataframe
        cubeIndexes: objects cp.index for perform change index
        valueColumns: string with column name of the dataframe where contain the values
                        cp.index with columns names for convert colums to index
        indexColumnHeaders: (optional) column names in pandas to parse with cubeIndexes. Used if header on dataframe is not equal to index identifiers.
        replaceByIndex: (optional) replace index used in valueColumns for this index. (using changeindex)
    """
    import pandas as pd

    valueIndex = None
    if isinstance(valueColumns, cubepy.Index):
        valueIndex = valueColumns
        valueColumns = valueIndex.values
    elif isinstance(valueColumns, str):
        valueColumns = np.array([valueColumns])

    if indexColumnHeaders is None:
        indexColumnHeaders = [index.name for index in cubeIndexes]

    #create total index and index names

    _allindexes = cubeIndexes
    _allIndexNames = indexColumnHeaders[:]
    if not valueIndex is None:
        _allindexes.append(valueIndex)
        _allIndexNames.append("data_index")

    #fill other columns for prevent melt error
    if isinstance(dataframe, pd.DataFrame):
        cols_not_in_df = [
            col for col in valueColumns if col not in dataframe.columns
        ]
        for col in cols_not_in_df:
            dataframe[col] = np.nan

    _full = dataframe.reset_index().melt(id_vars=indexColumnHeaders,
                                         value_vars=valueColumns,
                                         var_name="data_index",
                                         value_name="data_value")

    if _full.size == 0:
        _cube = cube(_allindexes, np.array([], dtype="O"))
    else:

        #sum for acum over duplicate records
        _full = _full.groupby(_allIndexNames, as_index=False).sum()
        _dtype = _full["data_value"].dtype

        _dataType = kindToString(_dtype.kind)
        if _dataType == "string":
            _full = _full[(_full["data_value"] != "")
                          & (_full['data_value'].notna())]
        else:
            _full = _full[(_full["data_value"] != 0)
                          & (_full['data_value'].notna())]

        _size = [len(x) for x in _allindexes]
        _emptyData = np.zeros(_size, dtype=_dtype)
        _cube = cube(_allindexes, _emptyData, _dtype)

        _valuePos = len(_full.columns)

        for _row in _full.itertuples():
            _arr = []
            _isOK = True
            _value = _row[_valuePos]
            for nn in range(1, len(_allIndexNames) + 1):
                _indexValue = _row[nn]
                if _indexValue in _allindexes[nn - 1]._indices:
                    _pos = _allindexes[nn - 1]._indices[_indexValue]
                    _arr.append(_pos)
                else:
                    _isOK = False
                    break

            if _isOK:
                _cube._values[tuple(_arr)] = _value

        if (not valueIndex is None) and (not replaceByIndex is None):
            _cube = changeIndex(_cube, valueIndex, replaceByIndex, 2)

    return _cube
    def cubeEvaluate(self,
                     result,
                     nodeDic,
                     nodeId,
                     dims=None,
                     rows=None,
                     columns=None,
                     summaryBy="sum",
                     bottomTotal=False,
                     rightTotal=False,
                     fromRow=0,
                     toRow=0):
        sby = safesum
        if summaryBy == 'avg':
            sby = safemean
        elif summaryBy == 'max':
            sby = safemax
        elif summaryBy == 'min':
            sby = safemin

        if (fromRow is None) or int(fromRow) <= 0:
            fromRow = 1
        if (toRow is None) or int(toRow) < 1:
            toRow = 100
        fromRow = int(fromRow)
        toRow = int(toRow)

        result = self.applyHierarchy(result, nodeDic, nodeId, dims, rows,
                                     columns, sby)

        _filters = []
        _rows = []
        _columns = []
        if not rows is None:
            for row in rows:
                if self.hasDim(result, str(row["field"])):
                    _rows.append(str(row["field"]))
                    self.addToFilter(nodeDic, row, _filters)

        if not columns is None:
            for column in columns:
                if self.hasDim(result, str(column["field"])):
                    _columns.append(str(column["field"]))
                    self.addToFilter(nodeDic, column, _filters)

        if not dims is None:
            for dim in dims:
                if self.hasDim(result, str(dim["field"])):
                    self.addToFilter(nodeDic, dim, _filters)

        tmp = None
        if len(_rows) == 0 and len(_columns) == 0 and result.ndim > 0:
            #_rows.append( result.dims[0] )
            tmp = cubepy.Cube([], result.filter(_filters).reduce(sby))

        else:
            tmp = result.filter(_filters).reduce(
                sby, keep=(_rows + _columns)).transpose(_rows + _columns)

        finalValues = tmp.values
        finalIndexes = []
        if tmp.ndim > 0:
            finalIndexes = tmp.axes[0].values
        finalColumns = ["Total"]
        if tmp.ndim == 2:
            finalColumns = tmp.axes[1].values

        # Add totales
        _totalRow = None
        if bottomTotal and len(_rows) > 0:
            # add total row
            #finalIndexes = np.append(finalIndexes,"Total")
            if tmp.ndim == 1:
                _totalRow = finalValues.sum(axis=0).reshape(1)
                #finalValues = np.append( finalValues, finalValues.sum(axis=0).reshape(1), axis=0)
            else:
                _totalRow = finalValues.sum(axis=0).reshape(
                    1, len(finalValues[0]))
                _totalRow = _totalRow[0]
                if rightTotal:
                    _totalRow = np.append(_totalRow, finalValues.sum())

        if rightTotal and len(_columns) > 0:
            # add total column
            if tmp.ndim == 1:
                finalIndexes = np.append(finalIndexes, "Total")
                finalValues = np.append(finalValues,
                                        finalValues.sum(axis=0).reshape(1),
                                        axis=0)
            else:
                finalColumns = np.append(finalColumns, "Total")
                finalValues = np.append(finalValues,
                                        finalValues.sum(axis=1).reshape(
                                            len(finalValues), 1),
                                        axis=1)

            # con una sola dimension

        # chek inf
        if kindToString(finalValues.dtype.kind) == "numeric":
            if np.isinf(finalValues).any():
                finalValues[np.isinf(finalValues)] = None

        # chec if haver nan values
        # if np.isnan(finalValues).any():
        if pd.isnull(finalValues).any():
            try:
                finalValues = np.where(np.isnan(finalValues), None,
                                       finalValues)
            except:
                finalValues[pd.isnull(finalValues)] = None

        res = {}
        pageInfo = None
        onRow = None
        onColumn = None
        if len(_rows) == 0 and len(_columns) == 0:
            res = {
                "columns": [],
                "index": ["Total"],
                "data": [[finalValues.tolist()]]
            }
        elif len(_rows) == 0:
            onColumn = _columns[0]
            res = {
                "columns": finalIndexes[:300].tolist(),
                "index": finalColumns,
                "data": [finalValues[:300].tolist()]
            }
        elif len(_columns) == 0:

            if (len(finalIndexes) > self.PAGESIZE):
                pageInfo = {
                    "fromRow": int(fromRow),
                    "toRow": int(toRow),
                    "totalRows": len(finalIndexes)
                }

            onRow = _rows[0]
            res = {
                "columns": finalColumns,
                "index": finalIndexes[fromRow - 1:toRow].tolist(),
                "data": [[x] for x in finalValues[fromRow - 1:toRow].tolist()]
            }
            # add total rows
            if not _totalRow is None:
                res["index"].append("Total")
                res["data"].append(_totalRow.tolist())

        else:
            onColumn = _columns[0]
            onRow = _rows[0]

            if (len(finalIndexes) > self.PAGESIZE):
                pageInfo = {
                    "fromRow": int(fromRow),
                    "toRow": int(toRow),
                    "totalRows": len(finalIndexes)
                }

            res = {
                "columns": finalColumns[:300].tolist(),
                "index": finalIndexes[fromRow - 1:toRow].tolist(),
                "data": finalValues[fromRow - 1:toRow, :300].tolist()
            }

            # add total rows
            if not _totalRow is None:
                res["index"].append("Total")
                res["data"].append(_totalRow[:300].tolist())

        return self.createResult(res,
                                 type(tmp),
                                 onRow=onRow,
                                 onColumn=onColumn,
                                 node=nodeDic[nodeId],
                                 pageInfo=pageInfo)