def indexFromPandas(dataframe, columnName=None, removeEmpty=True, indexName="new index"): """ Return a cp.index from an column of a pandas dataframe. dataframe: pandas dataframe columnName: dataframe column name used for create cp.index. By default is created using the first column removeEmpty: True for remove empty rows Ex. cp.indexFromPandas(df) cp.indexFromPandas(df,"column10") """ _serie = None if columnName is None: _serie = dataframe[dataframe.columns[0]] else: _serie = dataframe[columnName] if removeEmpty: _serie.dropna(inplace=True) if kindToString(_serie.dtype.kind) == "string" or kindToString( _serie.dtype.kind) == "object": _serie = _serie[_serie != ""] return index(indexName, _serie.unique())
def previewNode(self, nodeDic, nodeId): from pyplan_engine.classes.Helpers import Helpers from sys import getsizeof res = { "resultType": str(type(nodeDic[nodeId].result)), "dims": [], "console": nodeDic[nodeId].lastEvaluationConsole, "preview": "" } if isinstance(nodeDic[nodeId].result, cubepy.Cube): cube = nodeDic[nodeId].result for _axis in cube.axes: _nodeTitle = None if _axis.name in nodeDic: _nodeTitle = nodeDic[_axis.name].title if _nodeTitle is None: _item = _axis.name + " [" + str(len(_axis)) + "]" else: _item = _nodeTitle + \ " ("+_axis.name+") [" + str(len(_axis)) + "]" res["dims"].append(_item) res["preview"] += "Dimensions: " + str(cube.ndim) res["preview"] += "\nShape: " + str(cube.shape) res["preview"] += "\nSize: " + str(cube.size) res["preview"] += "\nMemory: " + \ str(round(getsizeof(cube)/1024/1024, 2)) + " Mb" if not cube.values is None: res["preview"] += "\nData type: " + \ str(cube.values.dtype) + \ " (" + kindToString(cube.values.dtype.kind)+")" res["preview"] += "\nValues: \n\n" + str(cube.values)[:1000] elif isinstance(nodeDic[nodeId].result, cubepy.Index): index = nodeDic[nodeId].result res["preview"] += "Size: " + str(len(index)) res["preview"] += "\nMemory: " + \ str(round(getsizeof(index)/1024/1024, 2)) + " Mb" if not index.values is None: res["preview"] += "\nData type: " + \ str(index.values.dtype) + \ " (" + kindToString(index.values.dtype.kind)+")" #res["preview"] += "\nValues: \n" + '\n\t'.join([''.join(row) for row in index.values[:100]]) res["preview"] += "\nValues: \n\t" + \ '\n\t'.join([''.join(str(row)) for row in index.values[:100]]) return json.dumps(res)
def previewNode(self, nodeDic, nodeId): from pyplan_engine.classes.Helpers import Helpers from sys import getsizeof result = nodeDic[nodeId].result res = { "resultType": str(type(result)), "dims": [], "console": nodeDic[nodeId].lastEvaluationConsole, "preview": "" } for nn in range(result.ndim): _item = self.AXISNAME + str(nn) + " [" + str( result.shape[nn]) + "]" res["dims"].append(_item) res["preview"] += "Dimensions: " + str(result.ndim) res["preview"] += "\nShape: " + str(result.shape) res["preview"] += "\nSize: " + str(result.size) res["preview"] += "\nMemory: " + \ str(round(result.nbytes/1024/1024, 2)) + " Mb" res["preview"] += "\nData type: " + \ str(result.dtype) + " (" + kindToString(result.dtype.kind)+")" res["preview"] += "\nValues: \n\n" + str(result)[:1000] result = None return json.dumps(res)
def cube(axes, values=None, broadcast=True, dtype=None): """Create a cube object. axes: list of axis of the cube values: optional, list of values of the cube. Can be other cubes for build a report. Ex. cp.cube([time]) cp.cube([time,product]) cp.cube([time,product],[10,20,30,40,50,60,70,80]) cp.cube([time,product],cp.random) cp.cube([index_reports],[report_1,report_2]) """ if values is None: if not dtype is None: if dtype is str: return cubepy.Cube.full(axes, '', dtype='U25') elif kindToString(np.dtype(dtype).kind) == "string": return cubepy.Cube.full(axes, '', dtype=dtype) return cubepy.Cube.zeros(axes) else: if isinstance(values, list) or isinstance(values, np.ndarray): if len(values) > 0: if isinstance(values[0], cubepy.Cube): #use stack if isinstance(axes, list): axes = axes[0] return cubepy.stack(values, axes, broadcast) return cubepy.Cube(axes, values, fillValues=True, dtype=dtype) elif isinstance(values, numbers.Number) and values == random: theSize = [len(x) for x in axes] return cube(axes, np.random.randint(100, size=theSize), dtype=dtype) else: return cubepy.Cube.full(axes, values, dtype=dtype)
def previewNode(self, nodeDic, nodeId): from pyplan_engine.classes.Helpers import Helpers from sys import getsizeof res = { "resultType": str(type(nodeDic[nodeId].result)), "dims": [], "columns": [], "console": nodeDic[nodeId].lastEvaluationConsole, "preview": "" } if isinstance(nodeDic[nodeId].result, pd.DataFrame): cube = nodeDic[nodeId].result if self.isIndexed(cube): res["dims"] = list(cube.index.names) for idx, col in enumerate(cube.columns.values[:500]): res["columns"].append( str(col) + " (" + kindToString(cube.dtypes[idx].kind) + ")") res["preview"] += "Rows: " + str(len(cube.index)) #res += "\nColumns: " + ', '.join([''.join(row) for row in cube.columns.values[:500]]) res["preview"] += "\nShape: " + str(cube.shape) res["preview"] += "\nMemory: " + \ str(round(cube.memory_usage(deep=True).sum() / 1024/1024, 2)) + " Mb" #res["preview"] += "\nValues: \n" + str(cube.head(20)) res["preview"] += "\nValues: \n" + cube.head(20).to_string() elif isinstance(nodeDic[nodeId].result, pd.Series): serie = nodeDic[nodeId].result if self.isIndexed(serie): res["dims"] = list(serie.index.names) res["preview"] += "Rows: " + str(len(serie.index)) res["preview"] += "\nMemory: " + \ str(round(serie.memory_usage(deep=True) / 1024/1024, 2)) + " Mb" #res["preview"] += "\nValues: \n" + str(serie.head(20)) res["preview"] += "\nValues: \n" + serie.head(20).to_string() elif isinstance(nodeDic[nodeId].result, pd.Index): res["preview"] = str(nodeDic[nodeId].result)[:1000] return json.dumps(res)
def cubeFromPandas(dataframe, cubeIndexes, valueColumns, indexColumnHeaders=None, replaceByIndex=None): """Create new cp.cube, converting pandas to multidimensional data, according to the parameters dataframe: pandas dataframe cubeIndexes: objects cp.index for perform change index valueColumns: string with column name of the dataframe where contain the values cp.index with columns names for convert colums to index indexColumnHeaders: (optional) column names in pandas to parse with cubeIndexes. Used if header on dataframe is not equal to index identifiers. replaceByIndex: (optional) replace index used in valueColumns for this index. (using changeindex) """ import pandas as pd valueIndex = None if isinstance(valueColumns, cubepy.Index): valueIndex = valueColumns valueColumns = valueIndex.values elif isinstance(valueColumns, str): valueColumns = np.array([valueColumns]) if indexColumnHeaders is None: indexColumnHeaders = [index.name for index in cubeIndexes] #create total index and index names _allindexes = cubeIndexes _allIndexNames = indexColumnHeaders[:] if not valueIndex is None: _allindexes.append(valueIndex) _allIndexNames.append("data_index") #fill other columns for prevent melt error if isinstance(dataframe, pd.DataFrame): cols_not_in_df = [ col for col in valueColumns if col not in dataframe.columns ] for col in cols_not_in_df: dataframe[col] = np.nan _full = dataframe.reset_index().melt(id_vars=indexColumnHeaders, value_vars=valueColumns, var_name="data_index", value_name="data_value") if _full.size == 0: _cube = cube(_allindexes, np.array([], dtype="O")) else: #sum for acum over duplicate records _full = _full.groupby(_allIndexNames, as_index=False).sum() _dtype = _full["data_value"].dtype _dataType = kindToString(_dtype.kind) if _dataType == "string": _full = _full[(_full["data_value"] != "") & (_full['data_value'].notna())] else: _full = _full[(_full["data_value"] != 0) & (_full['data_value'].notna())] _size = [len(x) for x in _allindexes] _emptyData = np.zeros(_size, dtype=_dtype) _cube = cube(_allindexes, _emptyData, _dtype) _valuePos = len(_full.columns) for _row in _full.itertuples(): _arr = [] _isOK = True _value = _row[_valuePos] for nn in range(1, len(_allIndexNames) + 1): _indexValue = _row[nn] if _indexValue in _allindexes[nn - 1]._indices: _pos = _allindexes[nn - 1]._indices[_indexValue] _arr.append(_pos) else: _isOK = False break if _isOK: _cube._values[tuple(_arr)] = _value if (not valueIndex is None) and (not replaceByIndex is None): _cube = changeIndex(_cube, valueIndex, replaceByIndex, 2) return _cube
def cubeEvaluate(self, result, nodeDic, nodeId, dims=None, rows=None, columns=None, summaryBy="sum", bottomTotal=False, rightTotal=False, fromRow=0, toRow=0): sby = safesum if summaryBy == 'avg': sby = safemean elif summaryBy == 'max': sby = safemax elif summaryBy == 'min': sby = safemin if (fromRow is None) or int(fromRow) <= 0: fromRow = 1 if (toRow is None) or int(toRow) < 1: toRow = 100 fromRow = int(fromRow) toRow = int(toRow) result = self.applyHierarchy(result, nodeDic, nodeId, dims, rows, columns, sby) _filters = [] _rows = [] _columns = [] if not rows is None: for row in rows: if self.hasDim(result, str(row["field"])): _rows.append(str(row["field"])) self.addToFilter(nodeDic, row, _filters) if not columns is None: for column in columns: if self.hasDim(result, str(column["field"])): _columns.append(str(column["field"])) self.addToFilter(nodeDic, column, _filters) if not dims is None: for dim in dims: if self.hasDim(result, str(dim["field"])): self.addToFilter(nodeDic, dim, _filters) tmp = None if len(_rows) == 0 and len(_columns) == 0 and result.ndim > 0: #_rows.append( result.dims[0] ) tmp = cubepy.Cube([], result.filter(_filters).reduce(sby)) else: tmp = result.filter(_filters).reduce( sby, keep=(_rows + _columns)).transpose(_rows + _columns) finalValues = tmp.values finalIndexes = [] if tmp.ndim > 0: finalIndexes = tmp.axes[0].values finalColumns = ["Total"] if tmp.ndim == 2: finalColumns = tmp.axes[1].values # Add totales _totalRow = None if bottomTotal and len(_rows) > 0: # add total row #finalIndexes = np.append(finalIndexes,"Total") if tmp.ndim == 1: _totalRow = finalValues.sum(axis=0).reshape(1) #finalValues = np.append( finalValues, finalValues.sum(axis=0).reshape(1), axis=0) else: _totalRow = finalValues.sum(axis=0).reshape( 1, len(finalValues[0])) _totalRow = _totalRow[0] if rightTotal: _totalRow = np.append(_totalRow, finalValues.sum()) if rightTotal and len(_columns) > 0: # add total column if tmp.ndim == 1: finalIndexes = np.append(finalIndexes, "Total") finalValues = np.append(finalValues, finalValues.sum(axis=0).reshape(1), axis=0) else: finalColumns = np.append(finalColumns, "Total") finalValues = np.append(finalValues, finalValues.sum(axis=1).reshape( len(finalValues), 1), axis=1) # con una sola dimension # chek inf if kindToString(finalValues.dtype.kind) == "numeric": if np.isinf(finalValues).any(): finalValues[np.isinf(finalValues)] = None # chec if haver nan values # if np.isnan(finalValues).any(): if pd.isnull(finalValues).any(): try: finalValues = np.where(np.isnan(finalValues), None, finalValues) except: finalValues[pd.isnull(finalValues)] = None res = {} pageInfo = None onRow = None onColumn = None if len(_rows) == 0 and len(_columns) == 0: res = { "columns": [], "index": ["Total"], "data": [[finalValues.tolist()]] } elif len(_rows) == 0: onColumn = _columns[0] res = { "columns": finalIndexes[:300].tolist(), "index": finalColumns, "data": [finalValues[:300].tolist()] } elif len(_columns) == 0: if (len(finalIndexes) > self.PAGESIZE): pageInfo = { "fromRow": int(fromRow), "toRow": int(toRow), "totalRows": len(finalIndexes) } onRow = _rows[0] res = { "columns": finalColumns, "index": finalIndexes[fromRow - 1:toRow].tolist(), "data": [[x] for x in finalValues[fromRow - 1:toRow].tolist()] } # add total rows if not _totalRow is None: res["index"].append("Total") res["data"].append(_totalRow.tolist()) else: onColumn = _columns[0] onRow = _rows[0] if (len(finalIndexes) > self.PAGESIZE): pageInfo = { "fromRow": int(fromRow), "toRow": int(toRow), "totalRows": len(finalIndexes) } res = { "columns": finalColumns[:300].tolist(), "index": finalIndexes[fromRow - 1:toRow].tolist(), "data": finalValues[fromRow - 1:toRow, :300].tolist() } # add total rows if not _totalRow is None: res["index"].append("Total") res["data"].append(_totalRow[:300].tolist()) return self.createResult(res, type(tmp), onRow=onRow, onColumn=onColumn, node=nodeDic[nodeId], pageInfo=pageInfo)