def _get_dataframe_from_cursor(self, cursor): """ from the given cursor return a DataFrame """ df = cursor_to_dataframe(cursor, parser=self._parser) df = self._restore_dataframe_proper(df) return df
def get_dataframe_documents(self, name, columns=None, lazy=False, filter=None, version=-1, is_series=False, **kwargs): """ Internal method to return DataFrame from documents :param name: the name of the object (str) :param columns: the column projection as a list of column names :param lazy: if True returns a lazy representation as an MDataFrame. If False retrieves all data and returns a DataFrame (default) :param filter: the filter to be applied as a column__op=value dict :param version: the version to retrieve (not supported) :param is_series: if True retruns a Series instead of a DataFrame :param kwargs: remaining kwargs are used a filter. The filter kwarg overrides other kwargs. :return: the retrieved object (DataFrame, Series or MDataFrame) """ collection = self.collection(name) if lazy: from ..mdataframe import MDataFrame filter = filter or kwargs df = MDataFrame(collection, columns=columns).query(**filter) if is_series: df = df[0] else: # TODO ensure the same processing is applied in MDataFrame # TODO this method should always use a MDataFrame disregarding lazy filter = filter or kwargs if filter: from .query import Filter query = Filter(collection, **filter).query cursor = collection.find(filter=query, projection=columns) else: cursor = collection.find(projection=columns) # restore dataframe df = cursor_to_dataframe(cursor) if '_id' in df.columns: del df['_id'] meta = self.metadata(name) if hasattr(meta, 'kind_meta'): df = convert_dtypes(df, meta.kind_meta.get('dtypes', {})) # -- restore columns meta_columns = dict(meta.kind_meta.get('columns')) if meta_columns: # apply projection, if any if columns: # get only projected columns # meta_columns is {origin_column: stored_column} orig_columns = dict({k: v for k, v in iteritems(meta_columns) if k in columns or v in columns}) else: # restore columns to original name orig_columns = meta_columns df.rename(columns=orig_columns, inplace=True) # -- restore indexes idx_meta = meta.kind_meta.get('idx_meta') if idx_meta: df = restore_index(df, idx_meta) # -- restore row order if is_series: index = df.index name = df.columns[0] df = df[name] df.index = index df.name = None if name == 'None' else name return df
def list_indexes(self): """ list all indices in database """ return cursor_to_dataframe(self.collection.list_indexes())