Ejemplo n.º 1
0
 def _get_dataframe_from_cursor(self, cursor):
     """ 
     from the given cursor return a DataFrame
     """
     df = cursor_to_dataframe(cursor, parser=self._parser)
     df = self._restore_dataframe_proper(df)
     return df
Ejemplo n.º 2
0
Archivo: base.py Proyecto: 0r0i/omegaml
    def get_dataframe_documents(self, name, columns=None, lazy=False,
                                filter=None, version=-1, is_series=False,
                                **kwargs):
        """
        Internal method to return DataFrame from documents 

        :param name: the name of the object (str)
        :param columns: the column projection as a list of column names
        :param lazy: if True returns a lazy representation as an MDataFrame.
           If False retrieves all data and returns a DataFrame (default) 
        :param filter: the filter to be applied as a column__op=value dict 
        :param version: the version to retrieve (not supported)
        :param is_series: if True retruns a Series instead of a DataFrame
        :param kwargs: remaining kwargs are used a filter. The filter kwarg
           overrides other kwargs.
        :return: the retrieved object (DataFrame, Series or MDataFrame)

        """
        collection = self.collection(name)
        if lazy:
            from ..mdataframe import MDataFrame
            filter = filter or kwargs
            df = MDataFrame(collection, columns=columns).query(**filter)
            if is_series:
                df = df[0]
        else:
            # TODO ensure the same processing is applied in MDataFrame
            # TODO this method should always use a MDataFrame disregarding lazy
            filter = filter or kwargs
            if filter:
                from .query import Filter
                query = Filter(collection, **filter).query
                cursor = collection.find(filter=query, projection=columns)
            else:
                cursor = collection.find(projection=columns)
            # restore dataframe
            df = cursor_to_dataframe(cursor)
            if '_id' in df.columns:
                del df['_id']
            meta = self.metadata(name)
            if hasattr(meta, 'kind_meta'):
                df = convert_dtypes(df, meta.kind_meta.get('dtypes', {}))
            # -- restore columns
            meta_columns = dict(meta.kind_meta.get('columns'))
            if meta_columns:
                # apply projection, if any
                if columns:
                    # get only projected columns
                    # meta_columns is {origin_column: stored_column}
                    orig_columns = dict({k: v for k, v in iteritems(meta_columns)
                                         if k in columns or v in columns})
                else:
                    # restore columns to original name
                    orig_columns = meta_columns
                df.rename(columns=orig_columns, inplace=True)
            # -- restore indexes
            idx_meta = meta.kind_meta.get('idx_meta')
            if idx_meta:
                df = restore_index(df, idx_meta)
            # -- restore row order
            if is_series:
                index = df.index
                name = df.columns[0]
                df = df[name]
                df.index = index
                df.name = None if name == 'None' else name
        return df
Ejemplo n.º 3
0
 def list_indexes(self):
     """
     list all indices in database
     """
     return cursor_to_dataframe(self.collection.list_indexes())