def test_fromRecords_toRecords(self): # structured array K = 10 recs = np.zeros(K, dtype='O,O,f8,f8') recs['f0'] = range(K / 2) * 2 recs['f1'] = np.arange(K) / (K / 2) recs['f2'] = np.arange(K) * 2 recs['f3'] = np.arange(K) lp = LongPanel.fromRecords(recs, 'f0', 'f1') self.assertEqual(len(lp.items), 2) lp = LongPanel.fromRecords(recs, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) torecs = lp.toRecords() self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2) # DataFrame df = DataFrame.from_records(recs) lp = LongPanel.fromRecords(df, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) # dict of arrays series = DataFrame.from_records(recs)._series lp = LongPanel.fromRecords(series, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) self.assert_('f2' in series) self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def test_fromRecords_toRecords(self): # structured array K = 10 recs = np.zeros(K, dtype="O,O,f8,f8") recs["f0"] = range(K / 2) * 2 recs["f1"] = np.arange(K) / (K / 2) recs["f2"] = np.arange(K) * 2 recs["f3"] = np.arange(K) lp = LongPanel.fromRecords(recs, "f0", "f1") self.assertEqual(len(lp.items), 2) lp = LongPanel.fromRecords(recs, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) torecs = lp.toRecords() self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2) # DataFrame df = DataFrame.from_records(recs) lp = LongPanel.fromRecords(df, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) # dict of arrays series = DataFrame.from_records(recs)._series lp = LongPanel.fromRecords(series, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) self.assert_("f2" in series) self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def read(self, coerce_float=True, parse_dates=None, columns=None, chunksize=None): """doc string""" if columns is not None and len(columns) > 0: from sqlalchemy import select cols = [self.table.c[n] for n in columns] if self.index is not None: [cols.insert(0, self.table.c[idx]) for idx in self.index[::-1]] sql_select = select(cols) else: sql_select = self.table.select() result = self.pd_sql.execute(sql_select) column_names = result.keys() if chunksize is not None: return self._query_iterator(result, chunksize, column_names, coerce_float=coerce_float, parse_dates=parse_dates) else: data = result.fetchall() self.frame = DataFrame.from_records( data, columns=column_names, coerce_float=coerce_float) self._harmonize_columns(parse_dates=parse_dates) if self.index is not None: self.frame.set_index(self.index, inplace=True) return self.frame
def read_frame(sql, eng, index_col=None, coerce_float=True): """ Returns a DataFrame corresponding to the result set of the query string. Optionally provide an index_col parameter to use one of the columns as the index. Otherwise will be 0 to len(results) - 1. Parameters ---------- sql: string SQL query to be executed eng: sqlalchemy engine index_col: string, optional column name to use for the returned DataFrame object. """ cur = eng.execute(sql) rows = _safe_fetch(cur) columns = cur.keys() cur.close() result = DataFrame.from_records(rows, columns=columns, coerce_float=coerce_float) if index_col is not None: result = result.set_index(index_col) return result
def read_frame(sql, con, index_col=None, coerce_float=True, params=None): """ Returns a DataFrame corresponding to the result set of the query string. Optionally provide an index_col parameter to use one of the columns as the index. Otherwise will be 0 to len(results) - 1. Parameters ---------- sql: string SQL query to be executed con: DB connection object, optional index_col: string, optional column name to use for the returned DataFrame object. params: list or tuple, optional List of parameters to pass to execute method. """ cur = execute(sql, con, params=params) rows = _safe_fetch(cur) columns = [col_desc[0] for col_desc in cur.description] cur.close() con.commit() result = DataFrame.from_records(rows, columns=columns, coerce_float=coerce_float) if index_col is not None: result = result.set_index(index_col) return result
def frame_query(sql, con, index_col=None): """ Returns a DataFrame corresponding to the result set of the query string. Optionally provide an index_col parameter to use one of the columns as the index. Otherwise will be 0 to len(results) - 1. Parameters ---------- sql: string SQL query to be executed con: DB connection object, optional index_col: string, optional column name to use for the returned DataFrame object. """ cur = execute(sql, con) rows = _safe_fetch(cur) con.commit() columns = [col_desc[0] for col_desc in cur.description] result = DataFrame.from_records(rows, columns=columns) if index_col is not None: result = result.set_index(index_col) return result
def read(self, coerce_float=True, parse_dates=None, columns=None): if columns is not None and len(columns) > 0: from sqlalchemy import select cols = [self.table.c[n] for n in columns] if self.index is not None: [cols.insert(0, self.table.c[idx]) for idx in self.index[::-1]] sql_select = select(cols) else: sql_select = self.table.select() result = self.pd_sql.execute(sql_select) data = result.fetchall() column_names = result.keys() self.frame = DataFrame.from_records(data, columns=column_names, coerce_float=coerce_float) self._harmonize_columns(parse_dates=parse_dates) if self.index is not None: self.frame.set_index(self.index, inplace=True) return self.frame
def read_frame(sql, con, index_col=None): """ Returns a DataFrame corresponding to the result set of the query string. Optionally provide an index_col parameter to use one of the columns as the index. Otherwise will be 0 to len(results) - 1. Parameters ---------- sql: string SQL query to be executed con: DB connection object, optional index_col: string, optional column name to use for the returned DataFrame object. """ cur = execute(sql, con) rows = _safe_fetch(cur) con.commit() columns = [col_desc[0] for col_desc in cur.description] result = DataFrame.from_records(rows, columns=columns) if index_col is not None: result = result.set_index(index_col) return result
def read_frame(sql, con, index_col=None, coerce_float=True, params=None): """ Returns a DataFrame corresponding to the result set of the query string. Optionally provide an index_col parameter to use one of the columns as the index. Otherwise will be 0 to len(results) - 1. Parameters ---------- sql: string SQL query to be executed con: DB connection object, optional index_col: string, optional column name to use for the returned DataFrame object. coerce_float : boolean, default True Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets params: list or tuple, optional List of parameters to pass to execute method. """ cur = execute(sql, con, params=params) rows = _safe_fetch(cur) columns = [col_desc[0] for col_desc in cur.description] cur.close() con.commit() result = DataFrame.from_records(rows, columns=columns, coerce_float=coerce_float) if index_col is not None: result = result.set_index(index_col) return result
def read(self, coerce_float=True, parse_dates=None, columns=None): if columns is not None and len(columns) > 0: from sqlalchemy import select cols = [self.table.c[n] for n in columns] if self.index is not None: cols.insert(0, self.table.c[self.index]) sql_select = select(cols) else: sql_select = self.table.select() result = self.pd_sql.execute(sql_select) data = result.fetchall() column_names = result.keys() self.frame = DataFrame.from_records(data, columns=column_names, coerce_float=coerce_float) self._harmonize_columns(parse_dates=parse_dates) if self.index is not None: self.frame.set_index(self.index, inplace=True) # Assume if the index in prefix_index format, we gave it a name # and should return it nameless if self.index == self.prefix + '_index': self.frame.index.name = None return self.frame
def read(self, coerce_float=True, parse_dates=None, columns=None): if columns is not None and len(columns) > 0: from sqlalchemy import select cols = [self.table.c[n] for n in columns] if self.index is not None: cols.insert(0, self.table.c[self.index]) sql_select = select(cols) else: sql_select = self.table.select() result = self.pd_sql.execute(sql_select) data = result.fetchall() column_names = result.keys() self.frame = DataFrame.from_records( data, columns=column_names, coerce_float=coerce_float) self._harmonize_columns(parse_dates=parse_dates) if self.index is not None: self.frame.set_index(self.index, inplace=True) # Assume if the index in prefix_index format, we gave it a name # and should return it nameless if self.index == self.prefix + '_index': self.frame.index.name = None return self.frame
def _wrap_result(data, columns, index_col=None, coerce_float=True, parse_dates=None): """Wrap result set of query in a DataFrame """ frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) _parse_date_columns(frame, parse_dates) if index_col is not None: frame.set_index(index_col, inplace=True) return frame
def read_sql(self, sql, index_col=None, coerce_float=True, params=None, parse_dates=None): args = _convert_params(sql, params) cursor = self.execute(*args) columns = [col_desc[0] for col_desc in cursor.description] data = self._fetchall_as_list(cursor) cursor.close() data_frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) _parse_date_columns(data_frame, parse_dates) if index_col is not None: data_frame.set_index(index_col, inplace=True) return data_frame
def read_sql(self, sql, index_col=None, coerce_float=True, parse_dates=None, params=None): args = _convert_params(sql, params) result = self.execute(*args) data = result.fetchall() columns = result.keys() data_frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) _parse_date_columns(data_frame, parse_dates) if index_col is not None: data_frame.set_index(index_col, inplace=True) return data_frame
def read_sql(self, sql, index_col=None, coerce_float=True, params=None, parse_dates=None): args = _convert_params(sql, params) cursor = self.execute(*args) columns = [col_desc[0] for col_desc in cursor.description] data = self._fetchall_as_list(cursor) cursor.close() data_frame = DataFrame.from_records( data, columns=columns, coerce_float=coerce_float) _parse_date_columns(data_frame, parse_dates) if index_col is not None: data_frame.set_index(index_col, inplace=True) return data_frame
def read_sql(self, sql, index_col=None, coerce_float=True, parse_dates=None, params=None): args = _convert_params(sql, params) result = self.execute(*args) data = result.fetchall() columns = result.keys() data_frame = DataFrame.from_records( data, columns=columns, coerce_float=coerce_float) _parse_date_columns(data_frame, parse_dates) if index_col is not None: data_frame.set_index(index_col, inplace=True) return data_frame
def _query_iterator(self, result, chunksize, columns, coerce_float=True, parse_dates=None): """Return generator through chunked result set""" while True: data = result.fetchmany(chunksize) if not data: break else: self.frame = DataFrame.from_records( data, columns=columns, coerce_float=coerce_float) self._harmonize_columns(parse_dates=parse_dates) if self.index is not None: self.frame.set_index(self.index, inplace=True) yield self.frame