def frame_query(sql, con, indexField='Time', asDataMatrix=False): """ Returns a DataFrame corresponding to the result set of the query string. Optionally provide an indexField parameter to use one of the columns as the index. Otherwise will be 0 to len(results) - 1. Parameters ---------- sql: string SQL query to be executed con: DB connection object, optional indexField: string, optional column name to use for the returned DataFrame object. """ data = array_query(sql, con) if indexField is not None: try: idx = Index(data.pop(indexField)) except KeyError: raise KeyError('indexField %s not found! %s' % (indexField, sql)) else: idx = Index(np.arange(len(data.values()[0]))) if asDataMatrix: return DataMatrix(data, index=idx) else: return DataFrame(data=data, index=idx)
def test_mi_access(self): # GH 4145 data = """h1 main h3 sub h5 0 a A 1 A1 1 1 b B 2 B1 2 2 c B 3 A1 3 3 d A 4 B2 4 4 e A 5 B2 5 5 f B 6 A2 6 """ df = pd.read_csv(StringIO(data),sep='\s+',index_col=0) df2 = df.set_index(['main', 'sub']).T.sort_index(1) index = Index(['h1','h3','h5']) columns = MultiIndex.from_tuples([('A','A1')],names=['main','sub']) expected = DataFrame([['a',1,1]],index=columns,columns=index).T result = df2.loc[:,('A','A1')] assert_frame_equal(result,expected) result = df2[('A','A1')] assert_frame_equal(result,expected) # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index expected = DataFrame([['a',1,1]],index=['A1'],columns=['h1','h3','h5'],).T result = df2['A']['A1'] assert_frame_equal(result,expected) # selecting a non_unique from the 2nd level expected = DataFrame([['d',4,4],['e',5,5]],index=Index(['B2','B2'],name='sub'),columns=['h1','h3','h5'],).T result = df2['A']['B2'] assert_frame_equal(result,expected)
def test_multi_nan_indexing(self): # GH 3588 df = DataFrame({"a":['R1', 'R2', np.nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, np.nan , 20]}) result = df.set_index(['a','b'], drop=False) expected = DataFrame({"a":['R1', 'R2', np.nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, np.nan , 20]}, index = [Index(['R1','R2',np.nan,'R4'],name='a'),Index(['C1','C2','C3','C4'],name='b')]) assert_frame_equal(result,expected)
def testWithXEffectsAndDroppedDummies(self): result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=["x1"], dropped_dummies={"x1": 30}) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[1, 0, 14, 1], [0, 1, 17, 1], [0, 0, 48, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(["x1_6", "x1_9", "x2", "intercept"]) self.assertTrue(exp_index.equals(result._x.items))
def testWithEntityEffectsAndDroppedDummies(self): result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, dropped_dummies={"entity": "B"}) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1, 1], [9, 17, 1, 1], [30, 48, 0, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(["x1", "x2", "FE_A", "intercept"]) self.assertTrue(exp_index.equals(result._x.items))
def testWithEntityEffects(self): result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 0, 1], [9, 17, 0, 1], [30, 48, 1, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1', 'x2', 'FE_B', 'intercept']) self.assertTrue(exp_index.equals(result._x.items))
def testWithXEffects(self): result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1']) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[0, 0, 14, 1], [0, 1, 17, 1], [1, 0, 48, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1_30', 'x1_9', 'x2', 'intercept']) self.assertTrue(exp_index.equals(result._x.items))
def testWithXEffectsAndConversionAndDroppedDummies(self): result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=["x1", "x2"], dropped_dummies={"x2": "3.14"}) assert_almost_equal(result._y.values.flat, [1, 2, 3, 4]) exp_x = [[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], [0, 0, 0, 0, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(["x1_B", "x1_C", "x2_1.59", "x2_2.65", "intercept"]) self.assertTrue(exp_index.equals(result._x.items))
def testWithEntityEffectsAndDroppedDummies(self): result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, dropped_dummies={'entity' : 'B'}) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1, 1], [9, 17, 1, 1], [30, 48, 0, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1', 'x2', 'FE_A', 'intercept']) self.assertTrue(exp_index.equals(result._x.items))
def testWithXEffectsAndConversion(self): result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2']) assert_almost_equal(result._y.values.flat, [1, 2, 3, 4]) exp_x = [[0, 0, 0, 1, 1], [1, 0, 0, 0, 1], [0, 1, 1, 0, 1], [0, 0, 0, 1, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1_B', 'x1_C', 'x2_2.65', 'x2_3.14', 'intercept']) self.assertTrue(exp_index.equals(result._x.items))
def testWithXEffectsAndConversionAndDroppedDummies(self): result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'], dropped_dummies={'x2' : 'foo'}) assert_almost_equal(result._y.values.flat, [1, 2, 3, 4]) exp_x = [[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], [0, 0, 0, 0, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept']) self.assertTrue(exp_index.equals(result._x.items))
def testWithEntityEffectsAndDroppedDummies(self): result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, dropped_dummies={'entity': 'B'}) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1, 1], [9, 17, 1, 1], [30, 48, 0, 1]] assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1', 'x2', 'FE_A', 'intercept']) self.assertTrue(exp_index.equals(result._x.items))
def setUp(self): major_axis = Index([1, 2, 3, 4]) minor_axis = Index([1, 2]) major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) self.index = LongPanelIndex(major_axis, minor_axis, major_labels, minor_labels) major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) self.incon = LongPanelIndex(major_axis, minor_axis, major_labels, minor_labels)
def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ['a', 'a', 'b'] cols = ['b', 'a'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) self.assert_(result.equals(expected)) # across dtypes df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('aaaaaaa')) df.head() str(df) result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']]) result.columns = list('aaaaaaa') df_v = df.iloc[:, 4] res_v = result.iloc[:, 4] assert_frame_equal(df, result) # GH 3561, dups not in selected order ind = ['A', 'A', 'B', 'C'] df = DataFrame({'test': range(len(ind))}, index=ind) rows = ['C', 'B'] res = df.ix[rows] self.assert_(rows == list(res.index)) res = df.ix[Index(rows)] self.assert_(Index(rows).equals(res.index)) rows = ['C', 'B', 'E'] res = df.ix[rows] self.assert_(rows == list(res.index)) # inconcistent returns for unique/duplicate indices when values are missing df = DataFrame(randn(4, 3), index=list('ABCD')) expected = df.ix[['E']] dfnu = DataFrame(randn(5, 3), index=list('AABCD')) result = dfnu.ix[['E']] assert_frame_equal(result, expected)
def test_reindex_corner(self): index = Index(['a', 'b', 'c']) dm = self.empty.reindex(index=[1, 2, 3]) reindexed = dm.reindex(columns=index) self.assert_(reindexed.columns.equals(index)) # ints are weird smaller = self.intframe.reindex(columns=['A', 'B', 'E']) self.assert_(smaller['E'].dtype == np.float_)
def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ['a', 'a', 'b'] cols = ['b', 'a'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) self.assert_(result.equals(expected))
def getMixedTypeDict(): index = Index(['a', 'b', 'c', 'd', 'e']) data = { 'A' : [0., 1., 2., 3., 4.], 'B' : [0., 1., 0., 1., 0.], 'C' : ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D' : DateRange('1/1/2009', periods=5) } return index, data
def testFiltering(self): result = ols(y=self.panel_y2, x=self.panel_x2) x = result._x index = [x.major_axis[i] for i in x.index.major_labels] index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) self.assertTrue(exp_index.equals(index)) index = [x.minor_axis[i] for i in x.index.minor_labels] index = Index(sorted(set(index))) exp_index = Index(["A", "B"]) self.assertTrue(exp_index.equals(index)) x = result._x_filtered index = [x.major_axis[i] for i in x.index.major_labels] index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)]) self.assertTrue(exp_index.equals(index)) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]] assert_almost_equal(exp_x, result._x.values) exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]] assert_almost_equal(exp_x_filtered, result._x_filtered.values) self.assertTrue(result._x_filtered.major_axis.equals(result.y_fitted.index))
def test_operators_corner(self): series = self.ts empty = Series([], index=Index([])) result = series + empty self.assert_(np.isnan(result).all()) result = empty + Series([], index=Index([])) self.assert_(len(result) == 0) deltas = Series([timedelta(1)] * 5, index=np.arange(5)) sub_deltas = deltas[::2] deltas5 = deltas * 5 deltas = deltas + sub_deltas # float + int int_ts = self.ts.astype(int)[:-5] added = self.ts + int_ts expected = self.ts.values[:-5] + int_ts.values self.assert_(np.array_equal(added[:-5], expected))
def test_constructor_dict(self): frame = self.klass({'col1': self.ts1, 'col2': self.ts2}) common.assert_dict_equal(self.ts1, frame['col1'], compare_keys=False) common.assert_dict_equal(self.ts2, frame['col2'], compare_keys=False) frame = self.klass({ 'col1': self.ts1, 'col2': self.ts2 }, columns=['col2', 'col3', 'col4']) self.assertEqual(len(frame), len(self.ts2)) self.assert_('col1' not in frame) self.assert_(np.isnan(frame['col3']).all()) # Corner cases self.assertEqual(len(self.klass({})), 0) self.assertRaises(Exception, lambda x: self.klass([self.ts1, self.ts2])) # pass dict and array, nicht nicht self.assertRaises(Exception, self.klass, { 'A': { 'a': 'a', 'b': 'b' }, 'B': ['a', 'b'] }) # can I rely on the order? self.assertRaises(Exception, self.klass, { 'A': ['a', 'b'], 'B': { 'a': 'a', 'b': 'b' } }) self.assertRaises(Exception, self.klass, { 'A': ['a', 'b'], 'B': Series(['a', 'b'], index=['a', 'b']) }) # Length-one dict micro-optimization frame = self.klass({'A': {'1': 1, '2': 2}}) self.assert_(np.array_equal(frame.index, ['1', '2'])) # empty dict plus index idx = Index([0, 1, 2]) frame = self.klass({}, index=idx) self.assert_(frame.index is idx)
def test_reindex(self): newFrame = self.frame.reindex(self.ts1.index) for col in newFrame.cols(): for idx, val in newFrame[col].iteritems(): if idx in self.frame.index: if np.isnan(val): self.assert_(np.isnan(self.frame[col][idx])) else: self.assertEqual(val, self.frame[col][idx]) else: self.assert_(np.isnan(val)) for col, series in newFrame.iteritems(): self.assert_(common.equalContents(series.index, newFrame.index)) emptyFrame = self.frame.reindex(Index([])) self.assert_(len(emptyFrame.index) == 0) # Cython code should be unit-tested directly nonContigFrame = self.frame.reindex(self.ts1.index[::2]) for col in nonContigFrame.cols(): for idx, val in nonContigFrame[col].iteritems(): if idx in self.frame.index: if np.isnan(val): self.assert_(np.isnan(self.frame[col][idx])) else: self.assertEqual(val, self.frame[col][idx]) else: self.assert_(np.isnan(val)) for col, series in nonContigFrame.iteritems(): self.assert_( common.equalContents(series.index, nonContigFrame.index)) # corner cases # Same index, copies values newFrame = self.frame.reindex(self.frame.index) self.assert_(newFrame.index is self.frame.index) # length zero newFrame = self.frame.reindex([]) self.assert_(not newFrame) self.assertEqual(len(newFrame.cols()), len(self.frame.cols())) # pass non-Index newFrame = self.frame.reindex(list(self.ts1.index)) self.assert_(newFrame.index.equals(self.ts1.index))
def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ['a', 'a', 'b'] cols = ['b', 'a'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) self.assert_(result.equals(expected)) # across dtypes df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('aaaaaaa')) df.head() str(df) result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']]) result.columns = list('aaaaaaa') df_v = df.iloc[:, 4] res_v = result.iloc[:, 4] assert_frame_equal(df, result)
def testFiltering(self): result = ols(y=self.panel_y2, x=self.panel_x2) x = result._x index = [x.major_axis[i] for i in x.index.major_labels] index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) self.assertTrue(exp_index.equals(index)) index = [x.minor_axis[i] for i in x.index.minor_labels] index = Index(sorted(set(index))) exp_index = Index(['A', 'B']) self.assertTrue(exp_index.equals(index)) x = result._x_filtered index = [x.major_axis[i] for i in x.index.major_labels] index = Index(sorted(set(index))) exp_index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)]) self.assertTrue(exp_index.equals(index)) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]] assert_almost_equal(exp_x, result._x.values) exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]] assert_almost_equal(exp_x_filtered, result._x_filtered.values) self.assertTrue( result._x_filtered.major_axis.equals(result.y_fitted.index))
def makeDateIndex(k): dates = list(DateRange(datetime(2000, 1, 1), periods=k)) return Index(dates)
def makeIntIndex(k): return Index(np.arange(k))
def test_set_index(self): idx = Index(np.arange(len(self.mixed_frame))) self.mixed_frame.index = idx self.assert_(self.mixed_frame['foo'].index is idx)
def test_parsers(self): # https://github.com/dateutil/dateutil/issues/217 import dateutil yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') cases = {'2011-01-01': datetime.datetime(2011, 1, 1), '2Q2005': datetime.datetime(2005, 4, 1), '2Q05': datetime.datetime(2005, 4, 1), '2005Q1': datetime.datetime(2005, 1, 1), '05Q1': datetime.datetime(2005, 1, 1), '2011Q3': datetime.datetime(2011, 7, 1), '11Q3': datetime.datetime(2011, 7, 1), '3Q2011': datetime.datetime(2011, 7, 1), '3Q11': datetime.datetime(2011, 7, 1), # quarterly without space '2000Q4': datetime.datetime(2000, 10, 1), '00Q4': datetime.datetime(2000, 10, 1), '4Q2000': datetime.datetime(2000, 10, 1), '4Q00': datetime.datetime(2000, 10, 1), '2000q4': datetime.datetime(2000, 10, 1), '2000-Q4': datetime.datetime(2000, 10, 1), '00-Q4': datetime.datetime(2000, 10, 1), '4Q-2000': datetime.datetime(2000, 10, 1), '4Q-00': datetime.datetime(2000, 10, 1), '00q4': datetime.datetime(2000, 10, 1), '2005': datetime.datetime(2005, 1, 1), '2005-11': datetime.datetime(2005, 11, 1), '2005 11': datetime.datetime(2005, 11, 1), '11-2005': datetime.datetime(2005, 11, 1), '11 2005': datetime.datetime(2005, 11, 1), '200511': datetime.datetime(2020, 5, 11), '20051109': datetime.datetime(2005, 11, 9), '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, 36, 28), "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), "Sep 25 2003": datetime.datetime(2003, 9, 25), "January 1 2014": datetime.datetime(2014, 1, 1), # GH 10537 '2014-06': datetime.datetime(2014, 6, 1), '06-2014': datetime.datetime(2014, 6, 1), '2014-6': datetime.datetime(2014, 6, 1), '6-2014': datetime.datetime(2014, 6, 1), '20010101 12': datetime.datetime(2001, 1, 1, 12), '20010101 1234': datetime.datetime(2001, 1, 1, 12, 34), '20010101 123456': datetime.datetime(2001, 1, 1, 12, 34, 56), } for date_str, expected in compat.iteritems(cases): result1, _, _ = tools.parse_time_string(date_str, yearfirst=yearfirst) result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below result4 = to_datetime(np.array([date_str], dtype=object), yearfirst=yearfirst) result6 = DatetimeIndex([date_str], yearfirst=yearfirst) # result7 is used below result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) for res in [result1, result2]: self.assertEqual(res, expected) for res in [result3, result4, result6, result8, result9]: exp = DatetimeIndex([pd.Timestamp(expected)]) tm.assert_index_equal(res, exp) # these really need to have yearfist, but we don't support if not yearfirst: result5 = Timestamp(date_str) self.assertEqual(result5, expected) result7 = date_range(date_str, freq='S', periods=1, yearfirst=yearfirst) self.assertEqual(result7, expected) # NaT result1, _, _ = tools.parse_time_string('NaT') result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT)
def makeStringIndex(k): return Index([rands(10) for _ in xrange(k)])