def test_to_records_dt64(self): df = DataFrame([["one", "two", "three"], ["four", "five", "six"]], index=date_range("2012-01-01", "2012-01-02")) assert df.to_records()['index'][0] == df.index[0] rs = df.to_records(convert_datetime64=False) assert rs['index'][0] == df.index.values[0]
def test_to_records_dtype(self, kwargs, expected): # see gh-18146 df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) if not isinstance(expected, np.recarray): with pytest.raises(expected[0], match=expected[1]): df.to_records(**kwargs) else: result = df.to_records(**kwargs) tm.assert_almost_equal(result, expected)
def test_to_records_index_name(self): df = DataFrame(np.random.randn(3, 3)) df.index.name = 'X' rs = df.to_records() assert 'X' in rs.dtype.fields df = DataFrame(np.random.randn(3, 3)) rs = df.to_records() assert 'index' in rs.dtype.fields df.index = MultiIndex.from_tuples([('a', 'x'), ('a', 'y'), ('b', 'z')]) df.index.names = ['A', None] rs = df.to_records() assert 'level_0' in rs.dtype.fields
def test_to_records_dict_like(self): # see gh-18146 class DictLike(object): def __init__(self, **kwargs): self.d = kwargs.copy() def __getitem__(self, key): return self.d.__getitem__(key) def __contains__(self, key): return key in self.d def keys(self): return self.d.keys() df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) dtype_mappings = dict(column_dtypes=DictLike(**{"A": np.int8, "B": np.float32}), index_dtypes="<U2") result = df.to_records(**dtype_mappings) expected = np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")]) tm.assert_almost_equal(result, expected)
def test_to_records_with_multindex(self): # GH3189 index = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] data = np.zeros((8, 4)) df = DataFrame(data, index=index) r = df.to_records(index=True)['level_0'] assert 'bar' in r assert 'one' not in r
def test_to_records_datetimeindex_with_tz(self, tz): # GH13937 dr = date_range('2016-01-01', periods=10, freq='S', tz=tz) df = DataFrame({'datetime': dr}, index=dr) expected = df.to_records() result = df.tz_convert("UTC").to_records() # both converted to UTC, so they are equal tm.assert_numpy_array_equal(result, expected)
def test_to_records_dt64(self): df = DataFrame([["one", "two", "three"], ["four", "five", "six"]], index=date_range("2012-01-01", "2012-01-02")) # convert_datetime64 defaults to None expected = df.index.values[0] result = df.to_records()['index'][0] assert expected == result # check for FutureWarning if convert_datetime64=False is passed with tm.assert_produces_warning(FutureWarning): expected = df.index.values[0] result = df.to_records(convert_datetime64=False)['index'][0] assert expected == result # check for FutureWarning if convert_datetime64=True is passed with tm.assert_produces_warning(FutureWarning): expected = df.index[0] result = df.to_records(convert_datetime64=True)['index'][0] assert expected == result
def ColorTransform(data): from pandas import DataFrame, concat #magTaglist = ['MAG_DETMODEL', 'MAG_MODEL', 'MAG_PETRO', 'MAG_HYBRID', 'MAG_PSF','MAG_AUTO', # 'MAG_APER_2', 'MAG_APER_3', 'MAG_APER_4','MAG_APER_5' ] #,'MAG_APER_6','MAG_APER_7','MAG_APER_8','MAG_APER_9', 'MAG_APER_10'] magTaglist = ['MAG_MODEL', 'MAG_AUTO', 'MAG_APER_3', 'MAG_APER_4','MAG_APER_5','MAG_APER_6' ] combine = add_SDSS_colors(data, magTag_template = 'MAG_DETMODEL') for magTag in magTaglist: strip = add_SDSS_colors(data, magTag_template = magTag) combine = concat([combine, strip], axis=1) data = DataFrame(data.data) data = concat([data, combine], axis=1) #del data['index'] matched_arr = data.to_records() return matched_arr
def test_to_records_with_categorical(self): # GH8626 # dict creation df = DataFrame({'A': list('abc')}, dtype='category') expected = Series(list('abc'), dtype='category', name='A') tm.assert_series_equal(df['A'], expected) # list-like creation df = DataFrame(list('abc'), dtype='category') expected = Series(list('abc'), dtype='category', name=0) tm.assert_series_equal(df[0], expected) # to record array # this coerces result = df.to_records() expected = np.rec.array([(0, 'a'), (1, 'b'), (2, 'c')], dtype=[('index', '=i8'), ('0', 'O')]) tm.assert_almost_equal(result, expected)
def add_SDSS_colors(data, magTag_template = 'MAG_DETMODEL', independent = None): print "Doing des->sdss color transforms for "+magTag_template filters = ['G','R','I','Z'] magTags = [] desMags = np.empty([len(filters),len(data)]) for i,thisFilter in enumerate(filters): magTag = magTag_template+'_'+thisFilter desMags[i,:] = data[magTag] magTags.append(magTag+'_SDSS') sdssMags = transform_DES_to_SDSS(desMags[0,:], desMags[1,:], desMags[2,:], desMags[3,:]) from pandas import DataFrame, concat data = DataFrame( sdssMags, index = magTags).T if independent == 'yes': fulldata = DataFrame(data) data = concat([fulldata, data], axis=1) data = data.to_records() else : pass return data
def slide_9(): data = pd.read_csv(MACRODATAPATH) periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date') data = DataFrame(data.to_records(), columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'), index=periods.to_timestamp('D', 'end')) ldata = data.stack().reset_index().rename(columns={0: 'value'}) wdata = ldata.pivot('date', 'item', 'value') print ldata[:10] pivoted = ldata.pivot('date', 'item', 'value') print pivoted.head() ldata['value2'] = np.random.randn(len(ldata)) print ldata[:10] pivoted = ldata.pivot('date', 'item') print pivoted[:5] print pivoted['value'][:5] unstacked = ldata.set_index(['date', 'item']).unstack('item') print unstacked[:7]
def add_df(self, df: pd.DataFrame): """Add a pandas Dataframe""" rows = df.to_records().tolist() self.add_rows(rows)
'line_01':['a','b','c','d'], 'line_02':[1,2,3,4], 'line_03':[4,3,2,1] } f = DataFrame(data) # 列旋转为行,DataFrame->Series f1 = f.stack() # print f1 # 行旋转为列,Series->DataFrame f2 = f1.unstack() # print f2 f3 = pd.read_csv('rcs/macrodata.csv') periods = pd.PeriodIndex(year=f3.year, quarter=f3.quarter, name='date') f3 = DataFrame(f3.to_records(), columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'), index=periods.to_timestamp('D', 'end')) ldata = f3.stack().reset_index().rename(columns={0: 'value'}) wdata = ldata.pivot('date', 'item', 'value') # print ldata # print wdata data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4, 'k2': [1, 1, 2, 3, 3, 4, 4]}) # 去除重复值 # data.duplicated()会返回一个Bool的Series,表示各行是否是重复值 s1 = data.duplicated() f4 = data.ix[np.logical_not(s1),] # print f4 # drop_dumplicates会直接返回一个除去重复值的DataFrame
def df_to_bytes_np_records_(df: pd.DataFrame) -> bytes: b = BytesIO() rec = df.to_records() np.save(b, rec, allow_pickle=False) b.seek(0) return b.read()
def __add_dataframe(self, name: str, data: pandas.DataFrame, desc: str): table = self._file.create_table(self._folder, name, data.to_records(index=False), desc) table.flush()
def test_from_records_sequencelike(self): df = DataFrame( { "A": np.array(np.random.randn(6), dtype=np.float64), "A1": np.array(np.random.randn(6), dtype=np.float64), "B": np.array(np.arange(6), dtype=np.int64), "C": ["foo"] * 6, "D": np.array([True, False] * 3, dtype=bool), "E": np.array(np.random.randn(6), dtype=np.float32), "E1": np.array(np.random.randn(6), dtype=np.float32), "F": np.array(np.arange(6), dtype=np.int32), } ) # this is actually tricky to create the recordlike arrays and # have the dtypes be intact blocks = df._to_dict_of_blocks() tuples = [] columns = [] dtypes = [] for dtype, b in blocks.items(): columns.extend(b.columns) dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns]) for i in range(len(df.index)): tup = [] for _, b in blocks.items(): tup.extend(b.iloc[i].values) tuples.append(tuple(tup)) recarray = np.array(tuples, dtype=dtypes).view(np.recarray) recarray2 = df.to_records() lists = [list(x) for x in tuples] # tuples (lose the dtype info) result = DataFrame.from_records(tuples, columns=columns).reindex( columns=df.columns ) # created recarray and with to_records recarray (have dtype info) result2 = DataFrame.from_records(recarray, columns=columns).reindex( columns=df.columns ) result3 = DataFrame.from_records(recarray2, columns=columns).reindex( columns=df.columns ) # list of tupels (no dtype info) result4 = DataFrame.from_records(lists, columns=columns).reindex( columns=df.columns ) tm.assert_frame_equal(result, df, check_dtype=False) tm.assert_frame_equal(result2, df) tm.assert_frame_equal(result3, df) tm.assert_frame_equal(result4, df, check_dtype=False) # tuples is in the order of the columns result = DataFrame.from_records(tuples) tm.assert_index_equal(result.columns, RangeIndex(8)) # test exclude parameter & we are casting the results here (as we don't # have dtype info to recover) columns_to_test = [columns.index("C"), columns.index("E1")] exclude = list(set(range(8)) - set(columns_to_test)) result = DataFrame.from_records(tuples, exclude=exclude) result.columns = [columns[i] for i in sorted(columns_to_test)] tm.assert_series_equal(result["C"], df["C"]) tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
def test_to_records_floats(self): df = DataFrame(np.random.rand(10, 10)) df.to_records()
df = DataFrame({ 'left': result, 'right': result + 5 }, columns=pd.Index(['left', 'right'], name='side')) df df.unstack('state') df.unstack('state').stack('side') ###长宽格式的转换 #1 data = pd.read_csv('d:data/macrodata.csv') periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date') data = DataFrame(data.to_records(), columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'), index=periods.to_timestamp('D', 'end')) ldata = data.stack().reset_index().rename(columns={0: 'value'}) wdata = ldata.pivot('date', 'item', 'value') #2 ldata[:10] pivoted = ldata.pivot('date', 'item', 'value') pivoted.head() ldata['value2'] = np.random.randn(len(ldata)) ldata[:10]
#3 df = DataFrame({'left': result, 'right': result + 5}, columns=pd.Index(['left', 'right'], name='side')) df df.unstack('state') df.unstack('state').stack('side') ###长宽格式的转换 #1 data = pd.read_csv('d:data/macrodata.csv') periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date') data = DataFrame(data.to_records(), columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'), index=periods.to_timestamp('D', 'end')) ldata = data.stack().reset_index().rename(columns={0: 'value'}) wdata = ldata.pivot('date', 'item', 'value') #2 ldata[:10] pivoted = ldata.pivot('date', 'item', 'value') pivoted.head() ldata['value2'] = np.random.randn(len(ldata)) ldata[:10]
df2 = DataFrame({'a': [5., 4., np.nan, 3., 7.], 'b': [np.nan, 3., 4., 6., 8.]}) df1.combine_first(df2) # Reshaping with hierarchical indexing========================================= data = DataFrame(np.arange(6).reshape((2, 3)), index=pd.Index(['Ohio', 'Colorado'], name='state'), columns=pd.Index(['one', 'two', 'three'], name='number')) result = data.stack() #stack a DataFrame to a Muliti-index Series, can inverse result.unstack(0) result.unstack('state') # Pivoting "long" to "wide" format data = pd.read_csv('macrodata.csv') periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date') data = DataFrame(data.to_records(),columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'),index=periods.to_timestamp('D', 'end')) ldata = data.stack().reset_index().rename(columns={0: 'value'}) wdata = ldata.pivot('date', 'item', 'value') # Removing duplicates=============================== data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,'k2': [1, 1, 2, 3, 3, 4, 4]}) data.duplicated() data.drop_duplicates() data['v1'] = range(7) data.drop_duplicates(['k1']) data.drop_duplicates(['k1', 'k2'], take_last=True) # Replacing values------------------------------- data = Series([1., -999., 2., -999., -1000., 3.])
def DESmag_to_SDSSmag(sdss_data, des_data): sdss, des = match(sdss_data, des_data) sys.stdout.write('DESmag to SDSS mag ') filters = ['G','R','I','Z'] Scolorkind = ['MODELMAG', 'CMODELMAG' ] import time t1 = time.time() from multiprocessing import Process, Queue def multiprocessing_mag(q,sdss, des, thisfilter, Scolor): magTag = Scolor+'_'+thisfilter+'_DES' q.put(( magTag, scikitfitting(sdss, des, filter = thisfilter, Scolorkind = Scolor ))) sys.stdout.write('.') d_queue = Queue() d_processes = [] for Scolor in Scolorkind: for thisfilter in filters: p = Process(target=multiprocessing_mag, args=(d_queue, sdss, des, thisfilter, Scolor )) d_processes.append(p) for p in d_processes: p.start() result = [d_queue.get() for p in d_processes] #result.sort() desMags = [D[1] for D in result] magTaglist = [D[0] for D in result] fib2mag = DESfib2mag_to_SDSSfib2mag(sdss, des) #dperp3 = DESdperp_to_SDSSdperp(sdss, des) """ for Tag, desMag in zip(magTaglist, desMags): des = rf.append_fields(des, Tag, desMag) sys.stdout.write('.') des = rf.append_fields(des, 'FIBER2MAG_I_DES', fib2mag) sys.stdout.write('done') """ # mergin with pandas from pandas import DataFrame, concat data2 = DataFrame( desMags, index = magTaglist ).T fib2mag = DataFrame( fib2mag, columns = ['FIBER2MAG_I_DES'] ) des = DataFrame(des) del des['index'] des = concat([des, data2, fib2mag], axis=1) des = des.to_records() """ for thisfilter in filters: #print "DESmag to SDSS mag for filter "+thisfilter #des = DES_to_SDSS_fitting(sdss, des, filter = thisfilter, Scolorkind = 'CMODELMAG', Dcolorkind = 'MAG_MODEL') #des = DES_to_SDSS_fitting(sdss, des, filter = thisfilter, Scolorkind = 'MODELMAG', Dcolorkind = 'MAG_DETMODEL') data1 = scikitfitting(sdss, des, filter = thisfilter, Scolorkind = 'MODELMAG') data2 = scikitfitting(sdss, des, filter = thisfilter, Scolorkind = 'CMODELMAG') des = DESfib2mag_to_SDSSfib2mag(sdss, des) #des = DESdperp_to_SDSSdperp(sdss, des) """ print '\ntime :', time.time()-t1 return sdss, des #des #sdss, des2
def _latest_partition_from_df(cls, df: pd.DataFrame) -> Optional[List[str]]: if not df.empty: return df.to_records(index=False)[0].item() return None