Beispiel #1
0
    def test_to_records_dt64(self):
        df = DataFrame([["one", "two", "three"],
                        ["four", "five", "six"]],
                       index=date_range("2012-01-01", "2012-01-02"))
        assert df.to_records()['index'][0] == df.index[0]

        rs = df.to_records(convert_datetime64=False)
        assert rs['index'][0] == df.index.values[0]
Beispiel #2
0
    def test_to_records_dtype(self, kwargs, expected):
        # see gh-18146
        df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]})

        if not isinstance(expected, np.recarray):
            with pytest.raises(expected[0], match=expected[1]):
                df.to_records(**kwargs)
        else:
            result = df.to_records(**kwargs)
            tm.assert_almost_equal(result, expected)
Beispiel #3
0
    def test_to_records_index_name(self):
        df = DataFrame(np.random.randn(3, 3))
        df.index.name = 'X'
        rs = df.to_records()
        assert 'X' in rs.dtype.fields

        df = DataFrame(np.random.randn(3, 3))
        rs = df.to_records()
        assert 'index' in rs.dtype.fields

        df.index = MultiIndex.from_tuples([('a', 'x'), ('a', 'y'), ('b', 'z')])
        df.index.names = ['A', None]
        rs = df.to_records()
        assert 'level_0' in rs.dtype.fields
Beispiel #4
0
    def test_to_records_dict_like(self):
        # see gh-18146
        class DictLike(object):
            def __init__(self, **kwargs):
                self.d = kwargs.copy()

            def __getitem__(self, key):
                return self.d.__getitem__(key)

            def __contains__(self, key):
                return key in self.d

            def keys(self):
                return self.d.keys()

        df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]})

        dtype_mappings = dict(column_dtypes=DictLike(**{"A": np.int8,
                                                        "B": np.float32}),
                              index_dtypes="<U2")

        result = df.to_records(**dtype_mappings)
        expected = np.rec.array([("0", "1", "0.2", "a"),
                                 ("1", "2", "1.5", "bc")],
                                dtype=[("index", "<U2"), ("A", "i1"),
                                       ("B", "<f4"), ("C", "O")])
        tm.assert_almost_equal(result, expected)
Beispiel #5
0
 def test_to_records_with_multindex(self):
     # GH3189
     index = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
              ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
     data = np.zeros((8, 4))
     df = DataFrame(data, index=index)
     r = df.to_records(index=True)['level_0']
     assert 'bar' in r
     assert 'one' not in r
Beispiel #6
0
    def test_to_records_datetimeindex_with_tz(self, tz):
        # GH13937
        dr = date_range('2016-01-01', periods=10,
                        freq='S', tz=tz)

        df = DataFrame({'datetime': dr}, index=dr)

        expected = df.to_records()
        result = df.tz_convert("UTC").to_records()

        # both converted to UTC, so they are equal
        tm.assert_numpy_array_equal(result, expected)
Beispiel #7
0
    def test_to_records_dt64(self):
        df = DataFrame([["one", "two", "three"],
                        ["four", "five", "six"]],
                       index=date_range("2012-01-01", "2012-01-02"))

        # convert_datetime64 defaults to None
        expected = df.index.values[0]
        result = df.to_records()['index'][0]
        assert expected == result

        # check for FutureWarning if convert_datetime64=False is passed
        with tm.assert_produces_warning(FutureWarning):
            expected = df.index.values[0]
            result = df.to_records(convert_datetime64=False)['index'][0]
            assert expected == result

        # check for FutureWarning if convert_datetime64=True is passed
        with tm.assert_produces_warning(FutureWarning):
            expected = df.index[0]
            result = df.to_records(convert_datetime64=True)['index'][0]
            assert expected == result
Beispiel #8
0
def ColorTransform(data):
    
    from pandas import DataFrame, concat

    #magTaglist = ['MAG_DETMODEL', 'MAG_MODEL', 'MAG_PETRO', 'MAG_HYBRID', 'MAG_PSF','MAG_AUTO',
    #              'MAG_APER_2', 'MAG_APER_3', 'MAG_APER_4','MAG_APER_5' ] #,'MAG_APER_6','MAG_APER_7','MAG_APER_8','MAG_APER_9', 'MAG_APER_10']
    
    magTaglist = ['MAG_MODEL', 'MAG_AUTO', 'MAG_APER_3', 'MAG_APER_4','MAG_APER_5','MAG_APER_6' ]
    combine = add_SDSS_colors(data, magTag_template = 'MAG_DETMODEL')
    
    for magTag in magTaglist:
    
        strip = add_SDSS_colors(data, magTag_template = magTag)
        combine = concat([combine, strip], axis=1)

    data = DataFrame(data.data)
    data = concat([data, combine], axis=1)
    #del data['index']
    matched_arr = data.to_records()
    return matched_arr
Beispiel #9
0
    def test_to_records_with_categorical(self):

        # GH8626

        # dict creation
        df = DataFrame({'A': list('abc')}, dtype='category')
        expected = Series(list('abc'), dtype='category', name='A')
        tm.assert_series_equal(df['A'], expected)

        # list-like creation
        df = DataFrame(list('abc'), dtype='category')
        expected = Series(list('abc'), dtype='category', name=0)
        tm.assert_series_equal(df[0], expected)

        # to record array
        # this coerces
        result = df.to_records()
        expected = np.rec.array([(0, 'a'), (1, 'b'), (2, 'c')],
                                dtype=[('index', '=i8'), ('0', 'O')])
        tm.assert_almost_equal(result, expected)
Beispiel #10
0
def add_SDSS_colors(data, magTag_template = 'MAG_DETMODEL', independent = None):
    print "Doing des->sdss color transforms for "+magTag_template
    filters = ['G','R','I','Z']
    magTags = []
    
    desMags = np.empty([len(filters),len(data)])
    for i,thisFilter in enumerate(filters):
        magTag = magTag_template+'_'+thisFilter
        desMags[i,:] = data[magTag]
        magTags.append(magTag+'_SDSS')
    sdssMags = transform_DES_to_SDSS(desMags[0,:], desMags[1,:], desMags[2,:], desMags[3,:])

    from pandas import DataFrame, concat
    data = DataFrame( sdssMags, index = magTags).T

    if independent == 'yes':
        fulldata = DataFrame(data)
        data = concat([fulldata, data], axis=1)
        data = data.to_records()
    else : pass

    return data
def slide_9():
    data = pd.read_csv(MACRODATAPATH)
    periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')
    data = DataFrame(data.to_records(),
                     columns=pd.Index(['realgdp', 'infl', 'unemp'],
                                      name='item'),
                     index=periods.to_timestamp('D', 'end'))

    ldata = data.stack().reset_index().rename(columns={0: 'value'})
    wdata = ldata.pivot('date', 'item', 'value')
    print ldata[:10]
    pivoted = ldata.pivot('date', 'item', 'value')
    print pivoted.head()

    ldata['value2'] = np.random.randn(len(ldata))
    print ldata[:10]

    pivoted = ldata.pivot('date', 'item')
    print pivoted[:5]
    print pivoted['value'][:5]

    unstacked = ldata.set_index(['date', 'item']).unstack('item')
    print unstacked[:7]
Beispiel #12
0
 def add_df(self, df: pd.DataFrame):
     """Add a pandas Dataframe"""
     rows = df.to_records().tolist()
     self.add_rows(rows)
Beispiel #13
0
    'line_01':['a','b','c','d'],
    'line_02':[1,2,3,4],
    'line_03':[4,3,2,1]
}
f = DataFrame(data)

# 列旋转为行,DataFrame->Series
f1 = f.stack()
# print f1
# 行旋转为列,Series->DataFrame
f2 = f1.unstack()
# print f2

f3 = pd.read_csv('rcs/macrodata.csv')
periods = pd.PeriodIndex(year=f3.year, quarter=f3.quarter, name='date')
f3 = DataFrame(f3.to_records(),
                 columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'),
                 index=periods.to_timestamp('D', 'end'))
ldata = f3.stack().reset_index().rename(columns={0: 'value'})
wdata = ldata.pivot('date', 'item', 'value')
# print ldata
# print wdata

data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,
                  'k2': [1, 1, 2, 3, 3, 4, 4]})
# 去除重复值
# data.duplicated()会返回一个Bool的Series,表示各行是否是重复值
s1 = data.duplicated()
f4 = data.ix[np.logical_not(s1),]
# print f4
# drop_dumplicates会直接返回一个除去重复值的DataFrame
Beispiel #14
0
def df_to_bytes_np_records_(df: pd.DataFrame) -> bytes:
    b = BytesIO()
    rec = df.to_records()
    np.save(b, rec, allow_pickle=False)
    b.seek(0)
    return b.read()
Beispiel #15
0
 def __add_dataframe(self, name: str, data: pandas.DataFrame, desc: str):
     table = self._file.create_table(self._folder, name,
                                     data.to_records(index=False), desc)
     table.flush()
Beispiel #16
0
    def test_from_records_sequencelike(self):
        df = DataFrame(
            {
                "A": np.array(np.random.randn(6), dtype=np.float64),
                "A1": np.array(np.random.randn(6), dtype=np.float64),
                "B": np.array(np.arange(6), dtype=np.int64),
                "C": ["foo"] * 6,
                "D": np.array([True, False] * 3, dtype=bool),
                "E": np.array(np.random.randn(6), dtype=np.float32),
                "E1": np.array(np.random.randn(6), dtype=np.float32),
                "F": np.array(np.arange(6), dtype=np.int32),
            }
        )

        # this is actually tricky to create the recordlike arrays and
        # have the dtypes be intact
        blocks = df._to_dict_of_blocks()
        tuples = []
        columns = []
        dtypes = []
        for dtype, b in blocks.items():
            columns.extend(b.columns)
            dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns])
        for i in range(len(df.index)):
            tup = []
            for _, b in blocks.items():
                tup.extend(b.iloc[i].values)
            tuples.append(tuple(tup))

        recarray = np.array(tuples, dtype=dtypes).view(np.recarray)
        recarray2 = df.to_records()
        lists = [list(x) for x in tuples]

        # tuples (lose the dtype info)
        result = DataFrame.from_records(tuples, columns=columns).reindex(
            columns=df.columns
        )

        # created recarray and with to_records recarray (have dtype info)
        result2 = DataFrame.from_records(recarray, columns=columns).reindex(
            columns=df.columns
        )
        result3 = DataFrame.from_records(recarray2, columns=columns).reindex(
            columns=df.columns
        )

        # list of tupels (no dtype info)
        result4 = DataFrame.from_records(lists, columns=columns).reindex(
            columns=df.columns
        )

        tm.assert_frame_equal(result, df, check_dtype=False)
        tm.assert_frame_equal(result2, df)
        tm.assert_frame_equal(result3, df)
        tm.assert_frame_equal(result4, df, check_dtype=False)

        # tuples is in the order of the columns
        result = DataFrame.from_records(tuples)
        tm.assert_index_equal(result.columns, RangeIndex(8))

        # test exclude parameter & we are casting the results here (as we don't
        # have dtype info to recover)
        columns_to_test = [columns.index("C"), columns.index("E1")]

        exclude = list(set(range(8)) - set(columns_to_test))
        result = DataFrame.from_records(tuples, exclude=exclude)
        result.columns = [columns[i] for i in sorted(columns_to_test)]
        tm.assert_series_equal(result["C"], df["C"])
        tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
Beispiel #17
0
 def test_to_records_floats(self):
     df = DataFrame(np.random.rand(10, 10))
     df.to_records()
df = DataFrame({
    'left': result,
    'right': result + 5
},
               columns=pd.Index(['left', 'right'], name='side'))
df

df.unstack('state')

df.unstack('state').stack('side')

###长宽格式的转换
#1
data = pd.read_csv('d:data/macrodata.csv')
periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')
data = DataFrame(data.to_records(),
                 columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'),
                 index=periods.to_timestamp('D', 'end'))

ldata = data.stack().reset_index().rename(columns={0: 'value'})
wdata = ldata.pivot('date', 'item', 'value')

#2
ldata[:10]

pivoted = ldata.pivot('date', 'item', 'value')
pivoted.head()

ldata['value2'] = np.random.randn(len(ldata))
ldata[:10]
Beispiel #19
0
 def test_to_records_floats(self):
     df = DataFrame(np.random.rand(10, 10))
     df.to_records()
Beispiel #20
0
#3
df = DataFrame({'left': result, 'right': result + 5},
               columns=pd.Index(['left', 'right'], name='side'))
df

df.unstack('state')

df.unstack('state').stack('side')


###长宽格式的转换
#1
data = pd.read_csv('d:data/macrodata.csv')
periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')
data = DataFrame(data.to_records(),
                 columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'),
                 index=periods.to_timestamp('D', 'end'))

ldata = data.stack().reset_index().rename(columns={0: 'value'})
wdata = ldata.pivot('date', 'item', 'value')

#2
ldata[:10]

pivoted = ldata.pivot('date', 'item', 'value')
pivoted.head()

ldata['value2'] = np.random.randn(len(ldata))
ldata[:10]
Beispiel #21
0
df2 = DataFrame({'a': [5., 4., np.nan, 3., 7.],
                 'b': [np.nan, 3., 4., 6., 8.]})
df1.combine_first(df2)

# Reshaping with hierarchical indexing=========================================
data = DataFrame(np.arange(6).reshape((2, 3)),
                 index=pd.Index(['Ohio', 'Colorado'], name='state'),
                 columns=pd.Index(['one', 'two', 'three'], name='number'))
result = data.stack()    #stack a DataFrame to a Muliti-index Series, can inverse
result.unstack(0)
result.unstack('state')

# Pivoting "long" to "wide" format
data = pd.read_csv('macrodata.csv')
periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')
data = DataFrame(data.to_records(),columns=pd.Index(['realgdp', 'infl', 'unemp'], name='item'),index=periods.to_timestamp('D', 'end'))

ldata = data.stack().reset_index().rename(columns={0: 'value'})
wdata = ldata.pivot('date', 'item', 'value')

# Removing duplicates===============================
data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,'k2': [1, 1, 2, 3, 3, 4, 4]})

data.duplicated()
data.drop_duplicates()
data['v1'] = range(7)
data.drop_duplicates(['k1'])
data.drop_duplicates(['k1', 'k2'], take_last=True)

# Replacing values-------------------------------
data = Series([1., -999., 2., -999., -1000., 3.])
Beispiel #22
0
def DESmag_to_SDSSmag(sdss_data, des_data):
    
    sdss, des = match(sdss_data, des_data)
    
    sys.stdout.write('DESmag to SDSS mag ')
    
    filters = ['G','R','I','Z']
    Scolorkind = ['MODELMAG', 'CMODELMAG' ]
    import time
    t1 = time.time()
    from multiprocessing import Process, Queue
    
    def multiprocessing_mag(q,sdss, des, thisfilter, Scolor):
        magTag = Scolor+'_'+thisfilter+'_DES'
        q.put(( magTag, scikitfitting(sdss, des, filter = thisfilter, Scolorkind =  Scolor )))
        sys.stdout.write('.')
    
    d_queue = Queue()
    d_processes = []
    for Scolor in Scolorkind:
        for thisfilter in filters:
            p = Process(target=multiprocessing_mag, args=(d_queue, sdss, des, thisfilter, Scolor ))
            d_processes.append(p)

    for p in d_processes:
        p.start()
    
    result = [d_queue.get() for p in d_processes]

    #result.sort()
    desMags = [D[1] for D in result]
    magTaglist = [D[0] for D in result]


    fib2mag = DESfib2mag_to_SDSSfib2mag(sdss, des)
    #dperp3 = DESdperp_to_SDSSdperp(sdss, des)

    """
    for Tag, desMag in zip(magTaglist, desMags):
        des = rf.append_fields(des, Tag, desMag)
        sys.stdout.write('.')

    des = rf.append_fields(des, 'FIBER2MAG_I_DES', fib2mag)
    sys.stdout.write('done')
    """
    # mergin with pandas

    from pandas import DataFrame, concat
    data2 = DataFrame( desMags, index = magTaglist ).T
    fib2mag = DataFrame( fib2mag, columns = ['FIBER2MAG_I_DES'] )
    des = DataFrame(des)

    del des['index']
    des = concat([des, data2, fib2mag], axis=1)

    des = des.to_records()
    

    """
    for thisfilter in filters:
        #print "DESmag to SDSS mag for filter "+thisfilter
        
        #des = DES_to_SDSS_fitting(sdss, des, filter = thisfilter, Scolorkind = 'CMODELMAG', Dcolorkind = 'MAG_MODEL')
        #des = DES_to_SDSS_fitting(sdss, des, filter = thisfilter, Scolorkind = 'MODELMAG', Dcolorkind = 'MAG_DETMODEL')
        
    
        data1 = scikitfitting(sdss, des, filter = thisfilter, Scolorkind = 'MODELMAG')
        data2 = scikitfitting(sdss, des, filter = thisfilter, Scolorkind = 'CMODELMAG')
        
    des = DESfib2mag_to_SDSSfib2mag(sdss, des)
    #des = DESdperp_to_SDSSdperp(sdss, des)
    """
    print '\ntime :', time.time()-t1


    return sdss, des #des #sdss, des2
Beispiel #23
0
 def _latest_partition_from_df(cls,
                               df: pd.DataFrame) -> Optional[List[str]]:
     if not df.empty:
         return df.to_records(index=False)[0].item()
     return None