Example #1
0
    def test_combine_series(self):
        s = self.panel['ItemA'][:10]
        result = self.panel.add(s, axis=0)
        expected = DataFrame.add(self.panel, s, axis=0)
        assert_frame_equal(result, expected)

        s = self.panel.ix[5]
        result = self.panel + s
        expected = DataFrame.add(self.panel, s, axis=1)
        assert_frame_equal(result, expected)
Example #2
0
    def test_fill_value_when_combine_const(self):
        # GH12723
        dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
        df = DataFrame({'foo': dat}, index=range(6))

        exp = df.fillna(0).add(2)
        res = df.add(2, fill_value=0)
        assert_frame_equal(res, exp)
Example #3
0
def sens_to_zero_rates(contract, market, curve_ccy, rate_key, reporting_ccy):
    """Sensitivity of each cashflow to the curve specified by currency and key

    A leg that pays IBOR is sensitive to both the discount and tenor curve
     of the currency in which the cash flows (coupons) are paid.
    """
    df_sens = DataFrame(columns=['ttm', 'sens', 'ccy', 'curve'])
    if curve_ccy == contract.currency:

        forwards = ibor_rate(contract, market)
        # replace rate with forwards for any fixing date after valuation date
        a = contract.frame
        a.rate = a.rate.where(a.fixing < market.dt_valuation, forwards)

        zcb_pay = market.discount_factor(a.pay, currency=contract.currency)

        if rate_key == 'discount':
            unpaid = a.pay >= market.dt_valuation
            crv = market.discount_curve(curve_ccy)
            pay_dates = a.pay[unpaid]
            ttm_pay = crv.daycount_fn(market.dt_valuation, pay_dates)
            sens = -ttm_pay * (zcb_pay * a.notional * a.rate * a.period).loc[unpaid]
            if contract.notl_exchange and unpaid.any():
                sens.iloc[-1] += a.notional.iloc[-1]
            if reporting_ccy != contract.currency:
                sens *= market.fx(reporting_ccy, contract.currency)
            df_sens = DataFrame({'ttm': ttm_pay, 'sens': sens,
                                 'ccy': curve_ccy, 'curve': 'discount'})
        elif rate_key == contract.frequency:  # TODO - Review and add comments
            crv, crv_key = market.curve(contract.currency, contract.frequency)
            unfixed = a.fixing >= market.dt_valuation
            pay_dates = a.pay.loc[unfixed]
            ttm_pay = crv.daycount_fn(market.dt_valuation, pay_dates)
            zcbi_pay = crv.discount_factor(pay_dates)

            fix_dates = a.fixing.loc[unfixed]
            ttm_fix = crv.daycount_fn(market.dt_valuation, fix_dates)
            zcbi_fix = crv.discount_factor(contract.frame.fixing)

            scale_factor = zcbi_fix / zcbi_pay * (a.notional * zcb_pay).loc[unfixed]
            sens_pay = ttm_pay * scale_factor
            sens_fix = -ttm_fix * scale_factor

            if reporting_ccy != contract.currency:
                fx = market.fx(reporting_ccy, contract.currency)
                sens_pay *= fx
                sens_fix *= fx

            df_pay = DataFrame({'ttm': ttm_pay, 'sens': sens_pay}).set_index('ttm')
            df_fix = DataFrame({'ttm': ttm_fix, 'sens': sens_fix}).set_index('ttm')
            df_sens = df_pay.add(df_fix, fill_value=0)

            df_sens['ttm'] = df_sens.index
            df_sens['ccy'] = curve_ccy
            df_sens['curve'] = crv_key

    return df_sens
Example #4
0
File: combo.py Project: ageek/ramp
 def combine(self, datas):
     count = DataFrame(np.zeros(len(datas[0])), index=datas[0].index)
     eps = 1.0e-8
     col_names = []
     for data in datas:
         for col in data.columns:
             d = data[col]
             m = d.mean()
             s = d.std()
             if s < eps:
                 continue
             d = d.map(lambda x: self.is_outlier(x, m, s))
             col_names.append(col)
             count = count.add(d, axis=0)
     count.columns = [','.join(col_names)]
     return count
Example #5
0
print obj5["one"]
print obj5[:2]
obj5[obj5 < 5] = 3
print obj5
print obj5.ix["Ohio", ["one", "two"]]

s1 = Series([7.3, -2.5, 3.4, 1.5], index=["a", "c", "d", "e"])
s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=["a", "c", "e", "f", "g"])

print s1 + s2

df1 = DataFrame(np.arange(9).reshape((3, 3)), columns=list("bcd"), index=["Ohin", "Texa", "Colorado"])
df2 = DataFrame(np.arange(12).reshape((4, 3)), columns=list("bcd"), index=["Utah", "Ohin", "Texa", "Colorado"])

print df1 + df2
print df1.add(df2, fill_value=0)

series2 = df2.ix[0]

print df2 - series2

ff = lambda x: x.max() - x.min()

print df2.apply(ff)
print df2.apply(ff, axis=1)

df3 = DataFrame(np.random.randn(3, 3), columns=list("bcd"), index=["Ohin", "Texa", "Colorado"])
ff2 = lambda x: "%.2f" % x
print df3
print df3.applymap(ff2)
print df3
Example #6
0
data.ix['Colorado',['two','three']]
data.ix['Colorado',[3,0,1]]
data.ix['Colorado']
data['two']
data.two

## 算数运算对齐
s1 = Series([7.3,-2.5,3.4,1.5], index=['a','b','d','e'])
s2 = Series([-2.1,3.6,-1.5,4,3.1], index=['a','c','e','f','g'])
s1 + s2
df1 = DataFrame(np.arange(9).reshape((3,3)), columns=list('bcd'), index=['Ohio','Texas','Colorado'])
df2 = DataFrame(np.arange(12).reshape((4,3)),columns=list('bde'), index=['Utah','Ohio','Texas','Oregon'])
df1 + df2
# 所谓的对齐,就是索引相同的值做运算
# 填充值,可以给对不上的对象填充一个特殊值
df1.add(df2, fill_value=0)
# 这个只会填充df2中没有的对象。

## DataFrame和Series之间的运算
frame = DataFrame(np.arange(12).reshape((4,3)), columns=list('bde'), index=['Utah','Utahs','Texas','Oregon'])
series = frame.ix[0]
frame - series
# 每列都会减对应元素,这种被称为沿着行广播,如果想要沿着列广播,可以如下操作
series3 = frame['d']
series3 = frame.ix['d',:]
frame.sub(series3, axis=0)

## 函数的应用和映射
frame = DataFrame(np.random.randn(4,3), columns=list('bde'),
	index=['Utah','Ohio','Texas','Oregon'])
f = lambda x :x.max()-x.min()
Example #7
0
import numpy as np

from pandas import DataFrame
from pandas import Series


data = np.arange(100,80,-1)
s = Series(data)
dic = {
    'line01':np.arange(20,40),
    'line03':np.linspace(30,35,20),
    'line02':np.arange(80,60,-1)
}
f = DataFrame(dic,index=np.arange(100,80,-1))
f2 = DataFrame({'line03':np.linspace(30,35,10),'line04':np.arange(10)},index=np.arange(100,90,-1));
f3 = f.add(f2)
# 求和
# 求每一列的和
sum = f.sum()
# 求指定列的和
sum1 = f[['line01','line02']].sum()
# 求每一行的和
sum2 = f.sum(axis=1)
# print sum
# print sum1
# print sum2
# 若该行或列有NaN则运行结果为NaN,默认skipna为True,忽略NaN
sum4 = f3.sum(skipna=False)
# print sum4

# 获取所有值
Example #8
0
from pandas import Series, DataFrame
import numpy as np
import pandas as pd

s1 = Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
print(s1 + s2)

df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))
print(df1)
print('')
print(df2)

print('without fill value')
print(df1.add(df2))

print('with fill value')
print(df1.add(df2, fill_value=0))

# set fill value when reindexing
print(df1.reindex(columns=df2.columns, fill_value=0))

Example #9
0
DataFrame 기본 연산
'''

from pandas import DataFrame
import numpy as np

# DataFrame 생성 
frame1 = DataFrame(np.arange(0,9).reshape(3,3),
                   columns=list('abc'))
frame2 = DataFrame(np.arange(1,10).reshape(3,3),
                   columns=list('abc'))
print(frame1)
print(frame2)

# frame 덧셈
add = frame1.add(frame2)
print(add)

# frame 뺄셈
sub = frame2.sub(frame1)
print(sub)

# frame 나눗셈 div = frame2 / frame1
div = frame2.div(frame1)
print(div) # inf : 부모가 0인 경우 

# frame 곱셈 
mul = frame1.mul(frame2)
print(mul)

# 행/열 단위 합계/평균/최댓값/최솟값
dframe1 = DataFrame(np.arange(4).reshape((2,2)),
                    columns = list('AB'),
                    index = ['NYC','LA'])
dframe1


dframe2 = DataFrame(np.arange(9).reshape((3,3)),
                    columns = list('ADC'),
                    index = ['NYC','SF','LA'])
dframe2

# adding dataframes
dframe1 + dframe2 # only adds where both row and column match, everything else will be null

dframe1
dframe1.add(dframe2, fill_value = 0) # doesn't add if row/column combination doesn't exist in either table


# operations between Series and DataFrame
ser3 = dframe2.ix[0]
ser3

dframe2 - ser3







class YieldReader (object):
    def __init__ (self, yields = None, scale = 1.0):
        self.masses = [] if yields is None else [ind [0] for ind in yields.index]
        self.isotopes = [] if yields is None else [Isotope (iso) for iso in yields.columns]
        self.isotopes.sort ()
        self.yields = DataFrame () if yields is None else yields
        self.yields = self.yields.fillna (0.0)
        self.yields *= scale

    @classmethod
    def from_file (cls, filename, mass, **kwargs):
        self = cls ()
        self.add_file (filename, mass, **kwargs)
        return self

    @classmethod
    def from_directory (cls, directory = "yields/wh07/", mass_file = "masses", **kwargs):
        self = cls ()
        mass_file = open (directory + "/" + mass_file, "r")
        for line in mass_file:
            if line == "\n":
                continue
            line = line.rstrip ("\n").split (" ")
            try:
                self.add_file (directory + "/" + line [1], float (line [0]) * u.solMass, **kwargs)
            except IndexError:
                self.yields = self.yields.append (DataFrame ([{"mass": float (line [0]) * u.solMass, "file": directory + "/"}]).set_index (["mass", "file"]))
                self.masses.append (float (line [0]) * u.solMass)
        return self

    @classmethod
    def combine (cls, yield_readers):
        self = cls ()
        self.masses = u.Quantity (np.array (np.concatenate ([yr.masses for yr in yield_readers])))
        for yr in yield_readers:
            isotopeArray = yr.isotopes
            for iso in isotopeArray:
                if iso not in self.isotopes:
                    self.isotopes.append (iso)
        self.isotopes.sort ()
        for yr in yield_readers:
            dataframe = yr.yields
            self.yields = self.yields.append (dataframe)
        self.yields = self.yields.fillna (0.0)
        return self

    def add_file (self, filename, mass, winds = True, explosions = True, keplerYield = True, totalYieldName = "yieldall", windYieldName = "yieldwind", expYieldName = None, isotopeName = "isotope", table = 1):
        self.masses.append (mass)
        if keplerYield:
            i = fromKeplerYield (filename, table)
        else:
            i = 0
        result = np.genfromtxt (filename, skip_header = i, names = True, dtype = None)

        yieldDF = {}
        yieldDF ["mass"] = mass
        yieldDF ["file"] = filename
        for row in result:
            if row [isotopeName] == "total" or row [isotopeName] == b"total":
                break
            isotope = Isotope (row [isotopeName])
            if isotope not in self.isotopes:
                self.isotopes.append (isotope)
            yieldDF [isotope.string] = 0.0
            if winds and explosions and totalYieldName is not None:
                yieldDF [isotope.string] += float (row [totalYieldName])
            else:
                if winds:
                    yieldDF [isotope.string] += float (row [windYieldName])
                if explosions:
                    if expYieldName is None:
                        yieldDF [isotope.string] += float (row [totalYieldName]) - float (row [windYieldName])
                    else:
                        yieldDF [isotope.string] += row [expYieldName]
        self.yields = self.yields.append (DataFrame ([yieldDF]).set_index (["mass", "file"]))
        self.yields = self.yields.fillna (0.0)
        self.isotopes.sort ()
  
    def get_yield (self, isotope, massArray = None, tolerance = 0.0001):
        if isinstance (isotope, Isotope):
            isotope = isotope.string
        if isotope not in self.yields:
            if massArray is None:
                return u.Quantity ([0.0] * len (self.yields), u.solMass)
            return u.Quantity ([0.0] * len (massArray), u.solMass)
                
        if massArray is None:
            return u.Quantity (self.yields [isotope], u.solMass)
        return u.Quantity (self.yields [isotope].iloc [massArray], u.solMass)
        
    def get_masses (self):
        return self.masses

    def get_keys (self):
        return [i for i in self.yields.index]
        
    def __add__ (self, other):
        return YieldReader (self.yields.add (other.yields, fill_value = 0.0))
        
    def __mul__ (self, scalar):
        return YieldReader (self.yields, scalar)
        
    __rmul__ = __mul__
    
    def __div__ (self, scalar):
        return self * (1.0 / scalar)

    def __getitem__ (self, i):
        if isinstance (i, slice):
            return YieldReader (self.yields [i])
        return YieldReader (self.yields [i:i+1])
def main():
    # reindex
    obj = Series(range(4), index="a b c d".split(" ")[::-1])
    print obj

    obj2 = obj.reindex("a b c d e".split(" "))
    print obj2

    # Change NaN
    print obj.reindex("a b c d e".split(" "), fill_value=0)
    colors = ["blue", "purple", "yellow"]
    index = [0, 2, 4]
    obj3 = Series(colors, index=index)
    print obj3.reindex(range(6))
    print obj3.reindex(range(6), method="ffill")  # not found forward fill
    print obj3.reindex(range(6), method="backfill")  # bfill

    # DataFrame
    states = ["Ohio", "Texas", "California"]
    frame = DataFrame(np.arange(9).reshape((3, 3)), index="a b c".split(" "), columns=["Ohio", "Texas", "California"])
    print frame
    frame2 = frame.reindex("a b c d".split(" "))
    print frame2
    states[0] = "Utah"
    states[1], states[0] = states[:2]
    print frame.reindex(columns=states)
    # fill
    print frame.reindex("a b c d".split(" "), method="ffill", columns=states)
    print frame.ix["a b c d".split(" ")]
    print frame.ix["a b c d".split(" "), states]

    # Delete column
    print "", ""
    obj = Series(range(5), index="a b c d e".split(" "))
    new_obj = obj.drop("c")
    print new_obj
    print obj

    # Index reference
    print "", ""
    obj = Series(np.arange(4.0), index="a b c d".split(" "))
    print obj["b"]
    print obj[1]  # same
    print obj[2:4]
    print obj[["b", "a", "c"]]
    print obj[[1, 3]]
    print obj[obj < 2]
    # Slice with label
    print obj["b":"c"]  # include 'c'
    obj["b":"c"] = 5
    print obj

    data = DataFrame(
        np.arange(16).reshape((4, 4)),
        index=["Ohio", "Colorado", "Utah", "New York"],
        columns=["one", "two", "three", "four"],
    )
    print data
    # column
    print data["two"]
    print data[["three", "one"]]
    # row
    print data[:2]
    print data[data["three"] > 5]
    # all values
    print data < 5
    data[data < 5] = 0
    print data
    # row and column
    print data.ix[["Colorado"], ["two", "three"]]
    print data.ix[["Colorado", "Utah"], [3, 0, 1]]
    # row
    print data.ix[2]
    # label row and column, return column
    print data.ix[:"Utah", "two"]
    # xs
    # row
    print data.xs("Utah")
    print data.xs("Utah", axis=0)
    # rows
    print data.xs("two", axis=1)
    # icol/irow i is index
    print data.icol(1)
    print data.irow(1)

    # Union
    print "", ""
    s1 = Series([7.3, -2.5, 3.4, 1.5], index=["a", "c", "d", "e"])
    s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=["a", "c", "e", "f", "g"])
    print s1
    print s2
    # index is union, but d, f, g are NaN
    print s1 + s2
    df1 = DataFrame(np.arange(9.0).reshape((3, 3)), columns=list("bcd"), index=["Ohio", "Texas", "Colorado"])
    df2 = DataFrame(np.arange(12.0).reshape((4, 3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])
    print df1
    print df2
    print df1 + df2

    # arithmetic method
    print "", ""
    df1 = DataFrame(np.arange(12.0).reshape((3, 4)), columns=list("abcd"))
    df2 = DataFrame(np.arange(20.0).reshape((4, 5)), columns=list("abcde"))
    print df1
    print df2
    print df1.add(df2, fill_value=0)
    # reindex has fill_value argument
    # other arithmetic method are sub/div/mul(ti)

    # Calculation in a DataFrame and Series
    print "", ""
    # subtract from each row. broadcat
    arr = np.arange(12.0).reshape((3, 4))
    print arr
    print arr[0]
    print arr - arr[0]
    frame = DataFrame(np.arange(12.0).reshape((4, 3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])
    series = frame.ix[0]
    print frame
    print series
    print frame - series

    series2 = Series(range(3), index=list("bef"))
    print frame + series2

    series3 = frame["d"]
    series4 = frame.ix[0]
    print frame
    print series3
    print series4
    print frame.sub(series3, axis=0)
    print frame.sub(series4, axis=1)

    # apply function and mapping
    print "", ""
    frame = DataFrame(np.arange(12.0).reshape((4, 3)), columns=list("bde"), index=["Utah", "Ohio", "Texas", "Oregon"])
    print frame
    f = lambda x: x.max() - x.min()
    print frame.apply(f)
    print frame.apply(f, axis=1)

    f = lambda x: Series([x.min(), x.max()], index=["min", "max"])
    print frame.apply(f)

    format = lambda x: "{0:.2f}".format(x)
    print frame.applymap(format)  # frame
    print frame["e"].map(format)  # series

    # sort and rank
    print "", ""
    obj = Series(range(4), index=list("dabc"))
    print obj
    print obj.sort_index()

    frame = DataFrame(np.arange(8).reshape((2, 4)), index=["three", "one"], columns=list("dabc"))
    print frame
    print frame.sort_index()
    print frame.sort_index(axis=1)
    print frame.sort_index(axis=1, ascending=False)

    # Sorting series
    print "", ""
    obj = Series([4, 7, -3, 2])
    print obj.order()
    obj = Series([4, np.nan, 7, np.nan, -3, 2])
    print obj.order()
    print obj.order(ascending=False)

    # order by multi columns
    print "", ""
    frame = DataFrame({"b": [4, 7, -3, 2], "a": [0, 1, 0, 1]})
    print frame.sort_index(by=["a", "b"])

    # rank
    print "", ""
    obj = Series([7, -5, 7, 4, 2, 0, 4])
    print obj.rank()  # method is average
    print obj.rank(method="first")  # No Duplicates
    print obj.rank(ascending=False, method="min")
    print obj.rank(ascending=False, method="max")
    f1 = DataFrame(obj, columns=["data"])
    f2 = DataFrame(obj.rank(), columns=["rank"])
    # merge by each index
    print pd.merge(f1, f2, left_index=True, right_index=True)

    # Index of the axis with duplicate values
    print "", ""
    obj = Series(range(5), index=list("aaabc"))
    print obj
    print obj.index.is_unique
    print obj["a"]
    print obj["c"]

    df = DataFrame(np.arange(12.0).reshape((4, 3)), index=list("aabb"), columns=list("ccd"))
    print df
    print df.ix["b"]
    print df["c"]
Example #13
0
# DataFrame的对齐
# 效果和Series一样,不过是同时对齐行和列
df1 = DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'), index=['Ohio', 'Texas', "Colorado"])
df2 = DataFrame(np.arange(12.).reshape((4,3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
# print df1
# print df2
df1_add_df2 = df1 + df2
# print df1_add_df2

# 填充默认值代替没有出现的值来参与计算
df1_default = DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df2_defaule = DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))
# 没有默认值
df1_add_df2_without = df1_default + df2_defaule
# 设置默认值,没有的值来参与计算
df1_add_df2_default = df1_default.add(df2_defaule, fill_value=0)
# print df1_add_df2_without
# print df1_add_df2_default

# 对应的算法方法
# add 加法
# sub 减法
# div 除法
# mul 乘法

# DataFrame 和 Series 之间的计算
arr = np.arange(12.).reshape((3, 4))
# broadcasting广播计算
# print arr - arr[0]
# DataFrame 与 Series之间的计算与broadcast类似
frame = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
data.drop('three',axis=1)

#直接 定位到元素 点 用ix
data.ix['Colorado',['one','four']]


#***************************
#对Series 相加  是对相同索引 的数据相加,没有的值 其和 最终会以NaN 来表示
list('abcd') 等价于 ['a','b','c','d']

df1=DataFrame(np.arange(12).reshape(3,4),columns=list('abcd'))

df2=DataFrame(np.arange(20).reshape(4,5),columns=list('abcde'))

df1.add(df2,fill_value=0) 对未有值的NaN 以0代替 带入加法

#add #sub #div #mul  加减乘除
#***********************************************
frame=DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Ohio','Texas','Oregon'])
frame.abs()

#自定义函数
f=lambda x:x.max()-x.min()
#用apply 来执行
frame.apply(f) #对列执行
frame.apply(f,axis=1) #对 行执行

#定义函数
def f(x):
    return Series([x.min(),x.max()],index=['min','nax'])
# -*- coding: utf-8 -*-
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                 数据框和向量的计算规则
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

#%%
from pandas import DataFrame,Series
from string import letters
s1=Series(range(3),list(letters[:3]))
d1=DataFrame(
    {'a':range(0,3)},
    index=list(letters[:3])
)
d2=DataFrame(
    {'a':range(0,10),'b':range(10,20),'c':range(20,30)},
    index=list(letters[:10])
)

#%%  数据框相加使用的是对齐计算同位置元素
d1+d2
#%%  数据框和系列相加是按列对齐计算同位置元素,并按行扩展
d2+s1
#%% 使用add函数实现按行对齐
#axis表示用哪个label来对齐,而不是广播的方向
d2.add(s1,axis=0)  #  {0, 1, 'index', 'columns'} 
#%%
d2.add(s1,axis=1)


test_person=list(test_person)
test_person.sort()
Test_Dataset=[]
for i in test_person:
    X=Dataset_originall[Dataset_originall['subject#']==i]#subjectが1(1番の人)全体をdataframeそのものとして取り出す
    X1=[X.iloc[:,4].values,X.iloc[:,5].values,X.iloc[:,6:].values]#取り出したDataFrameからラベルyと特徴量Xを取り出す
    Test_Dataset.append(X1)

#    
#次に各人のデータを区別せずにまとめて入れます
#
training_person_set=set(training_person)

TRAININGDATA=DataFrame()
for i in training_person:
    TRAININGDATA =TRAININGDATA.add(Dataset_originall[Dataset_originall['subject#']==i],fill_value=0)
X_training=TRAININGDATA.iloc[:,6:].values
y1_training=TRAININGDATA.iloc[:,4].values
y2_training=TRAININGDATA.iloc[:,5].values
                           
                             
TESTDATA=DataFrame()
for i in test_person:
    TESTDATA =TESTDATA.add(Dataset_originall[Dataset_originall['subject#']==i],fill_value=0)
X_test=TESTDATA.iloc[:,6:].values
y1_test=TESTDATA.iloc[:,4].values
y2_test=TESTDATA.iloc[:,5].values

#特徴量の標準化 本来ならこうすべきだが、今回は全体の標準化量を計算する
def Scaler(X,mean,variance):
    return (X-mean)/variance
Example #17
0
# print b
# print c
# print d
# print e
# print g
# print h
# print i

# 获取行数
len(f.index)



# 运算  DataFrame同理
s1 = Series(np.arange(10,20),index=np.arange(0,10))
s2 = Series(np.arange(50,60),index=np.arange(5,15))
s3 = s1 + s2
# 对原本没有的值进行填充
s4 = s1.add(s2,fill_value = 0)
# print s3
# print s4

# Series 与 DataFrame的运算
s1 = f.ix[0,:]
# DataFrame会每行都根据索引减去series中相应的值
f1 = f - s1
# print f1
# 若需要按列运算,需指定axis轴
s1 = f.ix[:,0]
f1 = f.add(s1,axis = 0)
print f1