Example #1
0
def pd_03():
    df=DataFrame(np.random.randn(6,3))
    df.ix[2:,1]=np.nan
    df.ix[4:,2]=np.nan
    print df
    print df.fillna(method='ffill')
    print df.fillna(method='ffill',limit=2)
    data=Series([1.,None,3.5,None,7])
    print data.fillna(data.mean())
    print df.fillna(df.mean())
Example #2
0
def main():
    """
    Handling of not applicable values
    """

    string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])
    print string_data
    print string_data.isnull()
    string_data[0] = None
    print string_data.isnull()
    print None is np.nan, None == np.nan # not same

    # Exclude N/A
    print '',''
    NA = np.nan
    data = Series([1, NA, 3.5, NA, 7])
    print data.dropna()
    print data[data.notnull()]

    data = DataFrame([
        [1., 6.5, 3.],
        [1., NA, NA],
        [NA, NA, NA],
        [NA, 6.5, 3.]
    ])
    cleaned = data.dropna() # row that all value is not NA
    print data
    print cleaned
    print data.dropna(how='all')
    data[4] = None
    print data.dropna(axis=1, how='all')
    print data.dropna(thresh=2) # non NA is more 2

    # Fill NA
    print '',''
    print data.fillna(0)
    print data.fillna({1: 0.5, 2: -1})
    _ = data.fillna(0, inplace=True)
    print data
    print '',''
    df = DataFrame(np.arange(18).reshape((6, 3)))
    df.ix[2:, 1] = NA; df.ix[4:, 2] = NA
    print df
    print df.fillna(method='ffill')
    print df.fillna(method='ffill', limit=2)
    data = Series([1., NA, 3.5, NA, 7])
    print data.fillna(data.mean())
Example #3
0
data
data.dropna(axis=1, how='all')
## Filling in Missing Data
df
df.fillna(0)
df = DataFrame(np.random.randn(7, 3))
df
df.ix[:4,1]
df.ix[:4,1] = NA
df.ix[:4,1]
df.ix[:2, 2] = NA
df
df.dropna(thresh=3)
df
df.fillna({1: 0.5, 3:-1})
df
_ = df.fillna(0, inplace=True)
df
a = 5
_ = a +3
a
df = DataFrame(np.random.randn(6, 3))
df.ix[2: ,1] = NA
df.ix[4: ,2] = NA
df
df.fillna(method='ffill')
df
df.fillna(method='ffill', limit=2)
data = Series([1., NA, 3.5, NA, 7])
(1. + 3.5 + 7) /3
Example #4
0
# 填充缺失数据

# 使用fillna来填充缺失值
# print data_nan_dataframe_time.fillna(0)
# 可以传递字典给fillna, 对于不同列填充不同的值
# print data_nan_dataframe_time.fillna({1: 0.5, 3: -1})
# fillna 默认是返回一个新的对象,不对原对象进行修改
# 设置inplace来选择是否对原对象进行修改
_ = data_nan_dataframe_time.fillna(0, inplace=True)
# print data_nan_dataframe_time

# 对于reindex有效的插值方法也可以用于fillna
df_reindex_fillna = DataFrame(np.random.randn(6, 3))
df_reindex_fillna.ix[2:, 1] = np.nan
df_reindex_fillna.ix[4:, 2] = np.nan
# print df_reindex_fillna
# print df_reindex_fillna.fillna(method='ffill')
# print df_reindex_fillna.fillna(method='ffill', limit=2)

# 用平均值填充
data_mean_fill = Series([1., np.nan, 3.5, np.nan, 7])
# print data_mean_fill.fillna(data_mean_fill.mean())
# print data_mean_fill.fillna(value=2)

# fillna函数的参数
# value 用于填充缺失值的标量值或者字典对象
# method 插值方式 如果函数调用时未指定其他参数的话。 默认为ffill
# axis 待填充的轴, 默认axis=0
# inplace 修改调用者对象,而不产生副本
# limit 对于前向填充或者后向填充, 可以连续填充的最大的数量