Ejemplo n.º 1
0
def count_missing_data():
    ebola = ds.load_ebola()
    print('Eboda data:', '\n', ebola.head(), '\n')
    print('# of entries per column:', '\n', ebola.count(), '\n')

    num_rows = ebola.shape[0]
    num_missing = num_rows - ebola.count()
    print('# of missing entries per column:', '\n', num_missing, '\n')
Ejemplo n.º 2
0
def backward_fill_missing_values():
    ebola = ds.load_ebola()
    # Fills missing values with the NEXT valid observation. In case the previous row is NaN, it doesn't do anything.
    # Observe the column 'Cases_Liberia' and the rows 2 and 3
    print('Ebola data:', '\n',
          ebola.fillna(method='bfill').ix[0:, 0:5].head(), '\n')
    print('Ebola data:', '\n',
          ebola.fillna(method='bfill').ix[0:, 0:5].tail(), '\n')
Ejemplo n.º 3
0
def arithmetic_ops_missing_values():
    ebola = ds.load_ebola()

    ebola['sum_columns'] = ebola['Cases_Guinea'] + ebola[
        'Cases_Liberia'] + ebola['Cases_SierraLeone']
    ebola_subset = ebola.ix[:, [
        'Cases_Guinea', 'Cases_Liberia', 'Cases_SierraLeone'
    ]]
    print('Summing column with NaN values', '\n', ebola_subset.head(n=10),
          '\n')
Ejemplo n.º 4
0
def count_missing_values_numpy():
    ebola = ds.load_ebola()
    print('Eboda data:', '\n', ebola.head(), '\n')

    missing_values = np.count_nonzero(ebola.isnull())
    print('Missing values in DataFrame:', '\n', missing_values, '\n')

    missing_values = np.count_nonzero(ebola['Cases_Guinea'].isnull())
    print('Missing values in Series:', '\n', missing_values, '\n')

    # NaN values will be on the top, so look at the head()
    missing_values = ebola['Cases_Guinea'].value_counts(dropna=False).head()
    print('Missing values in Series II:', '\n', missing_values, '\n')
Ejemplo n.º 5
0
def fill_missing_values():
    ebola = ds.load_ebola()
    # Fills missing values with the value on the previous row. In case the previous row is NaN, it doesn't do anything.
    # We can also use fillna(0, inplace=True) to change the current DataFrame. It will avoid creating a copy
    print('Ebola data:', '\n', ebola.fillna(0).ix[0:10, 0:5], '\n')
Ejemplo n.º 6
0
def drop_nan_values():
    ebola = ds.load_ebola()
    # One can also drop rows with NaN values, but that has to be done with caution.
    # If we drop the th NaN values form the ebola DataFrame, we will end up with only one row.
    # It returns a new DataFrame, unless we use inplace=True
    print('Dropping NaN values:', '\n', ebola.dropna().shape)
Ejemplo n.º 7
0
def forward_fill_missing_values():
    ebola = ds.load_ebola()
    # Fills values with the vLAST valid observation. In case the previous row is NaN, it doesn't do anything.
    # Observe the column 'Cases_Liberia' and the rows 2 and 3
    print('Ebola data:', '\n',
          ebola.fillna(method='ffill').ix[0:10, 0:5], '\n')