Ejemplo n.º 1
0
def proportion_of_features_with_na(X: numpy.array,
                                   y: numpy.array = None) -> float:
    '''
    Proportion of features with missing values.

    Parameters
    ----------
    X : numpy.array
        2d-array with features columns.
    y : numpy.array
        Array of response values.

    Return
    ------
    float:
        Number of features with missing values over total number of features.    
    '''

    # check if is dataframe
    X = (X if isinstance(X, pandas.DataFrame) else pandas.DataFrame(X))
    has_na = X.isnull().any()

    return has_na.sum() / X.shape[1]