Beispiel #1
0
def describe(data):  # verbose=True):
    """ Print input/output multiple times

    Parameters
    ----------
    data: numpy.nd.array
        The data you want to get a description from
    verbose: boolean(optional)
        Decides whether the description is short or long form

    Returns
    -------
    dict
        missingness: list
            Confidence interval of data being MCAR, MAR or MNAR - in that order
        null_xy: list of tuples
            Indices of all null points
        null_n: list
            Total number of null values for each column
        pmissing_n: float
            Percentage of missing values in dataset
        null_rows: list
            Indices of all rows that are completely null
        null_cols: list
            Indices of all columns that are completely null
        mean_rows: list
            Mean value of each row
        mean_cols: list
            Mean value of each column
        std_dev: list
            std dev for each row/column
        min_max: list
            Finds the minimum and maximum for each row

    """
    #    missingness = [0.33, 0.33, 0.33]  # find_missingness(data)
    null_xy = find_null(data)
    null_n = len(null_xy)
    pmissing_n = float(null_n / len(data.flatten))
    #    pmissing_rows = ""
    #    pmissing_cols = ""
    #    null_rows = ""
    #    null_cols = ""
    #    mean_rows = ""
    #    mean_cols = ""
    #    std_dev = ""
    #                   "missingness": missingness,
    description = {
        "null_xy": null_xy,
        "null_n": null_n,
        "pmissing_n": pmissing_n
    }
    #                   "pmissing_rows": pmissing_rows,
    #                   "pmissing_cols": pmissing_cols,
    #                   "null_rows": null_rows,
    #                   "null_cols": null_cols,
    #                   "mean_rows": mean_rows,
    #                   "mean_cols": mean_cols,
    #                   "std_dev": std_dev}
    return description
Beispiel #2
0
def arima(data, p, d, q, axis=0):
    """Autoregressive Integrated Moving Average Imputation

    Stationary model

    PARAMETERS
    ----------
    data: numpy.ndarray
        The matrix with missing values that you want to impute
    p: int
        Number of autoregressive terms. Ex (p,d,q)=(1,0,0).
    d: int
        Number of nonseasonal differences needed for stationarity
    q: int
        Number of lagged forecast errors in the prediction equation
    axis: boolean (optional)
        0 if time series is in row format (Ex. data[0][:] is 1st data point).
        1 if time series is in col format (Ex. data[:][0] is 1st data point).

    RETURNS
    -------
    numpy.ndarray
    """
    assert isinstance(p, int), "Parameter `p` must be an integer"
    assert isinstance(d, int), "Parameter `d` must be an integer"
    assert isinstance(q, int), "Parameter `q` must be an integer"

    null_xy = find_null(data)
    for x, y in null_xy:
        print(x, y)
    return data
Beispiel #3
0
def mode(data):
    """ Substitute missing values with the mode of that column(most frequent).

    In the case that there is a tie (there are multiple, most frequent values)
    for a column randomly pick one of them.

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)
    modes = []
    for y_i in range(np.shape(data)[1]):
        unique_counts = np.unique(data[:, [y_i]], return_counts=True)
        max_count = np.max(unique_counts[1])
        mode_y = [
            unique for unique, count in np.transpose(unique_counts)
            if count == max_count and not np.isnan(unique)
        ]
        modes.append(mode_y)  # Appends index of column and column modes
    for x_i, y_i in null_xy:
        data[x_i][y_i] = np.random.choice(modes[y_i])
    return data
Beispiel #4
0
def count_missing(data):
    """ Calculate the total percentage of missing values and also the
    percentage in each column.

    Parameters
    ----------
    data: np.array
        Data to impute.

    Returns
    -------
    dict
        Percentage of missing values in total and in each column.

    """
    size = len(data.flatten())
    null_xy = find_null(data)
    np.unique(null_xy)
    counter = {y: 0. for y in np.unique(null_xy.T[1])}
    change_in_percentage = 1. / size
    for _, y in null_xy:
        counter[y] += change_in_percentage
    total_missing = len(null_xy) / size
    counter["total"] = total_missing

    return counter
Beispiel #5
0
def locf(data, axis=0):
    """ Last Observation Carried Forward

    For each set of missing indices, use the value of one row before(same
    column). In the case that the missing value is the first row, look one
    row ahead instead. If this next row is also NaN, look to the next row.
    Repeat until you find a row in this column that's not NaN. All the rows
    before will be filled with this value.

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.
    axis: boolean (optional)
        0 if time series is in row format (Ex. data[0][:] is 1st data point).
        1 if time series is in col format (Ex. data[:][0] is 1st data point).

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    if axis == 0:
        data = np.transpose(data)
    elif axis == 1:
        pass
    else:
        raise BadInputError(
            "Error: Axis value is invalid, please use either 0 (row format) or 1 (column format)"
        )

    null_xy = find_null(data)
    for x_i, y_i in null_xy:
        # Simplest scenario, look one row back
        if x_i - 1 > -1:
            data[x_i][y_i] = data[x_i - 1][y_i]
        # Look n rows forward
        else:
            x_residuals = np.shape(data)[0] - x_i - 1  # n datapoints left
            val_found = False
            for i in range(1, x_residuals):
                if not np.isnan(data[x_i + i][y_i]):
                    val_found = True
                    break
            if val_found:
                # pylint: disable=undefined-loop-variable
                for x_nan in range(i):
                    data[x_i + x_nan][y_i] = data[x_i + i][y_i]
            else:
                raise Exception("Error: Entire Column is NaN")
    return data
Beispiel #6
0
def em(data, loops=50):
    """ Imputes given data using expectation maximization.

    E-step: Calculates the expected complete data log likelihood ratio.
    M-step: Finds the parameters that maximize the log likelihood of the
    complete data.

    Parameters
    ----------
    data: numpy.nd.array
        Data to impute.
    loops: int
        Number of em iterations to run before breaking.
    inplace: boolean
        If True, operate on the numpy array reference

    Returns
    -------
    numpy.nd.array
        Imputed data.

    """
    null_xy = find_null(data)
    for x_i, y_i in null_xy:
        col = data[:, int(y_i)]
        mu = col[~np.isnan(col)].mean()
        std = col[~np.isnan(col)].std()
        col[x_i] = np.random.normal(loc=mu, scale=std)
        previous, i = 1, 1
        for i in range(loops):
            # Expectation
            mu = col[~np.isnan(col)].mean()
            std = col[~np.isnan(col)].std()
            # Maximization
            col[x_i] = np.random.normal(loc=mu, scale=std)
            # Break out of loop if likelihood doesn't change at least 10%
            # and has run at least 5 times
            delta = (col[x_i] - previous) / previous
            if i > 5 and delta < 0.1:
                data[x_i][y_i] = col[x_i]
                break
            data[x_i][y_i] = col[x_i]
            previous = col[x_i]
    return data
Beispiel #7
0
def impute_SOM(data, n):
    imputer = SimpleImputer(missing_values=np.nan,
                            strategy='constant',
                            fill_value=0)
    data_fill0 = imputer.fit_transform(data)
    data_miss = data.copy()

    som = SOM(data_fill0, (n, n), 3, 300)
    som.train()
    res = np.array(som.train_result()).reshape(-1, 1)
    null_set = find_null(data)
    for i1, i2 in null_set:
        neighbor = som.getneighbor(res[i1][0], 1)
        activation_group = neighbor[0]
        for j in neighbor:
            activation_group = activation_group | j
        activation_group = np.array([som.W.T[p] for p in activation_group])
        data_miss[i1, i2] = activation_group.mean(axis=0)[i2]
    return data_miss
Beispiel #8
0
def mean(data):
    """ Substitute missing values with the mean of that column.

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)
    for x_i, y_i in null_xy:
        row_wo_nan = data[:, [y_i]][~np.isnan(data[:, [y_i]])]
        new_value = np.mean(row_wo_nan)
        data[x_i][y_i] = new_value
    return data
Beispiel #9
0
def random(data):
    """ Fill missing values in with a randomly selected value from the same
    column.

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)
    for x, y in null_xy:
        uniques = np.unique(data[:, y])
        uniques = uniques[~np.isnan(uniques)]
        data[x][y] = np.random.choice(uniques)
    return data
Beispiel #10
0
def fast_knn(data, k=3, **kwargs):
    """ Impute using a variant of the nearest neighbours approach

    Basic idea: Impute array and then use the resulting complete
    array to construct a KDTree. Use this KDTree to compute nearest neighbours.
    After finding `k` nearest neighbours, take the weighted average of them.

    This approach is much, much faster than the other implementation (fit+transform
    for each subset) which is almost prohibitively expensive.

    Parameters
    ----------
    data: numpy.ndarray
        2D matrix to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)
    data_c = mean(data)
    kdtree = KDTree(data_c)

    for x_i, y_i in null_xy:
        distances, indices = kdtree.query(data_c[x_i], k=k + 1)
        # Will always return itself in the first index. Delete it.
        distances, indices = distances[1:], indices[1:]
        weights = (np.sum(distances) - distances) / np.sum(distances)
        # Make weights sum to 1
        weights_unit = weights / np.sum(weights)
        # Assign missing value the weighted average of `k` nearest neighbours
        data[x_i][y_i] = np.dot(weights_unit,
                                [data_c[y_i][ind] for ind in indices])
    return data
Beispiel #11
0
def median(data):
    """ Substitute missing values with the median of that column(middle).

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)
    cols_missing = set(null_xy.T[1])
    medians = {}
    for y_i in cols_missing:
        cols_wo_nan = data[:, [y_i]][~np.isnan(data[:, [y_i]])]
        median_y = np.median(cols_wo_nan)
        medians[str(y_i)] = median_y
    for x_i, y_i in null_xy:
        data[x_i][y_i] = medians[str(y_i)]
    return data
Beispiel #12
0
def fast_knn(data,
             k=3,
             eps=0,
             p=2,
             distance_upper_bound=np.inf,
             leafsize=10,
             **kwargs):
    """ Impute using a variant of the nearest neighbours approach

    Basic idea: Impute array with a basic mean impute and then use the resulting complete
    array to construct a KDTree. Use this KDTree to compute nearest neighbours.
    After finding `k` nearest neighbours, take the weighted average of them. Basically,
    find the nearest row in terms of distance

    This approach is much, much faster than the other implementation (fit+transform
    for each subset) which is almost prohibitively expensive.


    Parameters
    ----------
    data: numpy.ndarray
        2D matrix to impute.

    k: int, optional
        Parameter used for method querying the KDTree class object. Number of
        neighbours used in the KNN query. Refer to the docs for
        [`scipy.spatial.KDTree.query`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.KDTree.query.html).

    eps: nonnegative float, optional
        Parameter used for method querying the KDTree class object. From the
        SciPy docs: "Return approximate nearest neighbors; the kth returned
        value is guaranteed to be no further than (1+eps) times the distance to
        the real kth nearest neighbor". Refer to the docs for
        [`scipy.spatial.KDTree.query`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.KDTree.query.html).

    p : float, 1<=p<=infinity, optional
        Parameter used for method querying the KDTree class object. Straight from the
        SciPy docs: "Which Minkowski p-norm to use. 1 is the
        sum-of-absolute-values Manhattan distance 2 is the usual Euclidean
        distance infinity is the maximum-coordinate-difference distance". Refer to
        the docs for
        [`scipy.spatial.KDTree.query`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.KDTree.query.html).

    distance_upper_bound : nonnegative float, optional
        Parameter used for method querying the KDTree class object. Straight
        from the SciPy docs: "Return only neighbors within this distance. This
        is used to prune tree searches, so if you are doing a series of
        nearest-neighbor queries, it may help to supply the distance to the
        nearest neighbor of the most recent point." Refer to the docs for
        [`scipy.spatial.KDTree.query`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.KDTree.query.html).

    leafsize: int, optional
        Parameter used for construction of the `KDTree` class object. Straight from
        the SciPy docs: "The number of points at which the algorithm switches
        over to brute-force. Has to be positive". Refer to the docs for
        [`scipy.spatial.KDTree`](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.spatial.KDTree.html)
        for more information.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    Examples
    --------

        >>> data = np.arange(25).reshape((5, 5)).astype(np.float)
        >>> data[0][2] =  np.nan
        >>> data
        array([[ 0.,  1., nan,  3.,  4.],
               [ 5.,  6.,  7.,  8.,  9.],
               [10., 11., 12., 13., 14.],
               [15., 16., 17., 18., 19.],
               [20., 21., 22., 23., 24.]])
        >> fast_knn(data, k=1) # Weighted average (by distance) of nearest 1 neighbour
        array([[ 0.,  1.,  7.,  3.,  4.],
               [ 5.,  6.,  7.,  8.,  9.],
               [10., 11., 12., 13., 14.],
               [15., 16., 17., 18., 19.],
               [20., 21., 22., 23., 24.]])
        >> fast_knn(data, k=2) # Weighted average of nearest 2 neighbours
        array([[ 0.        ,  1.        , 10.08608891,  3.        ,  4.        ],
               [ 5.        ,  6.        ,  7.        ,  8.        ,  9.        ],
               [10.        , 11.        , 12.        , 13.        , 14.        ],
               [15.        , 16.        , 17.        , 18.        , 19.        ],
               [20.        , 21.        , 22.        , 23.        , 24.        ]])
        >> fast_knn(data, k=3)
        array([[ 0.        ,  1.        , 13.40249283,  3.        ,  4.        ],
               [ 5.        ,  6.        ,  7.        ,  8.        ,  9.        ],
               [10.        , 11.        , 12.        , 13.        , 14.        ],
               [15.        , 16.        , 17.        , 18.        , 19.        ],
               [20.        , 21.        , 22.        , 23.        , 24.        ]])
        >> fast_knn(data, k=5) # There are at most only 4 neighbours. Raises error
        ...
        IndexError: index 5 is out of bounds for axis 0 with size 5

    """
    null_xy = find_null(data)
    data_c = mean(data)
    kdtree = KDTree(data_c, leafsize=leafsize)

    for x_i, y_i in null_xy:
        distances, indices = kdtree.query(
            data_c[x_i],
            k=k + 1,
            eps=eps,
            p=p,
            distance_upper_bound=distance_upper_bound)
        # Will always return itself in the first index. Delete it.
        distances, indices = distances[1:], indices[1:]
        weights = distances / np.sum(distances)
        # Assign missing value the weighted average of `k` nearest neighbours
        data[x_i][y_i] = np.dot(weights, [data_c[ind][y_i] for ind in indices])
    return data
Beispiel #13
0
def buck_iterative(data):
    """ Iterative variant of buck's method

    - Variable to regress on is chosen at random.
    - EM type infinite regression loop stops after change in prediction from
      previous prediction < 10% for all columns with missing values

    A Method of Estimation of Missing Values in Multivariate Data Suitable for
    use with an Electronic Computer S. F. Buck Journal of the Royal Statistical
    Society. Series B (Methodological) Vol. 22, No. 2 (1960), pp. 302-306

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)

    # Add a column of zeros to the index values
    null_xyv = np.append(null_xy, np.zeros((np.shape(null_xy)[0], 1)), axis=1)

    null_xyv = [[int(x), int(y), v] for x, y, v in null_xyv]
    temp = []
    cols_missing = {y for _, y, _ in null_xyv}

    # Step 1: Simple Imputation, these are just placeholders
    for x_i, y_i, value in null_xyv:
        # Column containing nan value without the nan value
        col = data[:, [y_i]][~np.isnan(data[:, [y_i]])]

        new_value = np.mean(col)
        data[x_i][y_i] = new_value
        temp.append([x_i, y_i, new_value])
    null_xyv = temp

    # Step 5: Repeat step 2 - 4 until convergence (the 100 is arbitrary)

    converged = [False] * len(null_xyv)
    while not all(converged):
        # Step 2: Placeholders are set back to missing for one variable/column
        dependent_col = int(np.random.choice(list(cols_missing)))
        missing_xs = [int(x) for x, y, value in null_xyv if y == dependent_col]

        # Step 3: Perform linear regression using the other variables
        x_train, y_train = [], []
        for x_i in (x_i for x_i in range(len(data)) if x_i not in missing_xs):
            x_train.append(np.delete(data[x_i], dependent_col))
            y_train.append(data[x_i][dependent_col])
        model = LinearRegression()
        model.fit(x_train, y_train)

        # Step 4: Missing values for the missing variable/column are replaced
        # with predictions from our new linear regression model
        # For null indices with the dependent column that was randomly chosen
        for i, z in enumerate(null_xyv):
            x_i = z[0]
            y_i = z[1]
            value = data[x_i, y_i]
            if y_i == dependent_col:
                # Row 'x' without the nan value
                new_value = model.predict(
                    [np.delete(data[x_i], dependent_col)])
                data[x_i][y_i] = new_value.reshape(1, -1)
                if value == 0.0:
                    delta = (new_value - value) / 0.01
                else:
                    delta = (new_value - value) / value
                converged[i] = abs(delta) < 0.1
    return data
Beispiel #14
0
def test_missing_values_present():
    """ Check that the dataset is corrupted (missing values present)"""
    assert find_null(data).size != 0
Beispiel #15
0
def mice(data, **kwargs):
    """Multivariate Imputation by Chained Equations

    Reference:
        Buuren, S. V., & Groothuis-Oudshoorn, K. (2011). Mice: Multivariate
        Imputation by Chained Equations in R. Journal of Statistical Software,
        45(3). doi:10.18637/jss.v045.i03

    Implementation follows the main idea from the paper above. Differs in
    decision of which variable to regress on (here, I choose it at random).
    Also differs in stopping criterion (here the model stops after change in
    prediction from previous prediction is less than 10%).

    Parameters
    ----------
    data: numpy.ndarray
        Data to impute.

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    null_xy = find_null(data)

    # Add a column of zeros to the index values
    null_xyv = np.append(null_xy, np.zeros((np.shape(null_xy)[0], 1)), axis=1)

    null_xyv = [[int(x), int(y), v] for x, y, v in null_xyv]
    temp = []
    cols_missing = set([y for _, y, _ in null_xyv])

    # Step 1: Simple Imputation, these are just placeholders
    for x_i, y_i, value in null_xyv:
        # Column containing nan value without the nan value
        col = data[:, [y_i]][~np.isnan(data[:, [y_i]])]

        new_value = np.mean(col)
        data[x_i][y_i] = new_value
        temp.append([x_i, y_i, new_value])
    null_xyv = temp

    # Step 5: Repeat step 2 - 4 until convergence (the 100 is arbitrary)

    converged = [False] * len(null_xyv)
    while all(converged):
        # Step 2: Placeholders are set back to missing for one variable/column
        dependent_col = int(np.random.choice(list(cols_missing)))
        missing_xs = [int(x) for x, y, value in null_xyv if y == dependent_col]

        # Step 3: Perform linear regression using the other variables
        x_train, y_train = [], []
        for x_i in (x_i for x_i in range(len(data)) if x_i not in missing_xs):
            x_train.append(np.delete(data[x_i], dependent_col))
            y_train.append(data[x_i][dependent_col])
        model = LinearRegression()
        model.fit(x_train, y_train)

        # Step 4: Missing values for the missing variable/column are replaced
        # with predictions from our new linear regression model
        temp = []
        # For null indices with the dependent column that was randomly chosen
        for i, x_i, y_i, value in enumerate(null_xyv):
            if y_i == dependent_col:
                # Row 'x' without the nan value
                new_value = model.predict(np.delete(data[x_i], dependent_col))
                data[x_i][y_i] = new_value.reshape(1, -1)
                temp.append([x_i, y_i, new_value])
                delta = (new_value - value) / value
                if delta < 0.1:
                    converged[i] = True
        null_xyv = temp
    return data
Beispiel #16
0
 def test_missing_values_present(self):
     """ Check that the dataset is corrupted (missing values present)"""
     self.assertTrue(find_null(self.data).size != 0)
Beispiel #17
0
def _nan_exists(data):
    """ True if there is at least one np.nan in the array"""
    null_xy = find_null(data)
    return len(null_xy) > 0
Beispiel #18
0
def moving_window(data, nindex=None, wsize=5, errors="coerce", func=np.mean,
        inplace=False, **kwargs):
    """ Interpolate the missing values based on nearby values.

    For example, with an array like this:

        array([[-1.24940, -1.38673, -0.03214945,  0.08255145, -0.007415],
               [ 2.14662,  0.32758 , -0.82601414,  1.78124027,  0.873998],
               [-0.41400, -0.977629,         nan, -1.39255344,  1.680435],
               [ 0.40975,  1.067599,  0.29152388, -1.70160145, -0.565226],
               [-0.54592, -1.126187,  2.04004377,  0.16664863, -0.010677]])

    Using a `k` or window size of 3. The one missing value would be set
    to -1.18509122. The window operates on the horizontal axis.

    Usage
    -----

    The parameters default the function to a moving mean. You may want to change
    the default window size:
        
        moving_window(data, wsize=10)

    To only look at past data (null value is at the rightmost index in the window):

        moving_window(data, nindex=-1)

    To use a custom function:
    
        moving_window(data, func=np.median)

    You can also do something like take 1.5x the max of previous values in the window:

        moving_window(data, func=lambda arr: max(arr) * 1.50, nindex=-1)
    

    Parameters
    ----------
    data: numpy.ndarray
        2D matrix to impute.
    nindex: int
        Null index. Index of the null value inside the moving average window.
        Use cases: Say you wanted to make value skewed toward the left or right
        side. 0 would only take the average of values from the right and -1
        would only take the average of values from the left
    wsize: int
        Window size. Size of the moving average window/area of values being used
        for each local imputation. This number includes the missing value.
    errors: {"raise", "coerce", "ignore"}
        Errors will occur with the indexing of the windows - for example if there
        is a nan at data[x][0] and `nindex` is set to -1 or there is a nan at
        data[x][-1] and `nindex` is set to 0. `"raise"` will raise an error,
        `"coerce"` will try again using an nindex set to the middle and `"ignore"`
        will just leave it as a nan.
    inplace: {True, False}
        Whether to return a copy or run on the passed-in array

    Returns
    -------
    numpy.ndarray
        Imputed data.

    """
    if errors == "ignore":
        raise Exception("`errors` value `ignore` not implemented yet. Sorry!")

    if not inplace:
        data = data.copy()

    wsize = 5
    nindex = None
    if nindex is None: # If using equal window side lengths
        assert wsize % 2 == 1, "The parameter `wsize` should not be even "\
        "if the value `nindex` is not set since it defaults to the midpoint "\
        "and an even `wsize` makes the midpoint ambiguous"
        wside_left = wsize // 2
        wside_right = wsize // 2
    else: # If using custom window side lengths
        assert nindex < wsize, "The null index must be smaller than the window size"
        if nindex == -1:
            wside_left = wsize - 1
            wside_right = 0
        else:
            wside_left = nindex
            wside_right = wsize - nindex - 1

    while True:
        null_xy = find_null(data)
        n_null_prev = len(null_xy)
        for x_i, y_i in null_xy:
            left_i = max(0, y_i-wside_left)
            right_i = min(wsize, y_i+wside_right+1)
            window = data[x_i, left_i: right_i]
            window_not_null = window[~np.isnan(window)]

            if len(window_not_null) > 0:
                try:
                    data[x_i][y_i] = func(window_not_null)
                    continue
                except Exception as e:
                    if errors == "raise":
                        raise e
                    else:
                        pass

            # Aggregate function didn't work for some reason
            if errors == "coerce":
                wside_left = wsize // 2
                wside_right = wsize_left
                window = data[x_i, y_i-wside_leftk: y_i + wside_right]
                window_not_null = window[~np.isnan(window)]
                try:
                    data[x_i][y_i] = func(window_not_null)
                except Exception:
                    pass
        if n_null_prev == len(find_null(data)):
            break

    return data