예제 #1
0
def get_non_outliers(v, MAD_threshold=2.5):
    """ returns the the non-zero, non-outliers for the input pandas series using MAD"""

    outliers = get_list_of_outliers(v, threshold=MAD_threshold)
    # setting the zero positions as outliers
    outliers[np.where(v==0)] = 1
    non_outliers = np.logical_not(outliers)

    # The non-outliers are non-zero values that are in the IQR (positions that are zero are considered outliers
    # even if the IQR includes zero)
    non_outliers_indices = np.where(non_outliers)[0]
    mean = np.mean(v[non_outliers_indices])
    std = np.std(v[non_outliers_indices])

    return non_outliers_indices, mean, std
예제 #2
0
def get_non_outliers_information(v, MAD_threshold=2.5):
    """ returns the non-outliers for the input pandas series using MAD"""

    d = pd.Series(index=columns_for_samples_coverage_stats_dict)
    outliers = get_list_of_outliers(v, threshold=MAD_threshold)
    non_outliers = np.logical_not(outliers)

    if not (len(non_outliers)):
        non_outlier_indices = np.array([])
        d['non_outlier_mean_coverage'] = 0.0
        d['non_outlier_coverage_std'] = 0.0

    else:
        non_outlier_indices = np.where(non_outliers)[0]
        d['non_outlier_mean_coverage'] = np.mean(v[non_outlier_indices])
        d['non_outlier_coverage_std'] = np.std(v[non_outlier_indices])

    return non_outlier_indices, d
예제 #3
0
def get_non_outliers_information(v, MAD_threshold=2.5, zeros_are_outliers=False):
    """ returns the non-outliers for the input pandas series using MAD"""

    d = pd.Series(index=columns_for_samples_coverage_stats_dict)
    outliers = get_list_of_outliers(v, threshold=MAD_threshold, zeros_are_outliers=zeros_are_outliers)
    non_outliers = np.logical_not(outliers)
    non_outlier_indices = np.where(non_outliers)[0]

    if not(len(non_outlier_indices)):
        non_outlier_indices = np.array([])
        d['non_outlier_mean_coverage'] = 0.0
        d['non_outlier_coverage_std'] = 0.0

    else:
        d['non_outlier_mean_coverage'] = np.mean(v[non_outlier_indices])
        d['non_outlier_coverage_std'] = np.std(v[non_outlier_indices])

    return non_outlier_indices, d