def get_non_outliers(v, MAD_threshold=2.5): """ returns the the non-zero, non-outliers for the input pandas series using MAD""" outliers = get_list_of_outliers(v, threshold=MAD_threshold) # setting the zero positions as outliers outliers[np.where(v==0)] = 1 non_outliers = np.logical_not(outliers) # The non-outliers are non-zero values that are in the IQR (positions that are zero are considered outliers # even if the IQR includes zero) non_outliers_indices = np.where(non_outliers)[0] mean = np.mean(v[non_outliers_indices]) std = np.std(v[non_outliers_indices]) return non_outliers_indices, mean, std
def get_non_outliers_information(v, MAD_threshold=2.5): """ returns the non-outliers for the input pandas series using MAD""" d = pd.Series(index=columns_for_samples_coverage_stats_dict) outliers = get_list_of_outliers(v, threshold=MAD_threshold) non_outliers = np.logical_not(outliers) if not (len(non_outliers)): non_outlier_indices = np.array([]) d['non_outlier_mean_coverage'] = 0.0 d['non_outlier_coverage_std'] = 0.0 else: non_outlier_indices = np.where(non_outliers)[0] d['non_outlier_mean_coverage'] = np.mean(v[non_outlier_indices]) d['non_outlier_coverage_std'] = np.std(v[non_outlier_indices]) return non_outlier_indices, d
def get_non_outliers_information(v, MAD_threshold=2.5, zeros_are_outliers=False): """ returns the non-outliers for the input pandas series using MAD""" d = pd.Series(index=columns_for_samples_coverage_stats_dict) outliers = get_list_of_outliers(v, threshold=MAD_threshold, zeros_are_outliers=zeros_are_outliers) non_outliers = np.logical_not(outliers) non_outlier_indices = np.where(non_outliers)[0] if not(len(non_outlier_indices)): non_outlier_indices = np.array([]) d['non_outlier_mean_coverage'] = 0.0 d['non_outlier_coverage_std'] = 0.0 else: d['non_outlier_mean_coverage'] = np.mean(v[non_outlier_indices]) d['non_outlier_coverage_std'] = np.std(v[non_outlier_indices]) return non_outlier_indices, d