Ejemplos de get_categoricals en Python, ejemplos de src.util.spn_util.get_categoricals en Python

Ejemplo n.º 1

0

Mostrar archivo

def get_mutual_information_correlation(spn, context):
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)

    correlation_matrix = []

    for x in range(num_features):
        if x not in categoricals:
            correlation_matrix.append(np.full((num_features), np.nan))
        else:
            x_correlation = [np.nan] * num_features
            x_range = context.get_domains_by_scope([x])[0]
            spn_x = marginalize(spn, [x])
            query_x = np.array([[np.nan] * num_features] * len(x_range))
            query_x[:, x] = x_range
            for y in categoricals:
                if x == y:
                    x_correlation[x] = 1
                    continue
                spn_y = marginalize(spn, [y])
                spn_xy = marginalize(spn, [x, y])
                y_range = context.get_domains_by_scope([y])[0]
                query_y = np.array([[np.nan] * num_features] * len(y_range))
                query_y[:, y] = y_range
                query_xy = np.array([[np.nan] * num_features] *
                                    (len(x_range + 1) * (len(y_range + 1))))
                xy = np.mgrid[x_range[0]:x_range[-1]:len(x_range) * 1j,
                              y_range[0]:y_range[-1]:len(y_range) * 1j]
                xy = xy.reshape(2, -1)
                query_xy[:, x] = xy[0, :]
                query_xy[:, y] = xy[1, :]
                results_xy = likelihood(spn_xy, query_xy)
                results_xy = results_xy.reshape(len(x_range), len(y_range))
                results_x = likelihood(spn_x, query_x)
                results_y = likelihood(spn_y, query_y)
                xx, yy = np.mgrid[0:len(x_range) - 1:len(x_range) * 1j,
                                  0:len(y_range) - 1:len(y_range) * 1j]
                xx = xx.astype(int)
                yy = yy.astype(int)

                grid_results_x = results_x[xx]
                grid_results_y = results_y[yy]
                grid_results_xy = results_xy

                log = np.log(
                    grid_results_xy /
                    (np.multiply(grid_results_x, grid_results_y).squeeze()))
                prod = np.prod(np.array([log, grid_results_xy]), axis=0)

                log_x = np.log(results_x)
                log_y = np.log(results_y)

                entropy_x = -1 * np.sum(np.multiply(log_x, results_x))
                entropy_y = -1 * np.sum(np.multiply(log_y, results_y))

                x_correlation[y] = (np.sum(prod) /
                                    np.sqrt(entropy_x * entropy_y))
            correlation_matrix.append(np.array(x_correlation))
    return np.array(correlation_matrix)

Ejemplo n.º 2

0

Mostrar archivo

def get_full_correlation(spn, context):
    categoricals = get_categoricals(spn, context)
    full_corr = get_correlation_matrix(spn)
    for cat in categoricals:
        full_corr[:, cat] = np.nan
        full_corr[cat, :] = np.nan
    cat_corr = get_categorical_correlation(spn, context)
    cat_cat_corr = get_mutual_information_correlation(spn, context)
    result = np.nansum([full_corr, cat_corr, cat_cat_corr], axis=0)
    return result

Ejemplo n.º 3

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def categorical_correlations(spn, dictionary):
    context = dictionary['context']
    categoricals = get_categoricals(spn, context)
    corr = get_full_correlation(spn, context)
    num_features = len(spn.scope)
    feature_names = context.feature_names

    all_combinations = [
        (i, j)
        for i, j in itertools.product(range(num_features), range(num_features))
        if i > j and np.abs(corr[i, j]) > correlation_threshold
    ]
    if isinstance(feature_combinations, int):
        num_choices = min(feature_combinations, len(all_combinations))
        shown_combinations = random.sample(all_combinations, k=num_choices)
    elif feature_combinations == 'all':
        shown_combinations = all_combinations
    else:
        shown_combinations = feature_combinations

    for cat_counter, cat in enumerate(
            set([combination[0] for combination in shown_combinations])):
        for i in [
                combination[1] for combination in shown_combinations
                if combination[0] == cat
        ]:
            phrase = get_nlg_phrase(*CORRELATION_NLG)
            while '{z}' in phrase or 'As' in phrase or 'linear' in phrase:
                phrase = get_nlg_phrase(*CORRELATION_NLG)
            strength = ['weak', 'moderate', 'strong', 'very strong', 'perfect']
            strength_values = [0.3, 0.6, 0.8, 0.99]
            strength_descr = strength[threshold(strength_values,
                                                np.abs(corr[cat, i]))]
            strength_adv = strength_descr + 'ly'
            if show_conditional:
                iplot(
                    p.plot_related_features(spn, i, cat,
                                            dictionary=dictionary))
            printmd(
                phrase.format(x=feature_names[cat],
                              y=feature_names[i],
                              strength=strength_descr,
                              strength_adv=strength_adv,
                              direction='',
                              neg_pos=''))

Ejemplo n.º 4

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def classification(spn, numerical_data, dictionary):
    context = dictionary['context']
    categoricals = get_categoricals(spn, context)
    misclassified = {}
    data_dict = {}
    for i in categoricals:
        y_true = numerical_data[:, i].reshape(-1, 1)
        query = np.copy(numerical_data)
        y_pred = predict_mpe(spn, i, query, context).reshape(-1, 1)
        misclassified[i] = np.where(y_true != y_pred)[0]
        misclassified_instances = misclassified[i].shape[0]
        data_dict[i] = np.concatenate((query[:, :i], y_pred, query[:, i + 1:]),
                                      axis=1)
        printmd(
            'For feature "{}" the SPN misclassifies {} instances, resulting in a precision of {}%.'
            .format(
                context.feature_names[i], misclassified_instances,
                np.round(
                    100 * (1 - misclassified_instances / len(numerical_data)),
                    2)))
    return misclassified, data_dict

Ejemplo n.º 5

0

Mostrar archivo

def get_categorical_data(spn,
                         df,
                         dictionary,
                         header=1,
                         types=False,
                         date=False,
                         assert_nan=False):
    """

    :param spn:
    :param df:
    :param dictionary:
    :param header:
    :param types:
    :param date:
    :return:
    """
    context = dictionary['context']
    categoricals = get_categoricals(spn, context)
    df_numerical = df.copy(deep=True)
    for i in categoricals:
        if df_numerical.iloc[:, i].isnull().values.any():
            non_nan = np.where(~np.isnan(df_numerical.iloc[:, i]))
        else:
            non_nan = np.arange(df_numerical.iloc[:, i].size)
        transformed = dictionary['features'][i]['encoder'].transform(
            df_numerical.values[non_nan, i].squeeze())
        df_numerical.iloc[non_nan, i] = transformed

    numerical_data = df_numerical.values.astype(float)

    categorical_data = {}
    for i in categoricals:
        non_nan = np.where(~np.isnan(df_numerical.iloc[:, i]))
        data = df_numerical.iloc[non_nan].groupby(context.feature_names[i])
        data = [data.get_group(x).values.astype(float) for x in data.groups]
        categorical_data[i] = data

    return numerical_data, categorical_data

Ejemplo n.º 6

0

Mostrar archivo

Archivo: ClusterAnalysis.py Proyecto: sqsltr520/DeepNotebooks

def categorical_nodes_description(spn, context):
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)
    total_analysis = {}
    for cat in categoricals:
        marg_total = marginalize(spn, [cat])
        categorical_probabilities = []
        for i, n in enumerate(spn.children):
            node_weight = np.log(spn.weights[i])
            node_probabilities = []
            for cat_instance in context.get_domains_by_scope([cat])[0]:
                marg = marginalize(n, [cat])
                query = np.zeros((1, num_features))
                query[:, :] = np.nan
                query[:, cat] = cat_instance
                proba = np.exp(
                    log_likelihood(marg, query) + node_weight -
                    log_likelihood(marg_total, query)).reshape(-1)
                node_probabilities.append(proba)
            categorical_probabilities.append(np.array(node_probabilities))
        total_analysis[cat] = np.sum(np.array(categorical_probabilities),
                                     axis=2)

    node_categoricals = {}
    for cat in categoricals:
        node_categoricals[cat] = {}
        node_categoricals[cat]['contrib'] = []
        node_categoricals[cat]['explained'] = []
        for cat_instance in [
                int(c) for c in context.get_domains_by_scope([cat])[0]
        ]:
            probs = total_analysis[cat]
            # TODO: That threshold needs some evidence or theoretical grounding
            contrib_nodes = np.where(probs[:, cat_instance] /
                                     (np.sum(probs, axis=1)) > 0.4)
            explained_probs = np.sum(probs[contrib_nodes], axis=0)
            node_categoricals[cat]['contrib'].append(contrib_nodes)
            node_categoricals[cat]['explained'].append(explained_probs)
    return node_categoricals, total_analysis

Ejemplo n.º 7

0

Mostrar archivo

def get_categorical_correlation(spn, context):
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)
    var = get_variance(spn)
    full_matrix = np.zeros((num_features, num_features))
    full_matrix[:] = np.nan

    # OLD CODE
    for cat in categoricals:
        all_probs = []
        cat_vars = []
        query = np.full((1, num_features), np.nan)
        domain = context.get_domains_by_scope([cat])[0]
        for value in domain:
            query[:, cat] = value
            cond_spn = condition(spn, query)
            prob = likelihood(spn, query)
            cond_var = get_variance(cond_spn)
            cat_vars.append(cond_var)
            all_probs.append(prob)
        cat_vars = np.array(cat_vars)
        cat_vars = np.insert(cat_vars, cat, values=np.nan, axis=2)
        cat_vars = cat_vars.reshape((cat_vars.shape[0], cat_vars.shape[2]))
        all_probs = np.array(all_probs).reshape(-1, 1)
        all_probs /= np.sum(all_probs)
        total_var = np.sum(cat_vars * all_probs, axis=0)
        result = 1 - (total_var / var)
        full_matrix[:, cat] = result
        full_matrix[cat, :] = result
        for cat2 in categoricals:
            if cat != cat2:
                full_matrix[cat, cat2] = np.nan
                full_matrix[cat2, cat] = np.nan
            else:
                full_matrix[cat, cat] = 1
    assert np.all(np.logical_or(full_matrix > -0.0001, np.isnan(full_matrix)))
    full_matrix[full_matrix < 0] = 0
    return np.sqrt(full_matrix)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def node_categorical_description(spn, dictionary):
    context = dictionary['context']
    categoricals = get_categoricals(spn, context)
    feature_names = context.feature_names

    enc = [dictionary['features'][cat]['encoder'] for cat in categoricals]
    summarized, contributions = categorical_nodes_description(spn, context)

    for i, cat in enumerate(categoricals):
        printmd('#### Distribution of {}'.format(feature_names[cat]))
        for cat_instance in [
                int(c) for c in context.get_domains_by_scope([cat])[0]
        ]:
            name = enc[i].inverse_transform([cat_instance])
            contrib_nodes = summarized[cat]['contrib'][cat_instance][0]
            prop_of_instance = summarized[cat]['explained'][cat_instance][
                cat_instance]
            prop_of_nodes = prop_of_instance / np.sum(
                summarized[cat]['explained'][cat_instance])
            if prop_of_instance < 0.7:
                printmd(
                    'The feature "{}" is not separated well along the primary\
                        clusters.'.format(feature_names[cat]))
                break
            else:
                desc = '{}% of "{}" is captured by the nodes {}. The probability of\
                        "{}" for this group of nodes is {}%'

                printmd(
                    desc.format(
                        np.round(prop_of_instance * 100, 2),
                        name,
                        ', '.join([str(n) for n in contrib_nodes]),
                        name,
                        np.round(prop_of_nodes * 100, 2),
                    ))

Ejemplo n.º 9

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def explanation_vector_description(spn,
                                   dictionary,
                                   data_dict,
                                   cat_features,
                                   use_shap=False):
    context = dictionary['context']
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)
    feature_types = context.parametric_types
    domains = context.get_domains_by_scope(spn.scope)
    feature_names = context.feature_names
    all_combinations = list(
        itertools.product(categoricals, list(range(num_features))))
    if explanation_vectors_show == 'all':
        shown_combinations = all_combinations
    elif isinstance(explanation_vectors_show, int):
        num_choices = min(explanation_vectors_show, len(all_combinations))
        shown_combinations = random.sample(all_combinations, k=num_choices)
    else:
        shown_combinations = features_shown

    if explanation_vector_classes:
        shown_classes = explanation_vector_classes
    else:
        shown_classes = categoricals

    def plot_query(query, data, query_dict):
        if len(query[0]) == 0:
            return None
        conditional_evidence = np.full((1, num_features), np.nan)
        conditional_evidence[:, i] = data[0, i]
        gradients = fast_conditional_gradient(spn, conditional_evidence,
                                              data[query])
        gradients_norm = np.linalg.norm(gradients, axis=1).reshape(-1, 1)
        _gradients = (gradients / gradients_norm)[:, k]
        discretize = np.histogram(_gradients, range=(-1, 1), bins=20)
        binsize = discretize[1][1] - discretize[1][0]
        if np.abs(_gradients.mean()) < explanation_vector_threshold:
            return _gradients
        header, description, plot = explanation_vector(_gradients, discretize,
                                                       data, query, query_dict)
        if not header:
            return _gradients
        printmd(header)
        iplot(plot)
        printmd(description)
        return _gradients

    all_gradients = {}
    for i in shown_classes:
        all_gradients[i] = {}
        for j in domains[i]:
            all_gradients[i][j] = {}
            printmd('#### Class "{}": "{}"'.format(
                feature_names[i],
                dictionary['features'][i]['encoder'].inverse_transform(
                    [int(j)])))
            test_query = np.where((data_dict[i][:, i] == j))
            if len(test_query[0]) == 0:
                printmd(
                    'For this particular class instance, no instances of the predicted data were found. \
                This might be because the predictive precision of the network was not high enough.'
                )
                continue
            #if use_shap:
            # shapley_values = shap_sampling(spn, )
            for k in range(num_features - 1):
                all_gradients[i][j][k] = {}
                this_range = [x for x in range(num_features) if x != i]
                instance = this_range[k]
                if (i, k) not in shown_combinations:
                    continue
                if instance in categoricals:
                    plot_data = []
                    for l in domains[instance]:
                        query = np.where((data_dict[i][:, i] == j)
                                         & (data_dict[i][:, instance] == l))
                        query_dict = {
                            'type':
                            'categorical',
                            'class':
                            feature_names[i],
                            'class_instance':
                            dictionary['features'][i]
                            ['encoder'].inverse_transform([int(j)]),
                            'feature':
                            feature_names[instance],
                            'feature_instance':
                            dictionary['features'][instance]
                            ['encoder'].inverse_transform([int(l)]),
                            'feature_idx':
                            instance,
                            'class_idx':
                            i
                        }

                        data = data_dict[i][query]

                        if data.size == 0:
                            continue

                        evidence = np.full((1, data.shape[1]), np.nan)
                        evidence[:, i] = data[0, i]
                        if use_shap:
                            gradients = shap_sampling(spn, data, i, N=10)
                        else:
                            gradients = fast_conditional_gradient(
                                spn, evidence, data)
                        gradients_norm = np.linalg.norm(gradients,
                                                        axis=1).reshape(-1, 1)
                        _gradients = (gradients / gradients_norm)[:, k]

                        discretize = np.histogram(_gradients,
                                                  range=(-1, 1),
                                                  bins=10)
                        binsize = discretize[1][1] - discretize[1][0]
                        plot_data.append((_gradients, discretize,
                                          query_dict['feature_instance']))
                    plot = p.plot_cat_explanation_vector(plot_data)
                    header = '##### Predictive categorical feature "{}"\n\n'.format(
                        query_dict['feature'])
                    printmd(header)
                    iplot(plot)

                    if _gradients is None:
                        all_gradients[i][j][k][l] = 0
                    else:
                        all_gradients[i][j][k][l] = _gradients.mean()
                else:
                    plot_data = []
                    cmap = IndexColormap('viridis', 20)
                    discretization_bins = np.linspace(domains[instance][0],
                                                      domains[instance][1], 20)
                    dataset_binning = find_nearest(discretization_bins,
                                                   data_dict[i][:, instance])
                    for discretized_bin, l in enumerate(
                            np.linspace(domains[instance][0],
                                        domains[instance][1], 20)):
                        query = np.where((dataset_binning == discretized_bin)
                                         & (data_dict[i][:, i] == j))
                        query_dict = {
                            'type':
                            'categorical',
                            'class':
                            feature_names[i],
                            'class_instance':
                            dictionary['features'][i]
                            ['encoder'].inverse_transform([int(j)]),
                            'feature':
                            feature_names[instance],
                            'feature_instance':
                            np.round(l, 2),
                            'feature_idx':
                            instance,
                            'class_idx':
                            i
                        }

                        data = data_dict[i][query]

                        if data.size == 0:
                            # printmd('No fitting instances found.')
                            continue

                        evidence = np.full((1, data.shape[1]), np.nan)
                        evidence[:, i] = data[0, i]
                        if use_shap:
                            gradients = shap_sampling(spn, data, i, N=10)
                        else:
                            gradients = fast_conditional_gradient(
                                spn, evidence, data)
                        gradients_norm = np.linalg.norm(gradients,
                                                        axis=1).reshape(-1, 1)
                        _gradients = (gradients / gradients_norm)[:, k]

                        discretize = np.histogram(_gradients,
                                                  range=(-1, 1),
                                                  bins=10)
                        binsize = discretize[1][1] - discretize[1][0]
                        plot_data.append((_gradients, discretize,
                                          query_dict['feature_instance']))
                    plot = p.plot_cat_explanation_vector(plot_data, color=cmap)
                    header = '##### Predictive continuous feature "{}": "{}"\n\n'.format(
                        query_dict['feature'], query_dict['feature_instance'])
                    printmd(header)
                    iplot(plot)

                    if _gradients is None:
                        all_gradients[i][j][k][l] = 0
                    else:
                        all_gradients[i][j][k][l] = _gradients.mean()
    return all_gradients

Ejemplo n.º 10

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def describe_misclassified(spn, dictionary, misclassified, data_dict,
                           numerical_data):
    context = dictionary['context']
    categoricals = get_categoricals(spn, context)
    empty = np.array([[np.nan] * len(spn.scope)])
    for i in categoricals:
        if use_shapley:
            raise NotImplementedError
        else:
            if misclassified_explanations == 'all':
                show_misclassified = misclassified[i]
            elif isinstance(misclassified_explanations, int):
                num_choices = min(misclassified_explanations,
                                  len(misclassified[i]))
                show_misclassified = random.sample(misclassified[i].tolist(),
                                                   k=num_choices)
            else:
                show_misclassified = misclassified_explanations
            for inst_num in show_misclassified:
                instance = data_dict[i][inst_num:inst_num + 1]
                evidence = instance.copy()
                evidence[:, i] = np.nan
                prior = log_likelihood(spn, evidence)
                posterior = log_likelihood(spn, instance)
                total = 0
                all_nodes = []
                for j, node in enumerate(spn.children):
                    node_prob = np.exp(
                        np.log(spn.weights[j]) +
                        log_likelihood(spn, instance) - posterior)
                    total += node_prob
                    all_nodes.append((node_prob, j))
                all_nodes.sort()
                all_nodes.reverse()
                needed_nodes = []
                all_reps = []
                total_prob = 0
                for prob, idx in all_nodes:
                    node = Copy(spn.children[idx])
                    assign_ids(node)
                    total_prob += prob
                    needed_nodes.append(idx)
                    all_reps.append(mpe(node, empty)[0])
                    if total_prob > 0.9:
                        break
                real_value = dictionary['features'][i][
                    'encoder'].inverse_transform(
                        [int(numerical_data[inst_num, i])])
                pred_value = dictionary['features'][i][
                    'encoder'].inverse_transform(
                        [int(data_dict[i][inst_num, i])])
                printmd(
                    'Instance {} was predicted as "{}", even though it is "{}", because it was most similar to the following clusters: {}'
                    .format(inst_num, pred_value, real_value,
                            ', '.join(map(str, needed_nodes))))
                all_reps = np.array(all_reps).reshape(len(needed_nodes),
                                                      len(spn.scope))
                table = np.round(np.concatenate([instance, all_reps], axis=0),
                                 2)
                node_nums = np.array(['instance'] + needed_nodes).reshape(
                    -1, 1)
                table = np.append(node_nums, table, axis=1)

                iplot(
                    p.plot_table([''] + context.feature_names,
                                 table.transpose()))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def show_node_separation(spn, nodes, context):
    categoricals = get_categoricals(spn, context)
    all_features = spn.scope
    feature_names = context.feature_names

    if features_shown == 'all':
        shown_features = all_features
    elif isinstance(features_shown, int):
        num_choices = min(features_shown, len(all_features))
        shown_features = random.sample(all_features, k=num_choices)
    else:
        shown_features = features_shown

    node_means = np.array([get_mean(node).reshape(-1) for node in nodes])
    node_vars = np.array([get_variance(node).reshape(-1) for node in nodes])
    node_stds = np.sqrt(node_vars)
    names = np.arange(1, len(nodes) + 1, 1)
    strength_separation = cluster_anova(spn)
    node_var, node_mean = cluster_mean_var_distance(nodes, spn)
    all_seps = {
        i: separation
        for i, separation in zip(shown_features, strength_separation)
    }
    for i in shown_features:
        if i not in categoricals:
            description_string = ''
            plot = p.plot_error_bar(names, node_means[:, i], node_vars[:, i],
                                    feature_names[i])
            strength = ['weak', 'moderate', 'strong', 'very strong', 'perfect']
            strength_values = [0.3, 0.6, 0.8, 0.99]
            strength_adv = strength[threshold(strength_values,
                                              strength_separation[i])] + 'ly'
            var_outliers = np.where(node_var[:, i] > variance_threshold)[0]
            if len(var_outliers) == 1:
                node_string = ', '.join([str(v) for v in var_outliers])
                description_string += 'The variance of node {} is significantly larger then the average node. '.format(
                    node_string)
            elif len(var_outliers) > 0:
                node_string = ', '.join([str(v) for v in var_outliers])
                description_string += 'The variances of the nodes {} are significantly larger then the average node. '.format(
                    node_string)
            mean_high_outliers = np.where(node_mean[:, i] > mean_threshold)[0]
            mean_low_outliers = np.where(node_mean[:, i] < -mean_threshold)[0]
            if len(mean_high_outliers) == 1:
                node_string = ', '.join([str(v) for v in mean_high_outliers])
                description_string += 'The mean of node {} is significantly larger then the average node. '.format(
                    node_string)
            elif len(mean_high_outliers) > 0:
                node_string = ', '.join([str(v) for v in mean_high_outliers])
                description_string += 'The means of the nodes {} are significantly larger then the average node. '.format(
                    node_string)
            if len(mean_low_outliers) == 1:
                node_string = ', '.join([str(v) for v in mean_low_outliers])
                description_string += 'The mean of node {} is significantly smaller then the average node.'.format(
                    node_string)
            elif len(mean_low_outliers) > 0:
                node_string = ', '.join([str(v) for v in mean_low_outliers])
                description_string += 'The means of the nodes {} are significantly smaller then the average node.'.format(
                    node_string)
            if description_string or strength_separation[
                    i] > separation_threshold:
                description_string = 'The feature "{}" is {} separated by the clustering. '.format(
                    feature_names[i], strength_adv) + description_string
                iplot(plot)
                printmd(description_string)
    return all_seps

Ejemplo n.º 12

0

Mostrar archivo

Archivo: dn_text_generation.py Proyecto: sqsltr520/DeepNotebooks

def correlation_description(spn, dictionary):
    context = dictionary['context']
    features = context.feature_names
    high_correlation = correlation_threshold
    categoricals = get_categoricals(spn, context)
    non_categoricals = [i for i in spn.scope if i not in categoricals]
    corr = get_full_correlation(spn, context)
    labels = features
    iplot(p.matshow(corr, x_labels=labels, y_labels=labels))

    idx = np.where(np.abs(corr) > high_correlation)

    phrases = []
    for i in range(corr.shape[0]):
        correlated_features = [
            j for j in range(corr.shape[1])
            if i > j and np.abs(corr[i, j]) > high_correlation
        ]
        modifiers = [
            get_correlation_modifier(corr[i, j]) for j in correlated_features
        ]
        counter = 0
        while counter < len(modifiers):
            x = labels[i]
            y = labels[correlated_features[counter]]
            phrase = get_nlg_phrase(*CORRELATION_NLG)
            if '{z}' in phrase:
                if counter == len(modifiers) - 1:
                    continue
                z = labels[counter + 1]
                mod1 = modifiers[counter]
                mod2 = modifiers[counter + 1]
                if ('but' in phrase or 'while'
                        in phrase) and mod1.strength == mod2.strength:
                    phrase = phrase.replace(', but', ', and')
                    phrase = phrase.replace(', while', ', and')
                if 'On the other hand' in phrase and mod1.strength == mod2.strength:
                    continue
                phrase = phrase.format(x=x,
                                       y=y,
                                       z=z,
                                       strength=mod1.strength,
                                       strength_adv=mod1.strength_adv,
                                       strength_2=mod2.strength,
                                       strength_2_adv=mod2.strength_adv,
                                       direction=mod1.direction,
                                       neg_pos_1=mod1.neg_pos,
                                       neg_pos_2=mod2.neg_pos)
                counter += 2
            else:
                mod1 = modifiers[counter]
                phrase = phrase.format(x=x,
                                       y=y,
                                       strength=mod1.strength,
                                       strength_adv=mod1.strength_adv,
                                       direction=mod1.direction,
                                       neg_pos=mod1.neg_pos)
                counter += 1
            phrases.append(phrase)
    if not phrases:
        printmd('No features show more then a very weak correlation.')
    else:
        printmd(
            deep_join(phrases, ' ') +
            '\n\nAll other features do not have more then a very weak correlation.'
        )
    return corr