def categorical_correlations(spn, dictionary):
    categoricals = f.get_categoricals(spn)
    corr = f.get_full_correlation(spn)
    
    all_combinations = [(i,j) for i,j in itertools.product(range(spn.numFeatures), range(spn.numFeatures)) if i > j and np.abs(corr[i,j]) > correlation_threshold]
    if isinstance(feature_combinations, int):
        num_choices = min(feature_combinations, len(all_combinations))
        shown_combinations = random.sample(all_combinations, k=num_choices)
    elif feature_combinations == 'all':
        shown_combinations = all_combinations
    else:
        shown_combinations = feature_combinations
    
    for cat_counter, cat in enumerate(set([combination[0] for combination in shown_combinations])):
        for i in [combination[1] for combination in shown_combinations if combination[0] == cat]:
            phrase = get_nlg_phrase(*CORRELATION_NLG)
            while '{z}' in phrase or 'As' in phrase or 'linear' in phrase:
                phrase = get_nlg_phrase(*CORRELATION_NLG)
            strength = ['weak', 'moderate', 'strong', 'very strong', 'perfect']
            strength_values = [0.3, 0.6, 0.8, 0.99]
            strength_descr = strength[threshold(strength_values, np.abs(corr[cat,i]))]
            strength_adv = strength_descr+'ly'
            if show_conditional:
                iplot(p.plot_related_features(spn, i, cat, dictionary=dictionary))
            printmd(phrase.format(
                x=spn.featureNames[cat],
                y=spn.featureNames[i],
                strength=strength_descr,
                strength_adv=strength_adv,
                direction='',
                neg_pos=''))
 def compute_strength(self, gradients):
     self.ready = True
     direction = np.mean(gradients)
     strength = ['very weak', 'weak', 'moderate', 'strong', 'very strong']
     strength_values = [0.05, 0.15, 0.3, 0.5]
     direction_descriptor = ['negative', 'positive']
     self.strength = strength[threshold(strength_values, np.abs(direction))]
     self.direction = 'positive' if direction > 0 else 'negative'
def get_correlation_modifier(corr):
    strength = ['weak', 'moderate', 'strong', 'very strong', 'perfect']
    strength_values = [0.3, 0.6, 0.8, 0.99]
    direction = ['decrease', 'increase']
    neg_pos = ['negative', 'positive']
    strength = strength[threshold(strength_values, np.abs(corr))]
    strength_adv = strength + 'ly'
    direction = direction[0] if corr < 0 else direction[1]
    neg_pos = neg_pos[0] if corr < 0 else neg_pos[1]
    return Modifier(strength, strength_adv, direction, neg_pos)
def show_node_separation(spn, nodes):
    categoricals = f.get_categoricals(spn)
    all_features = list(range(spn.numFeatures))

    if features_shown == 'all':
        shown_features = all_features
    elif isinstance(features_shown, int):
        num_choices = min(features_shown, len(all_features))
        shown_features = random.sample(all_features, k=num_choices)
    else:
        shown_features = features_shown

    node_means = np.array([node.moment(1, spn.numFeatures) for node in nodes])
    node_vars = np.array([node.moment(2, spn.numFeatures) - node.moment(1, spn.numFeatures) ** 2
                          for node in nodes])
    node_stds = np.sqrt(node_vars)
    names = np.arange(1,len(nodes)+1,1)
    strength_separation = f.cluster_variance_separation(spn)
    node_var, node_mean = f.cluster_mean_var_distance(nodes, spn)
    all_seps = {i: separation for i, separation in zip(shown_features, strength_separation)}
    for i in shown_features:
        if i not in categoricals:
            description_string = ''
            plot = p.plot_error_bar(names, node_means[:,i], node_vars[:,i], spn.featureNames[i])
            strength = ['weak', 'moderate', 'strong', 'very strong', 'perfect']
            strength_values = [0.3, 0.6, 0.8, 0.99]
            strength_adv = strength[threshold(strength_values, strength_separation[i])]+'ly'
            var_outliers = np.where(node_var[:,i] > variance_threshold)[0]
            if len(var_outliers) == 1:
                node_string = ', '.join([str(v) for v in var_outliers])
                description_string += 'The variance of node {} is significantly larger then the average node. '.format(node_string)
            elif len(var_outliers) > 0:
                node_string = ', '.join([str(v) for v in var_outliers])
                description_string += 'The variances of the nodes {} are significantly larger then the average node. '.format(node_string)
            mean_high_outliers = np.where(node_mean[:,i] > mean_threshold)[0]
            mean_low_outliers = np.where(node_mean[:,i] < -mean_threshold)[0]
            if len(mean_high_outliers) == 1:
                node_string = ', '.join([str(v) for v in mean_high_outliers])
                description_string += 'The mean of node {} is significantly larger then the average node. '.format(node_string)
            elif len(mean_high_outliers) > 0:
                node_string = ', '.join([str(v) for v in mean_high_outliers])
                description_string += 'The means of the nodes {} are significantly larger then the average node. '.format(node_string)
            if len(mean_low_outliers) == 1:
                node_string = ', '.join([str(v) for v in mean_low_outliers])
                description_string += 'The mean of node {} is significantly smaller then the average node.'.format(node_string)
            elif len(mean_low_outliers) > 0:
                node_string = ', '.join([str(v) for v in mean_low_outliers])
                description_string += 'The means of the nodes {} are significantly smaller then the average node.'.format(node_string)
            if description_string or strength_separation[i] > separation_threshold:
                description_string = 'The feature "{}" is {} separated by the clustering. '.format(spn.featureNames[i], strength_adv) + description_string
                iplot(plot)
                f.printmd(description_string)
    return all_seps
def correlation_statement(corr, feature1, feature2):
    strength = ['weak', 'moderate', 'strong', 'very strong', 'perfect']
    strength_values = [0.3, 0.6, 0.8, 0.99]
    direction = ['decrease', 'increase']
    neg_pos = ['negative', 'positive']

    description = dict(
            strength=strength[threshold(strength_values, np.abs(corr))],
            strength_adv=strength[threshold(strength_values, np.abs(corr))]+'ly',
            direction=direction[0] if corr < 0 else direction[1],
            neg_pos=neg_pos[0] if corr < 0 else neg_pos[1],
            fx=feature1,
            fy=feature2,
            )
    sentences = [
            '"{fx}" and "{fy}" influence each other {strength_adv}. As one increases, the other {direction}s.',
            'There is a {strength} {neg_pos} dependency between "{fx}" and "{fy}".',
            'There is a {strength} linear relation between "{fx}" and "{fy}".',
            'The model shows a {strength} linear relation between "{fx}" and "{fy}".',
            'The features "{fx}" an "{fy}" have a {strength} dependency between them.'
            ]
    return sentences[random.randrange(len(sentences))].format(**description) + ' ' if np.abs(corr) > 0.25 else ''