def get_gini_of_split(self, Y1, Y2):
     """get the square error of a split"""
     # Assume that we assign each a certain label to the two set,
     # the best assignment is the mean value of each set
     gini1 = gini(Y1)
     gini2 = gini(Y2)
     length = len(Y1) + len(Y2)
     return len(Y1) / length * gini1 + len(Y2) / length * gini2
def occupation_gini(matrix, motif, G):
    """return gini of occupancy over sites"""
    eps = [score_seq(matrix, site) for site in motif]
    fgs = [exp(-ep) for ep in eps]
    Zb = Zb_from_matrix(matrix, G)
    Z = sum(fgs) + Zb
    return gini([fg / Z for fg in fgs])
    def run_once(self, times):
        res = {}
        alphas, xs_min, errors = [], [], []
        ginis, means = [], []
        pops, gdps, revs, exps = [], [], [], []
        btms, tops = [], []

        self.reset()

        for t in range(times):
            self.society.update_all_wages()
            self.government.reset_accounts()

            self.government.collect_taxes()
            self.government.redistribute_revenue(self.param.bias)

            if self.param.negative_tax:
                self.government.pay_nit(self.param.exemption_quantile,
                                        self.param.subsidy_rate)
            if self.param.basic_income:
                self.government.pay_ubi(self.param.fixed_benefit,
                                        self.param.variable_benefit)

            all_incomes = self.society.get_overall_incomes()

            a, x, e = IncomeDist.estimate_params(all_incomes)
            alphas.append(a)
            xs_min.append(x)
            errors.append(e)

            ginis.append(utils.gini(all_incomes))
            gdps.append(sum(all_incomes))
            means.append(gdps[-1] / self.society.size)
            revs.append(self.government.revenue)
            exps.append(self.government.expenses)
            pops.append(self.society.size)

            btm, top = utils.bottom_top(all_incomes)
            btms.append(btm)
            tops.append(top)

            self.society.update_population()

        res['alpha'] = alphas
        res['x_min'] = xs_min
        res['error'] = errors
        res['gini'] = ginis
        res['mean'] = means
        res['pop'] = pops
        res['gdp'] = gdps
        res['rev'] = revs
        res['exp'] = exps
        res['btm'] = btms
        res['top'] = tops

        return DataFrame.from_dict(res, orient='columns')
Beispiel #4
0
    def split(self, x, y, depth, tree_idx):
        if (self.max_depth is not None
                and depth > self.max_depth) or self.check_terminate(
                    y.to_numpy()):
            cnt = Counter(y.to_numpy())
            self.tree[tree_idx] = \
                [None, None, None, None, cnt.most_common(1)[0][0]]
            return

        while True:
            feature_names = self.pickup_feature_names(x.keys())

            best_split_feature = None
            best_split_value = None
            best_criteria = None

            for feature_name in feature_names:
                feature = x[feature_name].sort_values().to_numpy()
                for idx in range(1, len(feature)):
                    split_value = (feature[idx - 1] + feature[idx]) / 2
                    larger_y, others_y = y[x[feature_name] > split_value], y[
                        x[feature_name] <= split_value]
                    larger_n, others_n = len(larger_y), len(others_y)
                    if self.criterion == 'gini':
                        new_gini = larger_n * utils.gini(larger_y.to_numpy(
                        )) + others_n * utils.gini(others_y.to_numpy())
                        new_gini /= (larger_n + others_n)
                        if best_criteria is None or new_gini < best_criteria:
                            best_criteria = new_gini
                            best_split_feature = feature_name
                            best_split_value = split_value
                    elif self.criterion == 'entropy':
                        after_entropy = larger_n * utils.entropy(
                            larger_y.to_numpy()) + others_n * utils.entropy(
                                others_y.to_numpy())
                        after_entropy /= (larger_n + others_n)
                        if best_criteria is None or after_entropy < best_criteria:
                            best_criteria = after_entropy
                            best_split_feature = feature_name
                            best_split_value = split_value

            larger_y, others_y = y[
                x[best_split_feature] > best_split_value], y[
                    x[best_split_feature] <= best_split_value]
            larger_n, others_n = len(larger_y), len(others_y)
            if self.criterion == 'gini':
                init_criteria = utils.gini(y.to_numpy())
                new_gini = larger_n * utils.gini(larger_y.to_numpy(
                )) + others_n * utils.gini(others_y.to_numpy())
                new_gini /= (larger_n + others_n)
            elif self.criterion == 'entropy':
                init_criteria = utils.entropy(y.to_numpy())
                after_entropy = larger_n * utils.entropy(larger_y.to_numpy(
                )) + others_n * utils.entropy(others_y.to_numpy())
                after_entropy /= (larger_n + others_n)

            if (x[best_split_feature] > best_split_value).sum() == 0 or \
                    (x[best_split_feature] <= best_split_value).sum() == 0:
                continue
            else:
                break

        greater_idx = len(self.tree)
        others_idx = greater_idx + 1
        self.tree[tree_idx] = [
            best_split_feature, best_split_value, greater_idx, others_idx, None
        ]
        self.tree.append([])
        self.tree.append([])
        self.split(x[x[best_split_feature] > best_split_value], larger_y,
                   depth + 1, greater_idx)
        self.split(x[x[best_split_feature] <= best_split_value], others_y,
                   depth + 1, others_idx)
Beispiel #5
0
            corr = sess.run(model.num_correct,
                            feed_dict={
                                model.input: [test_image],
                                model.label: [original_label]
                            })
            total_corr += corr

            IG = integrated_gradients(sess,
                                      reference_image,
                                      test_image,
                                      original_label,
                                      model,
                                      gradient_func='output_input_gradient',
                                      steps=num_steps)

            IG_vector = IG.flatten()

            gini_v = gini(IG_vector)
            total_gini += gini_v

            log_file.write('%d %.4f\n' % (corr, gini_v))

        acc = total_corr / num_eval_examples
        adv_gini = total_gini / num_eval_examples

        print('Accuracy: {:.2f}%'.format(100 * acc))
        print('Average Gini: {:.4f}'.format(adv_gini))

        log_file.close()
Beispiel #6
0
        saver.restore(sess, checkpoint)

        e = shap.DeepExplainer((model.input, model.output), reference_images)

        for i in range(num_eval_examples):
            test_image = X[i]
            original_label = y[i]

            corr = sess.run(model.num_correct,
                            feed_dict={
                                model.input: [test_image],
                                model.label: [original_label]
                            })
            total_corr += corr

            shap_value = e.shap_values(X[i:i + 1])[0]

            shap_value_vector = shap_value.flatten()
            gini_v = gini(shap_value_vector)
            total_gini += gini_v

            log_file.write('%d %.4f\n' % (corr, gini_v))

        acc = total_corr / num_eval_examples
        avg_gini = total_gini / num_eval_examples

        print('Accuracy: {:.2f}%'.format(100 * acc))
        print('Average Gini: {:.4f}'.format(avg_gini))

        log_file.close()
Beispiel #7
0
import dash
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import plotly.figure_factory as ff

import numpy as np
from utils import gini

app = dash.Dash('')

data1 = np.random.power(50, 500)
data2 = np.random.uniform(0, 1, 10000)

print 'Gini of Data1: ', gini(data1)
print 'Gini of Data2: ', gini(data2)

trace0 = go.Scatter(x=[0, 1],
                    y=[0, 1],
                    name='Line of Ideal Centralization',
                    mode='lines')
trace1 = go.Histogram(x=data1,
                      histnorm='probability density',
                      name='Lorenz Curve',
                      cumulative=dict(enabled=True))

data = [trace0, trace1]

layout = go.Layout(title='ETH Mining Decentralization (by Block Reward)',
                   showlegend=True,
Beispiel #8
0
y = data['Class'].values
print('Testing entropy, information gain, gain ratio...')
assert (utils.entropy([1, 0, 0, 1, 0, 1]) == 1)
assert (utils.entropy([1, 1, 1]) == 0)
assert (utils.entropy([0]) == 0)
outlook_index = np.where(original_attributes == 'Outlook')[0][0]
Xs, ys, d = utils.split_categ(X, y, outlook_index,
                              list(set(X[:, outlook_index])))
assert (np.isclose(utils.information_gain(y, ys), 0.246, rtol=1e-2))
assert (np.isclose(utils.gain_ratio(y, ys, y), 0.156, rtol=1e-2))

print('Testing gini index...')
assert (utils.gini_impurity([1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0]) == 0.5)
assert (utils.gini_impurity([0, 0, 0, 0, 0]) == 0)
print('Testing gini...')
assert (utils.gini([0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1],
                   [[0, 1, 0, 0], [1, 1, 0, 0, 1, 1, 0, 1]]) == 0.0625)

print('Testing Decision Tree...')
m = dt.DecisionTreeClassifier(missing_branch=False)
m.fit(X, y)
m.to_pdf(original_attributes, out='tree1.pdf')
assert (m.predict((['OVERCAST', 80, 90, 'T'])) == 'PLAY')
assert (m.predict(['RAIN', 80, 50, 'F']) == 'PLAY')
assert (m.predict(['RAIN', 80, 70, 'T']) == "DON'T PLAY")
assert (m.predict(['SUNNY', 50, 50, 'T']) == 'PLAY')
assert (m.predict(['SUNNY', 50, 91, 'T']) == "DON'T PLAY")
assert (m.predict([np.nan, 50, 91, 'T']) == "DON'T PLAY")

print('Testing Decision Tree with missing values (branch_nan = True)...')
m = dt.DecisionTreeClassifier(missing_branch=True)
# data, attributes, categories  = read.readData(data_path = '../Dados/Test_with_nan.csv', class_name='Class',