def get_prim(n_clusters, f_value, data, xstring, ystring, inputs, data_norm):

    df = clustering(n_clusters, data, xstring, ystring, data_norm)
    classes = df[['class']].drop_duplicates()['class']
    list_prim = []
    sorted(classes)

    for i in range(1, max(classes) + 1):
        list_prim.append(
            prim.Prim(inputs, (df['class'] == i),
                      threshold=0.5,
                      threshold_type=">"))

    boxes = []

    for i in range(0, len(list_prim)):
        boxes.append(list_prim[i].find_box())
        obj = (f_value * boxes[i].peeling_trajectory['coverage'] -
               (1 - f_value) * boxes[i].peeling_trajectory['density'])**2
        if 1 in boxes[i].peeling_trajectory['coverage']:
            coverage1 = np.where(
                boxes[i].peeling_trajectory['coverage'] == 1)[0][0]
            obj = obj.drop(obj.index[[coverage1]])
        k = obj.argmin()
        boxes[i].select(k)

    return [boxes, df]
    def compute_box_memberships(self, kde_noise_variance=.0001):
        self.catted_x_tr = np.concatenate(self.x_tr)
        kde_data = self.catted_x_tr + np.random.multivariate_normal(np.zeros(self.catted_x_tr.shape[1]), kde_noise_variance * np.eye(2)) #prevent singular matrix apparently some datapoints are identical/overplotted
        kde_data = kde_data.reshape([kde_data.shape[1], kde_data.shape[0]])
        kde = gaussian_kde(kde_data)

        grid_for_prim = self.get_grid_for_prim()
        density_estimate = kde(grid_for_prim.reshape([grid_for_prim.shape[1], grid_for_prim.shape[0]]))
        max_density = np.amax(density_estimate)
        print(density_estimate[0:20])
        primmer = prim.Prim(
            pd.DataFrame(grid_for_prim), 
            density_estimate,
            threshold=max_density * self.gate_init_params['prim_threshold_percent']
        )
        #for i in range(self.gate_init_params['n_boxes']):
            #primmer.find_box()
        primmer.find_all()
        print(primmer.limits, 'bark')
        box_memberships_tr = self.get_box_memberships_tr(primmer._boxes)
        self.box_memberships_tr = box_memberships_tr
Beispiel #3
0
import numpy as np
import matplotlib.pyplot as plt
import re
from os import rename
import glob as gb

# set solution number
solution_num = '1713'
utility = 'Cary'
metric = 'C_all'

# load objectives and establish regional criteria
perturbed = pd.read_csv('../Data/satisficing_' + str(solution_num) +
                        '_prim.csv')

DVs = pd.read_csv('../Data/all_perturbations' + solution_num + '.csv')

if utility == "Durham":
    util_DVs = DVs[['D_RT', 'D_TT', 'D_Cont', 'D_Ins', 'D_JLA']]
elif utility == "Raleigh":
    util_DVs = DVs[['R_RT', 'R_TT', 'R_Cont', 'R_Ins', 'R_JLA']]
else:
    util_DVs = DVs[['C_RT', 'C_Cont', 'C_Ins', 'C_JLA']]

p = prim.Prim(util_DVs, perturbed[metric], threshold=819, threshold_type=">")
box = p.find_box()
print box

box.show_tradeoff()
plt.show()
plt.show()

#%% scenario discovery -  PRIM

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ema_workbench.analysis import prim
from ema_workbench import ema_logging
ema_logging.log_to_stderr(ema_logging.INFO)

x = experiments1.iloc[:, 0:50]
outcomesdf = pd.DataFrame.from_dict(outcomes1)
y = outcomesdf['Expected Number of Deaths'] < 1.0

prim_alg = prim.Prim(x, y, threshold=0.842, peel_alpha=0.1)
box1 = prim_alg.find_box()

box1.show_tradeoff()
plt.show()

prim_alg = prim.Prim(x, y, threshold=0.842, peel_alpha=0.1)
box2 = prim_alg.find_box()

box2.show_pairs_scatter(10)
plt.show()
plt.savefig('prim expected deaths<1.png')

#%% feature scoring

from ema_workbench.analysis import feature_scoring
Beispiel #5
0
from __future__ import print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import prim

df = pd.DataFrame(np.random.rand(1000, 3), columns=["x1", "x2", "x3"])

p = prim.Prim(df,
              lambda x : x["x1"]*x["x2"] + 0.3*x["x3"],
              threshold=0.5,
              threshold_type=">")
box = p.find_box()
box.show_tradeoff()
plt.show()
Beispiel #6
0
sns.distplot(outputs.AAPE)
sns.pairplot(outputs)

outputs['AAD_bin'] = np.where(outputs['AAD'] < 3700000, 1, 0)  #Assing binary
sum(outputs['AAD_bin'])

outputs['AAPE_bin'] = np.where(outputs['AAPE'] < 94, 1, 0)  #Assing binary
sum(outputs['AAPE_bin'])
# Look at smaller number of pass/fails - i.e. only small number are under threshold, so interrogate this
# If '<' used, y values less than thresh will be 1's (cases of interest), all values graeter 0's

##https://github.com/Project-Platypus/PRIM/blob/master/README.md
# AAPE OBJECTIVE
# In this instance we are interested in cases where the response is greater than 0.5
# (as indicated by the "threshold" and "threshold_type" arguments)
p = prim.Prim(inputs2, outputs.AAPE_bin, threshold=0.5, threshold_type=">")
box = p.find_box()
box.show_tradeoff()
plt.show()

box.select(42)
print(box)
box.show_details()

# AAD OBJECTIVE
p = prim.Prim(inputs1, outputs.AAD_bin, threshold=0.5, threshold_type=">")
box = p.find_box()
box.show_tradeoff()
plt.show()

box.select(42)
Beispiel #7
0
resp = robjects.FloatVector(response)
h=r.hist(resp)
hf = robjects.conversion.converter.rpy2py(h)
pf = pd.DataFrame(hf)
print(h)
print(pf)
print(pf.attrs)

#print(prim_response)
#thr = robjects.FloatVector([1.0,2.0])
#rprim = r['prim.box']
#prim_res = rprim(x=qf,y=prim_response,threshold=thr)
#print(prim_res)


p = prim.Prim(df, response, threshold=1.0, threshold_type=">")
#box = p.find_box()
#box.show_tradeoff()
#plt.show()


from sklearn.datasets import load_iris
from sklearn import tree
X, y = load_iris(return_X_y=True)
print(X)
print(y)
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, y)
tree.plot_tree(clf)

Beispiel #8
0
from __future__ import print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import prim

# Import dataset
df_all = pd.read_csv("data_comp_exp_ML.csv")

# Check for missing values
df_all.isnull().sum()

region = "asia"
df = df_all.query("region =='{}'".format(region))
# Declare feature vector and target variable
X = df.drop(columns=["resultados_2_c","resultados_3_c","region","policy","parameter_set","gcm","key","goal_2_c","goal_3_c"])
response = df["goal_2_c"]

p = prim.Prim(X,
              response,
              threshold=1,
              threshold_type="=")
box = p.find_box()
box.show_tradeoff()
plt.show()
Beispiel #9
0
            matriz[i][j] = coluna
            matriz[j][i] = coluna
        j = j + 1
    i = i + 1
k = 0
for linha in matriz:
    vetor = {}
    i = 0
    for j in range(len(linha)):
        if linha[i] != 0:
            vetor[str(j)] = linha[i]
        i = i + 1
    lista_encadeada[str(k)] = vetor
    k = k + 1

for i, k in lista_encadeada.items():
    print i + "-" + str(k)
#print matriz

grafo = grafos.Grafo(lista_encadeada)

#kruskal = kruskal.Kruskal(grafo)
#valor=0
#print kruskal.arvore_minima()
#for i in  kruskal.arvore_minima():
#	valor = valor + int(i[0])
#print valor

prim = prim.Prim(grafo)
print prim.arvore_minima('0')