def get_prim(n_clusters, f_value, data, xstring, ystring, inputs, data_norm): df = clustering(n_clusters, data, xstring, ystring, data_norm) classes = df[['class']].drop_duplicates()['class'] list_prim = [] sorted(classes) for i in range(1, max(classes) + 1): list_prim.append( prim.Prim(inputs, (df['class'] == i), threshold=0.5, threshold_type=">")) boxes = [] for i in range(0, len(list_prim)): boxes.append(list_prim[i].find_box()) obj = (f_value * boxes[i].peeling_trajectory['coverage'] - (1 - f_value) * boxes[i].peeling_trajectory['density'])**2 if 1 in boxes[i].peeling_trajectory['coverage']: coverage1 = np.where( boxes[i].peeling_trajectory['coverage'] == 1)[0][0] obj = obj.drop(obj.index[[coverage1]]) k = obj.argmin() boxes[i].select(k) return [boxes, df]
def compute_box_memberships(self, kde_noise_variance=.0001): self.catted_x_tr = np.concatenate(self.x_tr) kde_data = self.catted_x_tr + np.random.multivariate_normal(np.zeros(self.catted_x_tr.shape[1]), kde_noise_variance * np.eye(2)) #prevent singular matrix apparently some datapoints are identical/overplotted kde_data = kde_data.reshape([kde_data.shape[1], kde_data.shape[0]]) kde = gaussian_kde(kde_data) grid_for_prim = self.get_grid_for_prim() density_estimate = kde(grid_for_prim.reshape([grid_for_prim.shape[1], grid_for_prim.shape[0]])) max_density = np.amax(density_estimate) print(density_estimate[0:20]) primmer = prim.Prim( pd.DataFrame(grid_for_prim), density_estimate, threshold=max_density * self.gate_init_params['prim_threshold_percent'] ) #for i in range(self.gate_init_params['n_boxes']): #primmer.find_box() primmer.find_all() print(primmer.limits, 'bark') box_memberships_tr = self.get_box_memberships_tr(primmer._boxes) self.box_memberships_tr = box_memberships_tr
import numpy as np import matplotlib.pyplot as plt import re from os import rename import glob as gb # set solution number solution_num = '1713' utility = 'Cary' metric = 'C_all' # load objectives and establish regional criteria perturbed = pd.read_csv('../Data/satisficing_' + str(solution_num) + '_prim.csv') DVs = pd.read_csv('../Data/all_perturbations' + solution_num + '.csv') if utility == "Durham": util_DVs = DVs[['D_RT', 'D_TT', 'D_Cont', 'D_Ins', 'D_JLA']] elif utility == "Raleigh": util_DVs = DVs[['R_RT', 'R_TT', 'R_Cont', 'R_Ins', 'R_JLA']] else: util_DVs = DVs[['C_RT', 'C_Cont', 'C_Ins', 'C_JLA']] p = prim.Prim(util_DVs, perturbed[metric], threshold=819, threshold_type=">") box = p.find_box() print box box.show_tradeoff() plt.show()
plt.show() #%% scenario discovery - PRIM import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from ema_workbench.analysis import prim from ema_workbench import ema_logging ema_logging.log_to_stderr(ema_logging.INFO) x = experiments1.iloc[:, 0:50] outcomesdf = pd.DataFrame.from_dict(outcomes1) y = outcomesdf['Expected Number of Deaths'] < 1.0 prim_alg = prim.Prim(x, y, threshold=0.842, peel_alpha=0.1) box1 = prim_alg.find_box() box1.show_tradeoff() plt.show() prim_alg = prim.Prim(x, y, threshold=0.842, peel_alpha=0.1) box2 = prim_alg.find_box() box2.show_pairs_scatter(10) plt.show() plt.savefig('prim expected deaths<1.png') #%% feature scoring from ema_workbench.analysis import feature_scoring
from __future__ import print_function import numpy as np import pandas as pd import matplotlib.pyplot as plt import prim df = pd.DataFrame(np.random.rand(1000, 3), columns=["x1", "x2", "x3"]) p = prim.Prim(df, lambda x : x["x1"]*x["x2"] + 0.3*x["x3"], threshold=0.5, threshold_type=">") box = p.find_box() box.show_tradeoff() plt.show()
sns.distplot(outputs.AAPE) sns.pairplot(outputs) outputs['AAD_bin'] = np.where(outputs['AAD'] < 3700000, 1, 0) #Assing binary sum(outputs['AAD_bin']) outputs['AAPE_bin'] = np.where(outputs['AAPE'] < 94, 1, 0) #Assing binary sum(outputs['AAPE_bin']) # Look at smaller number of pass/fails - i.e. only small number are under threshold, so interrogate this # If '<' used, y values less than thresh will be 1's (cases of interest), all values graeter 0's ##https://github.com/Project-Platypus/PRIM/blob/master/README.md # AAPE OBJECTIVE # In this instance we are interested in cases where the response is greater than 0.5 # (as indicated by the "threshold" and "threshold_type" arguments) p = prim.Prim(inputs2, outputs.AAPE_bin, threshold=0.5, threshold_type=">") box = p.find_box() box.show_tradeoff() plt.show() box.select(42) print(box) box.show_details() # AAD OBJECTIVE p = prim.Prim(inputs1, outputs.AAD_bin, threshold=0.5, threshold_type=">") box = p.find_box() box.show_tradeoff() plt.show() box.select(42)
resp = robjects.FloatVector(response) h=r.hist(resp) hf = robjects.conversion.converter.rpy2py(h) pf = pd.DataFrame(hf) print(h) print(pf) print(pf.attrs) #print(prim_response) #thr = robjects.FloatVector([1.0,2.0]) #rprim = r['prim.box'] #prim_res = rprim(x=qf,y=prim_response,threshold=thr) #print(prim_res) p = prim.Prim(df, response, threshold=1.0, threshold_type=">") #box = p.find_box() #box.show_tradeoff() #plt.show() from sklearn.datasets import load_iris from sklearn import tree X, y = load_iris(return_X_y=True) print(X) print(y) clf = tree.DecisionTreeClassifier() clf = clf.fit(X, y) tree.plot_tree(clf)
from __future__ import print_function import numpy as np import pandas as pd import matplotlib.pyplot as plt import prim # Import dataset df_all = pd.read_csv("data_comp_exp_ML.csv") # Check for missing values df_all.isnull().sum() region = "asia" df = df_all.query("region =='{}'".format(region)) # Declare feature vector and target variable X = df.drop(columns=["resultados_2_c","resultados_3_c","region","policy","parameter_set","gcm","key","goal_2_c","goal_3_c"]) response = df["goal_2_c"] p = prim.Prim(X, response, threshold=1, threshold_type="=") box = p.find_box() box.show_tradeoff() plt.show()
matriz[i][j] = coluna matriz[j][i] = coluna j = j + 1 i = i + 1 k = 0 for linha in matriz: vetor = {} i = 0 for j in range(len(linha)): if linha[i] != 0: vetor[str(j)] = linha[i] i = i + 1 lista_encadeada[str(k)] = vetor k = k + 1 for i, k in lista_encadeada.items(): print i + "-" + str(k) #print matriz grafo = grafos.Grafo(lista_encadeada) #kruskal = kruskal.Kruskal(grafo) #valor=0 #print kruskal.arvore_minima() #for i in kruskal.arvore_minima(): # valor = valor + int(i[0]) #print valor prim = prim.Prim(grafo) print prim.arvore_minima('0')