labels[i] = int(round((labels[i] * sigma[0, 1]) + mu[0, 1], -1)) ax.yaxis.set_ticklabels(labels) if show_allTypes: paint_pkmTypes(X, y) else: paint_pkmTypes(X, y, types) figure.legend() plt.show() attr_names = ["percentage_male", "sp_attack"] types_to_paint = ["fire"] X, y = Data_Management.load_csv_types_features("pokemon.csv", attr_names) #normalize X, mu, sigma = Normalization.normalize_data_matrix(X) num_entradas = np.shape(X)[1] num_ocultas = 25 num_etiquetas = 18 true_score_max = float("-inf") thetaTrueMin1 = None thetaTrueMin2 = None y_transformed = transform_y(y, num_etiquetas) trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups(
predicted_type = i return max_security, predicted_type def polynomial_features(X, grado): poly = pf(grado) return (poly.fit_transform(X)) # X, y = Data_Management.load_csv_types_features("pokemon.csv",['against_bug', 'against_dark','against_dragon','against_electric', # 'against_fairy','against_fight','against_fire','against_flying', # 'against_ghost','against_grass','against_ground','against_ice','against_normal', # 'against_poison','against_psychic','against_rock','against_steel','against_water']) X, y = Data_Management.load_csv_types_features("pokemon.csv", [feature1, feature2]) # TODO: usar el tipo2 para sacar el score tambien (si mi svm predice 1 y una de las dos y es 1, es truePositive++) y dar el resultado con solo # 1 tipo, todo lo del entrenamiento se queda igual (se entrena para un solo tipo). Luego en el score se hace eso y para predecir el tipo se queda igual. # Tambien puedo sacar dos svm, tipo primario y tipo secundario pero mas lio ? X = polynomial_features(X, grado) X, mu, sigma = Normalization.normalize_data_matrix(X[:, 1:]) X = Data_Management.add_column_left_of_matrix(X) trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups( X, y) svms = [] for j in range(18): currentTrainY = (trainY == j) * 1
from sklearn.ensemble import RandomForestClassifier import numpy as np from ML_UtilsModule import Data_Management, Normalization from boruta import BorutaPy # load X and y # NOTE BorutaPy accepts numpy arrays only, hence the .values attribute X, y = Data_Management.load_csv_types_features("pokemon.csv", [ "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "height_m", "weight_kg", "percentage_male", "generation" ]) y = y.ravel() # define random forest classifier, with utilising all cores and # sampling in proportion to y labels rf = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5) # define Boruta feature selection method feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1) # find all relevant features - 5 features should be selected feat_selector.fit(X, y) # check selected features - first 5 features are selected feat_selector.support_ # check ranking of features feat_selector.ranking_ # call transform() on X to filter it down to selected features X_filtered = feat_selector.transform(X)