labels[i] = int(round((labels[i] * sigma[0, 1]) + mu[0, 1], -1))
    ax.yaxis.set_ticklabels(labels)

    if show_allTypes:
        paint_pkmTypes(X, y)
    else:
        paint_pkmTypes(X, y, types)

    figure.legend()

    plt.show()


attr_names = ["percentage_male", "sp_attack"]
types_to_paint = ["fire"]
X, y = Data_Management.load_csv_types_features("pokemon.csv", attr_names)

#normalize
X, mu, sigma = Normalization.normalize_data_matrix(X)

num_entradas = np.shape(X)[1]
num_ocultas = 25
num_etiquetas = 18
true_score_max = float("-inf")

thetaTrueMin1 = None
thetaTrueMin2 = None

y_transformed = transform_y(y, num_etiquetas)

trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups(
            predicted_type = i

    return max_security, predicted_type


def polynomial_features(X, grado):
    poly = pf(grado)
    return (poly.fit_transform(X))


# X, y = Data_Management.load_csv_types_features("pokemon.csv",['against_bug', 'against_dark','against_dragon','against_electric',
#                          'against_fairy','against_fight','against_fire','against_flying',
#                          'against_ghost','against_grass','against_ground','against_ice','against_normal',
#                          'against_poison','against_psychic','against_rock','against_steel','against_water'])

X, y = Data_Management.load_csv_types_features("pokemon.csv",
                                               [feature1, feature2])
# TODO: usar el tipo2 para sacar el score tambien (si mi svm predice 1 y una de las dos y es 1, es truePositive++) y dar el resultado con solo
# 1 tipo, todo lo del entrenamiento se queda igual (se entrena para un solo tipo). Luego en el score se hace eso y para predecir el tipo se queda igual.
# Tambien puedo sacar dos svm, tipo primario y tipo secundario pero mas lio ?

X = polynomial_features(X, grado)
X, mu, sigma = Normalization.normalize_data_matrix(X[:, 1:])
X = Data_Management.add_column_left_of_matrix(X)

trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups(
    X, y)

svms = []

for j in range(18):
    currentTrainY = (trainY == j) * 1
예제 #3
0
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from ML_UtilsModule import Data_Management, Normalization
from boruta import BorutaPy

# load X and y
# NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
X, y = Data_Management.load_csv_types_features("pokemon.csv", [
    "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "height_m",
    "weight_kg", "percentage_male", "generation"
])
y = y.ravel()

# define random forest classifier, with utilising all cores and
# sampling in proportion to y labels
rf = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5)

# define Boruta feature selection method
feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1)

# find all relevant features - 5 features should be selected
feat_selector.fit(X, y)

# check selected features - first 5 features are selected
feat_selector.support_

# check ranking of features
feat_selector.ranking_

# call transform() on X to filter it down to selected features
X_filtered = feat_selector.transform(X)