Python Data_Management Examples, ML_UtilsModule.Data_Management Python Examples

Example #1

0

Show file

File: practica4.py Project: DiegoBV/Machine-Learning

def propagation(a1, theta1, theta2):
    a1 = Data_Management.add_column_left_of_matrix(a1)
    a2 = g(np.dot(a1, np.transpose(theta1)))      
    a2 = Data_Management.add_column_left_of_matrix(a2)
    
    a3 = g(np.dot(a2, np.transpose(theta2)))
    
    return a1, a2, a3

Example #2

0

Show file

def propagation(X, theta1, theta2):
    hiddenLayer = g(np.dot(X, np.transpose(theta1)))      
    hiddenLayer = Data_Management.add_column_left_of_matrix(hiddenLayer)
    
    outputLayer = g(np.dot(hiddenLayer, np.transpose(theta2)))
    
    return outputLayer

Example #3

0

Show file

def draw_decision_boundary(theta, X, Y, orX, mu, sigma):
    plt.figure()
    x0_min, x0_max = np.min(orX), np.max(orX)
    arrayX = np.arange(x0_min, x0_max, 0.05)
    arrayX = np.reshape(arrayX, (np.shape(arrayX)[0], 1))
    arrayXaux = Normalization.normalize2(generate_polynom_data(arrayX, 8), mu, sigma)
    arrayXaux = Data_Management.add_column_left_of_matrix(arrayXaux)
    theta = np.reshape(theta, (np.shape(theta)[0], 1))

    arrayY = h(arrayXaux, theta.T)
    plt.plot(arrayX, arrayY)
    plt.scatter(orX, Y, 20,marker='$F$',color= "red")
    plt.show()

Example #4

0

Show file

File: practica4_V2.py Project: DiegoBV/Machine-Learning

def backdrop(params_rn, num_entradas, num_ocultas, num_etiquetas, X, y, reg):
    """
    return coste y gradiente de una red neuronal de dos capas
    """
    theta1 = np.reshape(params_rn[:num_ocultas * (num_entradas + 1)],
                        (num_ocultas, (num_entradas + 1)))
    theta2 = np.reshape(params_rn[num_ocultas * (num_entradas + 1):],
                        (num_etiquetas, (num_ocultas + 1)))

    #--------------------PASO1---------------------------------------

    a1, a2, a3 = propagation(X, theta1, theta2)
    m = np.shape(X)[0]
    delta_3 = a3 - y  # (5000, 10)
    #--------------------PASO2---------------------------------------
    #delta_3 = a3 - y # (5000, 10)
    delta_matrix_1 = np.zeros(np.shape(theta1))
    delta_matrix_2 = np.zeros(np.shape(theta2))

    aux1 = np.dot(delta_3, theta2)  #(5000, 26)
    aux2 = Data_Management.add_column_left_of_matrix(
        derivada_de_G(np.dot(a1, np.transpose(theta1))))
    delta_2 = aux1 * aux2  #(5000, 26)
    delta_2 = np.delete(delta_2, [0], axis=1)  #(5000, 25)

    # #--------------------PASO4---------------------------------------

    delta_matrix_1 = delta_matrix_1 + np.transpose(
        np.dot(np.transpose(a1), delta_2))  #(25, 401)
    delta_matrix_2 = delta_matrix_2 + np.transpose(
        np.dot(np.transpose(a2), delta_3))  #(10, 26)
    #--------------------PASO6---------------------------------------
    delta_matrix_1 = (1 / m) * delta_matrix_1
    delta_matrix_1[:, 1:] = delta_matrix_1[:, 1:] + (reg / m) * theta1[:, 1:]

    delta_matrix_2 = (1 / m) * delta_matrix_2
    delta_matrix_2[:, 1:] = delta_matrix_2[:, 1:] + (reg / m) * theta2[:, 1:]

    cost = J(X, y, a3, num_etiquetas, theta1, theta2)
    gradient = np.concatenate(
        (np.ravel(delta_matrix_1), np.ravel(delta_matrix_2)))

    return cost, gradient

Example #5

0

Show file

File: svmClasificadorTipos.py Project: DiegoBV/Machine-Learning

def divideRandomGroups(X, y):
    X, y = Data_Management.shuffle_in_unison_scary(X, y)
    # ----------------------------------------------------------------------------------------------------
    percent_train = 0.6
    percent_valid = 0.2
    percent_test = 0.2
    # ----------------------------------------------------------------------------------------------------
    # TRAINIG GROUP
    t = int(np.shape(X)[0] * percent_train)
    trainX = X[:t]
    trainY = y[:t]
    # ----------------------------------------------------------------------------------------------------
    # VALIDATION GROUP
    v = int(np.shape(trainX)[0] + np.shape(X)[0] * percent_valid)
    validationX = X[np.shape(trainX)[0]:v]
    validationY = y[np.shape(trainY)[0]:v]
    # ----------------------------------------------------------------------------------------------------
    # TESTING GROUP
    testingX = X[np.shape(trainX)[0] + np.shape(validationX)[0]:]
    testingY = y[np.shape(trainY)[0] + np.shape(validationY)[0]:]

    return trainX, trainY, validationX, validationY, testingX, testingY

Example #6

0

Show file

File: svmClasificadorTipos.py Project: DiegoBV/Machine-Learning

            predicted_type = i

    return max_security, predicted_type


def polynomial_features(X, grado):
    poly = pf(grado)
    return (poly.fit_transform(X))


# X, y = Data_Management.load_csv_types_features("pokemon.csv",['against_bug', 'against_dark','against_dragon','against_electric',
#                          'against_fairy','against_fight','against_fire','against_flying',
#                          'against_ghost','against_grass','against_ground','against_ice','against_normal',
#                          'against_poison','against_psychic','against_rock','against_steel','against_water'])

X, y = Data_Management.load_csv_types_features("pokemon.csv",
                                               [feature1, feature2])
# TODO: usar el tipo2 para sacar el score tambien (si mi svm predice 1 y una de las dos y es 1, es truePositive++) y dar el resultado con solo
# 1 tipo, todo lo del entrenamiento se queda igual (se entrena para un solo tipo). Luego en el score se hace eso y para predecir el tipo se queda igual.
# Tambien puedo sacar dos svm, tipo primario y tipo secundario pero mas lio ?

X = polynomial_features(X, grado)
X, mu, sigma = Normalization.normalize_data_matrix(X[:, 1:])
X = Data_Management.add_column_left_of_matrix(X)

trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups(
    X, y)

svms = []

for j in range(18):
    currentTrainY = (trainY == j) * 1

Example #7

0

Show file

File: clasificadorPorTipos.py Project: DiegoBV/Machine-Learning

        labels[i] = int(round((labels[i] * sigma[0, 1]) + mu[0, 1], -1))
    ax.yaxis.set_ticklabels(labels)

    if show_allTypes:
        paint_pkmTypes(X, y)
    else:
        paint_pkmTypes(X, y, types)

    figure.legend()

    plt.show()


attr_names = ["percentage_male", "sp_attack"]
types_to_paint = ["fire"]
X, y = Data_Management.load_csv_types_features("pokemon.csv", attr_names)

#normalize
X, mu, sigma = Normalization.normalize_data_matrix(X)

num_entradas = np.shape(X)[1]
num_ocultas = 25
num_etiquetas = 18
true_score_max = float("-inf")

thetaTrueMin1 = None
thetaTrueMin2 = None

y_transformed = transform_y(y, num_etiquetas)

trainX, trainY, validationX, validationY, testingX, testingY = divideRandomGroups(

Example #8

0

Show file

File: svm.py Project: DiegoBV/Machine-Learning

            sigma = possible_values[j]
            svm = SVC(kernel='rbf', C=C_value, gamma=(1 / (2 * sigma**2)))
            svm.fit(X, y.ravel())
            current_score = true_score(
                Xval, yval, svm
            )  #calcula el score con los ejemplos de validacion (mayor score, mejor es el svm)
            if current_score > max_score:
                max_score = current_score
                best_svm = svm
                selected_C = C_value
                selected_Sigma = sigma

    return best_svm, selected_C, selected_Sigma


X, y = Data_Management.load_csv_svm("pokemon.csv", [feature1, feature2])
X, mu, sigma = Normalization.normalize_data_matrix(X)
X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups(
    X, y)

max_score = float("-inf")
best_svm = None

for i in range(NUM_TRIES):
    #THIS IS GIVING THE SAME RESULT, ALWAYS (MAYBE SELECT C AND SIGMA RANDOMLY)
    seed = np.random.seed()
    current_svm, C, s = eleccion_parametros_C_y_Sigma(trainX, trainY,
                                                      validationX, validationY,
                                                      mu, sigma)
    current_score = true_score(testingX, testingY, current_svm)
    draw_decisition_boundary(testingX, testingY, current_svm, current_score,

Example #9

0

Show file

File: main.py Project: DiegoBV/Machine-Learning

from ML_UtilsModule import Data_Management

X, y = Data_Management.load_csv("pokemon.csv")  #no vale
#print(X[:, 0])

Example #10

0

Show file

    arrayY = h(arrayXaux, theta.T)
    plt.plot(arrayX, arrayY)
    plt.scatter(orX, Y, 20,marker='$F$',color= "red")
    plt.show()

def draw_plot(X, Y):    
    plt.plot(X, Y)
    

data = loadmat('ex5data1.mat')
X, y, Xval, yval, Xtest, ytest = data['X'], data['y'],  data['Xval'], data['yval'], data['Xtest'], data['ytest']
XPoly = generate_polynom_data(X, 8)
XPoly, mu, sigma = normalize_matrix(XPoly)
mu = np.reshape(mu, (1, np.shape(mu)[0]))
sigma = np.reshape(sigma, (1, np.shape(sigma)[0]))
XPoly = Data_Management.add_column_left_of_matrix(XPoly)
XPolyVal = Normalization.normalize2(generate_polynom_data(Xval, 8), mu, sigma)
XPolyVal = Data_Management.add_column_left_of_matrix(XPolyVal)
XPolyTest = Normalization.normalize2(generate_polynom_data(Xtest, 8), mu, sigma)
XPolyTest = Data_Management.add_column_left_of_matrix(XPolyTest)

lambdaAux = [ 0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10  ]

error_array = np.array([], dtype=float)
error_array_val = np.array([], dtype=float)
thetas = np.array([], dtype=float)
for l in range(len(lambdaAux)):
    theta = np.ones(XPoly.shape[1], dtype=float)
    
    theta_min = sciMin(fun=minimizar, x0=theta,
    args=(XPoly, y, lambdaAux[l]),

Example #11

0

Show file

File: regresionLogistica_legendarios.py Project: DiegoBV/Machine-Learning

            falsePositives += 1
        elif checker[i] == 0 and y[i] == 1:
            falseNegatives += 1

    if truePositives == 0:
        return 0

    recall = (truePositives / (truePositives + falseNegatives))
    precision = (truePositives / (truePositives + falsePositives))
    score = 2 * (precision * recall / (precision + recall))

    return score


graphic_attr_names = ["capture_rate", "base_egg_steps"]
X, y = Data_Management.load_csv_svm("pokemon.csv", graphic_attr_names)
X, mu, sigma = Normalization.normalize_data_matrix(X)

X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups(
    X, y)

#-------------------------------------------------------------------------------------------------

allMaxPercent = []
allMaxElev = []
allMaxPoly = []
allMaxThetas = []

Xused = validationX
Yused = validationY

Example #12

0

Show file

File: red_neuronal.py Project: DiegoBV/Machine-Learning

    #set the labels to non-normalized values
    figure.canvas.draw()
    labels = [item for item in plt.xticks()[0]]
    for i in range(len(labels)):
        labels[i] = int(round((labels[i] * sigma[0, 0]) + mu[0, 0], -1))
    ax.xaxis.set_ticklabels(labels)

    labels = [item for item in plt.yticks()[0]]
    for i in range(len(labels)):
        labels[i] = int(round((labels[i] * sigma[0, 1]) + mu[0, 1], -1))
    ax.yaxis.set_ticklabels(labels)
    
    plt.show()
        
        
X, y = Data_Management.load_csv_svm("pokemon.csv", ["base_egg_steps", "base_happiness"])
nX, ny, ntrainX, ntrainY, nvalidationX, nvalidationY, ntestingX, ntestingY = Data_Management.divide_legendary_groups(X, y)

#normalize
p, X = polinomial_features(X, 5)
X, mu, sigma = Normalization.normalize_data_matrix(X[:, 1:])
#ssX = Data_Management.add_column_left_of_matrix(X)

X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups(X, y)


num_entradas = np.shape(X)[1]
num_ocultas = 25
num_etiquetas = 1
true_score_max = float("-inf")

Example #13

0

Show file

def propagation(X, theta1, theta2):
    hiddenLayer = g(np.dot(X, np.transpose(theta1)))      
    hiddenLayer = Data_Management.add_column_left_of_matrix(hiddenLayer)
    
    outputLayer = g(np.dot(hiddenLayer, np.transpose(theta2)))
    
    return outputLayer
    
    
def checkLearned(y, outputLayer):
    
    maxIndexV = np.argmax(outputLayer, axis = 1) + 1
    
    checker = (y[:,0] == maxIndexV) 
    count = np.size(np.where(checker == True)) 
    fin = count/np.shape(y)[0] * 100
    return fin



weights = loadmat('ex3weights.mat')
theta1, theta2 = weights['Theta1'], weights['Theta2']

X, y = Data_Management.load_mat("ex3data1.mat")

X = Data_Management.add_column_left_of_matrix(X) #añadida culumna de 1s

outputLayer = propagation(X, theta1, theta2)

print("Precision de la red neuronal: " + str(checkLearned(y, outputLayer)) + " %")

Example #14

0

Show file

from sklearn.ensemble import RandomForestClassifier
import numpy as np
from ML_UtilsModule import Data_Management, Normalization
from boruta import BorutaPy

# load X and y
# NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
X, y = Data_Management.load_csv_types_features("pokemon.csv", [
    "hp", "attack", "defense", "sp_attack", "sp_defense", "speed", "height_m",
    "weight_kg", "percentage_male", "generation"
])
y = y.ravel()

# define random forest classifier, with utilising all cores and
# sampling in proportion to y labels
rf = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5)

# define Boruta feature selection method
feat_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1)

# find all relevant features - 5 features should be selected
feat_selector.fit(X, y)

# check selected features - first 5 features are selected
feat_selector.support_

# check ranking of features
feat_selector.ranking_

# call transform() on X to filter it down to selected features
X_filtered = feat_selector.transform(X)

Example #15

0

Show file


def checkLearned(X, y, clasificadores):
    result = checkNumber(X, clasificadores)

    maxIndexV = np.argmax(result, axis=1)

    checker = ((y[:, 0] % np.shape(clasificadores)[0]) == maxIndexV)
    count = np.size(np.where(checker == True))
    fin = count / np.shape(y)[0] * 100

    return fin


def checkNumber(X, clasificadores):
    result = np.zeros((np.shape(X)[0], np.shape(clasificadores)[0]))

    result[:] = g(
        np.dot(X, np.transpose(clasificadores[:]))
    )  #result[0] = todo lo que piensa de cada numero respecto a si es 0
    return result


X, y = Data_Management.load_mat("ex3data1.mat")
clasificadores = oneVSAll(X, y, 10, 2)

print("Precision: " + str(checkLearned(X, y, clasificadores)) + " %")

Data_Management.draw_random_examples(X)
plt.show()

Example #16

0

Show file

File: practica4.py Project: DiegoBV/Machine-Learning

    # #--------------------PASO6---------------------------------------
    delta_matrix_1 = (1/m) * delta_matrix_1
    delta_matrix_1[:, 1:] = delta_matrix_1[:, 1:] + (reg/m) * theta1[:, 1:] 

    delta_matrix_2 = (1/m) * delta_matrix_2
    delta_matrix_2[:, 1:] = delta_matrix_2[:, 1:] + (reg/m) * theta2[:, 1:] 
    
    
    cost = J(X, y, a3, num_etiquetas, theta1, theta2)
    gradient = np.concatenate((np.ravel(delta_matrix_1), np.ravel(delta_matrix_2)))
    
    return cost, gradient


X, y = Data_Management.load_mat("ex4data1.mat")

#indexRand = np.random.randint(0, 5001, 100)
#displayData.displayData(X[indexRand[:]])
#plt.show()

weights = loadmat('ex4weights.mat')
theta1, theta2 = weights['Theta1'], weights['Theta2']
theta1 = pesos_aleat(np.shape(theta1)[1]-1,np.shape(theta1)[0])
theta2 = pesos_aleat(np.shape(theta2)[1]-1,np.shape(theta2)[0])


#a1, a2, a3 = propagation(X, theta1, theta2)

theta_vector = np.concatenate((np.ravel(theta1), np.ravel(theta2)))

Example #17

0

Show file

File: practica2_1.py Project: DiegoBV/Machine-Learning

                xx1.ravel(),
                xx2.ravel()].dot(theta))
    h = h.reshape(xx1.shape)

    # el cuarto parámetro es el valor de z cuya frontera se
    # quiere pintar
    plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b')
    plt.show()


def training_examples_test_with_theta(training_examples, Y, theta):
    test = g(np.dot(training_examples, np.transpose(theta)))
    test = np.around(test)
    test = np.reshape(test, (np.shape(test)[0], 1))
    mask = (Y == test)
    return (len(Y[mask]) / len(Y)) * 100


data = Data_Management.load_csv(sys.argv[1])  #sys.argv[1])
X, Y, m, n = Data_Management.get_parameters_from_training_examples(data)
draw_data(X, Y)
theta = np.zeros([1, n + 1], dtype=float)
print(theta.shape)
#theta = np.ravel(theta)
X = Data_Management.add_column_left_of_matrix(X)  #convention in linear regr

theta = tnc(func=J, x0=theta, fprime=gradient, args=(X, Y))[0]
pinta_frontera_recta(X, Y, theta)
print("Porcentaje de aciertos: " +
      str(training_examples_test_with_theta(X, Y, theta)) + "%")

Example #18

0

Show file

File: practica2_2.py Project: DiegoBV/Machine-Learning

    pos = np.where(Y == 1) #vector with index of the Y = 1
    plt.scatter(X[pos, 0], X[pos, 1], marker='*', c='y')
    pos = np.where(Y == 0) #vector with index of the Y = 1
    plt.scatter(X[pos, 0], X[pos, 1], marker='$F$', c='r')

def draw_decision_boundary(theta, X, Y, poly):
    x0_min, x0_max = X[:,0].min(), X[:,0].max()
    x1_min, x1_max = X[:,1].min(), X[:,1].max()
    xx1, xx2 = np.meshgrid(np.linspace(x0_min, x0_max), np.linspace(x1_min, x1_max))
    
    sigm = g(poly.fit_transform(np.c_[ xx1.ravel(), xx2.ravel()]).dot(theta))
    sigm = sigm.reshape(xx1.shape)

    plt.contour(xx1, xx2, sigm, [0.5], linewidths = 1, colors = 'g')

def draw(theta, X, Y, poly):
    plt.figure()
    draw_data(X, Y)
    draw_decision_boundary(theta, X, Y, poly)
    plt.show()

data = Data_Management.load_csv(sys.argv[1]) #sys.argv[1])
X, Y, m, n = Data_Management.get_parameters_from_training_examples(data)
poly, X_poly = polinomial_features(X, int(sys.argv[2]))
theta = np.zeros([1, np.shape(X_poly)[1]], dtype=float)

theta = tnc(func=J, x0=theta, fprime=gradient, args=(X_poly,Y))[0]


draw(theta, X, Y, poly)

Example #19

0

Show file

            plot_decision_boundary(X,
                                   y,
                                   model,
                                   self.epoch_count,
                                   self.count,
                                   cmap='RdBu')
            score, acc = model.evaluate(X, y, verbose=0)
            print("error " + str(score) + ", accuracy " + str(acc))
            self.count = self.count + 1
        self.epoch_count = self.epoch_count + 1
        return self.epoch_count


if __name__ == "__main__":
    X, y = datasets.make_moons(n_samples=1000, noise=0.1, random_state=0)
    X, y = Data_Management.load_csv_svm("pokemon.csv",
                                        ['weight_kg', 'height_m'])

    X, y, trainX, trainY, validationX, validationY, testingX, testingY = Data_Management.divide_legendary_groups(
        X, y)

    y = y.ravel()
    trainY = trainY.ravel()
    # Create a directory where image will be saved
    os.makedirs("images_new", exist_ok=True)

    # Define our model object
    model = Sequential()
    # Add layers to our model
    model.add(
        Dense(units=25,
              input_shape=(2, ),