def c_optimal_parameters_supposed(): batch_size = 50 alpha = 0.01 alpha_decay = 0.95 min_alpha = 0.00005 eta = 0.0001 eta_inc = 0.01 max_eta = 0.95 layers = \ [ { "type": "fully_connected", "num_nodes": 50 }, { "type": "fully_connected", "num_nodes": 50 } ] X, Y = load_sarcos("train") X_test, Y_test = load_sarcos("test") # Scale targets target_scaler = StandardScaler() Y = target_scaler.fit_transform(Y) Y_test = target_scaler.transform(Y_test) D = (X.shape[1], ) F = Y.shape[1] model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=True) mbsgd = MiniBatchSGD(net=model, epochs=100, batch_size=batch_size, alpha=alpha, alpha_decay=alpha_decay, min_alpha=min_alpha, eta=eta, eta_inc=eta_inc, max_eta=max_eta, random_state=0, verbose=2) mbsgd.fit(X, Y) # Print nMSE on test set Y_pred = model.predict(X_test) for f in range(F): print("Dimension %d: nMSE = %.2f %%" % (f + 1, 100 * nMSE(Y_pred[:, f], Y_test[:, f]))) # Store learned model, you can restore it with # model = pickle.load(open("sarcos_model.pickle", "rb")) # and use it in your evaluation script pickle.dump(model, open("sarcos_model.pickle", "wb"))
def MBSGD(trial): np.random.seed(0) # Download Sarcos dataset if this is required layers = [] for i in range(int(trial.suggest_int('num_layers', 2, 4))): layers.append({"type": "fully_connected", "num_nodes": int(trial.suggest_discrete_uniform('num_nodes',40, 200, 10))}) D = (X.shape[1],) F = Y.shape[1] model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=True) mbsgd = MiniBatchSGD(net=model, epochs=100, batch_size=int(trial.suggest_discrete_uniform('batch_size', 32, 256, 32)), alpha= trial.suggest_uniform('alpha', 0.09, 0.1), alpha_decay=trial.suggest_uniform('alpha_decay', 0.9, 0.995), min_alpha=trial.suggest_uniform('min_alpha', 0.00001, 0.0001), eta=trial.suggest_uniform('eta', 0.0001, 0.0002), eta_inc=trial.suggest_uniform('eta_inc', 1e-5, 1e-4), max_eta=trial.suggest_uniform('max_eta', 0.9, 0.95), random_state=0) ############################################################################ all_accuracies = cross_val_score(estimator=mbsgd, X=X, y=Y, cv=10, n_jobs=-1) return all_accuracies.mean()
def make_best_model(X, Y): num_layers = find_opt_num_layers(X, Y, 5) print("found best number of layers:" + str(num_layers)) print() structure = find_opt_node_distro(X, Y, num_layers, random_seed=426) print("found best structure:") for i in range(len(structure)): print("layer " + str(i + 1) + ", nodes: " + str(structure[i]) + "% of total nodes") print() num_nodes = find_opt_num_nodes(X, Y, 300, structure) print("found best number of nodes:") print(num_nodes) print() D = (X.shape[1], ) F = Y.shape[1] layers = [] for i in range(num_layers): layers.append({ "type": "fully_connected", "num_nodes": int(num_nodes * structure[i] / 100) }) best_model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=False) return best_model
def MBSGD(hyperparameters): np.random.seed(0) #Net structure layers = [] layers.append({"type": "fully_connected", "num_nodes": 39}) layers.append({"type": "fully_connected", "num_nodes": 61}) D = (X.shape[1], ) F = Y.shape[1] model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=True) mbsgd = MiniBatchSGD(net=model, epochs=100, batch_size=32, alpha=hyperparameters['alpha'], alpha_decay=hyperparameters['alpha_decay'], min_alpha=hyperparameters['min_alpha'], eta=hyperparameters['eta'], eta_inc=hyperparameters['eta_inc'], max_eta=hyperparameters['max_eta'], random_state=0) scores = cross_val_score(estimator=mbsgd, X=X, y=Y, cv=15, n_jobs=-1) return -scores.mean()
def check_gradient(D, F, layers, training): np.random.seed(0) n_tests = 50 eps = 1e-6 # You could adjust the precision here decimal = int(np.log10(1 / eps)) mlnn = MultilayerNeuralNetwork(D, F, layers, training=training, std_dev=0.01, verbose=True) mlnn.initialize_weights(np.random.RandomState(1)) X = np.random.rand(n_tests, *D) # Note that the components of a row of T have to lie within [0, 1] and sum # up to unity, otherwise the gradient will not be correct for softmax + CE! T = np.random.rand(n_tests, F) T /= T.sum(axis=1)[:, np.newaxis] # Calculate numerical and analytical gradients ga = mlnn.gradient(X, T) gn = mlnn.numerical_gradient(X, T, eps=eps) print("Checking gradients up to %d positions after decimal point..." % decimal, end="") np.testing.assert_almost_equal(ga, gn, decimal=decimal) print("OK")
def create_optimized_net(hyperparameters): global X, Y, X_test, Y_test, D, F batch_size = 32 alpha = hyperparameters['alpha'] alpha_decay = hyperparameters['alpha_decay'] min_alpha = hyperparameters['min_alpha'] eta = hyperparameters['eta'] eta_inc = hyperparameters['eta_inc'] max_eta = hyperparameters['max_eta'] layers = \ [ { "type": "fully_connected", "num_nodes": 156 }, { "type": "fully_connected", "num_nodes": 244 } ] model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=True) mbsgd = MiniBatchSGD(net=model, epochs=100, batch_size=batch_size, alpha=alpha, alpha_decay=alpha_decay, min_alpha=min_alpha, eta=eta, eta_inc=eta_inc, max_eta=max_eta, random_state=0, verbose=2) mbsgd.fit(X, Y) # Store learned model pickle.dump(model, open("sarcos_model.pickle", "wb")) return model
def test_params(D, F, X, Y, best_mean, best_model, layers): batch_size = 32 alpha = 0.06 alpha_decay = 0.9852968567173417 min_alpha = 1.944958755272318e-05 eta = 0.00010531183971652664 eta_inc = 0.0001955608234671532 max_eta = 0.982612873047571 model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=False) mbsgd = MiniBatchSGD(net=model, epochs=10, batch_size=batch_size, alpha=alpha, alpha_decay=alpha_decay, min_alpha=min_alpha, eta=eta, eta_inc=eta_inc, max_eta=max_eta, random_state=0, verbose=0) all_accuracies = cross_val_score(estimator=mbsgd, X=X, y=Y, cv=10, n_jobs=8, verbose=0) mean = -all_accuracies.mean() if mean < best_mean: best_mean = mean best_model = model #print() return best_model, best_mean
layers = \ [ { "type": "fully_connected", "num_nodes": 50 }, { "type": "fully_connected", "num_nodes": 20 } ] epochs = 150 mlnn = MultilayerNeuralNetwork(D=(1, ), F=1, layers=layers, training="regression", std_dev=0.01, verbose=1) mbsgd = MiniBatchSGD(net=mlnn, epochs=epochs, batch_size=16, alpha=0.1, eta=0.5, random_state=0, verbose=0) mbsgd.fit(X, Y) X_test = np.linspace(0, 1, 100)[:, np.newaxis] Y_test = np.sin(2 * np.pi * X_test) Y_test_prediction = mlnn.predict(X_test)
# Train model (code for exercise 10.2 1/2/3) ############################################################################ # Train neural network D = (X.shape[1], ) F = Y.shape[1] layers = \ [ { "type": "fully_connected", "num_nodes": 10 } ] model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.01, verbose=True) mbsgd = MiniBatchSGD(net=model, epochs=100, batch_size=32, alpha=0.005, eta=0.5, random_state=0, verbose=2) mbsgd.fit(X, Y) ############################################################################ # Print nMSE on test set Y_pred = model.predict(X_test) for f in range(F):
return plt if __name__ == '__main__': download_sarcos() X, Y = load_sarcos("train") X_test, Y_test = load_sarcos("test") target_scaler = StandardScaler() Y = target_scaler.fit_transform(Y) Y_test = target_scaler.transform(Y_test) D = (X.shape[1], ) F = Y.shape[1] model = MultilayerNeuralNetwork(D, F, layers, training="regression", std_dev=0.001, verbose=True) mbsgd = MiniBatchSGD(net=model, epochs=100, batch_size=batch_size, alpha=alpha, alpha_decay=alpha_decay, min_alpha=min_alpha, eta=eta, eta_inc=eta_inc, max_eta=max_eta, random_state=0, verbose=2) plot_learning_curve(mbsgd, X, Y, cv=None, n_jobs=4)