def test_handle_errors(self): with self.assertRaises(ValueError): # size of target data not the same as # size of input data. pnnet = algorithms.PNN(verbose=False) pnnet.train(np.array([[0], [0]]), np.array([0])) with self.assertRaises(ValueError): # 2-D target vector (must be 1-D) pnnet = algorithms.PNN(verbose=False) pnnet.train(np.array([[0]]), np.array([[0, 0]])) with self.assertRaises(ValueError): # invalid feature size for prediction data pnnet = algorithms.PNN(verbose=False) pnnet.train(np.array([[0], [0]]), np.array([0])) pnnet.predict(np.array([[0]])) msg = "hasn't been trained" with self.assertRaisesRegexp(NotTrained, msg): # predict without training pnnet = algorithms.PNN(verbose=False) pnnet.predict(np.array([[0]])) with self.assertRaises(ValueError): # different number of features for # train and test data grnet = algorithms.PNN(verbose=False) grnet.train(np.array([[0]]), np.array([0])) grnet.predict(np.array([[0, 0]]))
def test_predict_probability(self): dataset = datasets.load_digits() x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, train_size=0.7 ) x_train_before = x_train.copy() x_test_before = x_test.copy() y_train_before = y_train.copy() number_of_classes = len(np.unique(dataset.target)) pnnet = algorithms.PNN(verbose=False, std=10) pnnet.train(x_train, y_train) result = pnnet.predict_proba(x_test) n_test_inputs = x_test.shape[0] self.assertEqual(result.shape, (n_test_inputs, number_of_classes)) total_classes_prob = np.round(result.sum(axis=1), 10) np.testing.assert_array_equal( total_classes_prob, np.ones(n_test_inputs) ) old_result = result.copy() # Test problem with variable links np.testing.assert_array_equal(x_train, x_train_before) np.testing.assert_array_equal(x_test, x_test_before) np.testing.assert_array_equal(y_train, y_train_before) x_train[:, :] = 0 result = pnnet.predict_proba(x_test) total_classes_prob = np.round(result.sum(axis=1), 10) np.testing.assert_array_almost_equal(result, old_result)
def test_basic_storage(self): input_data = np.random.random((100, 2)) target_data = np.random.random(100) > 0.5 pnn = algorithms.PNN(std=0.123, verbose=True) pnn.train(input_data, target_data) stored_pnn = pickle.dumps(pnn) loaded_pnn = pickle.loads(stored_pnn) testcases = [ ('pnn', pnn), ('loaded_pnn', loaded_pnn), ] for name, network in testcases: print("Test case name: {}".format(name)) self.assertAlmostEqual(network.std, 0.123) self.assertAlmostEqual(network.verbose, True) with catch_stdout() as out: network.logs.stdout = out network.logs.write("Test message") terminal_output = out.getvalue() self.assertIn("Test message", terminal_output) pnn_prediction = pnn.predict(input_data) loaded_pnn_prediction = loaded_pnn.predict(input_data) np.testing.assert_array_almost_equal(loaded_pnn_prediction, pnn_prediction)
def test_with_noise(): (x_train, y_train), (x_test, y_test) = dataset8.load_data(mode=0) pnn = algorithms.PNN(std=1, batch_size=128, verbose=True) pnn.train(x_train[0:10000], y_train[0:10000]) y_predicted = pnn.predict(x_test) local_path = 'MY_IMAGES_GIMP/' for nTest in np.arange(0, 10, 1): # convert to numpy array x = get_pxs(local_path + str(nTest) + '.png') # Inverting and normalizing image x = 255 - x x /= 255 x = np.expand_dims(x, axis=0) x.shape = (1, 784) prediction = pnn.predict(x) print('REAL \t\tPREDICTED') print(str(nTest) + '\t\t\t' + str(prediction[0])) print("accuracy = %.2f" % (metrics.accuracy_score(y_predicted, y_test))) for i in range(0, 10000): x_test[i] = noise(x_test[i], 500) y_predicted = pnn.predict(x_test) print("accuracy on noise data = %.2f" % (metrics.accuracy_score(y_predicted, y_test)))
def __init__(self, std=10): """ :param std: float standard deviation for PDF function, default to 0.1. :return: """ self.pnn = algorithms.PNN(std=std, verbose=False)
def clasification(): df = pd.DataFrame() for name, adress, price in db.session.query(Place.place_name, Place.place_adress, Place.place_price): print(name, adress, price) df = df.append({"name": name, "adress": adress, "price": price}, ignore_index=True) # db.session.close() X = pd.get_dummies(data=df[['name', 'adress']]) mean_price = df['price'].mean() df.loc[df['price'] < mean_price, 'quality'] = 0 df.loc[df['price'] >= mean_price, 'quality'] = 1 print(df) print(X) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X, df['quality']) test_str = ['club', 'Ковальський провулок'] count_columns = len(X.columns) test_list = np.array([0] * count_columns) test_list[0] = 1 test_list[-2] = 1 test_list = np.reshape(test_list, (1, len(test_list))) print(test_list) y_predicted = pnn.predict(test_list) result = "Ні" if y_predicted - 1 < 0.0000000000001: result = "Так" return render_template('clasification.html', y_predicted=result, test_data=test_list[0], test_str=test_str)
def clasification(): df = pd.DataFrame() for name, company, salary in db.session.query(OrmVacancy.vacancy_name, OrmVacancy.vacancy_company, OrmVacancy.vacancy_salary): print(name, company, salary) df = df.append({"name": name, "company": company, "salary": salary}, ignore_index=True) mean_p = df['salary'].mean() df.loc[df['salary'] < mean_p, 'quality'] = 0 df.loc[df['salary'] >= mean_p, 'quality'] = 1 X = pd.get_dummies(data=df[['name', 'company']]) print(df) print(X) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X, df['quality']) count_columns = len(X.columns) string_test = ['frontend', 'google'] test_list = [0] * count_columns test_list[0] = 1 test_list[-1] = 1 print(test_list) y_predicted = pnn.predict([test_list]) result = "Ні" if y_predicted - 1 < 0.0000001: result = "Так" return render_template('clasification.html', y_predicted=result, test_data=test_list, string_test=string_test)
def fromPN(cls, pos_train, neg_train, z_trains, y_train, models=None): ''' a neural network will be trained with z_trains, y_train Arguments: pos_train, neg_train same in super class z_trains {List[DateFrame]} y_train {Array} Returns: ZeroOneSemiNaiveBayesClassifier ''' sbc = super(ZeroOneHemiNaiveBayesClassifier, cls).fromPN(pos_train, neg_train) if models is None: sbc.models = [ algorithms.PNN(std=np.std(z_train.values), verbose=False) for z_train in z_trains ] else: sbc.models = [ copy.copy(model_dict[model]) if isinstance(model, str) else model for model in models ] sbc.features2 = [z_train.columns for z_train in z_trains] sbc.fit(z_trains, y_train) return sbc
def test_basic_storage(self): X = np.random.random((100, 2)) y = np.random.random(100) > 0.5 # We keep verbose=True in order to see if value will # be True when we restore it from the pickle object. pnn = algorithms.PNN(std=0.123, verbose=True) pnn.train(X, y) stored_pnn = pickle.dumps(pnn) loaded_pnn = pickle.loads(stored_pnn) testcases = [ ('pnn', pnn), ('loaded_pnn', loaded_pnn), ] for name, network in testcases: print("Test case name: {}".format(name)) self.assertAlmostEqual(network.std, 0.123) self.assertAlmostEqual(network.verbose, True) with catch_stdout() as out: network.logs.stdout = out network.logs.write("Test message") terminal_output = out.getvalue() self.assertIn("Test message", terminal_output) pnn_prediction = pnn.predict(X) loaded_pnn_prediction = loaded_pnn.predict(X) np.testing.assert_array_almost_equal(loaded_pnn_prediction, pnn_prediction)
def clasification(): df = pd.DataFrame() for file_text, rating in db.session.query(ormFiles.file_text, ormFiles.rating): print(file_text, rating) df = df.append({ "file_name": file_text, "rating": float(rating) }, ignore_index=True) # db.session.close() df['count_symbols'] = df['file_name'].apply(len) df.loc[df['rating'] < 0.33, 'quality'] = 0 df.loc[df['rating'] >= 0.33, 'quality'] = 1 print(df) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(df['count_symbols'], df['quality']) test_data = 'ewij weioh uia guu aweg' t_test_data = len(test_data) y_predicted = pnn.predict([t_test_data]) result = "Ні" if y_predicted - 1 < 0.0000000000001: result = "Так" return render_template('clasification.html', y_predicted=result, test_data=test_data)
def test_cutting_exceptions(self): with self.assertRaises(ValueError): surgery.cut(algorithms.PNN(), 0, 1) with self.assertRaises(ValueError): surgery.cut(self.network, 0, 10) with self.assertRaises(ValueError): surgery.cut(self.network, 0, 0)
def PNN(X_train, X_test, y_train, y_test, X_dummy): environment.reproducible() pnn = algorithms.PNN(std=0.1, verbose=False) pnn.train(X_train, y_train) #print 'done trainin' y_predicted = pnn.predict(X_test) y_dummy = pnn.predict(X_dummy) #print y_predicted return y_dummy, y_predicted, metrics.accuracy_score(y_test, y_predicted)
def test_predict_different_inputs(self): pnnet = algorithms.PNN(verbose=False) data = np.array([[1, 2, 3]]).T target = np.array([[1, 0, 1]]).T pnnet.train(data, target) self.assertInvalidVectorPred(pnnet, data.ravel(), target.ravel(), decimal=2)
def pnnTrainTestNoNorm(X, Y): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X_train, Y_train) y_predicted = pnn.predict(X_test) score = metrics.accuracy_score(Y_test, y_predicted) print("PNN score: ", score) return score, y_predicted
def test_pnn_non_trivial_class_names_as_strings(self): # Issue #177: https://github.com/itdxer/neupy/issues/177 x = np.array([10] * 10 + [20] * 10 + [30] * 10) y = np.array(['cat'] * 10 + ['dog'] * 10 + ['horse'] * 10) pnn = algorithms.PNN(std=1) pnn.train(x, y) y_predicted = pnn.predict(x) np.testing.assert_array_equal(y, y_predicted) self.assertEqual(sorted(pnn.classes), ['cat', 'dog', 'horse'])
def test0(): """ daemon :return: """ dataset = datasets.load_digits() x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, train_size=0.7) nw = algorithms.PNN(std=10, verbose=False) nw.train(x_train, y_train) result = nw.predict(x_test) print metrics.accuracy_score(y_test, result)
def test_pnn_non_trivial_class_names(self): # Issue #177: https://github.com/itdxer/neupy/issues/177 x = np.array([10] * 10 + [20] * 10 + [30] * 10) y = np.array([1] * 10 + [2] * 10 + [3] * 10) pnn = algorithms.PNN(std=1) pnn.train(x, y) y_predicted = pnn.predict(x) np.testing.assert_array_almost_equal(y, y_predicted) self.assertEqual(sorted(pnn.classes), [1, 2, 3])
def test_digit_prediction(self): dataset = datasets.load_digits() x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, train_size=0.7 ) nw = algorithms.PNN(verbose=False, std=10) nw.train(x_train, y_train) result = nw.predict(x_test) self.assertAlmostEqual(metrics.accuracy_score(y_test, result), 0.9889, places=4)
def PNN(self): X, y = self.prepareData() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) X_train = np.array(X_train, dtype=np.float32) X_test = np.array(X_test, dtype=np.float32) y_test = np.array(y_test, dtype=np.int32) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X_train, y_train) y_predict = pnn.predict(X_test) return y_predict, y_test
def test_handle_errors(self): with self.assertRaises(ValueError): # Wrong: size of target data not the same as size of # input data. algorithms.PNN(verbose=False).train(np.array([[0], [0]]), np.array([0])) with self.assertRaises(ValueError): # Wrong: 2-D target vector (must be 1-D) algorithms.PNN(verbose=False).train(np.array([[0], [0]]), np.array([[0]])) with self.assertRaises(AttributeError): # Wrong: can't use iterative learning process for this # algorithm algorithms.PNN(verbose=False).train_epoch() with self.assertRaises(ValueError): # Wrong: invalid feature size for prediction data grnet = algorithms.PNN(verbose=False) grnet.train(np.array([[0], [0]]), np.array([0])) grnet.predict(np.array([[0]]))
def test1(): """ 数字型类别 :return: """ dataset = datasets.load_digits() n_samples = dataset.data.shape[0] ratio = 0.7 x_train, y_train = dataset.data[0:n_samples * ratio, :], dataset.target[0:n_samples * ratio] x_test, y_test = dataset.data[n_samples * ratio:, :], dataset.target[n_samples * ratio:] nw = algorithms.PNN(std=10, verbose=False) nw.train(x_train, y_train) result = nw.predict(x_test) print metrics.accuracy_score(y_test, result)
def pnnTrainTestNorm(X, Y): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7) scalar = StandardScaler() scalar.fit(X_train) X_train_n = scalar.transform(X_train) X_test_n = scalar.transform(X_test) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X_train_n, Y_train) y_predicted = pnn.predict(X_test_n) score = metrics.accuracy_score(Y_test, y_predicted) print("PNN score: ", score) return score, y_predicted
def diff_train(): maes = [0, 0, 0, 0] train_size = [24000, 12000, 5000, 2000] std = [0.00035, 0.001, 0.0035, 0.01] for j in range(0, 4): (x_train, y_train), (x_test, y_test) = dataset3.load_data(train_size=train_size[j], show=False) pnn = algorithms.PNN(std=std[j], verbose=True) pnn.train(x_train, y_train) y_predicted = pnn.predict(x_test) mae = (np.abs(y_test - y_predicted)).mean() plt_x_zero = np.empty(0) plt_y_zero = np.empty(0) plt_x_one = np.empty(0) plt_y_one = np.empty(0) acc = 0.0 i = 0 for coord in x_test: if y_predicted[i] < 0.5: plt_x_zero = np.append(plt_x_zero, coord[0]) plt_y_zero = np.append(plt_y_zero, coord[1]) elif y_predicted[i] >= 0.5: plt_x_one = np.append(plt_x_one, coord[0]) plt_y_one = np.append(plt_y_one, coord[1]) i += 1 plt.plot(plt_x_zero, plt_y_zero, '.') plt.plot(plt_x_one, plt_y_one, '.') plt.title('2 class classification\ntrain size = %d\nstd = %.4f\nmae =%.4f' % (train_size[j], std[j], mae)) maes[j] = mae plt.xlim(0, 1.3) plt.ylim(0, 1) plt.legend(('0 class', '1 class'), loc='upper right', shadow=True) plt.show() plt.close() return train_size, std, maes
def diff_train_size(): train_size = [15000, 10000, 5000, 2000] std = [2, 1, 0.5, 0.25] for j in range(0, 4): (x_train, y_train), (x_test, y_test) = dataset8.load_data(mode=0) pnn = algorithms.PNN(std=std[j], batch_size=128, verbose=True) pnn.train(x_train[0:train_size[j]], y_train[0:train_size[j]]) y_predicted = pnn.predict(x_test) maes = metrics.accuracy_score(y_predicted, y_test) print("accuracy = %.2f" % (maes))
def PNN(X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) X_train = np.array(X_train, dtype=np.float32) X_test = np.array(X_test, dtype=np.float32) y_test = np.array(y_test, dtype=np.int32) sc = StandardScaler() sc.fit(X_train) X_train = sc.transform(X_train) X_test = sc.transform(X_test) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X_train, y_train) y_predict = pnn.predict(X_test) return y_predict, y_test
def test_pnn_mini_batches(self): dataset = datasets.load_digits() n_classes = len(np.unique(dataset.target)) x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, train_size=0.7 ) pnnet = algorithms.PNN(verbose=False, batch_size=100) pnnet.train(x_train, y_train) y_predicted = pnnet.predict(x_test) self.assertEqual(y_predicted.shape, y_test.shape) y_predicted = pnnet.predict_proba(x_test) self.assertEqual(y_predicted.shape, (y_test.shape[0], n_classes))
def fromPN(cls, pos_train, neg_train, z_trains, y_train, models=None): ''' a neural network will be trained with z_trains, y_train Arguments: pos_train, neg_train same in super class z_trains {List[DateFrame]} y_train {Array} Returns: ZeroOneSemiNaiveBayesClassifier ''' sbc = super(ZeroOneHemiNaiveBayesClassifier, cls).fromPN(pos_train, neg_train) if models is None or models == 'grnn': sbc.models = [ algorithms.GRNN(std=np.std([ a for a in z_train.values.ravel() if str(a) != 'nan' and a != 0 ]), verbose=False) for z_train in z_trains ] elif models == 'pnn': sbc.models = [ algorithms.PNN(std=np.std([ a for a in z_train.values.ravel() if str(a) != 'nan' and a != 0 ]), verbose=False) for z_train in z_trains ] elif models == 'svm': sbc.models = [svm.SVC(kernel='rbf') for z_train in z_trains] elif models == 'lasso': sbc.models = [LassoLars() for z_train in z_trains] else: sbc.models = [ copy.deepcopy(model_dict[model]) if isinstance(model, str) else copy.deepcopy(model) for model in models ] sbc.features2 = [z_train.columns for z_train in z_trains] sbc.fit(z_trains, y_train) return sbc
def test_simple_pnn(self): dataset = datasets.load_iris() data = dataset.data target = dataset.target test_data_size = 10 skfold = StratifiedKFold(n_splits=test_data_size) avarage_result = 0 for train, test in skfold.split(data, target): x_train, x_test = data[train], data[test] y_train, y_test = target[train], target[test] nw = algorithms.PNN(verbose=False, std=0.1) nw.train(x_train, y_train) result = nw.predict(x_test) avarage_result += sum(y_test == result) self.assertEqual(avarage_result / test_data_size, 14.4)
def diff_std(): (x_train, y_train), (x_test, y_test) = dataset3.load_data(train_size=12000, show=True) titles = ["\n\nspread greater than necessary", "\n\nspread optimal", "\n\nspread less than necessary"] spreads = [0.1, 0.001, 0.0001] for spread, title in zip(spreads, titles): pnn = algorithms.PNN(std=spread, verbose=True) pnn.train(x_train, y_train) y_predicted = pnn.predict(x_test) mae = (np.abs(y_test - y_predicted)).mean() plt_x_zero = np.empty(0) plt_y_zero = np.empty(0) plt_x_one = np.empty(0) plt_y_one = np.empty(0) i = 0 for coord in x_test: if y_predicted[i] < 0.5: plt_x_zero = np.append(plt_x_zero, coord[0]) plt_y_zero = np.append(plt_y_zero, coord[1]) elif y_predicted[i] >= 0.5: plt_x_one = np.append(plt_x_one, coord[0]) plt_y_one = np.append(plt_y_one, coord[1]) i += 1 plt.plot(plt_x_zero, plt_y_zero, '.') plt.plot(plt_x_one, plt_y_one, '.') plt.title(title+'\n2 class classification\nstd = %.4f\nmae =%.4f' % (spread, mae)) plt.xlim(0, 1.3) plt.ylim(0, 1) plt.legend(('0 class', '1 class'), loc='upper right', shadow=True) plt.show() plt.close()
def clasification(): df = pd.DataFrame() for name, model, price in db.session.query(Products.product_name, Products.product_model, Recommendation.recommendation_price)\ .join(Recommendation, Recommendation.recommendation_id == Products.recommendation_id): print(name, model, price) df = df.append({ "name": name, "model": model, "price": float(price) }, ignore_index=True) # db.session.close() mean_p = df['price'].mean() df.loc[df['price'] < mean_p, 'quality'] = 0 df.loc[df['price'] >= mean_p, 'quality'] = 1 X = pd.get_dummies(data=df[['name', 'model']]) print(df) print(X) pnn = algorithms.PNN(std=10, verbose=False) pnn.train(X, df['quality']) test_str = ['BMW', 'X5'] count_columns = len(X.columns) test_list = [0] * count_columns test_list[1] = 1 test_list[-1] = 1 print(test_list) y_predicted = pnn.predict([test_list]) result = "Ні" if y_predicted - 1 < 0.0000001: result = "Так" return render_template('clasification.html', y_predicted=result, test_data=test_list, test_str=test_str)