def test_multi_log_reg(self): # Reduced because we want the tests to finish a bit faster. train_count = 10000 test_count = 500 jspec_data = self.d.get_jspec(self.sds) train_x_frame = self.d.get_train_data(self.sds)[0:train_count] train_x, M1 = train_x_frame.transform_encode(spec=jspec_data) test_x_frame = self.d.get_test_data(self.sds)[0:test_count] test_x = test_x_frame.transform_apply(spec=jspec_data, meta=M1) jspec_dict = {"recode": ["income"]} jspec_labels = self.sds.scalar(f'"{jspec_dict}"') train_y_frame = self.d.get_train_labels(self.sds)[0:train_count] train_y, M2 = train_y_frame.transform_encode(spec=jspec_labels) test_y_frame = self.d.get_test_labels(self.sds)[0:test_count] test_y = test_y_frame.transform_apply(spec=jspec_labels, meta=M2) betas = multiLogReg(train_x, train_y) [_, y_pred, acc] = multiLogRegPredict(test_x, betas, test_y) [_, conf_avg] = confusionMatrix(y_pred, test_y) confusion_numpy = conf_avg.compute() self.assertTrue(confusion_numpy[0][0] > 0.8) self.assertTrue(confusion_numpy[0][1] < 0.5) self.assertTrue(confusion_numpy[1][1] > 0.5) self.assertTrue(confusion_numpy[1][0] < 0.2)
def test_using_predict(self): """ Test the algorithm using the predict function. With builtin classification """ [X, labels, Y] = self.gen_data() # Call algorithm bias = multiLogReg(Matrix(self.sds,X),Matrix(self.sds,Y)).compute() [m, y_pred, acc] = multiLogRegPredict(Matrix(self.sds,X),Matrix(self.sds,bias), Matrix(self.sds,Y)).compute() self.assertTrue(acc > 98)
def test_simple(self): """ Test simple, if the log reg splits a dataset where everything over 1 is label 2 and under 1 is 1. With manual classification. """ [X, labels, Y] = self.gen_data() # Call algorithm bias = multiLogReg(Matrix(self.sds,X),Matrix(self.sds,Y)).compute() # Calculate result. res = np.reshape(np.dot(X, bias[:len(X[0])]) + bias[len(X[0])], (250)) f2 = lambda x: (x < 0) + 1 accuracy = np.sum(labels == f2(res)) / 250 * 100 self.assertTrue(accuracy > 98)
def test_multi_log_reg(self): # Reduced because we want the tests to finish a bit faster. train_count = 15000 test_count = 5000 # Train data X = Matrix(self.sds, self.d.get_train_data().reshape( (60000, 28*28))[:train_count]) Y = Matrix(self.sds, self.d.get_train_labels()[:train_count]) Y = Y + 1.0 # Test data Xt = Matrix(self.sds, self.d.get_test_data().reshape( (10000, 28*28))[:test_count]) Yt = Matrix(self.sds, self.d.get_test_labels()[:test_count]) Yt = Yt + 1.0 bias = multiLogReg(X, Y) [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute() self.assertGreater(acc, 80)
def test_multi_log_reg_with_read(self): train_count = 100 test_count = 100 X = Matrix(self.sds, self.d.get_train_data().reshape( (60000, 28*28))[:train_count]) X.write(self.base_path + "train_data").compute() Y = Matrix(self.sds, self.d.get_train_labels()[:train_count]) + 1 Y.write(self.base_path + "train_labels").compute() Xr = self.sds.read(self.base_path + "train_data") Yr = self.sds.read(self.base_path + "train_labels") bias = multiLogReg(Xr, Yr, verbose=False) # Test data Xt = Matrix(self.sds, self.d.get_test_data().reshape( (10000, 28*28))[:test_count]) Yt = Matrix(self.sds, self.d.get_test_labels()[:test_count]) Yt = Yt + 1.0 [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute(verbose=True) self.assertGreater(acc, 70)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # ------------------------------------------------------------- from systemds.context import SystemDSContext from systemds.matrix import Matrix from systemds.operator.algorithm import multiLogReg, multiLogRegPredict from systemds.examples.tutorials.mnist import DataManager d = DataManager() with SystemDSContext() as sds: # Train Data X = Matrix(sds, d.get_train_data().reshape((60000, 28 * 28))) Y = Matrix(sds, d.get_train_labels()) + 1.0 bias = multiLogReg(X, Y, tol=0.0001, verbose=False) # Test data Xt = Matrix(sds, d.get_test_data().reshape((10000, 28 * 28))) Yt = Matrix(sds, d.get_test_labels()) + 1.0 [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute() print(acc)
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # ------------------------------------------------------------- from systemds.context import SystemDSContext from systemds.operator.algorithm import multiLogReg, multiLogRegPredict from systemds.examples.tutorials.mnist import DataManager d = DataManager() X = d.get_train_data().reshape((60000, 28 * 28)) Y = d.get_train_labels() Xt = d.get_test_data().reshape((10000, 28 * 28)) Yt = d.get_test_labels() with SystemDSContext() as sds: # Train Data X_ds = sds.from_numpy(X) Y_ds = sds.from_numpy(Y) + 1.0 bias = multiLogReg(X_ds, Y_ds, maxi=30) # Test data Xt_ds = sds.from_numpy(Xt) Yt_ds = sds.from_numpy(Yt) + 1.0 [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Yt_ds).compute() print(acc)