Example #1
0
    def test_multi_log_reg(self):
        # Reduced because we want the tests to finish a bit faster.
        train_count = 10000
        test_count = 500

        jspec_data = self.d.get_jspec(self.sds)
        train_x_frame = self.d.get_train_data(self.sds)[0:train_count]
        train_x, M1 = train_x_frame.transform_encode(spec=jspec_data)
        test_x_frame = self.d.get_test_data(self.sds)[0:test_count]
        test_x = test_x_frame.transform_apply(spec=jspec_data, meta=M1)

        jspec_dict = {"recode": ["income"]}
        jspec_labels = self.sds.scalar(f'"{jspec_dict}"')
        train_y_frame = self.d.get_train_labels(self.sds)[0:train_count]
        train_y, M2 = train_y_frame.transform_encode(spec=jspec_labels)
        test_y_frame = self.d.get_test_labels(self.sds)[0:test_count]
        test_y = test_y_frame.transform_apply(spec=jspec_labels, meta=M2)

        betas = multiLogReg(train_x, train_y)
        [_, y_pred, acc] = multiLogRegPredict(test_x, betas, test_y)

        [_, conf_avg] = confusionMatrix(y_pred, test_y)
        confusion_numpy = conf_avg.compute()

        self.assertTrue(confusion_numpy[0][0] > 0.8)
        self.assertTrue(confusion_numpy[0][1] < 0.5)
        self.assertTrue(confusion_numpy[1][1] > 0.5)
        self.assertTrue(confusion_numpy[1][0] < 0.2)
    def test_using_predict(self):
        """
        Test the algorithm using the predict function.
        With builtin classification
        """
        [X, labels, Y] = self.gen_data()
        # Call algorithm
        bias = multiLogReg(Matrix(self.sds,X),Matrix(self.sds,Y)).compute()

        [m, y_pred, acc] = multiLogRegPredict(Matrix(self.sds,X),Matrix(self.sds,bias), Matrix(self.sds,Y)).compute()

        self.assertTrue(acc > 98)
    def test_simple(self):
        """
        Test simple, if the log reg splits a dataset where everything over 1 is label 2 and under 1 is 1.
        With manual classification.
        """
        [X, labels, Y] = self.gen_data()

        # Call algorithm
        bias = multiLogReg(Matrix(self.sds,X),Matrix(self.sds,Y)).compute()
        
        # Calculate result.
        res = np.reshape(np.dot(X, bias[:len(X[0])]) + bias[len(X[0])], (250))
        f2 = lambda x: (x < 0) + 1
        accuracy = np.sum(labels == f2(res)) / 250 * 100
        
        self.assertTrue(accuracy > 98)
Example #4
0
    def test_multi_log_reg(self):
        # Reduced because we want the tests to finish a bit faster.
        train_count = 15000
        test_count = 5000
        # Train data
        X = Matrix(self.sds, self.d.get_train_data().reshape(
            (60000, 28*28))[:train_count])
        Y = Matrix(self.sds, self.d.get_train_labels()[:train_count])
        Y = Y + 1.0

        # Test data
        Xt = Matrix(self.sds, self.d.get_test_data().reshape(
            (10000, 28*28))[:test_count])
        Yt = Matrix(self.sds, self.d.get_test_labels()[:test_count])
        Yt = Yt + 1.0

        bias = multiLogReg(X, Y)

        [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute()

        self.assertGreater(acc, 80)
Example #5
0
    def test_multi_log_reg_with_read(self):
        train_count = 100
        test_count = 100
        X = Matrix(self.sds, self.d.get_train_data().reshape(
            (60000, 28*28))[:train_count])
        X.write(self.base_path + "train_data").compute()
        Y = Matrix(self.sds, self.d.get_train_labels()[:train_count]) + 1
        Y.write(self.base_path + "train_labels").compute()

        Xr = self.sds.read(self.base_path + "train_data")
        Yr = self.sds.read(self.base_path + "train_labels")

        bias = multiLogReg(Xr, Yr, verbose=False)
        # Test data
        Xt = Matrix(self.sds, self.d.get_test_data().reshape(
            (10000, 28*28))[:test_count])
        Yt = Matrix(self.sds, self.d.get_test_labels()[:test_count])
        Yt = Yt + 1.0

        [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute(verbose=True)
        
        self.assertGreater(acc, 70)
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

from systemds.context import SystemDSContext
from systemds.matrix import Matrix
from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
from systemds.examples.tutorials.mnist import DataManager

d = DataManager()

with SystemDSContext() as sds:
    # Train Data
    X = Matrix(sds, d.get_train_data().reshape((60000, 28 * 28)))
    Y = Matrix(sds, d.get_train_labels()) + 1.0
    bias = multiLogReg(X, Y, tol=0.0001, verbose=False)
    # Test data
    Xt = Matrix(sds, d.get_test_data().reshape((10000, 28 * 28)))
    Yt = Matrix(sds, d.get_test_labels()) + 1.0
    [_, _, acc] = multiLogRegPredict(Xt, bias, Yt).compute()

print(acc)
Example #7
0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------
from systemds.context import SystemDSContext
from systemds.operator.algorithm import multiLogReg, multiLogRegPredict
from systemds.examples.tutorials.mnist import DataManager

d = DataManager()

X = d.get_train_data().reshape((60000, 28 * 28))
Y = d.get_train_labels()
Xt = d.get_test_data().reshape((10000, 28 * 28))
Yt = d.get_test_labels()

with SystemDSContext() as sds:
    # Train Data
    X_ds = sds.from_numpy(X)
    Y_ds = sds.from_numpy(Y) + 1.0
    bias = multiLogReg(X_ds, Y_ds, maxi=30)
    # Test data
    Xt_ds = sds.from_numpy(Xt)
    Yt_ds = sds.from_numpy(Yt) + 1.0
    [m, y_pred, acc] = multiLogRegPredict(Xt_ds, bias, Yt_ds).compute()

print(acc)