Python CData Exemples, csxdata.CData Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_cdata.py Projet : afcarl/csxdata

 def test_concatenate_produces_right_shape(self):
     newdata = CData((self.X_, self.y_), cross_val=0)
     newdata.concatenate(self.data)
     self.assertEqual(newdata.N, 20,
                      "Split after concatenation went wrong!")
     self.assertEqual(newdata.data.shape, (20, 3),
                      "Shapes went haywire after concatenation!")

Exemple #2

0

Afficher le fichier

    def setUp(self):
        self.data = CData(mnist_tolearningtable(roots["misc"] + "mnist.pkl.gz",
                                                fold=False),
                          headers=None)
        self.data.transformation = "std"
        self.X, self.Y = self.data.table("testing", m=5, shuff=False)

        self.net = BackpropNetwork(self.data.neurons_required[0],
                                   name="NumGradTestNetwork")
        self.net.add(DenseLayer(30, activation="sigmoid"))

Exemple #3

0

Afficher le fichier

Fichier : generic.py Projet : afcarl/SciProjects

def pull_data(crossval):
    from csxdata import CData, roots
    from csxdata.utilities.parser import parse_csv

    table = parse_csv(roots["csvs"] + "sum_ntab.csv", indeps=7)
    data, labels, headers = table
    labels = labels[..., -1].astype("float32")

    data = CData((data[..., -11:], labels), cross_val=crossval)
    data.transformation = "std"
    return data

Exemple #4

0

Afficher le fichier

Fichier : ann.py Projet : afcarl/SciProjects

def run():
    fruits = CData(gyumpath, gyumindeps, feature=TAXLEVEL, cross_val=CROSSVAL)
    fruits.transformation = (TRANSFORMATION, TRANSFORMATION_PARAM)

    network = build_net(*fruits.neurons_required)

    testing = fruits.table("testing")
    zsind = CData(zsindpath, zsindeps, cross_val=0.0, feature=TAXLEVEL)
    vx, vy = zsind.learning, zsind.lindeps
    vx = fruits.transformation(vx)
    vy = fruits.embed(vy)

    initc, initacc = network.evaluate(*testing, verbose=0)
    initc, initacc = round(initc, 5), round(initacc, 5)
    print("Initial cost: {}\tacc: {}".format(initc, initacc))

    X, y = fruits.table("learning")
    network.fit(X,
                y,
                batch_size=20,
                nb_epoch=400,
                validation_data=testing,
                verbose=0)
    tacc = network.evaluate(*testing, batch_size=fruits.n_testing,
                            verbose=0)[-1]
    vacc = network.evaluate(vx, vy, verbose=0)[-1]
    # batchgen = fruits.batchgen(100, infinite=True)
    # log = network.fit_generator(batchgen, fruits.N, nb_epoch=15, validation_data=valid, verbose=verbose)
    print("T: {}\tV: {}".format(tacc, vacc))
    return tacc, vacc

Exemple #5

0

Afficher le fichier

class TestNetwork(unittest.TestCase):
    def setUp(self):
        self.data = CData(mnist_tolearningtable(roots["misc"] + "mnist.pkl.gz",
                                                fold=False),
                          headers=None)
        self.data.transformation = "std"
        self.X, self.Y = self.data.table("testing", m=5, shuff=False)

        self.net = BackpropNetwork(self.data.neurons_required[0],
                                   name="NumGradTestNetwork")
        self.net.add(DenseLayer(30, activation="sigmoid"))

    def test_mse_with_sigmoid_output(self):
        self.net.add(
            DenseLayer(self.data.neurons_required[1], activation="sigmoid"))
        self.net.finalize(cost="mse", optimizer="sgd")
        self._run_numerical_gradient_test()

    def test_xent_with_sigmoid_output(self):
        self.net.add(
            DenseLayer(self.data.neurons_required[1], activation="sigmoid"))
        self.net.finalize(cost="xent", optimizer="sgd")
        self._run_numerical_gradient_test()

    def test_xent_with_softmax_output(self):
        self.net.add(
            DenseLayer(self.data.neurons_required[1], activation="softmax"))
        self.net.finalize(cost="xent", optimizer="sgd")
        self._run_numerical_gradient_test()

    def _run_numerical_gradient_test(self):
        self.net.fit(*self.data.table("learning", m=20),
                     batch_size=20,
                     epochs=1,
                     verbose=0)

        numerical = numerical_gradients(self.net, self.X, self.Y)
        analytical = analytical_gradients(self.net, self.X, self.Y)
        diff = analytical - numerical
        error = norm(diff) / max(norm(numerical), norm(analytical))

        dfstr = "{0: .4f}".format(error)

        self.assertLess(error, 1e-2,
                        "FATAL ERROR, {} (relerr) >= 1e-2".format(dfstr))
        self.assertLess(error, 1e-4,
                        "ERROR, 1e-2 > {} (relerr) >= 1e-4".format(dfstr))
        self.assertLess(error, 1e-7,
                        "SUSPICIOUS, 1e-4 > {} (relerr) >= 1e-7".format(dfstr))

Exemple #6

0

Afficher le fichier

Fichier : keras_vs_mnist.py Projet : afcarl/CsXperiments

def experiment(mode):
    mode = mode.lower()[0]
    if mode.lower()[0] == "c":
        chain = "Convolutional"
    elif mode.lower()[0] == "f":
        chain = "Fully Connected"
    else:
        raise RuntimeError("Wrong mode definition!")

    print("Experiment: MNIST classification with {} Neural Network!".format(chain))
    net = get_fcnn() if mode == "f" else get_cnn()
    mnist = CData(mnist_to_lt(miscpath, (True if mode == "c" else False)))
    mnist.transformation = "standardization"

    net.fit(mnist.data, mnist.indeps, batch_size=20, nb_epoch=30, verbose=1,
            validation_split=0.2, show_accuracy=True)

Exemple #7

0

Afficher le fichier

def pull_data(path):
    import pickle
    import gzip
    with gzip.open(path, "rb") as ltfl:
        lt = pickle.load(ltfl)
        ltfl.close()

    return CData(lt, .2)

Exemple #8

0

Afficher le fichier

Fichier : linear.py Projet : afcarl/SciProjects

 def get_data():
     fruits = CData(gyumpath,
                    gyumindeps,
                    feature=TAXLEVEL,
                    cross_val=CROSSVAL)
     fruits.transformation = (AFTER_TREATMENT, TRANSFORM_PARAM)
     zsind = CData(zsindpath,
                   zsindeps,
                   feature=TAXLEVEL,
                   cross_val=0,
                   transformation=None,
                   param=None)
     lX, lY = fruits.learning, fruits.lindeps
     tX, tY = fruits.testing, fruits.tindeps
     vX, vY = zsind.learning, zsind.lindeps
     vX = fruits.transformation(vX)
     return lX, lY, tX, tY, vX, vY

Exemple #9

0

Afficher le fichier

Fichier : ann.py Projet : afcarl/SciProjects

def full_training(validate=True, dump_weights=False):
    fruits = CData(gyumpath, gyumindeps, feature=TAXLEVEL, cross_val=0.0)
    fruits.transformation = (TRANSFORMATION, TRANSFORMATION_PARAM)

    network = build_net(*fruits.neurons_required)

    X, y = fruits.table("learning")
    network.fit(X, y, batch_size=30, nb_epoch=500, verbose=0)

    if dump_weights:
        weights = network.layers[0].get_weights()

        wghts = open("weights.csv", "w")
        wghts.write("\n".join([
            "\t".join([str(float(cell)) for cell in line])
            for line in weights[0].T
        ]).replace(".", ","))
        wghts.close()

    if validate:
        vx, vy = CData(zsindpath, zsindeps, cross_val=0.0, feature=TAXLEVEL)
        vx = fruits.transformation(vx)
        vy = fruits.embed(vy)
        vacc = network.evaluate(vx, vy, batch_size=len(vy), verbose=0)[-1]
        probs = network.predict_proba(vx, verbose=0)
        preds = network.predict_classes(vx, verbose=0)
        print("ANN validation accuracy:", vacc)
        return probs, preds, vy, fruits

Exemple #10

0

Afficher le fichier

Fichier : util.py Projet : afcarl/SciProjects

def pull_data(feature, path=DEFAULTPATH, **kw):
    return CData(path,
                 indeps=6,
                 headers=1,
                 cross_val=0,
                 dehungarize=True,
                 decimal=True,
                 feature=feature,
                 **kw)

Exemple #11

0

Afficher le fichier

Fichier : test_features.py Projet : afcarl/csxdata

class TestEmbedding(unittest.TestCase):
    """
    Dear Embedding Wrapper Classes,

    I would like you to:
    + create embeddings from categories
    ++ create OneHot embedding
    ++ create random embedding into n dimensions
    + transfrom any category label into the appropriate embedding
    - translate an embedding back to readable label or dummycode
    """
    def setUp(self):
        self.X_, self.y_, headers = parse_csv(etalonroot + "/input.csv")

        self.data = CData((self.X_, self.y_), cross_val=0)

    def test_embedding_then_reverting_to_onehot_doesnt_break_shapes(self):
        self.data.reset_data(shuff=False)
        self.data.crossval = 0
        self.data.embedding = 10
        self.assertEqual(
            self.data.embedding, "embedding",
            "<embedding> setter is faulty! (got {})".format(
                self.data.embedding))
        X, y = self.data.table()
        self.assertEqual(
            y.shape, (10, 10),
            "Embedding of independent variables went wrong! (got shape {})".
            format(y.shape))

        del self.data.embedding
        self.assertEqual(
            self.data.embedding, "onehot",
            "<embedding> deleter is faulty! (got {})".format(
                self.data.embedding))
        X, y = self.data.table()
        self.assertEqual(
            y.shape, (10, 3),
            "OneHot of independent variables went wrong! (got shape {})".
            format(y.shape))

Exemple #12

0

Afficher le fichier

Fichier : combined.py Projet : afcarl/SciProjects

def neural_switcharoo(grapes):
    model = Network(input_shape=grapes.neurons_required[0], name="GrapesNet")
    model.add(DenseLayer(60, activation="tanh"))
    model.add(DenseLayer(grapes.neurons_required[1], activation="sigmoid"))
    model.finalize(cost="xent", optimizer="adam")
    model.describe(1)

    model.fit(*grapes.table("testing"), epochs=300, monitor=["acc"], verbose=0)

    model.prediction(grapes.learning)
    trX = model.layers[-2].output

    trGrapes = CData((trX, grapes.lindeps), headers=None)

    full_run(trGrapes)

Exemple #13

0

Afficher le fichier

Fichier : reference.py Projet : csxeba/ANN_IQ

def get_brainforged_net():
    """Depends on csxnet and csxdata, both available on my github
    (but there are no install scripts for them :) )"""
    from csxnet import Network
    from csxdata import CData, roots

    model = Network(CData(roots["misc"] + "mnist.pkl.gz", 0.18),
                    eta=3.0,
                    lmbd1=0,
                    lmbd2=0,
                    mu=0,
                    cost="mse")
    model.add_fc(30, activation="sigmoid")
    model.finalize_architecture(activation="sigmoid")

    return model  # THIS IS ALSO A NETWORK OBJECT!

Exemple #14

0

Afficher le fichier

Fichier : test_cdata.py Projet : afcarl/csxdata

 def test_initialization_on_etalon_with_given_parameters(self):
     new_data = CData((self.X_, self.y_), cross_val=0.5)
     self.assertEqual(
         new_data.embedding, "onehot",
         "<embedding> property is faulty after initialization!")
     self.assertEqual(
         len(new_data.categories), 3,
         "Invalid determination of categories! (got {})".format(
             new_data.categories))
     self.assertEqual(new_data.crossval, 0.5,
                      "Wrong <crossval> value in data!")
     self.assertEqual(new_data.N, 5)
     self.assertEqual(new_data.N, new_data.learning.shape[0],
                      "Validation data splitting went wrong @ learning!")
     self.assertEqual(new_data.n_testing, 5)
     self.assertEqual(new_data.n_testing, new_data.testing.shape[0],
                      "Validation data splitting went wrong @ testing!")

Exemple #15

0

Afficher le fichier

Fichier : test_cdata.py Projet : afcarl/csxdata

    def test_reset(self):
        data2 = CData((self.X_, self.y_), cross_val=0)
        er = "Difference detected in data shapes"

        self.data.crossval = 3
        self.data.embedding = 10
        self.data.transformation = ("pca", 1)

        self.data.reset_data(shuff=False)
        sm1, sm2 = np.sum(self.data.data), np.sum(data2.data)

        self.assertEqual(self.data.learning.shape, (7, 3), msg=er)
        self.assertEqual(
            sm1,
            sm2,
            msg="The sums of learning data differ by {}!\n{}\n{}".format(
                abs(sm1 - sm2), sm1, sm2))

Exemple #16

0

Afficher le fichier

Fichier : combined.py Projet : afcarl/SciProjects

def autoencoder(grapes):
    ae = Network(input_shape=grapes.neurons_required[0],
                 name="TestAutoEncoder")
    ae.add(DenseLayer(60, activation="tanh"))
    ae.add(DenseLayer(30, activation="tanh"))
    ae.add(DenseLayer(30, activation="tanh"))
    ae.add(DenseLayer(60, activation="tanh"))
    ae.add(DenseLayer(grapes.neurons_required[0], activation="linear"))
    ae.finalize(cost="mse", optimizer="adam")
    ae.describe(1)

    ae.fit(grapes.testing, grapes.testing, epochs=300, verbose=0)
    ae.prediction(grapes.learning)
    eX = ae.layers[2].output

    trGrapes = CData((eX, grapes.lindeps), headers=None)

    full_run(trGrapes)

Exemple #17

0

Afficher le fichier

Fichier : summarize.py Projet : afcarl/SciProjects

from csxdata import CData

from SciProjects.grapes.classical import full_run
from SciProjects.grapes.ann import run_forged, run_keras
from SciProjects.grapes.combined import autoencoder, neural_switcharoo
from SciProjects.grapes import path, indepsn

grapes = CData(path, indepsn, headers=1, lower=True, feature="borregio")

print("Running all experiments on Grapes/Wines database!")
print("Classifying by wine production region!")
print("*" * 50)
print("PHASE 1: classic algorithms")
full_run(grapes)
print("*" * 50)
print("PHASE 2: neural network classification")
print("PHASE 2A: Keras")
run_keras(grapes)
print("PHASE 2B: Brainforge")
run_forged(grapes)
print("*" * 50)
print("PHASE 3: combined models")
print("PHASE 3A: autoencoder")
autoencoder(grapes)
print("PHASE 3B: neural switcharoo")
neural_switcharoo(grapes)

Exemple #18

0

Afficher le fichier

Fichier : eyeball.py Projet : afcarl/SciProjects

from csxdata import CData
from csxdata.utilities.highlevel import plot, transform
from SciProjects.generic import paths

FEATURE = "evjarat"

grapes = CData(*paths["grapes"], feature=FEATURE, cross_val=0, lower=True)
X, Y = grapes.learning, grapes.lindeps


def plot_raw():
    plot(X[:, ::2], Y, ["DH1", "D13C"], 1)


def plot_pca():
    lX = transform(X, factors=2, get_model=False, method="pca")
    plot(lX, Y, ["PC1", "PC2"], 1)


def plot_lda():
    lX = transform(X, factors=2, get_model=False, method="lda", y=Y)
    plot(lX, Y, ["LD1", "LD2"], 1)


def plot_ica():
    lX = transform(X, factors=2, get_model=False, method="ica")
    plot(lX, Y, ["IC1", "IC2"], 1)


# def plot_ae():
#     lX = transform(X, 2, False, "ae", y=Y)

Exemple #19

0

Afficher le fichier

def get_frame(transf, params):
    path, indepsn = paths["grapes"]
    grapes = CData(path, indepsn, headers=1, feature=FEATURE, lower=True)
    grapes.transformation = (transf, params)
    return grapes

Exemple #20

0

Afficher le fichier

Fichier : forestry.py Projet : afcarl/SciProjects

from sklearn.ensemble import RandomForestClassifier as RF

from csxdata import CData

from SciProjects.fruits import *

LABEL = "Familia"
rf = RF(n_estimators=100, n_jobs=1)

fruits = CData(gyumpath, gyumindeps, cross_val=0.2, feature=LABEL)
fruits.transformation = ("lda", 5)

zsind = CData(zsindpath, zsindeps, cross_val=0.0)
valid = fruits.transformation(zsind.learning, zsind.lindeps)
rf.fit(fruits.learning, fruits.lindeps)

tpredict = rf.predict(fruits.testing)
vpredict = rf.predict(valid[0])
tacc = [right == left for right, left in zip(tpredict, fruits.tindeps)]
vacc = [right == left for right, left in zip(vpredict, valid[1])]
print("TPredictions ({}%):\n{}".format(int(100 * sum(tacc) / len(tacc)),
                                       tpredict))
print("TestingY:\n", fruits.tindeps)
print("VPredictions ({}%):\n{}".format(int(100 * sum(vacc) / len(vacc)),
                                       vpredict))
print("ValidY:\n", valid[1])

Exemple #21

0

Afficher le fichier

Fichier : test_features.py Projet : afcarl/csxdata

class TestTransformations(unittest.TestCase):
    """
    Dear Transformation Wrapper Classes,

    I would like you to:

    """
    def setUp(self):
        self.X_, self.y_, headers = parse_csv(etalonroot + "/input.csv")

        self.data = CData((self.X_, self.y_), cross_val=0)

    def test_standardization_on_etalon(self):
        self.data.reset_data(shuff=False)

        calcme = parse_csv(etalonroot + "std.csv", dtype="float64")[0]
        calcme = np.sort(calcme.ravel())

        self.data.transformation = "std"
        X = np.round(self.data.learning.astype("float64"), 3)
        X = np.sort(X.ravel())

        self.assertEqual(self.data.transformation, "std",
                         "The transformation property is faulty!")
        self.assertTrue(np.all(np.equal(X, calcme)),
                        "Standardization is faulty!")

    def test_pca_on_etalon(self):
        self.data.reset_data(shuff=False)

        calcme = parse_csv(etalonroot + "pca.csv", dtype="float64")[0]
        calcme = np.round(np.sort(np.abs(calcme.ravel())), 1)

        self.data.transformation = "pca"
        X = self.data.learning.astype("float64")
        X = np.round(np.sort(np.abs(X.ravel())), 1)
        eq = np.isclose(X, calcme)

        self.assertEqual(self.data.transformation, "pca",
                         "The transformation property is faulty!")
        self.assertTrue(np.all(eq), "PCA is faulty!")

    def test_lda_on_etalon(self):
        self.data.reset_data(shuff=False)

        calcme = parse_csv(etalonroot + "lda.csv", dtype="float64")[0]
        calcme = np.round(np.sort(np.abs(calcme.ravel())), 1)

        self.data.transformation = "lda"
        X = self.data.learning.astype("float64")
        X = np.round(np.sort(np.abs(X.ravel())), 1)
        eq = np.isclose(X, calcme)

        self.assertEqual(self.data.transformation, "lda",
                         "The transformation property is faulty!")
        self.assertTrue(np.all(eq), "LDA is faulty!")

    def test_ica_on_etalon(self):
        self.data.reset_data(shuff=False)

        calcme = parse_csv(etalonroot + "ica.csv", dtype="float64")[0]
        calcme = np.round(np.sort(np.abs(calcme.ravel())), 1)

        self.data.transformation = "ica"
        X = self.data.learning.astype("float64")
        X = np.round(np.sort(np.abs(X.ravel())), 1)

        self.assertEqual(self.data.transformation, "ica",
                         "The transformation property is faulty!")
        self.assertTrue(np.allclose(X, calcme, rtol=1.e-4, atol=1.e-7),
                        "ICA is faulty!")

    def test_autoencoding_on_etalon(self):
        self.data.reset_data(shuff=False)
        self.data.transformation = ("ae", 10)
        self.assertEqual(
            self.data.transformation, "autoencoding",
            "Autoencoding failed on the <transformation> property assertion!")
        self.assertEqual(self.data.learning.shape, (10, 10),
                         "Autoencoding failed on the output shape test!")

Exemple #22

0

Afficher le fichier

Fichier : test_features.py Projet : afcarl/csxdata

    def setUp(self):
        self.X_, self.y_, headers = parse_csv(etalonroot + "/input.csv")

        self.data = CData((self.X_, self.y_), cross_val=0)

Exemple #23

0

Afficher le fichier

Fichier : inspect_literature.py Projet : afcarl/SciProjects

from matplotlib import pyplot as plt

from csxdata import CData
from csxdata.visual import Plotter2D

from SciProjects.matt import projectroot

data = CData(projectroot + "Spitzke.data.xlsx",
             indeps=4,
             feature="ORIGIN",
             cross_val=0,
             headers=1,
             dropna=True)
data.set_transformation("pls", 2)

plot = Plotter2D(X=data.learning,
                 y=data.lindeps,
                 fig=plt.gcf(),
                 title="Spitzke PCA",
                 axlabels=("PC01", "PC02"))
plot.split_scatter(center=True, label=True)
plt.show()

Exemple #24

0

Afficher le fichier

Fichier : testrun_conv.py Projet : csxeba/csxnet

def pull_mnist_data():
    mnist = CData(roots["misc"] + "mnist.pkl.gz", cross_val=0.18)
    mnist.transformation = "std"
    return mnist

Exemple #25

0

Afficher le fichier

Fichier : ensemble.py Projet : afcarl/SciProjects

# from sklearn.ensemble import AdaBoostClassifier as Boost
from sklearn.ensemble.bagging import BaggingClassifier as Boost
from sklearn.naive_bayes import GaussianNB

from csxdata import CData

from SciProjects.grapes import path, indepsn

if __name__ == '__main__':

    data = CData(path,
                 indepsn,
                 feature="evjarat",
                 headers=1,
                 cross_val=0.2,
                 lower=True)
    data.transformation = "std"
    model = Boost(GaussianNB(), n_estimators=100)

    model.fit(data.learning, data.lindeps)
    preds = model.predict(data.testing)
    eq = [left == right for left, right in zip(preds, data.tindeps)]
    print("Acc:", sum(eq) / len(eq))

Exemple #26

0

Afficher le fichier

Fichier : classical.py Projet : afcarl/SciProjects

def get_frame(pretreat="std", param=0):
    path, indps = paths["grapes"]
    frame = CData(path, indps, headers=1, feature="borregio", lower=True)
    frame.transformation = (pretreat, param)
    return frame

Exemple #27

0

Afficher le fichier

Fichier : combined.py Projet : afcarl/SciProjects

    ae.fit(grapes.testing, grapes.testing, epochs=300, verbose=0)
    ae.prediction(grapes.learning)
    eX = ae.layers[2].output

    trGrapes = CData((eX, grapes.lindeps), headers=None)

    full_run(trGrapes)


def neural_switcharoo(grapes):
    model = Network(input_shape=grapes.neurons_required[0], name="GrapesNet")
    model.add(DenseLayer(60, activation="tanh"))
    model.add(DenseLayer(grapes.neurons_required[1], activation="sigmoid"))
    model.finalize(cost="xent", optimizer="adam")
    model.describe(1)

    model.fit(*grapes.table("testing"), epochs=300, monitor=["acc"], verbose=0)

    model.prediction(grapes.learning)
    trX = model.layers[-2].output

    trGrapes = CData((trX, grapes.lindeps), headers=None)

    full_run(trGrapes)


if __name__ == '__main__':
    dframe = CData(path, indepsn, headers=1, feature="borregio", lower=True)
    dframe.transformation = "std"
    autoencoder(dframe)

Exemple #28

0

Afficher le fichier

Fichier : xp_dense.py Projet : shekkbuilder/brainforge

from csxdata import roots, CData

from brainforge import BackpropNetwork
from brainforge.layers import DenseLayer
from brainforge.optimization import SGD

mnist = CData(roots["misc"] + "mnist.pkl.gz", cross_val=10000, fold=False)
inshape, outshape = mnist.neurons_required

network = BackpropNetwork(input_shape=inshape,
                          layerstack=[
                              DenseLayer(30, activation="sigmoid"),
                              DenseLayer(outshape, activation="softmax")
                          ],
                          cost="xent",
                          optimizer=SGD(eta=3.))

network.fit(*mnist.table("learning"), validation=mnist.table("testing"))

Exemple #29

0

Afficher le fichier

from matplotlib.mlab import normpdf

from csxdata import CData

from SciProjects.generic import paths

FEATURE = "evjarat"
WHERE = None
EQUALS = None

grapes = CData(*paths["grapes"],
               cross_val=0,
               lower=True,
               feature=FEATURE,
               filterby=WHERE,
               selection=EQUALS)
X, Y = (grapes.learning, grapes.lindeps)
names = ["DH1", "DH2", "D13C"]


def category_frequencies():
    from csxdata.stats.inspection import category_frequencies
    category_frequencies(Y)


def normality_tests(hist=True):
    from csxdata.stats.normaltest import full
    full(X)
    if hist:
        from matplotlib import pyplot as plt
        f, axes = plt.subplots(1, 3)

Exemple #30

0

Afficher le fichier

Fichier : test_cdata.py Projet : afcarl/csxdata

class TestCData(unittest.TestCase):
    """
    Dear Categorical Dataframe,

    I would like you to:
    + hold categorical data for me.
    + partition the data to learning and testing cases
    + be able to generate weights based on the representation ratio of different classes
    + transform (whiten, autoencode, standardize) the independent variables
     and adjust the <inputs_required> accordingly.
     These transformations should fitted only on the learning data!
    + dummycode/embed the categorical variable:
     create the one-hot vector representations of categories OR
     embed the categorical variable into N-space,
     adjust <outputs_required> accordingly,
     and be able to translate the network output back to human readable class names
    + be able to reset transformations and embeddings if this is desirable
     without the loss of information.
    + create a learning table from the data
    + generate random batches from the data
    - Handle multiple labels and be able to average similarily labelled samples
    """
    def setUp(self):
        self.X_, self.y_, headers = parse_csv(etalonroot + "/input.csv")

        self.data = CData((self.X_, self.y_), cross_val=0)

    def test_initialization_on_etalon_with_given_parameters(self):
        new_data = CData((self.X_, self.y_), cross_val=0.5)
        self.assertEqual(
            new_data.embedding, "onehot",
            "<embedding> property is faulty after initialization!")
        self.assertEqual(
            len(new_data.categories), 3,
            "Invalid determination of categories! (got {})".format(
                new_data.categories))
        self.assertEqual(new_data.crossval, 0.5,
                         "Wrong <crossval> value in data!")
        self.assertEqual(new_data.N, 5)
        self.assertEqual(new_data.N, new_data.learning.shape[0],
                         "Validation data splitting went wrong @ learning!")
        self.assertEqual(new_data.n_testing, 5)
        self.assertEqual(new_data.n_testing, new_data.testing.shape[0],
                         "Validation data splitting went wrong @ testing!")

    def test_reset(self):
        data2 = CData((self.X_, self.y_), cross_val=0)
        er = "Difference detected in data shapes"

        self.data.crossval = 3
        self.data.embedding = 10
        self.data.transformation = ("pca", 1)

        self.data.reset_data(shuff=False)
        sm1, sm2 = np.sum(self.data.data), np.sum(data2.data)

        self.assertEqual(self.data.learning.shape, (7, 3), msg=er)
        self.assertEqual(
            sm1,
            sm2,
            msg="The sums of learning data differ by {}!\n{}\n{}".format(
                abs(sm1 - sm2), sm1, sm2))

    def test_core_data_is_readonly(self):
        with self.assertRaises(ValueError):
            self.data.data[0][0] = 2.0

    def test_setter_sets_crossval_getter_right(self):
        self.data.crossval = 5
        self.assertEqual(self.data.crossval, 0.5,
                         "Wrong <crossval> value in data!")
        self.assertEqual(self.data.N, 5)
        self.assertEqual(self.data.N, self.data.learning.shape[0],
                         "Validation data splitting went wrong @ learning!")
        self.assertEqual(self.data.n_testing, 5)
        self.assertEqual(self.data.n_testing, self.data.testing.shape[0],
                         "Validation data splitting went wrong @ testing!")

    def test_weights_sum_to_N(self):
        w = self.data.sample_weights
        self.assertEqual(round(w.sum()), self.data.N)

    def test_concatenate_produces_right_shape(self):
        newdata = CData((self.X_, self.y_), cross_val=0)
        newdata.concatenate(self.data)
        self.assertEqual(newdata.N, 20,
                         "Split after concatenation went wrong!")
        self.assertEqual(newdata.data.shape, (20, 3),
                         "Shapes went haywire after concatenation!")

    def test_batches_from_generator_are_shaped_and_distributed_right(self):
        i = 0
        self.data.crossval = 2
        self.data.transformation = ("ae", 15)
        self.data.embedding = 3
        for X, y, w in self.data.batchgen(2, weigh=True):
            self.assertEqual(X.shape, (2, 15))
            self.assertEqual(y.shape, (2, 3))
            self.assertEqual(w.shape, (2, ))
            i += 1
        self.assertEqual(
            i,
            4,
            msg="Number of batches differ. Got {} expected {}".format(i, 4))

    def test_neurons_required_property_on_untransformed_data(self):
        inshape, outputs = self.data.neurons_required
        self.assertIsInstance(inshape, tuple, "Input shape is not in a tuple!")
        self.assertIsInstance(inshape, tuple,
                              "Output shape is not in a tuple!")
        self.assertEqual(inshape, (3, ))
        self.assertEqual(outputs, (3, ))

    def test_neurons_required_proprety_after_heavy_transformation_then_resetting(
            self):
        self.data.embedding = 10
        self.data.transformation = ("pca", 2)
        inshape, outputs = self.data.neurons_required
        self.assertIsInstance(inshape, tuple, "Input shape is not in a tuple!")
        self.assertIsInstance(outputs, tuple,
                              "Output shape is not in a tuple!")
        self.assertEqual(inshape, (2, ),
                         "Wrong input shape after transformation/embedding!")
        self.assertEqual(outputs, (10, ),
                         "Wrong output shape after transformation/embedding!")
        self.data.reset_data(shuff=False)
        inshape, outputs = self.data.neurons_required
        self.assertIsInstance(inshape, tuple, "Input shape is not in a tuple!")
        self.assertIsInstance(outputs, tuple,
                              "Output shape is not in a tuple!")
        self.assertEqual(inshape, (3, ), "Wrong input shape after resetting!")
        self.assertEqual(outputs, (3, ), "Wrong output shape after resetting!")

Exemple #31

0

Afficher le fichier

Fichier : xp_conv.py Projet : shekkbuilder/brainforge

from csxdata import CData, roots

from brainforge import BackpropNetwork
from brainforge.layers import ConvLayer, PoolLayer, Flatten, DenseLayer, Activation
from brainforge.optimization import RMSprop

data = CData(roots["misc"] + "mnist.pkl.gz", cross_val=10000, fold=True)
ins, ous = data.neurons_required
net = BackpropNetwork(input_shape=ins, layerstack=[
    ConvLayer(3, 8, 8, compiled=False),
    PoolLayer(3, compiled=False), Activation("tanh"),
    Flatten(), DenseLayer(60, activation="tanh"),
    DenseLayer(ous, activation="softmax")
], cost="xent", optimizer=RMSprop(eta=0.01))

net.fit_generator(data.batchgen(bsize=20, infinite=True), lessons_per_epoch=60000, epochs=30,
                  validation=data.table("testing"))