Example #1
0
def test_encode_periodiccases_deep():
    """test periodiccases can be encoded
    """
    import numpy as np
    from crpm.setup_periodiccases import setup_periodiccases_deep
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.dynamics import computecost

    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_periodiccases_deep()
    nx = data.shape[0]
    nsample = data.shape[1]

    #remove discriminating layer
    prototype = model[0:-1]

    #partition training and validation data
    valid = data[1:nx, 0:nsample // 3]
    #validtargets = data[0,0:nsample//3]
    train = data[1:nx, nsample // 3:nsample]
    #targets =data[0,nsample//3:nsample]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, icost = computecost(autoencoder, valid, valid, "mse")

    #train prototype
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=5,
                                           maxepoch=500,
                                           momentum=.05)

    #calculate final reconstruction error
    pred, cost = computecost(autoencoder, valid, valid, "mse")

    #diagnostic
    print(icost)
    print(cost)

    #assert learning is taking place
    assert icost > cost
Example #2
0
def test_encode_nestedcs():
    """test nested cs data can be encoded
    """

    import numpy as np
    from crpm.setup_nestedcs import setup_nestedcs
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.dynamics import computecost
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.gradientdecent import gradientdecent
    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_nestedcs()

    #remove discriminating layer
    prototype = model[0:-1]

    #explicitly remove labels from data
    #labels = data[2, :]
    data = data[0:2, :]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, data, maxepoch=0)

    #calculate initial mean squared error
    pred, icost = computecost(autoencoder, data, data, "mse")

    #train model
    _, autoencoder = contrastivedivergence(prototype,
                                           data,
                                           ncd=10,
                                           nadj=10,
                                           maxepoch=1000,
                                           momentum=0.1,
                                           batchsize=10,
                                           finetune=6)

    #calculate final mean squared error
    pred, cost = computecost(autoencoder, data, data, "mse")

    #assert learning is taking place
    assert icost > cost
Example #3
0
def r_test_pretrain_periodiccases_deep():
    """test pretained periodiccases model encodes better than non pretrained model
    """
    import numpy as np
    from crpm.setup_periodiccases import setup_periodiccases_deep
    from crpm.dynamics import computecost

    from crpm.gradientdecent import gradientdecent
    from crpm.ffn_bodyplan import reinit_ffn
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.ffn_bodyplan import copy_ffn
    #from crpm.analyzebinaryclassifier import plotroc

    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_periodiccases_deep()
    nx = data.shape[0]
    nsample = data.shape[1]

    #partition training and validation data
    valid = data[1:nx, 0:nsample // 3]
    validtargets = data[0, 0:nsample // 3]
    train = data[1:nx, nsample // 3:nsample]
    targets = data[0, nsample // 3:nsample]

    #remove discriminating layer
    prototype = model[0:-1]

    #re-init prototype
    prototype = reinit_ffn(prototype)

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, icost_encoder = computecost(autoencoder, valid, valid, "mse")

    #conventional training autoencoder
    pred, gcost_encoder, _ = gradientdecent(autoencoder,
                                            train,
                                            train,
                                            "mse",
                                            valid,
                                            valid,
                                            maxepoch=1E6,
                                            earlystop=True,
                                            healforces=False,
                                            finetune=9)

    #assert auto encoder can be conventionally trained
    assert gcost_encoder < icost_encoder

    #re-init prototype
    prototype = reinit_ffn(prototype)

    #CD train autoencoder
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=10,
                                           batchsize=20,
                                           nadj=10,
                                           maxepoch=500,
                                           momentum=0.05,
                                           finetune=6)

    #calculate reconstruction error
    pred, cost_encoder = computecost(autoencoder, valid, valid, "mse")
    print(cost_encoder)

    #assert reconstruction error is less than initial recon error
    assert cost_encoder < icost_encoder

    #fine-tune autoencoder
    pred, fcost_encoder, _ = gradientdecent(autoencoder,
                                            train,
                                            train,
                                            "mse",
                                            valid,
                                            valid,
                                            maxepoch=1E6,
                                            earlystop=True,
                                            healforces=False,
                                            finetune=9)

    #assert final reconstruction error is not greater than previous recon error
    assert fcost_encoder <= cost_encoder

    #assert final reconstruction error is not greater than with conventional training
    assert fcost_encoder <= gcost_encoder
Example #4
0
def r_test_encode_spectra2():
    """test spectra2 can be encoded
    """

    import numpy as np
    from crpm.setup_spectra2 import setup_spectra2
    from crpm.lossfunctions import loss
    from crpm.gradientdecent import gradientdecent
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    from crpm.contrastivedivergence import contrastivedivergence
    from crpm.ffn import FFN
    from crpm.dynamics import computecost

    from crpm.fwdprop import fwdprop

    #init numpy seed
    np.random.seed(40017)

    #setup model
    model, data = setup_spectra2()

    #remove discriminating layer
    prototype = model[0:-1]

    #partition data (labels on first row)
    nobv = data.shape[1]
    cutoff = 2 * nobv // 3
    #target = data[0, :cutoff]
    train = data[1:, :cutoff]
    #vtarget = data[0, cutoff:]
    valid = data[1:, cutoff:]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, icost = computecost(autoencoder, valid, valid, "mse")
    print("init recon error = " + str(icost))

    #train prototype
    #_, autoencoder = contrastivedivergence(prototype, train, validata=valid,
    #                                       ncd=1,
    #                                       batchsize=50,
    #                                       nadj=10,
    #                                       maxepoch=100,
    #                                       momentum=0.0)
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=1,
                                           batchsize=10,
                                           nadj=10,
                                           maxepoch=1000,
                                           momentum=0.9,
                                           finetune=7)

    #calculate final reconstruction error
    pred, cost = computecost(autoencoder, valid, valid, "mse")
    print("pretrained recon error = " + str(cost))

    #assert learning is taking place
    assert icost > cost
Example #5
0
def r_test_spectra2():
    """test spectra2 can be encoded and generated
    """

    import numpy as np
    from crpm.setup_spectra2 import setup_spectra2
    from crpm.dynamics import computecost
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    #from crpm.lossfunctions import loss
    #from crpm.analyzebinaryclassifier import plotroc
    from crpm.gradientdecent import gradientdecent
    from crpm.contrastivedivergence import contrastivedivergence
    #from crpm.ffn import FFN
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.ffn_bodyplan import copy_ffn
    from crpm.fwdprop import fwdprop
    from crpm.backprop import backprop
    #from crpm.dynamics import computeforces
    #from crpm.dynamics import maxforce

    from crpm.gan import gan
    #import matplotlib
    #matplotlib.use('TkAgg')
    #import matplotlib.pyplot as plt

    #init numpy seed
    np.random.seed(40017)

    #setup model
    prototype, data = setup_spectra2()

    #get prototype depth
    nlayer = len(prototype)

    #get data dimensions
    nfeat = data.shape[0]
    nobv = data.shape[1]

    #zscore data
    tdata = np.divide(data - np.mean(data, axis=1, keepdims=True),
                      np.std(data, axis=1, keepdims=True))

    #transform features into boltzmann like probs
    #tdata = np.exp(-data)
    #partfunc = np.sum(tdata, axis=1, keepdims = True)
    #tdata = np.divide(tdata,partfunc) #normalize
    #tdata = np.divide(tdata, np.max(tdata, axis=1, keepdims=True))#scale features by maxintensity

    #plt.plot(data[:,0])
    #plt.show()
    #plt.plot(tdata[:,0])
    #plt.show()
    #data = tdata

    #partition data (labels on first row)
    ntrain = 2 * nobv // 3
    target = data[0, :ntrain]
    train = data[1:, :ntrain]
    vtarget = data[0, ntrain:]
    valid = data[1:, ntrain:]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    #calculate initial reconstruction error
    pred, ireconerr = computecost(autoencoder, valid, valid, "mse")
    print("init recon error = " + str(ireconerr))

    ##train prototype
    #_, autoencoder = contrastivedivergence(prototype, train,
    #                                       ncd=2,
    #                                       batchsize=50,
    #                                       nadj=10,
    #                                       maxepoch=100,
    #                                       momentum=0.1)
    #train prototype
    _, autoencoder = contrastivedivergence(prototype,
                                           train,
                                           validata=valid,
                                           ncd=1,
                                           batchsize=50,
                                           nadj=10,
                                           maxepoch=100,
                                           momentum=0.0)

    #calculate final reconstruction error
    pred, reconerr = computecost(autoencoder, valid, valid, "mse")
    print("pretrained recon error = " + str(reconerr))

    #assert learning is taking place by reduced recon error.
    assert ireconerr > reconerr

    # ----- Discriminator -----
    #create discriminator
    discriminator = copy_ffn(autoencoder[0:len(prototype)])
    discriminator = stack_new_layer(discriminator, n=1, activation="logistic")
    #analyze trained binary classifier
    pred, icost = computecost(discriminator, valid, vtarget, "bce")
    roc, ireport = analyzebinaryclassifier(pred, vtarget)
    if ireport["AreaUnderCurve"] < .5:
        #flip labels
        pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget)
    print(ireport)
    #plotroc(roc)

    #train discriminator
    pred, cost, _ = gradientdecent(discriminator,
                                   train,
                                   target,
                                   "bce",
                                   valid,
                                   vtarget,
                                   earlystop=True,
                                   finetune=6)

    #analyze trained binary classifier
    pred, cost = computecost(discriminator, valid, vtarget, "bce")
    roc, report = analyzebinaryclassifier(pred, vtarget)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - vtarget)
    print(report)
    #plotroc(roc)

    #assert discriminator can be trained by binary cross entropy error
    assert icost > cost

    #assert discriminator has potential to iden two calsses
    assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"]
    #assert report["AreaUnderCurve"] > .6

    # ----- generator -----

    #create generator from decoder
    generator = copy_ffn(autoencoder[len(prototype):len(autoencoder)])

    #adjust regularization
    for layer in generator:
        layer["regval"] = 0  #.00001

    #correct label idecies
    idx = 0
    for layer in generator:
        generator[idx]["layer"] = idx
        idx += 1

    #generate fake samples
    nfake = 600
    ncode = generator[0]["n"]
    fake, _ = fwdprop(np.random.rand(ncode, nfake), generator)

    #calculate initial reconstruction error
    pred, fkreconerr = computecost(autoencoder, fake, fake, "mse")
    print("init fake recon error = " + str(fkreconerr))

    #assert fake data recon error is better than untrained recon error
    assert fkreconerr < ireconerr

    #-- Start GAN training---

    ganerr = gan(generator,
                 discriminator,
                 train,
                 maxepoch=20000,
                 batchsize=50,
                 finetune=6.3)

    #assert generator fools discriminator at least some of the time bce<80%.
    assert ganerr[-1, 1] < .8

    #def moving_average(a, n=3) :
    #    ret = np.cumsum(a, dtype=float)
    #    ret[n:] = ret[n:] - ret[:-n]
    #    return ret[n - 1:] / n

    #fig = plt.figure()
    #plt.plot(ganerr[:, 0], ganerr[:, 1])
    #plt.plot(moving_average(ganerr[:, 0], n=20), moving_average(ganerr[:, 1], n=20))
    #plt.plot(ganerr[0, 0], ganerr[0, 1], marker="D", color="green", markersize=10)
    #plt.plot(ganerr[-1, 0], ganerr[-1, 1], marker="8", color="red", markersize=10)
    #plt.xlabel("discriminator error")
    #plt.ylabel("generator error")
    #plt.show()

    #print("final report")
    #print(report)
    #plotroc(roc)

    assert False
Example #6
0
def test_afnetwork():
    """test AF network patients can be encoded and generated
    """
    #import matplotlib
    #matplotlib.use('TkAgg')
    #import matplotlib.pyplot as plt
    #import matplotlib.patches as mpatches

    import numpy as np
    from crpm.setup_afmodel import setup_afmodel

    from crpm.dynamics import computecost
    from crpm.analyzebinaryclassifier import analyzebinaryclassifier
    #from crpm.lossfunctions import loss
    from crpm.analyzebinaryclassifier import plotroc
    from crpm.gradientdecent import gradientdecent
    from crpm.contrastivedivergence import contrastivedivergence
    #from crpm.ffn import FFN
    from crpm.ffn_bodyplan import stack_new_layer
    from crpm.ffn_bodyplan import copy_ffn
    from crpm.fwdprop import fwdprop
    #from crpm.backprop import backprop
    #from crpm.dynamics import computeforces
    #from crpm.dynamics import maxforce

    from crpm.gan import gan

    #init numpy seed
    np.random.seed(40017)

    #setup model
    prototype, train, target, valid, vtarget = setup_afmodel()

    #trim data
    #maxobv = 150
    #train = train[:,:maxobv]
    #valid = valid[:,:maxobv]
    #target = target[:maxobv]
    #vtarget = vtarget[:maxobv]

    #get prototype depth
    nlayer = len(prototype)

    #get data dimensions
    nfeat = train.shape[0]
    nobv = train.shape[1]

    #return untrained autoencoder
    _, autoencoder = contrastivedivergence(prototype, train, maxepoch=0)

    # ----- Discriminator -----

    #create discriminator
    discriminator = copy_ffn(autoencoder[0:len(prototype)])
    discriminator = stack_new_layer(discriminator, n=1, activation="logistic")

    print("analyze untrained discriminator to iden subtype")
    pred, icost = computecost(discriminator, valid, vtarget, "bce")
    roc, ireport = analyzebinaryclassifier(pred, vtarget)
    if ireport["AreaUnderCurve"] < .5:
        #flip labels
        pred, icost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, ireport = analyzebinaryclassifier(pred, 1 - vtarget)
    print(ireport)
    #plotroc(roc)

    #train discriminator
    pred, cost, _ = gradientdecent(discriminator,
                                   train,
                                   target,
                                   "bce",
                                   valid,
                                   vtarget,
                                   earlystop=True,
                                   finetune=7)

    print("analyze trained discriminator to iden subtype")
    pred, cost = computecost(discriminator, valid, vtarget, "bce")
    roc, report = analyzebinaryclassifier(pred, vtarget)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, valid, 1 - vtarget, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - vtarget)
    print(report)
    #plotroc(roc)

    #assert discriminator can be trained by binary cross entropy error
    #assert icost > cost

    #assert discriminator has potential to iden two classes
    #assert report["AreaUnderCurve"] > ireport["AreaUnderCurve"]
    #assert report["AreaUnderCurve"] > .55

    # ----- GENERATOR -----

    #create generator from decoder
    generator = copy_ffn(autoencoder[len(prototype) - 1:len(autoencoder)])

    #correct label idecies
    idx = 0
    for layer in generator:
        generator[idx]["layer"] = idx
        idx += 1

    #assert False
    #-- Main GAN training---
    #ganerr = gan(generator, discriminator, train,
    #                   maxepoch=100000, batchsize=1, finetune=6)
    ganerr = gan(generator,
                 discriminator,
                 train,
                 maxepoch=100000,
                 batchsize=1,
                 finetune=6)

    #def moving_average(a, n=3) :
    #    ret = np.cumsum(a, dtype=float)
    #    ret[n:] = ret[n:] - ret[:-n]
    #    return ret[n - 1:] / n

    #ganerr[:,2] = np.log(ganerr[:,2]) #plot density error on logscale
    #discerrbar = moving_average(ganerr[:, 0], n=20)
    #generrbar = moving_average(ganerr[:, 1], n=20)
    #autoerrbar = moving_average(ganerr[:, 2], n=20)

    #assert generator fools discriminator at least some of the time bce<65%.
    print(ganerr[-1, 1])
    assert ganerr[-1, 1] < .65

    #fig = plt.figure()
    #plt.plot(ganerr[:, 0], ganerr[:, 1])
    #plt.plot(discerrbar, generrbar)
    #plt.plot(discerrbar[0], generrbar[0], marker="D", color="green", markersize=10)
    #plt.plot(discerrbar[-1], generrbar[-1], marker="8", color="red", markersize=10)
    #plt.xlabel("discriminator error")
    #plt.ylabel("generator error")
    #plt.show()

    #fig = plt.figure()
    #plt.plot(ganerr[:, 0], ganerr[:, 2])
    #plt.plot(discerrbar, autoerrbar)
    #plt.plot(discerrbar[0], autoerrbar[0], marker="D", color="green", markersize=10)
    #plt.plot(discerrbar[-1], autoerrbar[-1], marker="8", color="red", markersize=10)
    #plt.xlabel("discriminator error")
    #plt.ylabel("encoder error")
    #plt.show()

    #generate fake data for every training sample
    nsample = train.shape[1]
    fake, _ = fwdprop(np.random.rand(generator[0]["n"], nsample), generator)
    #merge training and fake data
    gandata = np.hstack((train, fake))
    ganlabels = np.hstack((np.repeat(1, nsample), np.repeat(0, nsample)))

    print("analyze trained discriminator on fake vs training set")
    pred, cost = computecost(discriminator, gandata, ganlabels, "bce")
    roc, report = analyzebinaryclassifier(pred, ganlabels)
    if report["AreaUnderCurve"] < .5:
        #flip labels
        pred, cost = computecost(discriminator, gandata, ganlabels, "bce")
        roc, report = analyzebinaryclassifier(pred, 1 - ganlabels)
    print(report)
    #plotroc(roc)

    #gen fake data for every validation sample
    nsample = valid.shape[1]
    fake, _ = fwdprop(np.random.rand(generator[0]["n"], nsample), generator)
    #merge validation and fake data
    gandata = np.hstack((valid, fake))
    ganlabels = np.hstack((np.repeat(1, nsample), np.repeat(0, nsample)))

    print("analyze trained discriminator on fake vs vaidation set")
    pred, costv = computecost(discriminator, gandata, ganlabels, "bce")
    roc, reportv = analyzebinaryclassifier(pred, ganlabels)
    if reportv["AreaUnderCurve"] < .5:
        #flip labels
        pred, costv = computecost(discriminator, gandata, 1 - ganlabels, "bce")
        roc, reportv = analyzebinaryclassifier(pred, 1 - ganlabels)
    print(reportv)
    #plotroc(roc)

    #assert discriminator has poor potential to iden fake data
    assert reportv["AreaUnderCurve"] < .55

    #get fake data the discriminator thinks is real
    pred, _ = fwdprop(fake, discriminator)
    spoof = fake[:, pred[0, :] > report["OptimalThreshold"]]
Example #7
0
    def pretrain(self, state, validation=None):
        """ will pretrain deep network model by contrastive divergence """

        #make sure input all have the same number of observations
        nobv = state.shape[1]
        failcheck = False
        if validation is not None and validation.shape[0] != nobv:
            failcheck = True
        if failcheck:
            print(
                "runtime error in pretrain: inconsistent number of observations!"
            )
            return

        #get network input size
        nfeat = state.shape[0]  #network input size

        if validation is None:
            #manually set validation data to False
            validation = np.full(state.shape[0], False)

        #partition out validation patients from dataset
        intrain = ~validation
        nobv = np.sum(intrain)
        #exit if too few participated
        if nobv < 1:
            print("too few participants found for training")
            return
        #otherwise proceed with training
        data = state[:, intrain].reshape((nfeat, nobv))

        #Left off here - need to pop off last layer in model and add random weight to target and prediction nets

        #return untrained autoencoder
        _, autoencoder = contrastivedivergence(self.prednet, data, maxepoch=0)
        print(autoencoder)

        #calculate initial mean squared error
        pred, _ = fwdprop(data, autoencoder)
        icost, _ = loss("mse", pred, data)
        print(icost)

        #train model
        _, autoencoder = contrastivedivergence(self.prednet,
                                               data,
                                               maxepoch=100)

        #calculate final mean squared error
        pred, _ = fwdprop(data, autoencoder)
        cost, _ = loss("mse", pred, data)

        #print(autoencoder)
        print(icost)
        print(cost)

        #reinit the target network(s)
        #with the prediciton network
        #self.targetnet = copy_ffn(self.prednet)
        self.targetnet1 = copy_ffn(self.prednet)
        self.targetnet2 = copy_ffn(self.prednet)
        self.targetnet3 = copy_ffn(self.prednet)
        self.targetnet4 = copy_ffn(self.prednet)