def __init__(self, discount=0.95, exploration_rate=1.0, exploration_rate_decay=.99, target_every=2): """ define deep network hyperparameters""" self.discount = discount # how much future rewards are valued w.r.t. current self.exploration_rate = exploration_rate # initial exploration rate self.exploration_rate_decay = exploration_rate_decay # transition from exploration to expliotation self.target_every = target_every #how many iterations to skip before we swap prediciton network with target network #retrieve the body plan #input has 6 neurons, one for each metabolite conc. and one for time horizon #output has 1 neuron, representing the only action and its value function approximation #~~output has 3 neurons, representing the Q values for each of the 3 actions #~~ action 0 is no treatment, action 1 is drug1 Tx, and and action 2 is for drug2 Tx self.bodyplan = read_bodyplan("crpm/data/abbc_bodyplan.csv") #define prediction network self.prednet = init_ffn(self.bodyplan) self.loss = None #current prediction error #init the target network(s) self.targetnet1 = init_ffn(self.bodyplan) self.targetnet2 = init_ffn(self.bodyplan) self.targetnet3 = init_ffn(self.bodyplan) self.targetnet4 = init_ffn(self.bodyplan) #with the prediciton network #self.targetnet = copy_ffn(self.prednet) #init counter used to determine when to update target network with prediction network self.iteration = 0
def test_solve_numberadder(): """test number adder can be solved begining with weights = 1.1 """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset from crpm.gradientdecent import gradientdecent #create shallow bodyplan with 5 inputs and 1 output for numebr adder data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model model = init_ffn(bodyplan) #manually set layer weights to 1.1 and biases to 0 model[1]["weight"] = 1.1 * np.ones(model[1]["weight"].shape) #train numberadder model with mean squared error _, data = load_dataset("crpm/data/numberadder.csv") _, _, _ = gradientdecent(model, data[0:5, ], data[-1, ], "mse") print(model[1]["weight"]) assert np.allclose(model[1]["weight"], 1.0, rtol=.005)
def test_fwdprop_numberadder(): """test that unit weights will make a number adder. """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset from crpm.fwdprop import fwdprop #create shallow bodyplan with 5 inputs and 1 output for number adder data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create model model = init_ffn(bodyplan) #manually set layer 1 weights to 1 and biases to 0 model[1]["weight"] = np.ones(model[1]["weight"].shape) #run forward propagation with example data in numberadder.csv __, data = load_dataset("crpm/data/numberadder.csv") indepvars = data[0:5, ] depvars = data[-1, ] prediction, __ = fwdprop(indepvars, model) assert np.allclose(depvars, prediction, rtol=1E-7)
def test_backprop_numberadder(): """test that solved number adder will have zero forces with proper shape. """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.backprop import backprop #create shallow bodyplan for numberadder.csv data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model addermodel = init_ffn(bodyplan) #manually set layer 1 weights to 1 and biases to 0 addermodel[1]["weight"] = np.ones(addermodel[1]["weight"].shape) #compute forces using numberadder.csv data with mean squared error __, data = load_dataset("crpm/data/numberadder.csv") pred, state = fwdprop(data[0:5,], addermodel) __, dloss = loss("mse", pred, data[-1,]) forces, _ = backprop(addermodel, state, dloss) assert forces[-1]["fweight"].shape == (1, 5) assert np.allclose(1+forces[-1]["fweight"], 1, rtol=1E-7) assert forces[-1]["fbias"].shape == (1, 1) assert np.allclose(1+forces[-1]["fbias"], 1, rtol=1E-7)
def test_numadd_forcedir(): """test that number adder with initial wieghts >1 will have negative forces. """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.backprop import backprop #create shallow bodyplan for numberadder.csv data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model addermodel = init_ffn(bodyplan) #manually set layer 1 weights to 1.1 and biases to 0 addermodel[1]["weight"] = 1.1 * np.ones(addermodel[1]["weight"].shape) #compute forces using numberadder.csv data with mean squared error __, data = load_dataset("crpm/data/numberadder.csv") pred, state = fwdprop(data[0:5,], addermodel) __, dloss = loss("mse", pred, data[-1,]) forces, _ = backprop(addermodel, state, dloss) assert np.all(forces[-1]["fweight"] < 0)
def __init__(self, desc, std=None, pre=None, post=None): """define model from description with options for pre and post procs and inital weight distribution. """ from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn #save weight variance parameter self.weightstd = std #get bodyplan from a file description if isinstance(desc, str): self.bodyplan = read_bodyplan(desc) #get bodyplan from a list description if isinstance(desc, list): self.bodyplan = desc #define model from bodyplan self.body = init_ffn(self.bodyplan, weightstd=self.weightstd) #link static pre-processing body self.pre = pre #append indicator in description if applicable if self.pre is not None: for layer in self.pre: layer["desc"] = layer["desc"] + str(' static pre-processor') #link static post-processing body self.post = post #append indicator in description if applicable if self.post is not None: for layer in self.post: layer["desc"] = layer["desc"] + str(' static post-processor')
def setup_afmodel(): """ will return model prototype and downloaded data.""" import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from bodyplan file bodyplan = read_bodyplan("crpm/data/afmodel_bodyplan.csv") #create model prototype = init_ffn(bodyplan) #download data data = np.load("crpm/data/afmodel.npz") #get list of keys in data (represents individual arrays) keylist = [] for key in data.keys(): keylist.append(key) #return encoder protype, cohort1 data, cohort1 labels, cohort2 data, cohort2 labels return prototype, data[keylist[0]], data[keylist[1]], data[ keylist[2]], data[keylist[3]]
def setup_toruscases_deep(): """ will return model and downloaded data.""" from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from deep bodyplan file bodyplan = read_bodyplan("crpm/data/intorus_deep_bodyplan.csv") #create model model = init_ffn(bodyplan) #download data __, data = load_dataset("crpm/data/intorus.csv") return model, data
def setup_numberadder(): """ will return numberadder model and downloaded data.""" from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from bodyplan file bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create model model = init_ffn(bodyplan) #download data keys, data = load_dataset("crpm/data/numberadder.csv") return model, keys, data
def setup_periodiccases(): """ will return model and downloaded data.""" from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from bodyplan file bodyplan = read_bodyplan("crpm/data/periodiccases_bodyplan.csv") #create model model = init_ffn(bodyplan) #download data __, data = load_dataset("crpm/data/periodiccases.csv") return model, data
def setup_multicorrel_deep_c(): """ will return deep model and downloaded data.""" from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from bodyplan file bodyplan = read_bodyplan("crpm/data/multicorrel_deep_bodyplan.csv") #create model model = init_ffn(bodyplan) #download nestedCs data __, data = load_dataset("crpm/data/multicorrel_C.csv") return model, data
def setup_overfitting_shallow(): """ will return shallow model and downloaded data.""" from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from bodyplan file bodyplan = read_bodyplan("crpm/data/overfitting_shallow_bodyplan.csv") #create model model = init_ffn(bodyplan) #download data __, traindata = load_dataset("crpm/data/overfitting_training.csv") keys, validdata = load_dataset("crpm/data/overfitting_validation.csv") return model, keys[1:], traindata[1:, :], validdata[1:, :]
def reinit(self, std=None): """Reinitialize FFN object. Args: model: A previously created ffn model Returns: The input model with reinitialized weights and biases """ import numpy as np #always inform user model is being reinitialized print("Reinitialing FFN body!") #reset weight distribution if given if std is not None: self.weightstd = std #define model from bodyplan self.body = init_ffn(self.bodyplan, weightstd=self.weightstd)
def test_init_ffn_types(): """check if elements in layer dictionaries are of the correct type weights and biases should be ndarrays """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn bodyplan = read_bodyplan("crpm/data/example_ffn_bodyplan.csv") model = init_ffn(bodyplan) for layer in model: assert isinstance(layer["layer"], int) assert isinstance(layer["n"], int) assert isinstance(layer["activation"], str) if layer["layer"] > 0: assert isinstance(layer["regval"], float) assert isinstance(layer["weight"], np.ndarray) assert isinstance(layer["bias"], np.ndarray)
def test_solve_numberadder(): """test number adder can be solved begining with init weights set """ import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.dataset import load_dataset from crpm.ffn_bodyplan import init_ffn from crpm.fwdprop import fwdprop from crpm.lossfunctions import loss from crpm.langevindynamics import langevindynamics #load data __, data = load_dataset("crpm/data/numberadder.csv") __, testdata = load_dataset("crpm/data/numberadder_test.csv") #create shallow bodyplan with 5 inputs and 1 output for numebr adder data bodyplan = read_bodyplan("crpm/data/numberadder_bodyplan.csv") #create numberadder model model = init_ffn(bodyplan) #manually set layer weights to 1.5 and biases to 0 model[1]["weight"] = 1.5*np.ones(model[1]["weight"].shape) #calculate initial mean squared error pred, __ = fwdprop(data[0:5,], model) icost, __ = loss("mse", pred, data[-1,]) print("icost = "+str(icost)) print(model[1]["weight"]) #train numberadder model with mean squared error __, cost = langevindynamics(model, data[0:5,], data[-1,], "mse", testdata[0:5,], testdata[-1,], maxepoch=int(3E5), maxbuffer=int(1E3)) print("cost ="+str(cost)) print(model[1]["weight"]) assert icost > cost assert np.allclose(model[1]["weight"], 1.0, rtol=.005)
def setup_spectra2(): """ will return model and downloaded data.""" import numpy as np from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn from crpm.dataset import load_dataset #create model from bodyplan file bodyplan = read_bodyplan("crpm/data/spectra2_bodyplan.csv") #create model model = init_ffn(bodyplan) #download data data = np.load("crpm/data/spectra2.npz") #get list of keys in data (represents individual arrays) keylist = [] for key in data.keys(): keylist.append(key) return model, data[keylist[0]]
def test_init_ffn(): """Test ffn is created properly from example_bodyplan.csv """ from crpm.ffn_bodyplan import read_bodyplan from crpm.ffn_bodyplan import init_ffn bodyplan = read_bodyplan("crpm/data/example_ffn_bodyplan.csv") model = init_ffn(bodyplan) assert model[0]["layer"] == 0 assert model[1]["layer"] == 1 assert model[2]["layer"] == 2 assert model[3]["layer"] == 3 assert model[4]["layer"] == 4 assert model[0]["n"] == 2 assert model[1]["n"] == 3 assert model[2]["n"] == 5 assert model[3]["n"] == 7 assert model[4]["n"] == 1 assert model[1]["weight"].shape == (3, 2) assert model[2]["weight"].shape == (5, 3) assert model[3]["weight"].shape == (7, 5) assert model[4]["weight"].shape == (1, 7) assert model[1]["bias"].shape == (3, 1) assert model[2]["bias"].shape == (5, 1) assert model[3]["bias"].shape == (7, 1) assert model[4]["bias"].shape == (1, 1) assert model[0]["activation"] == 'linear' assert model[1]["activation"] == 'relu' assert model[2]["activation"] == 'relu' assert model[3]["activation"] == 'relu' assert model[4]["activation"] == 'logistic'
def init_som(model, state, n=100, nx=None, ny=None, hcp=False): """initializes a map from an ffn model Args: model: FFN model whose final layer is mapped n: number of mapping nodes default is 100 nx: number of nodes in x direction ny: number of nodes in y direction hcp: boolean indicating use of hexagonal close packing default is False """ import numpy as np from scipy.spatial import distance_matrix from crpm.ffn_bodyplan import get_bodyplan from crpm.ffn_bodyplan import init_ffn #make sure ffn top layer has logistic or softmax activation if (model[-1]["activation"] != "logistic" and model[-1]["activation"] != "softmax"): stop("som::init_map - input model is not a classifier.") #define number of clusters from size of top layer nclass = max(model[-1]["n"], 2) #get model bodyplan bodyplan = get_bodyplan(model) #edit bodyplan toplayer to reflect number of mapping nodes and create map bodyplan[-1]["n"] = n bodyplan[-1]["activation"] = "gaussian" # create map map = init_ffn(bodyplan) #add node geometry to top layer and save unit cell scale factor map[-1]["coord"], scale = coords(n, nx, ny, hcp) #calcualte node pair distances in mapping space for given geometry map[-1]["nodedist"] = distance_matrix(map[-1]["coord"], map[-1]["coord"]) #multiply scale factor by 2 for unit radius scale = np.multiply(scale, 0.5) #initialize node weights based on #first 3 principal components of the penultimate layer activity #define matrix with penultimate features in columns act = state[-2]["activity"] # calculate the mean of each feature mact = np.mean(act, axis=1) # mean center the features cact = act.T - mact # calculate covariance matrix of centered features vact = np.cov(cact.T) # eigendecomposition of covariance matrix values, vectors = np.linalg.eig(vact) #calcualte feature variance for scaling sig = np.std(act, axis=1)[:, None] print(mact) print(sig) print(values) print(vectors) #add zero vectors if number of features is less than 3 if vectors.shape[0] < 3: zerovectors = np.zeros((3 - vectors.shape[0], vectors.shape[1])) vectors = np.vstack((vectors, zerovectors)) zerovectors = np.zeros((3 - vectors.shape[0], 1)) #project node coordinates onto first 3 principal coordinates #unit scale coordinates then scale by feature stdev then translate to feature mean map[-1]["weight"] = ( (map[-1]["coord"] / scale).dot(vectors[0:3, :])) * sig.T + mact[:, None].T return map, nclass
def contrastivedivergence(model, data, validata=None, ncd=1, maxepoch=100, nadj=10, momentum=.5, batchsize=10, finetune=6): """unfold and train fnn model by contrastive divergence Args: model: deep FFN model data: features in rows, observations in columns. cd: number of contrastive divergence steps maxepoch: hard limit of learning iterations default is 100 nadj: period of learning rate adjustment in units of epochs momentum: fraction of previous change in weight carried over to next weight update step Returns: exit condition and trained unfolded model. Exit conditions are 0) learning converged, 1) learning not converged, and -1) learning cannot be performed. Training will modify model. """ import numpy as np from crpm.activationfunctions import activation from crpm.ffn_bodyplan import get_bodyplan from crpm.ffn_bodyplan import copy_bodyplan from crpm.ffn_bodyplan import push_bodyplanlayer from crpm.ffn_bodyplan import init_ffn #init exit condition to default exitcond = 0 #get model bodyplan bodyplan = get_bodyplan(model) #get number of model layers nlayer = len(model) #copy bodyplan unfolded_bodyplan = copy_bodyplan(bodyplan) #push layers in reversed order to create a symmetric bodyplan for layer in reversed(bodyplan[:-1]): push_bodyplanlayer(unfolded_bodyplan, layer) #create unfolded model from symmetric bodyplan smodel = init_ffn(unfolded_bodyplan) #print(smodel) #return symmetric model if maxepoch = 0 if maxepoch < 1: return exitcond, smodel #define minibatches #get number of observations in data nobv = data.shape[1] #calculate number of minibatches needed batchsize = int(batchsize) nbatch = nobv // batchsize #get randomized observation index data = data.T np.random.shuffle(data) data = data.T #alpha norm scales learning rate by max force relative to weight alpha_norm = 10**(-finetune) #alpha_norm = 1E-8#7#5E-6 #initialize previous layer activity with input data for layer 0 prevlayeractivity = data #do the same for the validation data validprevlayeractivity = validata if validata is None: #use last 20% of batches for validation vbatch = nbatch // 5 nbatch = nbatch - vbatch prevlayeractivity = data[:, 0:nbatch * batchsize] validprevlayeractivity = data[:, nbatch * batchsize:] # loop over first half of symmetric model begining with layer 1 for layerindex in range(1, nlayer): #encoding index is = layerindex #decoding index is = 2*nlayer - layerindex +1 decodeindex = 2 * nlayer - (layerindex + 1) #define layers vislayer = smodel[decodeindex] hidlayer = smodel[layerindex] #get number of nodes per layer nv = vislayer["n"] nh = hidlayer["n"] #initialize connecting weights ±4sqrt(6/(nv+nh)) hidlayer["weight"] = ((np.random.rand(nh, nv) - 1 / 2) * 8 * np.sqrt(6 / (nh + nv))) #determine appropriate RBM type vtype = vislayer["activation"] htype = hidlayer["activation"] rbmtype = None #1. binary if vtype == "logistic" and htype == "logistic": rbmtype = "binary" #define activity for visible layer def vsample(): """returns logistic visible layer activity given hiddenlayer state""" stimulus = np.add(hidlayer["weight"].T.dot(hstate), vislayer["bias"]) return activation("logistic", stimulus) #define activity for hidden layer def hsample(): """returns logistic hidden layer activity and stocastic binary state given visible layer activity""" stimulus = np.add(hidlayer["weight"].dot(vact), hidlayer["bias"]) hact = activation("logistic", stimulus) return hact, hact > np.random.random(hact.shape) #define free energy equation for binary-binary RBM def feng(act): #visible bias term: dim (1,m) #vbterm = -np.sum(np.multiply(act, vislayer["bias"]), axis=0) vbterm = -vislayer["bias"].T.dot(act) #hidden layer stimulus : dim (nh,m) stimulus = np.add(hidlayer["weight"].dot(act), hidlayer["bias"]) # init hidden term : dim (nh,m) #hidden_term = activation("vacuum",stimulus) #for exp(stim) term numerical stability #first calc where stimulus is negative #xidx = np.where(stimulus < 0) #hidden term function for negative stimulus #hidden_term[xidx] = np.log(1+np.exp(stimulus[xidx])) #then calc where stimulus is not negative #xidx = np.where(stimulus >= 0) #hidden term function for not negative stimulus #hidden_term[xidx] = stimulus[xidx]+np.log(1+np.exp(-stimulus[xidx])) hidden_term = np.where( stimulus < 0, np.log(1 + np.exp(stimulus)), stimulus + np.log(1 + np.exp(-stimulus))) #sum over hidden units to get true hidden_term : dim (1,m) hidden_term = np.sum(hidden_term, axis=0) #free energy = sum over samples (visible_bias_term - hidden_term) return np.sum(vbterm - hidden_term) #2. Gaussian-Bernoulli if vtype == "linear" and htype == "logistic": rbmtype = "gaussian-bernoulli" #Get standard deviation for real-valued visible units sigma = np.std(prevlayeractivity, axis=1, keepdims=True) #define activity for visible layer def vsample(): """returns linear plus gaussian noise visible layer activity given hidden layer state""" stimulus = np.add(hidlayer["weight"].T.dot(hstate) * sigma, vislayer["bias"]) return np.random.normal(loc=stimulus, scale=sigma) #define activity for hidden layer def hsample(): """returns logistic hidden layer activity and stocastic binary state given scaled visible layer activity""" stimulus = np.add(hidlayer["weight"].dot(vact / sigma), hidlayer["bias"]) act = activation("logistic", stimulus) return act, act > np.random.random(act.shape) #define free energy equation for Gaussian - Bernoulli RBM def feng(act): #hidden layer stimulus : dim (nh,m) stimulus = np.add(hidlayer["weight"].dot(act), hidlayer["bias"]) # init hidden term : dim (nh,m) #hidden_term = activation("vacuum",stimulus) #for exp(stim) term numerical stability #first calc where stimulus is negative #xidx = np.where(stimulus < 0) #hidden term function for negative stimulus #hidden_term[xidx] = np.log(1+np.exp(stimulus[xidx])) #then calc where stimulus is not negative #xidx = np.where(stimulus >= 0) #hidden term function for not negative stimulus #hidden_term[xidx] = stimulus[xidx]+np.log(1+np.exp(-stimulus[xidx])) hidden_term = np.where( stimulus < 0, np.log(1 + np.exp(stimulus)), stimulus + np.log(1 + np.exp(-stimulus))) #sum over hidden units to get true hidden_term : dim (1,m) hidden_term = np.sum(hidden_term, axis=0) #visible bias term: dim (1,m) vbterm = -vislayer["bias"].T.dot(act) #square term sqterm = np.trace( act.T.dot(act) + vislayer["bias"].T.dot(vislayer["bias"])) / 2 #free energy = vbterm +[act^2 +vbias^2]/2 - hidden_term) return np.sum(vbterm - hidden_term) + sqterm #3. Bernoulli-Gaussian if vtype == "logistic" and htype == "linear": rbmtype = "bernoulli-gaussian" #define activity for visible layer def vsample(): """returns logistic visible layer activity given unit scaled hidden layer activity""" stimulus = np.add(hidlayer["weight"].T.dot(hstate), vislayer["bias"]) return activation("logistic", stimulus) #define activity for hidden layer def hsample(): """returns linear plus unit var gaussian noise hidden layer activity and stocastic state given vislayer activity""" stimulus = np.add(hidlayer["weight"].dot(vact), hidlayer["bias"]) return stimulus, np.random.normal(loc=stimulus) #define free energy equation for Gaussian - Bernoulli RBM print( "free energy function is not properly defined for Bernouli-Gaussian RBM" ) def feng(act): stimulus = np.add(hidlayer["weight"].dot(act), hidlayer["bias"]) #visible bias term vbterm = -np.transpose(act).dot(vislayer["bias"]) vbtemp = np.add( np.transpose(act).dot(act), np.transpose(vislayer["bias"].dot(vislayer["bias"]))) vbterm = np.add(vbterm, vbtemp / 2).T # init hidden term hidden_term = activation("vacuum", stimulus) #for exp(stim) term numerical stability #first calc where stimulus is negative xidx = np.where(stimulus < 0) #hidden term function for negative stimulus hidden_term[xidx] = np.log(1 + np.exp(stimulus[xidx])) #then calc where stimulus is not negative xidx = np.where(stimulus >= 0) #hidden term function for not negative stimulus hidden_term[xidx] = stimulus[xidx] + np.log( 1 + np.exp(-stimulus[xidx])) #free energy = visible_bias_term - hidden_term return np.sum(vbterm - np.sum(hidden_term, axis=0)) #4. exit if unknown RBM type if rbmtype == None: exitcond = -1 #cannot run contrastive divergence on this model print( "Error in contrastivedivergence.py: cannot find appropriate RBM type." ) print("Ensure model has only logistic or linear layers.") print( "Also ensure linear layers are not adjacent - that would be pointless btw." ) return exitcond, smodel # continuous loop over learning steps (use exit conditions) print("training " + rbmtype + " RBM in layer " + str(layerindex)) continuelearning = True momentum_adj = 0 epoch = 0 err = 0 dweight = np.zeros(hidlayer["weight"].shape) dhbias = np.zeros(hidlayer["bias"].shape) dvbias = np.zeros(vislayer["bias"].shape) valid_feng = np.full(nadj, feng(validprevlayeractivity)) train_feng = np.full(nadj, feng(prevlayeractivity)) #freeeng = np.full(nadj, feng(validprevlayeractivity) # -feng(prevlayeractivity)) #freeeng0 = np.copy(freeeng) earlystop = False while continuelearning: #increment epoch counter epoch += 1 #print("epoch = "+str(epoch)) #loop over minibatches for batch in range(nbatch): #get minibatch minibatch = prevlayeractivity[:, batch * batchsize:(batch + 1) * batchsize] # get visible layer activity vact = minibatch # get hidden layer activity and poshidstates hact, hstate = hsample() # get product of visible layer and hidden layer actvities pprod = hact.dot(vact.T) # get sum of visible layer activity pvsum = np.sum(vact, axis=1, keepdims=True) # get sum of hidden layer activity phsum = np.sum(hact, axis=1, keepdims=True) # loop over ncd Gibbs sampling iterations (at least one iteration) continuegibbs = True gibbs = 0 while continuegibbs: #increment gibbs counter gibbs += 1 # get visible layer activity | hidden layer states vact = vsample() # sample hidden layer state | visible layer activity hact, _ = hsample() # use hidden layer activity instead of state for subsequent # iterations so we overwrite hstate with the activity hstate = np.copy(hact) #exit condition if gibbs >= ncd: continuegibbs = False # get product of visible layer and hidden layer actvities nprod = hact.dot(vact.T) # get sum of visible layer activity nvsum = np.sum(vact, axis=1, keepdims=True) # get sum of hidden layer activity nhsum = np.sum(hact, axis=1, keepdims=True) # accumulate error err += np.sum(np.square(minibatch - vact)) # get forces on visible layer biases dvbias0 = dvbias dvbias = (pvsum - nvsum) / batchsize # get forces on the hidden layer biases dhbias0 = dhbias dhbias = (phsum - nhsum) / batchsize #calculate forces on weights dweight0 = dweight dweight = (pprod - nprod) / batchsize #add regularization penalty term if specified by layer if hidlayer["regval"] > 0: if hidlayer["lreg"] == 1: dweight -= hidlayer["regval"] * np.sign( hidlayer["weight"]) if hidlayer["lreg"] == 2: dweight -= hidlayer["regval"] * hidlayer["weight"] #adjust learning rate to ensure integrator doesn't break if np.all(abs(dweight) >= np.finfo(float).eps): alpha = alpha_norm * np.max( np.divide(hidlayer["weight"], dweight)) #print(alpha) #update weights with momentum term hidlayer["weight"] += momentum_adj * dweight0 + alpha * dweight # update visible layer biases with momentum term vislayer["bias"] += momentum_adj * dvbias0 + alpha * dvbias # update hidden layer biases with momentum term hidlayer["bias"] += momentum_adj * dhbias0 + alpha * dhbias # periodically check free energy for overfitting valid_feng[epoch % nadj] = feng(validprevlayeractivity) train_feng[epoch % nadj] = feng(prevlayeractivity) #freeeng[epoch%nadj] = feng(validprevlayeractivity)-feng(prevlayeractivity) #print(np.mean(freeeng)) if epoch % nadj == (nadj - 1): #default turn off momentum momentum_adj = 0 #if train_feng inc then turn off momentum and continue training #else if np.polyfit(np.arange(nadj), train_feng, 1)[0] < 0: # if valid_feng is inc then initiate earlystopping if np.polyfit(np.arange(nadj), valid_feng, 1)[0] > 0: earlystop = True # else turn on momentum and continue training else: momentum_adj = momentum #if np.mean(freeeng) > np.mean(freeeng0)+0*np.std(freeeng0): # #initiate naive earlystopping # earlystop = True # print("Free engergy prev = " +str(np.mean(freeeng0))) # print("Free engergy curr = " +str(np.mean(freeeng))) #freeeng0 = np.copy(freeeng) # - EXIT CONDITIONS - #exit if learning is taking too long if epoch > int(maxepoch): print( "Warning contrastivedivergence.py: Training is taking a long time!" + " - Try increasing maxepoch - Training will end") exitcond = 1 continuelearning = False #exit if naive earlystopping has been engauged if earlystop: print( "Warning contrastivedivergence.py: early stopping after " + str(epoch) + " epochs") continuelearning = False #symmeterize weights vislayer["weight"] = hidlayer["weight"].T #hidlayer to original model model[layerindex] = hidlayer #promote prevlayeractivity to current hidlayer activity vact = np.copy(prevlayeractivity) prevlayeractivity, _ = hsample() #promote validation data to current hidden layer too vact = np.copy(validprevlayeractivity) validprevlayeractivity, _ = hsample() # return exit condition return exitcond, smodel