def train(self, trainSeq, validSeq, nEpochs=800, epochLen=175, validateEvery=25, vbs=500, printEvery=5, noiseSigma=0.4): print('-- Starting Training (nE=' + str(nEpochs) + ',eL=' + str(epochLen) + ') --') optimizer = optim.Adam(self.parameters(), lr = 0.03 * epochLen / 150.0) ns, na, tenv = self.stateSize, self.actionSize, trainSeq.env for epoch in range(nEpochs): if epoch % printEvery == 0: print('Epoch:',epoch, end='') loss = 0.0 self.zero_grad() # Zero out gradients for i in range(epochLen): self.reInitialize() # Reset LSTM hidden state seq,label = trainSeq.randomTrainingPair() # Current value seq = [ s + npr.randn(len(s))*noiseSigma for s in seq ] seq = [ avar(torch.from_numpy(s).float()) for s in seq] seq = [ torch.cat([self.stateSoftmax(sa[0:ns], tenv), F.softmax(sa[ns:ns+na])]) for sa in seq ] seqn = torch.cat(seq).view(len(seq), 1, -1) # [seqlen x batchlen x featureLen] prediction = self.forward(seqn)#[-1,:] label = avar(torch.from_numpy(label).float()) loss += self._lossFunction(prediction, label, env=tenv) loss.backward() optimizer.step() if epoch % printEvery == 0: print(" -> AvgLoss",str(loss.data[0] / epochLen)) if epoch % validateEvery == 0: bdata,blabels,bseqlen = validSeq.next(vbs,nopad=True) acc1, _ = self._accuracyBatch(bdata,blabels,validSeq.env) bdata,blabels,bseqlen = trainSeq.next(vbs,nopad=True) acc2, _ = self._accuracyBatch(bdata,blabels,tenv) print('\tCurrent Training Acc (est) =', acc1) print('\tCurrent Validation Acc (est) =', acc2) # Check training & final validation accuracy print('----') nmax = 5000 # Num from total to check at the end totalTrainAcc,nt = self._accuracyBatch(trainSeq.unpaddedData()[0:nmax],trainSeq.labels[0:nmax],trainSeq.env) print('Final Train Acc ('+str(nt)+'):',totalTrainAcc) totalValidAcc,nv = self._accuracyBatch(validSeq.unpaddedData()[0:nmax],validSeq.labels[0:nmax],validSeq.env) print('Final Validation Acc ('+str(nv)+'):',totalValidAcc)
def getLossFromAllNodes(self, alpha=0.5, lambda_h=-0.025, useHolder=False, holderp=-5.0, useOnlyLeaves=False, gamma=0.01): targetNodes = self.allNodes if useOnlyLeaves: targetNodes = self.leaves totalInverseValue = avar(torch.FloatTensor([0.0])) totalEntropy = avar(torch.FloatTensor([0.0])) totalBranching = avar(torch.FloatTensor([0.0])) if not useHolder: holderp = 1.0 nNodes = len(targetNodes) for i, node in enumerate(targetNodes): if i == 0: node.loss = avar(torch.FloatTensor([float('inf')])) continue if not node.branchingBreadth is None: totalBranching += node.branchingBreadth.type( torch.FloatTensor) # IGNORES PARENT TODO node.loss = -self.valueF(node.state) totalInverseValue += node.loss.pow(holderp) if not node.action is None: totalEntropy += -torch.sum( node.action[0] * torch.log(node.action[0])) # Penalize negative reward and entropy totalLosses = alpha * (totalInverseValue / nNodes).pow( 1.0 / holderp) + lambda_h * totalEntropy # Penalize too many branches totalLosses += gamma * totalBranching / nNodes return totalLosses
def train(self, trainSet, validSet, nEpochs=1500, batch_size=200, validateEvery=200, vbs=500, printEvery=200): optimizer = optim.Adam(self.parameters(), lr = 0.0003) state_size = self.stateSize lossFunction = nn.BCELoss() train_x, train_y = trainSet train_x = avar( torch.FloatTensor(train_x), requires_grad=False) train_y = avar( torch.FloatTensor(train_y), requires_grad=False) valid_x, valid_y = validSet valid_x = avar( torch.FloatTensor(valid_x), requires_grad=False) valid_y = avar( torch.FloatTensor(valid_y), requires_grad=False) ntrain, nvalid = len(train_x), len(valid_x) def getRandomMiniBatch(dsx,dsy,mbs,nmax): choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False) ) return dsx[choices], dsy[choices] for epoch in range(nEpochs): if epoch % printEvery == 0: print('Epoch:',epoch, end='') loss = 0.0 self.zero_grad() # Zero out gradients batch_x, batch_y = getRandomMiniBatch(train_x,train_y,batch_size,ntrain) prediction = self.forward(batch_x) #[-1,:] label = batch_y.unsqueeze(dim=1) #print(label.shape, prediction.shape) loss = lossFunction(prediction, label) loss.backward() optimizer.step() if epoch % printEvery == 0: print(" -> AvgLoss",str(loss.data[0]/ batch_size)) if epoch % validateEvery == 0: batch_vx, batch_vy = getRandomMiniBatch(valid_x,valid_y,batch_size,nvalid) predv = self.forward(batch_vx) #[-1,:] vy = batch_vy.unsqueeze(dim=1) acc = self._accuracyBatch(vy,predv) print("VACC (noiseless) =",'%.4f' % acc,end=', ') print('/n')
def train(self,trainSet,validSet,minibatch_size=200,maxIters=30000,testEvery=250,noiseSigma=0.2, noisyDataSetTxLoc=None,f_model_name=None): optimizer = optim.Adam(self.parameters(), lr = 0.0000025 * minibatch_size) lossf = nn.MSELoss() # nn.L1Loss() # nn.MSELoss() train_x, train_y = trainSet np.set_printoptions(precision=3) if not noisyDataSetTxLoc is None and os.path.exists(noisyDataSetTxLoc): print('Loading noised data (Note this ignores any changes to sigma)') with open(noisyDataSetTxLoc,'rb') as fff: train_x_noisy = pickle.load(fff) else: print('Noisifying data') train_x_noisy = self.noisify(train_x,noiseSigma) if not noisyDataSetTxLoc is None: print('Saving noised data to',noisyDataSetTxLoc) with open(noisyDataSetTxLoc,'wb') as fff: pickle.dump(train_x_noisy, fff) np.set_printoptions() train_x = avar( torch.FloatTensor(train_x), requires_grad=False) train_x_noisy = avar( torch.FloatTensor(train_x_noisy), requires_grad=False) train_y = avar( torch.FloatTensor(train_y), requires_grad=False) valid_x, valid_y = validSet valid_x = avar( torch.FloatTensor(valid_x), requires_grad=False) valid_y = avar( torch.FloatTensor(valid_y), requires_grad=False) ntrain, nvalid = len(train_x), len(valid_x) def getRandomMiniBatch(dsx,dsy,mbs,nmax): choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False) ) return dsx[choices], dsy[choices] print('Starting training') switchTime = 0 noiselessProb = 0.1 for i in range(0,maxIters): self.zero_grad() if i == switchTime: print('Changing to noisy dataset') train = train_x_noisy if i > switchTime and npr.uniform() > noiselessProb else train_x x, y = getRandomMiniBatch(train,train_y,minibatch_size,ntrain) y_hat = self.forward(x) loss = lossf(y_hat, y) loss.backward() optimizer.step() if i % testEvery == 0: print('Epoch', str(i) + ': L_t =', '%.4f' % loss.data[0], end=', ') vx, vy = getRandomMiniBatch(valid_x,valid_y,2000,nvalid) predv = self.forward(vx) lossv = lossf(predv, vy) print('L_v =','%.4f' % lossv.data[0],end=', ') acc = self._accuracyBatch(vy,predv) print("VACC (noiseless) =",'%.4f' % acc,end=', ') tx, ty = getRandomMiniBatch(train_x_noisy,train_y,2000,ntrain) predt = self.forward(tx) acctn = self._accuracyBatch(ty,predt) print("TACC (noisy) =",'%.4f' % acctn) if not f_model_name is None: torch.save(self.state_dict(), f_model_name)
def getLossFromLeaves(self, lambda_h=-0.0): totalLosses = avar(torch.FloatTensor([0.0])) totalEntropy = avar(torch.FloatTensor([0.0])) for leaf in self.leaves: totalLosses += -self.valueF(leaf.state) totalEntropy += -torch.sum( leaf.action[0] * torch.log(leaf.action[0])) loss = totalLosses + lambda_h * totalEntropy return loss / len(self.leaves)
def getRandomMiniBatch(dsx, dsy, mbs, nmax, noiseType=1, maxUniformNoiseLevel=0.001, gaussianSigma=0.001): choices = npr.choice(nmax, size=mbs, replace=False) xs, ys = dsx[choices], dsy[choices] newMB_x = np.zeros((mbs, self.inputSize)) newMB_y = np.zeros(mbs) if noiseType == 0: for i in range(mbs): jx, jy = int(xs[i, 0]), int(ys[i, 0]) newMB_x[i, jx] = 1.0 newMB_x[i, -10:] = xs[i, -10:] newMB_y[i] = jy elif noiseType == 1: if self.stateSize * maxUniformNoiseLevel > 1: print('Noise level is untenable! Max =', 1.0 / self.stateSize) sys.exit(0) for i in range(mbs): unifNoisedState = npr.uniform(low=0.0, high=maxUniformNoiseLevel, size=self.stateSize) jx, jy = int(xs[i, 0]), int(ys[i, 0]) spikeValue = 1.0 - np.sum( unifNoisedState) + unifNoisedState[jx] newMB_x[i, 0:self.stateSize] = unifNoisedState newMB_x[i, jx] = spikeValue newMB_x[i, 0:self.stateSize] /= np.sum( newMB_x[i, 0:self.stateSize]) newMB_x[i, -10:] = xs[i, -10:] newMB_y[i] = jy # print(np.argmax(newMB_x[i,0:self.stateSize]),',', # np.sum(newMB_x[i, 0:self.stateSize]),',', # newMB_x[i,np.argmax(newMB_x[i,0:self.stateSize])]) elif noiseType == 2: for i in range(mbs): jx, jy = int(xs[i, 0]), int(ys[i, 0]) noise = gaussianSigma * npr.normal(size=self.stateSize) newMB_x[i, jx] = 1.0 newMB_x[i, 0:self.stateSize] += noise newMB_x[i, 0:self.stateSize] = Utils.softmax( newMB_x[i, 0:self.stateSize]) newMB_x[i, -10:] = xs[i, -10:] newMB_y[i] = jy # print(newMB_x[i,0:self.stateSize]) # sys.exit(0) newMB_x = avar(torch.FloatTensor(newMB_x), requires_grad=False) newMB_y = avar(torch.LongTensor(newMB_y), requires_grad=False) return newMB_x, newMB_y
def grow(self, node, d, b, verbose=False): if verbose: print('Grow depth: ', d) if verbose: self.env.printState(node.state[0].data.numpy()) if d == self.maxDepth: return node if type(b) is int: b = avar(torch.LongTensor([b])) i = 0 while (i < b.data).all(): # Sample the current action hard_action, soft_a_s, new_branching_breadth, softBranching = self.simPolicy.sample( node.state) a_s = [torch.squeeze(hard_action)] inital_state = torch.squeeze(node.state) self.forwardModel.setHiddenState(node.hidden) current_state, _, current_hidden = self.forwardModel.forward( inital_state, a_s, 1) # Build the next subtre current_state = current_state.unsqueeze(dim=0) self.allStates.append(current_state) self.allActions.append(a_s) if verbose: print("int_state at depth", d) self.env.printState(node.state[0].data.numpy()) print("a_s at depth ", d, " and breath", i) self.env.printAction(a_s[0]) self.env.printAction(a_s[0]) print("curr_state at depth", d) self.env.printState(current_state[0].data.numpy()) childNode = Node(node, current_state, [soft_a_s], [hard_action], current_hidden) self.allNodes.append(childNode) childNode.branchingBreadth = new_branching_breadth childNode.softBranching = F.softmax(softBranching, dim=1) node.addChild(self.grow(childNode, d + 1, new_branching_breadth)) i += 1 return node
def measureLossAtTestTime(self, useOnlyLeaves=False): targetNodes = self.allNodes if useOnlyLeaves: targetNodes = self.leaves for i, node in enumerate(targetNodes): if i == 0: node.loss = avar(torch.FloatTensor([float('inf')])) continue node.loss = -self.valueF(node.state)
def test(self, x, y=None): if not type(x) is avar: x = avar(torch.FloatTensor(x)) print('Input State') s_0 = x[0:-10] self.printState(s_0, '\t') print('Input Action') self.printAction(x[-10:], '\t') print('Predicted Final State') yhat = self.forward(x) self.printState(yhat, '\t') if not y is None: if not type(y) is avar: y = avar(torch.FloatTensor(y)) print('Actual Final State') self.printState(y, '\t') print('Acc: ', self._accuracySingle(y, yhat))
def getBestPlanFromLeaves(self): bestInd, bestVal = 0, avar(torch.FloatTensor([float('-inf') ])) #float('-inf') for i, leaf in enumerate(self.leaves): currVal = self.valueF(leaf.state) if currVal.data.numpy() > bestVal.data.numpy(): bestInd = i bestVal = currVal return self.getPathFromLeaf(bestInd)
def getLossFromLeaves(self, lambda_h=0.001): totalLosses = avar(torch.FloatTensor([0.0])) #totalLosses = avar(torch.FloatTensor(len(self.leaves))) for i, leaf in enumerate(self.leaves): #totalLosses[i] = -self.valueF( leaf.state ) totalLosses += -self.valueF(leaf.state) + lambda_h * torch.sum( leaf.action[0] * torch.log(leaf.action[0])) #print(leaf.action[0].data.numpy().argmax(),-self.valueF( leaf.state ).data[0]) return totalLosses / len(self.leaves) #torch.min(totalLosses)
def main(): ### runTraining = True ### f_model_name = 'LSTM_FM_1_99' s = 'navigation' # 'transport' # Read training/validation data print('Reading Data') trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle" train, valid = SeqData(trainf), SeqData(validf) # Load forward model ForwardModel = LSTMForwardModel(train.lenOfInput, train.lenOfState) ForwardModel.load_state_dict(torch.load(f_model_name)) # Initialize forward policy exampleEnv = generateTask( 0, 0, 0, 3, 0) # This takes about 10 sec to train & solve on my comp SimPolicy = SimulationPolicy(exampleEnv) # Run training if runTraining: maxDepth = 3 SimPolicy.trainSad( exampleEnv, ForwardModel, printActions=True, maxDepth=maxDepth, # treeBreadth=2, eta_lr=0.001, #0.000375, trainIters=500, alpha=0.5, lambda_h=-0.005, #-0.0125, # negative = encourage entropy useHolder=True, holderp=-2.0, useOnlyLeaves=False, gamma=0.9 #1.5 ) # NOTE: the branching factor parameter here is merely the branching level AT THE PARENT # It has no effect anywhere else s_0 = torch.unsqueeze(avar(torch.FloatTensor( exampleEnv.getStateRep())), dim=0) tree = Tree(s_0, ForwardModel, SimPolicy, greedy_valueF, exampleEnv, maxDepth=maxDepth) #, branchingFactor=2) tree.measureLossAtTestTime() states, actions = tree.getBestPlan() print('Final Actions') for i in range(len(actions)): jq = actions[i][0].data.numpy().argmax() print('A' + str(i) + ':', jq, NavigationTask.actions[jq])
def getBestPlan(self, useOnlyLeaves=False): bestInd, bestVal = 0, avar(torch.FloatTensor([float('inf') ])) #float('-inf')\n", targetNodes = self.allNodes if useOnlyLeaves: targetNodes = self.leaves for i, node in enumerate(targetNodes): currVal = node.loss if currVal.data.numpy() < bestVal.data.numpy(): bestInd = i bestVal = currVal return self.getPathFromNode(bestInd)
def runOnActionSequence(self, start_state, actions, hidden=None): steps = len(actions) outputs = avar(torch.zeros( steps, 1, self.stateSize)) # seqlen x batchlen x stateSize output = start_state for i in range(steps): action = actions[i] inputv = torch.cat([output, action.unsqueeze(0)], dim=1) output, hidden = self.step(inputv, hidden) outputs[i] = output return outputs, hidden
def train(self, trainSet, validSet, minibatch_size=120, maxIters=4000, testEvery=150): optimizer = optim.Adam(self.parameters(), lr=0.000002 * minibatch_size) lossf = nn.MSELoss() # nn.L1Loss() # nn.MSELoss() train_x, train_y = trainSet train_x = avar(torch.FloatTensor(train_x), requires_grad=False) train_y = avar(torch.FloatTensor(train_y), requires_grad=False) valid_x, valid_y = validSet valid_x = avar(torch.FloatTensor(valid_x), requires_grad=False) valid_y = avar(torch.FloatTensor(valid_y), requires_grad=False) ntrain, nvalid = len(train_x), len(valid_x) def getRandomMiniBatch(dsx, dsy, mbs, nmax): choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False)) return dsx[choices], dsy[choices] print('Starting training') for i in range(0, maxIters): self.zero_grad() x, y = getRandomMiniBatch(train_x, train_y, minibatch_size, ntrain) y_hat = self.forward(x) loss = lossf(y_hat, y) #print(i,loss) loss.backward() optimizer.step() if i % testEvery == 0: print('Epoch', str(i) + ': L_t =', '%.4f' % loss.data[0], end=', ') vx, vy = getRandomMiniBatch(valid_x, valid_y, 2000, nvalid) predv = self.forward(vx) lossv = lossf(predv, vy) print('L_v =', '%.4f' % lossv.data[0], end=', ') acc = self._accuracyBatch(vy, predv) print("VACC =", '%.4f' % acc)
def generatePlanOld(self,start_state,env,eta=0.05,niters=None): x_t = avar( torch.randn(self.nacts,self.action_size) * self.start_sigma, requires_grad=True ) deconStartState = env.deconcatenateOneHotStateVector(start_state) lossf = nn.CrossEntropyLoss() gx, gy = avar(torch.FloatTensor(deconStartState[-2])), avar(torch.FloatTensor(deconStartState[-1])) _,sindx = avar(torch.FloatTensor(deconStartState[0])).max(0) _,sindy = avar(torch.FloatTensor(deconStartState[1])).max(0) _,indx = gx.max(0) _,indy = gy.max(0) niters = self.niters if niters is None else niters for i in range(niters): # Generate soft action sequence epsilon = avar( torch.randn(self.nacts, self.action_size) * self.sigma ) y_t = x_t + epsilon a_t = F.softmax( y_t, dim=1 ) # Compute predicted state self.f.reInitialize() # Reset LSTM hidden state currState = avar(torch.FloatTensor(start_state)) for k in range(0,self.nacts): action = a_t[k,:] currState = self.f.stateSoftmax(currState,env) currInput = torch.cat([currState,action],0) currInput = currInput.view(1, 1, -1) # [seqlen x batchlen x feat_size] lstm_out, self.f.hidden = self.f.lstm( currInput, self.f.hidden ) currState = self.f.hiddenToState( lstm_out[-1,0,:] ) # [seqlen x batchlen x hidden_size] # Compute loss predFinal = env.deconcatenateOneHotStateVector( self.f.stateSoftmax(currState,env) ) pvx = predFinal[0] pvy = predFinal[1] # lossx = lossf(pvx.view(1,len(pvx)), indx) lossy = lossf(pvy.view(1,len(pvy)), indy) loss = lossx + lossy # print(i, '-> L =', lossx.data[0],' + ',lossy.data[0]) print(indx.data[0],indy.data[0],end=' ### ') print( pvx.max(0)[1].data[0], pvy.max(0)[1].data[0] ) print('--') loss.backward() x_t.data -= eta * x_t.grad.data print('g_t',x_t.grad.data) print('x_t',x_t.data) print('Predicted End:',pvx.max(0)[1].data[0],pvy.max(0)[1].data[0]) x_t.grad.data.zero_() print('\nEnd\n') print(F.softmax( x_t, dim=1 )) for k in range(0,self.nacts): action = x_t[k,:] print(action.max(0)[1].data[0],end=' -> ') print(NavigationTask.actions[action.max(0)[1].data[0]]) print('--') print('START ',sindx.data[0],sindy.data[0]) print('TARGET END ',indx.data[0],indy.data[0]) print('--')
def trainSad(self, taskEnv, forwardModel, printActions=False, maxDepth=5, treeBreadth=2, eta_lr=0.0005, trainIters=500, alpha=0.5, lambda_h=-0.025, useHolder=False, holderp=-6.0, useOnlyLeaves=False, gamma=0.01, temperature=2, branching_temperature=1): optimizer = optim.Adam(self.parameters(), lr=eta_lr) for p in forwardModel.parameters(): p.requires_grad = False s0 = avar(torch.FloatTensor([self.env.getStateRep()]), requires_grad=False) for i in range(0, trainIters): tree = Tree(s0, forwardModel, self, greedy_valueF, self.env, maxDepth, treeBreadth, temperature=temperature, branching_temperature=branching_temperature) loss = tree.getLossFromAllNodes(alpha=alpha, lambda_h=lambda_h, useHolder=useHolder, holderp=holderp, useOnlyLeaves=useOnlyLeaves, gamma=gamma) loss.backward() optimizer.step() optimizer.zero_grad() if i % 50 == 0: # print('Loss',i,":",loss.data[0]) # print('NumTreeNodes:', len(tree.allNodes)) if printActions: plan = tree.getBestPlan() # print(plan) print("\n".join([ "A" + str(qi) + ": " + ",".join([ ",".join(["%.3f" % q for q in qq]) for qq in a[0].data.numpy() ]) for qi, a in enumerate(plan[1]) ]))
def getBestPlan(self): bestInd, bestVal = 0, avar(torch.FloatTensor([float('-inf') ])) #float('-inf') for i, leaf in enumerate(self.leaves): currVal = self.valueF(leaf.state) #print('State') #self.forwardModel.printState(leaf.state[0]) #print('Value',currVal) if currVal.data.numpy() > bestVal.data.numpy(): bestInd = i bestVal = currVal #print(bestVal) return self.getPathFromLeaf(bestInd)
def _lossFunction(self, outputs, targets, useMSE=False, env=None): if useMSE: loss = nn.MSELoss() return loss(outputs, targets) else: # Use Cross-entropy loss = nn.CrossEntropyLoss() cost = avar(torch.FloatTensor([0])) predVec = env.deconcatenateOneHotStateVector(outputs) labelVec = env.deconcatenateOneHotStateVector(targets) for pv, lv in zip(predVec, labelVec): val, ind = lv.max(0) cost += loss(pv.view(1, len(pv)), ind) return cost / len(predVec)
def _accuracySingle(self,seq,label,env): seq = [avar(torch.from_numpy(s).float()) for s in seq] seq = torch.cat(seq).view(len(seq), 1, -1) # [seqlen x batchlen x hidden_size] self.reInitialize() # Reset LSTM hidden state prediction = self.forward(seq) # Only retrieves final time state predVec = env.deconcatenateOneHotStateVector(prediction) labelVec = env.deconcatenateOneHotStateVector(label) locAcc = 0.0 for pv, lv in zip(predVec, labelVec): _, ind_pred = pv.max(0) ind_label = np.argmax(lv) locAcc += 1.0 if ind_pred.data[0] == ind_label else 0.0 return locAcc / len(predVec)
def forward(self, inputs, hidden=None, force=True, steps=0): if force or steps == 0: steps = len(inputs) outputs = avar(torch.zeros(steps, 1, self.stateSize)) for i in range(steps): if force or i == 0: inputv = inputs[i] else: trueInput = inputs[ i] # Even if not teacher forcing, still take true action inputv = torch.cat( [output, trueInput[-self.actionSize:].unsqueeze(0)], dim=1) output, hidden = self.step(inputv, hidden) outputs[i] = output return outputs, hidden
def train(self, trainSeq, validSeq, nEpochs=1500, epochLen=500, validateEvery=20, vbs=500, printEvery=5, noiseSigma=0.4): optimizer = optim.Adam(self.parameters(), lr=0.003) state_size, action_size, tenv = self.stateSize, self.actionSize, trainSeq.env for epoch in range(nEpochs): if epoch % printEvery == 0: print('Epoch:', epoch, end='') loss = 0.0 self.zero_grad() # Zero out gradients for i in range(epochLen): self.reInitialize(1) # Reset LSTM hidden state seq, label = trainSeq.randomTrainingPair() # Current value actions = [s[64:74] for s in seq] actions = [avar(torch.from_numpy(s).float()) for s in actions] intial_state = seq[0][0:64] seqn = len(seq) prediction, _ = self.forward(intial_state, actions, seqn) #[-1,:] label = avar(torch.from_numpy(label).float()) loss += self._lossFunction(prediction, label, env=tenv) loss.backward() optimizer.step() if epoch % printEvery == 0: print(" -> AvgLoss", str(loss.data[0] / epochLen)) if epoch % validateEvery == 0: bdata, blabels, bseqlen = validSeq.next(vbs, nopad=True) acc1, _ = self._accuracyBatch(bdata, blabels, validSeq.env) bdata, blabels, bseqlen = trainSeq.next(vbs, nopad=True) acc2, _ = self._accuracyBatch(bdata, blabels, tenv) print('\tCurrent Training Acc (est) =', acc1) print('\tCurrent Validation Acc (est) =', acc2)
def main(): f_model_name = 'LSTM_FM_1_99' gvp_model_name = "greedy_value_predictor_3" numRepeats = 5 tasks = [[6, generateTask(0, 0, 0, 12, 10)]] exampleEnv = NavigationTask() ForwardModel = LSTMForwardModel(74, 64) ForwardModel.load_state_dict(torch.load(f_model_name)) GreedyVP = GreedyValuePredictor(exampleEnv) GreedyVP.load_state_dict(torch.load(gvp_model_name)) print("Running the tasks") for i, task in enumerate(tasks): for j in range(numRepeats): task_state = task[1].getStateRep(oneHotOutput=False) px = int(task_state[0]) py = int(task_state[1]) orien = np.argmax(task_state[2:6]) gx = int(task_state[-2]) gy = int(task_state[-1]) print("$$###############################") print("Repeat " + str(j) + " for " + str(gx) + " , " + str(gy)) #print('www',px,py,orien,gx,gy) cenv = generateTask(px, py, orien, gx, gy) SimPolicy = SimulationPolicy(cenv) SimPolicy.trainSad(ForwardModel, GreedyVP, maxDepth=task[0], niters=2000) s_0 = torch.unsqueeze(avar(torch.FloatTensor(cenv.getStateRep())), dim=0) tree = Tree(s_0, ForwardModel, SimPolicy, greedy_valueF, cenv, task[0], 2) states, actions = tree.getBestPlan() for i in range(len(actions)): cenv.performAction(actions[i][0].data.numpy().argmax()) r = cenv.getReward() correct = (r == 1) #print('Correct?',correct) if correct: print('Correct final state', str(gx), str(gy)) torch.save( SimPolicy.state_dict(), "SimPolicy_solve_" + str(gx) + "_" + str(gy) + "_" + str(j))
def __init__(self, env, layerSizes=[100, 100], maxBranchingFactor=3): super(SimulationPolicy, self).__init__() self.actionSize = len(env.actions) self.stateSize = len(env.getStateRep(oneHotOutput=True)) self.env = env self.maxBranchingFactor = maxBranchingFactor self.intvec = avar( torch.LongTensor(list(range(maxBranchingFactor + 1)))).unsqueeze(0) #print("State Size: " , self.stateSize, "\nAction Size: ", self.actionSize) # Input space: [Batch, observations], output:[Batch, action_space] self.layer1 = nn.Linear(self.stateSize, layerSizes[0]) self.layer2 = nn.Linear(layerSizes[0], layerSizes[1]) self.layer3 = nn.Linear(layerSizes[1], self.actionSize) # Layer to sample branching factor self.intSamplingLayer = nn.Linear(layerSizes[1], self.maxBranchingFactor + 1)
def getLossFromAllNodes(self, alpha=0.5, lambda_h=-0.025, useHolder=False, holderp=-5.0, useOnlyLeaves=False, gamma=0.01, xi=0.01): targetNodes = self.allNodes if useOnlyLeaves: targetNodes = self.leaves totalInverseValue = avar(torch.FloatTensor([0.0])).unsqueeze(0) totalEntropy = avar(torch.FloatTensor([0.0])) totalBranching = avar(torch.FloatTensor([0.0])) totalEntropyB = avar(torch.FloatTensor([0.0])) # For branching sampler if not useHolder: holderp = 1.0 nNodes = len(targetNodes) mbf = avar( torch.FloatTensor( np.array(list(range(1, self.simPolicy.maxBranchingFactor + 1))))) for i, node in enumerate(targetNodes): if i == 0: node.loss = avar(torch.FloatTensor([float('inf')])) continue if not node.branchingBreadth is None: expectedBranching = torch.sum(node.softBranching * mbf) totalBranching += expectedBranching # print('----') # print('Exp',expectedBranching) # print('TrueBB',node.branchingBreadth) # totalBranching += node.branchingBreadth.type(torch.FloatTensor) # IGNORES PARENT TODO currloss = -self.valueF(node.state) node.loss = currloss # print('closs', currloss) # print('Holderp', holderp) currloss_pow = currloss.pow(holderp) # print('clp', currloss_pow) # print('totinv', totalInverseValue) totalInverseValue += currloss_pow if not node.action is None: totalEntropy += -torch.sum( node.action[0] * torch.log(node.action[0])) if not node.softBranching is None: totalEntropyB += -torch.sum( node.softBranching * torch.log(node.softBranching)) # Penalize negative reward and entropy totalLosses = alpha * (totalInverseValue / nNodes).pow( 1.0 / holderp) + lambda_h * totalEntropy / nNodes # Penalize too many branches totalLosses += gamma * totalBranching / nNodes # Penalize entropy in the branching sampler totalLosses += xi * totalEntropyB / nNodes return totalLosses
def main(): f_model_name = 'LSTM_FM_1_99' s = 'navigation' # 'transport' trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle" print('Reading Data') train, valid = SeqData(trainf), SeqData(validf) exampleEnv = generateTask(0, 0, 0, 14, 14) ForwardModel = LSTMForwardModel(train.lenOfInput, train.lenOfState) ForwardModel.load_state_dict(torch.load(f_model_name)) SimPolicy = SimulationPolicy(exampleEnv) SimPolicy.trainSad(ForwardModel) s_0 = torch.unsqueeze(avar(torch.FloatTensor(exampleEnv.getStateRep())), dim=0) tree = Tree(s_0, ForwardModel, SimPolicy, greedy_cont_valueF, exampleEnv, 5, 2) states, actions = tree.getBestPlan() for i in range(len(actions)): print(actions[i][0].data.numpy().argmax())
def forward(self, inital_state, actions, seqn): #initalState [1*1*state_size] actions[batch*noOfActions*Action_size] #print(actions[0].shape) #print(seqn) int_states = [] current_state = avar(torch.from_numpy(inital_state).float()) #print(current_state.shape) #print(torch.cat((current_state, actions[0]),0)) for i in range(seqn): concat_vec = torch.cat((current_state, actions[i]), 0).view(1, 1, -1) lstm_out, self.hidden = self.lstm(concat_vec, self.hidden) output_state = self.hiddenToState(lstm_out[0, 0, :]) int_states.append(output_state) current_state = output_state return current_state, int_states
def getBestPlan(self): bestInd, bestVal = 0, avar(torch.FloatTensor([float('inf') ])) #float('-inf')\n", currpath = None for i, node in enumerate(self.allNodes): if i == 0: continue currVal = node.loss if currVal.data.numpy() < bestVal.data.numpy(): putPath = self.getPathFromNode_branchingAndActions(i) bestInd = i bestVal = currVal currpath = putPath elif currVal.data.numpy() == bestVal.data.numpy(): if (currpath is None) or (len(putPath) < len(currpath)): putPath = self.getPathFromNode_branchingAndActions(i) bestInd = i bestVal = currVal currpath = putPath return currpath
def _accuracySingle(self, seq, label, env): seq = [avar(torch.from_numpy(s).float()) for s in seq] seq = torch.cat(seq).view(len(seq), 1, -1) # [seqlen x batchlen x hidden_size] self.reInitialize(1) # Reset LSTM hidden state #print(seq.shape) actions = [s[0][64:74] for s in seq] #actions = [ avar(torch.from_numpy(s).float()) for s in actions] intial_state = seq[0][0][0:64].data.numpy() seqn = len(seq) prediction, _ = self.forward(intial_state, actions, seqn) #[-1,:] #prediction = self.forward(seq) # Only retrieves final time state predVec = env.deconcatenateOneHotStateVector(prediction) labelVec = env.deconcatenateOneHotStateVector(label) locAcc = 0.0 for pv, lv in zip(predVec, labelVec): _, ind_pred = pv.max(0) ind_label = np.argmax(lv) locAcc += 1.0 if ind_pred.data[0] == ind_label else 0.0 return locAcc / len(predVec)
def main(): ts = "navigation-data-state_to_reward-train.pickle" vs = "navigation-data-state_to_reward-valid.pickle" ############ print('Reading Data') with open(ts,'rb') as inFile: print('\tReading',ts); trainSet = pickle.load(inFile) with open(vs,'rb') as inFile: print('\tReading',vs); validSet = pickle.load(inFile) env = NavigationTask() greedyvp = GreedyValuePredictor(env) greedyvp.train( trainSet, validSet) def generateTask(px,py,orien,gx,gy): direction = NavigationTask.oriens[orien] gs = np.array([gx, gy]) env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs) return env env = generateTask(0,1,2,3,2) state = avar( torch.FloatTensor(env.getStateRep()), requires_grad=False).view(1,-1) print(state.shape) greedyvp.forward(state).data.numpy() torch.save(greedyvp.state_dict(), "greedy_value_predictor")