def simForward(steps = 10): #Make problem h = Node(); solver = POMCP(); #make belief network = readInNetwork('../common/flyovertonNetwork.yaml') setNetworkNodes(network); target,curs,goals = populatePoints(network,maxTreeQueries); pickInd = np.random.randint(0,len(target)); trueS = [np.random.random()*8,np.random.random()*8,target[pickInd][0],target[pickInd][1],curs[pickInd],goals[pickInd],0]; sSet = []; for i in range(0,len(target)): sSet.append([trueS[0],trueS[1],target[i][0],target[i][1],curs[i],goals[i],0]); # trueX = np.random.random()*10; # trueY = np.random.random()*10; # sSet = []; # for i in range(0,maxTreeQueries): # sSet.append([trueX,trueY,np.random.random()*10,np.random.random()*10,np.random.choice([0,1,2]),np.random.choice([-1,1])]); # trueS = sSet[np.random.choice([0,len(sSet)-1])]; fig,ax1 = plt.subplots(); plotFudge = 20; allPrevs = np.zeros(shape=(steps,6)).tolist(); allRewards = []; allMeans = np.zeros(shape = (steps,2)); allVars = np.zeros(shape = (steps,2)); #fig,ax1 = displayNetworkMap('flyovertonNetwork.yaml',fig=fig,ax=ax1,vis=False); #plt.show() #get action for step in range(0,steps): fig,ax1 = displayNetworkMap('../common/flyovertonNetwork.yaml',fig,ax1,False,redraw=True); allPrevs[step] = trueS; act = solver.search(sSet,h,False); r = generate_r(trueS,act); trueS = generate_s(trueS,act); o = generate_o(trueS,act); allRewards.append(r); tmpHAct = h.getChildByID(act); tmpHObs = tmpHAct.getChildByID(o); #tmpBel = np.array(h.data); if(tmpHObs != -1 and len(tmpHObs.data) > 0): h = tmpHObs; #sSet = solver.resampleNode(h); else: #h = np.random.choice(h.children); # print(h); # print("State: {}".format(trueS)); # print("Action: {}".format(act)); # print("Observation: {}".format(o)); # raise("Error: Child Node not Found!!!") h = tmpHAct[0]; print("Error: Child Node Not Found!!!"); sSet = propogateAndMeasure(sSet,act,o); sSet = solver.resampleSet(sSet); tmpBel = np.array(sSet); allMeans[step] = [np.mean(tmpBel[:,2]),np.mean(tmpBel[:,3])]; allVars[step] = [np.std(tmpBel[:,2]),np.std(tmpBel[:,3])]; ax2 = fig.add_subplot(111,label='belief'); sp=[tmpBel[:,2],tmpBel[:,3]]; #ax2.hist2d(sp[0],sp[1],bins=40,range=[[-.2,8.2],[-.2,8.2]],cmin=1,cmap='Reds',zorder=2); ax2.scatter(sp[0],sp[1],c='k',zorder=2); #ax2.scatter(sp[0],sp[1],c='k',zorder=2); ax2.set_xlim([-0.2,8.2]); ax2.set_ylim([-0.2,8.2]); ax2.scatter(allPrevs[step][0],allPrevs[step][1],c=[0,0,1],zorder = 3); ax2.scatter(allPrevs[step][2],allPrevs[step][3],c=[1,0,0],zorder = 3); ax2.arrow(allPrevs[step][0],allPrevs[step][1],trueS[0]-allPrevs[step][0],trueS[1]-allPrevs[step][1],edgecolor=[0,0,1],head_width = 0.25,facecolor =[0,0,.5],zorder=3); ax2.arrow(allPrevs[step][2],allPrevs[step][3],trueS[2]-allPrevs[step][2],trueS[3]-allPrevs[step][3],edgecolor=[1,0,0],head_width = 0.25,facecolor = [.5,0,0],zorder=3); ax1.set_xlim([-0.2,8.2]); ax1.set_ylim([-0.2,8.2]); plt.axis('off') ax1.axis('off') ax2.axis('off') plt.axis('off') #plt.colorbar() plt.pause(0.01); #plt.show(); #ax2.clear(); print("Step: {} of {}".format(step+1,steps)) print("State: {}".format(trueS)); print("Action: {}".format(act)); print("Observation: {}".format(o)); #print("Distance: {0:.2f}".format(dist(trueS))) print("Ac Reward: {}".format(sum(allRewards))); print("Belief Mean: {0:.2f},{0:.2f}".format(np.mean(tmpBel[:,2]),np.mean(tmpBel[:,3]))); print("Belief Length: {}".format(len(tmpBel))) print(""); #print(info) if(isTerminal(trueS,act)): print("Captured after: {} steps".format(step)); break; ax2.remove(); #ax.scatter(trueS[0],trueS[1],c=[0,0,1,((step+plotFudge)/(steps+plotFudge))]); #ax.scatter(trueS[2],trueS[3],c=[0,1,0,((step+plotFudge)/(steps+plotFudge))]); # ax.scatter(tmpBel[:,2],tmpBel[:,3],c=[1,0,0,0.25],marker='*',s=2) # ax.scatter(allPrevs[step][0],allPrevs[step][1],c=[0,0,1]); # ax.scatter(allPrevs[step][2],allPrevs[step][3],c=[0,1,0]); # ax.arrow(allPrevs[step][0],allPrevs[step][1],trueS[0]-allPrevs[step][0],trueS[1]-allPrevs[step][1],edgecolor=[0,0,1],head_width = 0.25,facecolor =[0,0,.5]); # ax.arrow(allPrevs[step][2],allPrevs[step][3],trueS[2]-allPrevs[step][2],trueS[3]-allPrevs[step][3],edgecolor=[0,1,0],head_width = 0.25,facecolor = [0,.5,0]); # allC = np.zeros(shape=(step,4)); # allC[:,2] = 1; # for i in range(0,len(allC)): # allC[i,3] = .6*(i/len(allC)) # ax.scatter(allPrevs[:,0],allPrevs[:,1],c=allC) # plt.xlim([-0.5,10.5]); # plt.ylim([-0.5,10.5]); # plt.pause(0.001) # plt.cla(); # plt.clf(); #plt.pause(2); #print("Final Accumlated Reward after {} steps: {}".format(steps,sum(allRewards))); fig,axarr = plt.subplots(2); x = range(0,steps); allPrevs = np.array(allPrevs); axarr[0].plot(allMeans[:,0],c='g'); axarr[0].plot(allMeans[:,0] + 2*allVars[:,0],c='g',linestyle='--') axarr[0].plot(allMeans[:,0] - 2*allVars[:,0],c='g',linestyle='--') axarr[0].plot(allPrevs[:,2],c='k',linestyle='--') axarr[0].fill_between(x,allMeans[:,0] - 2*allVars[:,0],allMeans[:,0] + 2*allVars[:,0],alpha=0.25,color='g') axarr[0].set_ylim([-0.5,10.5]); axarr[0].set_ylabel('North Estimate') axarr[1].plot(allMeans[:,1],c='g'); axarr[1].plot(allMeans[:,1] + 2*allVars[:,1],c='g',linestyle='--') axarr[1].plot(allMeans[:,1] - 2*allVars[:,1],c='g',linestyle='--') axarr[1].plot(allPrevs[:,3],c='k',linestyle='--') axarr[1].fill_between(x,allMeans[:,1] - 2*allVars[:,1],allMeans[:,1] + 2*allVars[:,1],alpha=0.25,color='g') axarr[1].set_ylim([-0.5,10.5]); axarr[1].set_ylabel('East Estimate') fig.suptitle("Estimates with 2 sigma bounds when caught at: {}".format(step)); plt.show()
def runSims(sims = 10,steps = 10,verbosity = 2,simIdent = 'Test'): #set up data collection dataPackage = {'Meta':{'NumActs':numActs,'maxDepth':maxDepth,'c':c,'maxTreeQueries':maxTreeQueries,'maxTime':maxTime,'gamma':gamma,'numObs':numObs,'problemName':problemName,'agentSpeed':agentSpeed},'Data':[]} for i in range(0,sims): dataPackage['Data'].append({'Beliefs':[],'ModeBels':[],'States':[],'Actions':[],'Observations':[],'Rewards':[],'TreeInfo':[]}); if(verbosity >= 1): print("Starting Data Collection Run: {}".format(simIdent)); print("Running {} simulations of {} steps each".format(sims,steps)) #run individual sims for count in range(0,sims): if(verbosity >= 2): print("Simulation: {} of {}".format(count+1,sims)); #Make Problem h = Node(); solver = POMCP(); #Initialize Belief and State network = readInNetwork('../common/flyovertonNetwork.yaml') setNetworkNodes(network); target,curs,goals = populatePoints(network,maxTreeQueries); pickInd = np.random.randint(0,len(target)); trueS = [np.random.random()*8,np.random.random()*8,target[pickInd][0],target[pickInd][1],curs[pickInd],goals[pickInd],0]; sSet = []; for i in range(0,len(target)): sSet.append([trueS[0],trueS[1],target[i][0],target[i][1],curs[i],goals[i],0]); #For storage purposes, only the mean and sd of the belief are kept #dataPackage['Data'][count]['Beliefs'].append(sSet); mean = [sum([sSet[i][2] for i in range(0,len(sSet))])/len(sSet),sum([sSet[i][3] for i in range(0,len(sSet))])/len(sSet)]; tmpBel = np.array(sSet); mean = [np.mean(tmpBel[:,2]),np.mean(tmpBel[:,3])]; sd = [np.std(tmpBel[:,2]),np.std(tmpBel[:,3])]; dataPackage['Data'][count]['Beliefs'].append([mean,sd]); dataPackage['Data'][count]['States'].append(trueS); if(verbosity >= 4): fig,ax1 = plt.subplots(); for step in range(0,steps): if(verbosity>=3): print("Step: {}".format(step)); if(verbosity >=4): fig,ax1 = displayNetworkMap('../common/flyovertonNetwork.yaml',fig,ax1,False,redraw=True); act,info = solver.search(sSet,h,depth = min(maxDepth,steps-step+1),inform=True); trueS = generate_s(trueS,act); r = generate_r(trueS,act); o = generate_o(trueS,act); tmpHAct = h.getChildByID(act); tmpHObs = tmpHAct.getChildByID(o); if(tmpHObs != -1 and len(tmpHObs.data) > 0): h = tmpHObs; #sSet = solver.resampleNode(h); else: h = tmpHAct[0]; #print("Error: Child Node Not Found!!!"); sSet = propogateAndMeasure(sSet,act,o); sSet = solver.resampleSet(sSet); tmpBel = np.array(sSet); #print(len(tmpBel)); mean = [np.mean(tmpBel[:,2]),np.mean(tmpBel[:,3])]; sd = [np.std(tmpBel[:,2]),np.std(tmpBel[:,3])]; modeBels = [len(np.where(tmpBel[:,6] == 0)[0]), len(np.where(tmpBel[:,6] == 1)[0])]; ################################################ if(verbosity >= 4): ax2 = fig.add_subplot(111,label='belief'); sp=[tmpBel[:,2],tmpBel[:,3]]; #ax2.hist2d(sp[0],sp[1],bins=40,range=[[-.2,8.2],[-.2,8.2]],cmin=1,cmap='Reds',zorder=2); ax2.scatter(sp[0],sp[1],c='k',zorder=2); #ax2.scatter(sp[0],sp[1],c='k',zorder=2); ax2.set_xlim([-0.2,8.2]); ax2.set_ylim([-0.2,8.2]); ax2.scatter(trueS[0],trueS[1],c=[0,0,1],zorder = 3); ax2.scatter(trueS[2],trueS[3],c=[1,0,0],zorder = 3); #ax2.arrow(trueS[0],trueS[1],trueS[0]-trueS[0],trueS[1]-trueS[1],edgecolor=[0,0,1],head_width = 0.25,facecolor =[0,0,.5],zorder=3); #ax2.arrow(trueS[2],trueS[3],trueS[2]-trueS[2],trueS[3]-trueS[3],edgecolor=[1,0,0],head_width = 0.25,facecolor = [.5,0,0],zorder=3); ax2.add_patch(Circle([trueS[0],trueS[1]],1,alpha=0.25,edgecolor='b')) ax2.set_aspect('equal'); ax1.set_aspect('equal'); ax1.set_xlim([-0.2,8.2]); ax1.set_ylim([-0.2,8.2]); plt.axis('off') ax1.axis('off') ax2.axis('off') plt.axis('off') #plt.colorbar() plt.pause(0.01); ax2.remove(); ################################################ dataPackage['Data'][count]['Beliefs'].append([mean,sd]); dataPackage['Data'][count]['States'].append(trueS); dataPackage['Data'][count]['Actions'].append(act); dataPackage['Data'][count]['Observations'].append(o); dataPackage['Data'][count]['Rewards'].append(r); dataPackage['Data'][count]['TreeInfo'].append(info); dataPackage['Data'][count]['ModeBels'].append(modeBels); if(isTerminal(trueS,act)): #print(trueS); if(verbosity >= 2): print("Captured after: {} steps".format(step)); break; print("Capture Time: {}".format(len(dataPackage['Data'][count]['Rewards'])-1)); print("Average Capture Time: {}".format(sum([len(dataPackage['Data'][i]['Rewards'])-1 for i in range(0,count+1)])/(count+1))); print(""); np.save('../../data/dataGridNaive_E2_{}'.format(simIdent),dataPackage)
def simulate(verbosity=0): # Initialize network # ------------------------------------------------------ network = readInNetwork('../yaml/flyovertonShift.yaml') h = Node() solver = POMCP('graphSpec') maxFlightTime = 600 #10 minutes human_sketch_chance = 1 / 60 #about once a minute pmean = 3 #poisson mean amult = 10 #area multiplier #human_sketch_chance = 0; # Initialize belief and state # ------------------------------------------------------ target, curs, goals = populatePoints(network, solver.sampleCount) pickInd = np.random.randint(0, len(target)) trueNode = np.random.choice(network) solver.buildActionSet(trueNode) trueS = [ trueNode.loc[0], trueNode.loc[1], target[pickInd][0], target[pickInd][1], curs[pickInd], goals[pickInd], 0, trueNode ] sSet = [] for i in range(0, len(target)): sSet.append([ trueS[0], trueS[1], target[i][0], target[i][1], curs[i], goals[i], 0, trueS[7] ]) fig, ax = plt.subplots() # DEBUG: Add initial sketch # ------------------------------------------------------ params = { 'centroid': [500, 500], 'dist_nom': 50, 'angle_noise': .3, 'dist_noise': .25, 'pois_mean': 4, 'area_multiplier': 5, 'name': "Test", 'steepness': 20, 'points': None } ske = Sketch(params) solver.addSketch(trueS[7], ske) # Set up sketches # ------------------------------------------------------ with open("../yaml/landmarks.yaml", 'r') as stream: fi = yaml.safe_load(stream) params = { 'centroid': [4, 5], 'dist_nom': 2, 'dist_noise': .25, 'angle_noise': .2, 'pois_mean': pmean, 'area_multiplier': amult, 'name': "Test", 'steepness': 7 } allSketches = [] #seedCount = 0 for k, v in fi['Landmarks'].items(): #for k in fi.keys: v = fi['Landmarks'][k] params['name'] = k params['dist_nom'] = v['radius'] params['centroid'] = v['loc'] params['dist_noise'] = v['radius'] / 4 params['points'] = None allSketches.append(Sketch(params)) #seedCount += 1 np.random.shuffle(allSketches) sketchQueue = deque(allSketches) sketchTimes = [] expCount = 0 for i in range(0, len(sketchQueue)): expCount += expon.rvs(scale=1 / human_sketch_chance) sketchTimes.append(expCount) # print(sketchTimes); # sketchTimes.pop(0); # print(sketchTimes); # Set up data collection # ---------------------------------------------------------- # Note: Beliefs should only be s[2:4] # to capture target state beliefs and s[6] to capture mode data = { 'States': [], 'Actions': [], 'Human_Obs': [], 'Drone_Obs': [], 'Beliefs': [], 'ModeBels': [], 'TotalTime': 0, 'DecisionTimes': [], 'GivenTimes': [], 'Sketches': [] } data['maxDepth'] = solver.maxDepth data['c'] = solver.c data['maxTreeQueries'] = solver.maxTreeQueries data['maxTime'] = solver.maxTime data['gamma'] = solver.gamma data['agentSpeed'] = solver.agentSpeed data['sampleCount'] = solver.sampleCount data['human_class_thresh'] = solver.human_class_thresh data['human_accuracy'] = solver.human_accuracy data['capture_length'] = solver.capture_length data['detect_length'] = solver.detect_length data['drone_falseNeg'] = solver.drone_falseNeg data['drone_falsePos'] = solver.drone_falsePos data['targetSpeed'] = solver.targetSpeed data['targetDev'] = solver.targetDev data['offRoadSpeed'] = solver.offRoadSpeed data['offRoadDev'] = solver.offRoadDev data['leaveRoadChance'] = solver.leaveRoadChance data['human_availability'] = solver.human_availability data['maxFlightTime'] = maxFlightTime data['human_sketch_chance'] = human_sketch_chance data['assumed_availability'] = solver.assumed_availability data['assumed_accuracy'] = solver.assumed_accuracy data['Captured'] = False data['pois_mean'] = pmean data['area_multiplier'] = amult #print(data['maxTime']); endFlag = False totalTime = 0 step = 0 curDecTime = 5 decCounts = 0 sketchesMade = [] # Simulate until captured or out of time # ------------------------------------------------------ #for step in range(0, maxSteps): newSet = sSet while (totalTime < maxFlightTime): data['States'].append(trueS) bel = np.array(sSet)[:, 2:4] modebel = np.array(sSet)[:, 7] data['Beliefs'].append(bel) data['ModeBels'].append(modebel) # POMCP makes decision decisionFlag = False # ----------------------------------------------------- if (trueS[0] == trueS[7].loc[0] and trueS[1] == trueS[7].loc[1]): if (verbosity > 0): print( "Starting step: {} with Decision Time: {:0.2f}s at Total Time: {:0.2f}s" .format(step + 1, min(solver.maxTime, curDecTime), totalTime)) decCounts = 0 decisionFlag = True #act,info = solver.search(sSet, h, depth=min(solver.maxDepth, maxSteps-step+1), maxTime = min(curDecTime,solver.maxTime),inform=True) act, info = solver.search(newSet, h, depth=solver.maxDepth, maxTime=min(curDecTime, solver.maxTime), inform=True) newSet = sSet solver.buildActionSet(trueS[7]) if (verbosity > 1): print("Action: {}".format(solver.actionSet[act])) try: data['Actions'].append(solver.actionSet[act]) except Exception: #print("FAILURE") #print(len(solver.actionSet),act); #raise; act = 0 #Disable To Blind Plan ######################################################## if (solver.actionSet[act][1][0] is not None): #If a question is asked regarding a sketch # print(solver.actionSet[act],solver.actionSet[act][1],solver.actionSet[act][1][0]) o = solver.generate_o(trueS, solver.actionSet[act]) if (verbosity > 1): [o1, o2] = o.split() print("Human observation: {}".format(o2)) [o1, o2] = o.split() data['Human_Obs'].append(o2) newSet = np.array(newSet) newSet = solver.measurementUpdate(newSet, solver.actionSet[act], o) newSet = solver.measurementUpdate_time(newSet, solver.actionSet[act], o) newSet = solver.resampleSet(newSet) sSet = newSet curDecTime = dist(trueS, solver.actionSet[act][0].loc) / solver.agentSpeed totalTime += curDecTime data['GivenTimes'].append(min(curDecTime, solver.maxTime)) data['DecisionTimes'].append(totalTime) step += 1 for i in range(0, int(np.ceil(curDecTime))): newSet = solver.dynamicsUpdate(newSet, solver.actionSet[act]) decCounts += 1 if (decCounts + totalTime > maxFlightTime): totalTime += decCounts endFlag = True fakeAct = [solver.actionSet[act][0], [None, None]] #if(verbosity > 1): #print("Action: {}".format(solver.actionSet[act])); # propagate state # ------------------------------------------------------ #solver.buildActionSet(trueS[7]); trueS = solver.generate_s(trueS, solver.actionSet[act]) r = solver.generate_r(trueS, solver.actionSet[act]) sSet = solver.dynamicsUpdate(sSet, solver.actionSet[act]) #o = solver.generate_o(trueS,solver.actionSet[act]); o = solver.generate_o(trueS, fakeAct) #o = solver.generate_o(trueS,[solver.actionSet[act][0],[solver.actionSet[act][1][0],None]]); #o = "Null No" [o1, o2] = o.split() data['Drone_Obs'].append(o1) o = o1 + " Null" if ("Captured" in o): endFlag = True data['Captured'] = True if (verbosity > 1): print("Drone Observation: {}".format(o1)) # if question was asked, see if human answered # ------------------------------------------------------ sSet = solver.measurementUpdate(sSet, fakeAct, o) sSet = solver.resampleSet(sSet) if (decisionFlag): tmpHAct = h.getChildByID(act) tmpHObs = tmpHAct.getChildByID(o) if (tmpHObs != -1 and len(tmpHObs.data) > 0): h = tmpHObs #sSet = solver.resampleNode(h); else: h = tmpHAct[0] if (verbosity == 2): ax.clear() sSetNp = np.array(sSet) sSetOff = sSetNp[sSetNp[:, 6] == 1] sSetOn = sSetNp[sSetNp[:, 6] == 0] ax.scatter(sSetOn[:, 2], sSetOn[:, 3], color='magenta', alpha=0.1, edgecolor='none') ax.scatter(sSetOff[:, 2], sSetOff[:, 3], color='red', alpha=0.3, edgecolor='none') ax.scatter(trueS[0], trueS[1], color='blue', alpha=1, s=100) ax.scatter(trueS[2], trueS[3], color='black', alpha=1, s=100) # if(solver.actionSet[act][0] is not None): # #theta_options = [180,0,90,270,135,45,315,225] # theta = theta_options[solver.actionSet[act][0]]; # else: # theta = 90; #theta = np.arctan2(trueS[7].loc[1],trueS[7].loc[0])-np.arctan2(trueS[1],trueS[0]) # theta = np.arctan2(trueS[7].loc[1],trueS[7].loc[0])-np.arctan2(trueS[1],trueS[0]) # theta = np.degrees(theta); theta = computeTheta([trueS[0], trueS[1]], trueS[7].loc) #print(theta) detect_length = solver.detect_length detect_points = [[trueS[0], trueS[1]], [ trueS[0] + detect_length * math.cos(2 * -0.261799 + math.radians(theta)), trueS[1] + detect_length * math.sin(2 * -0.261799 + math.radians(theta)) ], [ trueS[0] + detect_length * math.cos(2 * 0.261799 + math.radians(theta)), trueS[1] + detect_length * math.sin(2 * 0.261799 + math.radians(theta)) ]] detect_poly = Polygon(detect_points) x, y = detect_poly.exterior.xy ax.plot(x, y, color='blue') capture_length = solver.capture_length capture_points = [ [trueS[0], trueS[1]], [ trueS[0] + capture_length * math.cos(2 * -0.261799 + math.radians(theta)), trueS[1] + capture_length * math.sin(2 * -0.261799 + math.radians(theta)) ], [ trueS[0] + capture_length * math.cos(2 * 0.261799 + math.radians(theta)), trueS[1] + capture_length * math.sin(2 * 0.261799 + math.radians(theta)) ] ] capture_poly = Polygon(capture_points) x, y = capture_poly.exterior.xy ax.plot(x, y, color='gold') #Show Sketches for s in range(0, len(sketchesMade)): sketch_Poly = Polygon(sketchesMade[s].points) x, y = sketch_Poly.exterior.xy ax.plot(x, y, color='green') #plt.axis('equal') ax.set_xlim([0, 1000]) ax.set_ylim([0, 1000]) plt.pause(0.1) # check if human volunteered anything # ------------------------------------------------------ # checkVolunteer() # check if human sketched anything # ------------------------------------------------------ if (decisionFlag): # coin = np.random.random(); # cTest = expon.cdf(min(curDecTime,solver.maxTime), scale=1/human_sketch_chance) # #print(coin,cTest); if (len(sketchTimes) > 0 and totalTime > sketchTimes[0]): sketchTimes.pop(0) #print("Sketch Made: {}".format(ske.name)); ske = sketchQueue.pop() sketchesMade.append(ske) solver.addSketch(trueS[7], ske) if (verbosity > 1): print("Sketch Made: {}".format(ske.name)) data['Sketches'].append(ske) # save everything # ------------------------------------------------------ # repeat # ------------------------------------------------------ h = Node() # repeat if (verbosity > 1): print("") if (endFlag): break data['TotalTime'] = totalTime return data