def updateAllActions(self, dataStructureIndex): state = self.getState(dataStructureIndex, self.stateScene) state_codings = tiling.get_tile_coding(state, self.tilings) self.dataStructure[dataStructureIndex]["action"] = {} for action in self.actions.keys(): action_idx = self.actions.index(action) indexList = self.getStateListDependingOnAction( dataStructureIndex, action, 5 ) # sucht alle zu der Pose und Action passenden Actions #TODO wieviele Datensätze werden einbezogen (aktuell 5) self.dataStructure[dataStructureIndex]["action"][action] = None if indexList is not None: reward = 0 for index in indexList: reward += self.dataStructure[index]['score'][ self.stateScene] reward /= len( indexList) # TODO Gewichtung der verschiedenen Werte for coding, q_table in zip(state_codings, self.q_tables): if not (q_table[tuple(coding) + (action_idx, )] == 0): lr = 1 else: lr = self.lr delta = reward - q_table[tuple(coding) + (action_idx, )] q_table[tuple(coding) + (action_idx, )] += lr * ( delta) #TODO set learning rate self.dataStructure[dataStructureIndex]["action"][ action] = q_table[tuple(coding) + ( action_idx, )] * 20 + 60 #nur zum ploten
def update(self, state, action, target): state_codings = tiling.get_tile_coding( state, self.tilings) # [[5, 1], [4, 0], [3, 0]] ... action_idx = self.actions.index(action) for coding, q_table in zip(state_codings, self.q_tables): delta = target - q_table[tuple(coding) + (action_idx, )] q_table[tuple(coding) + (action_idx, )] += self.lr * (delta)
def value(self, state, action): #value der Q-Table state_codings = tiling.get_tile_coding( state, self.tilings) # [[5, 1], [4, 0], [3, 0]] ... action_idx = self.actions.index(action) value = 0 for coding, q_table in zip(state_codings, self.q_tables): value += q_table[tuple(coding) + (action_idx, )] return value / self.num_tilings
def valuePoseActionTables(self, pose, action): #liest eine pose mit iener action aus der pose_action_tables pose_codings = tiling.get_tile_coding( pose, self.tilings) # [[5, 1], [4, 0], [3, 0]] ... action_idx = self.qValue.actions.index(action) value = 0 for coding, pose_action_table in zip(pose_codings, self.pose_action_tables): value += pose_action_table[tuple(coding) + (action_idx, )] return value / self.num_tilings
def getScalarField(self, dataStructure, pose6D): dataStructure = [{"jointAvg": dataStructure}] self.pose_tables = self.resetPoseTable() print("self.pose_tables: ", self.pose_tables) state = self.qValue.getState(0, self.qValue.stateScene, dataStructure) valueVectorPairs = self.qValue.valueVectorPairs(state) print("valueVectorPairs: ", valueVectorPairs) originPose = self.pose6Dto3D(pose6D) pose_codings = tiling.get_tile_coding(originPose, self.tilings) codingTable = [] for element in valueVectorPairs: displacementVector = np.array(element["displacementVector"]) newPose = originPose + displacementVector #TODO: ist das hier richtig? pose_codings = tiling.get_tile_coding(newPose, self.tilings) value = element["value"] for coding, pose_table in zip(pose_codings, self.pose_tables): if np.any(coding[0] + coding[1] * 100 + coding[2] * 10000 in codingTable): print("bereits vorhanden... wähle anderes sampling: ", coding) else: codingTable.append(coding[0] + coding[1] * 100 + coding[2] * 10000) pose_table[tuple(coding)] = value
def updateAllActionsMax(self, dataStructureIndex): state = self.getState(dataStructureIndex, self.stateScene) state_codings = tiling.get_tile_coding(state, self.tilings) self.dataStructure[dataStructureIndex]["action"] = {} rewardListAction = self.getStateList3( dataStructureIndex, len(self.dataStructure) ) #TODO wieviele Datensätze werden einbezogen (aktuell ALLE) for action in self.actions.keys(): action_idx = self.actions.index(action) reward = rewardListAction[action] self.dataStructure[dataStructureIndex]["action"][action] = None if reward is not None: for coding, q_table in zip(state_codings, self.q_tables): #MAXIMUM if q_table[tuple(coding) + (action_idx, )] < reward: q_table[tuple(coding) + (action_idx, )] = reward #self.dataStructure[dataStructureIndex]["action"][action] = q_table[tuple(coding) + (action_idx,)] * 20 + 60 #nur zum ploten for coding, q_tableList in zip( state_codings, self.q_tablesList): #MEAN & in trainWithAllData q_tableList[np.ravel_multi_index( tuple(coding) + (action_idx, ), (self.state_sizes[0] + (self.actions.length, )))].append(reward)
def getDecisionScalarField(self, dataStructure): print("len: ", len(dataStructure)) #schreibt alle elemente der Datastructure in die pose_action_tables self.pose_action_tables = self.resetPoseActionTable() for el in dataStructure: state = self.qValue.getState(0, self.qValue.stateScene, [el]) valueVectorPairs = self.qValue.valueVectorPairs(state) #valueVectorPairs = [{'displacementVector': array([0., 0., 0.]), 'value': nan, 'actionIdx': 0}, # {'displacementVector': array([-0.1, 0. , 0. ]), 'value': nan, 'actionIdx': 1},....] # el = {'handCenterAvg': # {'depositBox': {'timestamp': 398.0, 'x': -607.092, 'y': 209.03591775, 'z': 1231.0925}, # 'getBox': {'timestamp': 286.0, 'x': -506.46900000000005, 'y': -272.51025, 'z': 1341.4375}}, # 'jointAvg': # {'depositBox': {'HANDDISTANCE': 415.4124850699847, 'LELBW': 116.20698380983922, 'LFOREARM': 225.95153472018518, 'LSHOULDER2D': 48.9902370141013, 'LUPPERARM': 413.29950395888295, 'OUTOFMIDDLEDISTANCE': 183.42403653492383, 'RELBW': 71.18893798968479, 'RFOREARM': 236.50801522081682, 'RSHOULDER2D': 13.73358209986485, 'RUPPERARM': 418.8775285707044, 'SHOULDERDISTANCE': 340.91170633138495, 'timestamp': 398.0}, # 'getBox': {'HANDDISTANCE': 414.63783559934245, 'LELBW': 123.27435898724582, 'LFOREARM': 215.563763188211, 'LSHOULDER2D': 65.05766101371253, 'LUPPERARM': 400.0593395108174, 'OUTOFMIDDLEDISTANCE': 160.31681215348596, 'RELBW': 122.6475539529318, 'RFOREARM': 232.98312677688182, 'RSHOULDER2D': 65.59852835250899, 'RUPPERARM': 382.69829045613574, 'SHOULDERDISTANCE': 302.20686433016147, 'timestamp': 286.0}}, # 'nameIndex': '390', # 'path': [{'distance': 504.1703271997581, 'duration': 112.0, 'end': 'depositBox', 'path': array([-100.623 , 481.54616775, -110.345 ]), 'start': 'getBox'}], # 'pose': array([-0.1, 0.4, 0. , 0. , 0. , 0. ]), 'poseIndex': 387, 'score': {'depositBox': 0.26981563316141316,'getBox': 0.0}} originPose = self.pose6Dto3D( el["pose"] ) #es wird für die jeweilige Pose angegeben, ob eine Action empfohlen wird pose_codings = tiling.get_tile_coding(originPose, self.tilings) for element in valueVectorPairs: if not np.isnan(element["value"]): for coding, pose_table in zip(pose_codings, self.pose_action_tables): if pose_table[tuple(coding) + (element["actionIdx"], )] == 0: pose_table[tuple(coding) + ( element["actionIdx"], )] = element["value"] else: pose_table[ tuple(coding) + (element["actionIdx"], )] = ( element["value"] + pose_table[tuple(coding) + (element["actionIdx"], )]) / 2
def setValue(self, state, value): #value der Distance-Table state_codings = tiling.get_tile_coding(state, self.tilings) for coding, distance_table in zip(state_codings, self.distance_tables): distance_table[tuple(coding)] = value
def value(self, state): #value der Distance-Table state_codings = tiling.get_tile_coding(state, self.tilings) value = 0 for coding, distance_table in zip(state_codings, self.distance_tables): value += distance_table[tuple(coding)] return value / self.num_tilings
def printPoseTable3D(self): # für eine pose alle actions plt.close('all') fig = plt.figure() ax = [] boxMarkerN = [] gridValMin = 1 gridValMax = 0 for plotNumber in range(1): nxj = np.shape(self.pose_tables)[1] * 1j nyj = np.shape(self.pose_tables)[2] * 1j nzj = np.shape(self.pose_tables)[3] * 1j minimumPose = [] maximumPose = [] for i in range(3): minimumPose.append(self.tilingsShape["boundingBox"][i][0]) maximumPose.append(self.tilingsShape["boundingBox"][i][1]) minimumPose = np.array(minimumPose) maximumPose = np.array(maximumPose) grid_x, grid_y, grid_z = np.mgrid[ minimumPose[0]:maximumPose[0]:nxj, minimumPose[1]:maximumPose[1]:nyj, minimumPose[2]:maximumPose[2]:nzj] grid_x = grid_x.flatten() grid_y = grid_y.flatten() grid_z = grid_z.flatten() gridVal = np.zeros(np.shape(grid_x)[0]) for i in range(np.shape(grid_x)[0]): pose_codings = tiling.get_tile_coding( [grid_x[i], grid_y[i], grid_z[i]], self.tilings) for coding, pose_table in zip(pose_codings, self.pose_tables): #print("coding: ", coding, " pose; ", pose_table[tuple(coding)]) gridVal[i] += pose_table[tuple(coding)] gridVal[i] /= self.num_tilings gridVal = np.where(gridVal == 0, np.NaN, gridVal) ax.append(fig.add_subplot(111 + plotNumber, projection='3d')) gridValMin = min([gridValMin, np.nanmin(gridVal)]) gridValMax = max([gridValMax, np.nanmax(gridVal)]) # print( " gridValMin: ", gridValMin, "gridValMax: ", gridValMax) norm = mpl.colors.Normalize(vmin=gridValMin, vmax=gridValMax) print("No: ", plotNumber, ' m: ', np.nanmin(gridVal), ' M: ', np.nanmax(gridVal)) boxMarkerN.append(ax[plotNumber].scatter(grid_x, grid_y, grid_z, marker='o', c=gridVal, s=30, norm=norm)) #ax[plotNumber].set_aspect('equal') ax[plotNumber].set_xlim(minimumPose[1], maximumPose[1]) ax[plotNumber].set_ylim(minimumPose[1], maximumPose[1]) ax[plotNumber].set_zlim(minimumPose[2], maximumPose[2]) ax[plotNumber].set_xlabel('X') ax[plotNumber].set_ylabel('Y') ax[plotNumber].set_zlabel('Z') plt.title("Where to go") # Customize the view angle ax[plotNumber].view_init(elev=-5., azim=-30) #azim um z, links händisch fig.colorbar(boxMarkerN[0], shrink=0.5, aspect=5) plt.show()