Example #1
0
 def updateAllActions(self, dataStructureIndex):
     state = self.getState(dataStructureIndex, self.stateScene)
     state_codings = tiling.get_tile_coding(state, self.tilings)
     self.dataStructure[dataStructureIndex]["action"] = {}
     for action in self.actions.keys():
         action_idx = self.actions.index(action)
         indexList = self.getStateListDependingOnAction(
             dataStructureIndex, action, 5
         )  # sucht alle zu der Pose und Action passenden Actions #TODO wieviele Datensätze werden einbezogen (aktuell 5)
         self.dataStructure[dataStructureIndex]["action"][action] = None
         if indexList is not None:
             reward = 0
             for index in indexList:
                 reward += self.dataStructure[index]['score'][
                     self.stateScene]
             reward /= len(
                 indexList)  # TODO Gewichtung der verschiedenen Werte
             for coding, q_table in zip(state_codings, self.q_tables):
                 if not (q_table[tuple(coding) + (action_idx, )] == 0):
                     lr = 1
                 else:
                     lr = self.lr
                 delta = reward - q_table[tuple(coding) + (action_idx, )]
                 q_table[tuple(coding) + (action_idx, )] += lr * (
                     delta)  #TODO set learning rate
                 self.dataStructure[dataStructureIndex]["action"][
                     action] = q_table[tuple(coding) + (
                         action_idx, )] * 20 + 60  #nur zum ploten
Example #2
0
    def update(self, state, action, target):
        state_codings = tiling.get_tile_coding(
            state, self.tilings)  # [[5, 1], [4, 0], [3, 0]] ...
        action_idx = self.actions.index(action)

        for coding, q_table in zip(state_codings, self.q_tables):
            delta = target - q_table[tuple(coding) + (action_idx, )]
            q_table[tuple(coding) + (action_idx, )] += self.lr * (delta)
Example #3
0
    def value(self, state, action):  #value der Q-Table
        state_codings = tiling.get_tile_coding(
            state, self.tilings)  # [[5, 1], [4, 0], [3, 0]] ...
        action_idx = self.actions.index(action)

        value = 0
        for coding, q_table in zip(state_codings, self.q_tables):
            value += q_table[tuple(coding) + (action_idx, )]
        return value / self.num_tilings
Example #4
0
 def valuePoseActionTables(self, pose, action):
     #liest eine pose mit iener action aus der pose_action_tables
     pose_codings = tiling.get_tile_coding(
         pose, self.tilings)  # [[5, 1], [4, 0], [3, 0]] ...
     action_idx = self.qValue.actions.index(action)
     value = 0
     for coding, pose_action_table in zip(pose_codings,
                                          self.pose_action_tables):
         value += pose_action_table[tuple(coding) + (action_idx, )]
     return value / self.num_tilings
Example #5
0
 def getScalarField(self, dataStructure, pose6D):
     dataStructure = [{"jointAvg": dataStructure}]
     self.pose_tables = self.resetPoseTable()
     print("self.pose_tables: ", self.pose_tables)
     state = self.qValue.getState(0, self.qValue.stateScene, dataStructure)
     valueVectorPairs = self.qValue.valueVectorPairs(state)
     print("valueVectorPairs: ", valueVectorPairs)
     originPose = self.pose6Dto3D(pose6D)
     pose_codings = tiling.get_tile_coding(originPose, self.tilings)
     codingTable = []
     for element in valueVectorPairs:
         displacementVector = np.array(element["displacementVector"])
         newPose = originPose + displacementVector  #TODO: ist das hier richtig?
         pose_codings = tiling.get_tile_coding(newPose, self.tilings)
         value = element["value"]
         for coding, pose_table in zip(pose_codings, self.pose_tables):
             if np.any(coding[0] + coding[1] * 100 +
                       coding[2] * 10000 in codingTable):
                 print("bereits vorhanden... wähle anderes sampling: ",
                       coding)
             else:
                 codingTable.append(coding[0] + coding[1] * 100 +
                                    coding[2] * 10000)
             pose_table[tuple(coding)] = value
Example #6
0
 def updateAllActionsMax(self, dataStructureIndex):
     state = self.getState(dataStructureIndex, self.stateScene)
     state_codings = tiling.get_tile_coding(state, self.tilings)
     self.dataStructure[dataStructureIndex]["action"] = {}
     rewardListAction = self.getStateList3(
         dataStructureIndex, len(self.dataStructure)
     )  #TODO wieviele Datensätze werden einbezogen (aktuell ALLE)
     for action in self.actions.keys():
         action_idx = self.actions.index(action)
         reward = rewardListAction[action]
         self.dataStructure[dataStructureIndex]["action"][action] = None
         if reward is not None:
             for coding, q_table in zip(state_codings,
                                        self.q_tables):  #MAXIMUM
                 if q_table[tuple(coding) + (action_idx, )] < reward:
                     q_table[tuple(coding) + (action_idx, )] = reward
                     #self.dataStructure[dataStructureIndex]["action"][action] = q_table[tuple(coding) + (action_idx,)] * 20 + 60 #nur zum ploten
             for coding, q_tableList in zip(
                     state_codings,
                     self.q_tablesList):  #MEAN & in trainWithAllData
                 q_tableList[np.ravel_multi_index(
                     tuple(coding) + (action_idx, ),
                     (self.state_sizes[0] +
                      (self.actions.length, )))].append(reward)
Example #7
0
 def getDecisionScalarField(self, dataStructure):
     print("len: ", len(dataStructure))
     #schreibt alle elemente der Datastructure in die pose_action_tables
     self.pose_action_tables = self.resetPoseActionTable()
     for el in dataStructure:
         state = self.qValue.getState(0, self.qValue.stateScene, [el])
         valueVectorPairs = self.qValue.valueVectorPairs(state)
         #valueVectorPairs =  [{'displacementVector': array([0., 0., 0.]), 'value': nan, 'actionIdx': 0},
         # {'displacementVector': array([-0.1,  0. ,  0. ]), 'value': nan, 'actionIdx': 1},....]
         # el = {'handCenterAvg':
         # {'depositBox': {'timestamp': 398.0, 'x': -607.092, 'y': 209.03591775, 'z': 1231.0925},
         #  'getBox': {'timestamp': 286.0, 'x': -506.46900000000005, 'y': -272.51025, 'z': 1341.4375}},
         # 'jointAvg':
         #  {'depositBox': {'HANDDISTANCE': 415.4124850699847, 'LELBW': 116.20698380983922, 'LFOREARM': 225.95153472018518, 'LSHOULDER2D': 48.9902370141013, 'LUPPERARM': 413.29950395888295, 'OUTOFMIDDLEDISTANCE': 183.42403653492383, 'RELBW': 71.18893798968479, 'RFOREARM': 236.50801522081682, 'RSHOULDER2D': 13.73358209986485, 'RUPPERARM': 418.8775285707044, 'SHOULDERDISTANCE': 340.91170633138495, 'timestamp': 398.0},
         # 'getBox': {'HANDDISTANCE': 414.63783559934245, 'LELBW': 123.27435898724582, 'LFOREARM': 215.563763188211, 'LSHOULDER2D': 65.05766101371253, 'LUPPERARM': 400.0593395108174, 'OUTOFMIDDLEDISTANCE': 160.31681215348596, 'RELBW': 122.6475539529318, 'RFOREARM': 232.98312677688182, 'RSHOULDER2D': 65.59852835250899, 'RUPPERARM': 382.69829045613574, 'SHOULDERDISTANCE': 302.20686433016147, 'timestamp': 286.0}},
         # 'nameIndex': '390',
         # 'path': [{'distance': 504.1703271997581, 'duration': 112.0, 'end': 'depositBox', 'path': array([-100.623     ,  481.54616775, -110.345     ]), 'start': 'getBox'}],
         # 'pose': array([-0.1,  0.4,  0. ,  0. ,  0. ,  0. ]), 'poseIndex': 387, 'score': {'depositBox': 0.26981563316141316,'getBox': 0.0}}
         originPose = self.pose6Dto3D(
             el["pose"]
         )  #es wird für die jeweilige Pose angegeben, ob eine Action empfohlen wird
         pose_codings = tiling.get_tile_coding(originPose, self.tilings)
         for element in valueVectorPairs:
             if not np.isnan(element["value"]):
                 for coding, pose_table in zip(pose_codings,
                                               self.pose_action_tables):
                     if pose_table[tuple(coding) +
                                   (element["actionIdx"], )] == 0:
                         pose_table[tuple(coding) + (
                             element["actionIdx"], )] = element["value"]
                     else:
                         pose_table[
                             tuple(coding) + (element["actionIdx"], )] = (
                                 element["value"] +
                                 pose_table[tuple(coding) +
                                            (element["actionIdx"], )]) / 2
 def setValue(self, state, value):  #value der Distance-Table
     state_codings = tiling.get_tile_coding(state, self.tilings)
     for coding, distance_table in zip(state_codings, self.distance_tables):
         distance_table[tuple(coding)] = value
 def value(self, state):  #value der Distance-Table
     state_codings = tiling.get_tile_coding(state, self.tilings)
     value = 0
     for coding, distance_table in zip(state_codings, self.distance_tables):
         value += distance_table[tuple(coding)]
     return value / self.num_tilings
Example #10
0
    def printPoseTable3D(self):
        # für eine pose alle actions
        plt.close('all')
        fig = plt.figure()
        ax = []
        boxMarkerN = []
        gridValMin = 1
        gridValMax = 0
        for plotNumber in range(1):
            nxj = np.shape(self.pose_tables)[1] * 1j
            nyj = np.shape(self.pose_tables)[2] * 1j
            nzj = np.shape(self.pose_tables)[3] * 1j
            minimumPose = []
            maximumPose = []
            for i in range(3):
                minimumPose.append(self.tilingsShape["boundingBox"][i][0])
                maximumPose.append(self.tilingsShape["boundingBox"][i][1])
            minimumPose = np.array(minimumPose)
            maximumPose = np.array(maximumPose)

            grid_x, grid_y, grid_z = np.mgrid[
                minimumPose[0]:maximumPose[0]:nxj,
                minimumPose[1]:maximumPose[1]:nyj,
                minimumPose[2]:maximumPose[2]:nzj]
            grid_x = grid_x.flatten()
            grid_y = grid_y.flatten()
            grid_z = grid_z.flatten()
            gridVal = np.zeros(np.shape(grid_x)[0])
            for i in range(np.shape(grid_x)[0]):
                pose_codings = tiling.get_tile_coding(
                    [grid_x[i], grid_y[i], grid_z[i]], self.tilings)
                for coding, pose_table in zip(pose_codings, self.pose_tables):
                    #print("coding: ", coding, " pose; ", pose_table[tuple(coding)])
                    gridVal[i] += pose_table[tuple(coding)]
                gridVal[i] /= self.num_tilings
            gridVal = np.where(gridVal == 0, np.NaN, gridVal)

            ax.append(fig.add_subplot(111 + plotNumber, projection='3d'))
            gridValMin = min([gridValMin, np.nanmin(gridVal)])
            gridValMax = max([gridValMax, np.nanmax(gridVal)])
            # print( " gridValMin: ", gridValMin, "gridValMax: ", gridValMax)
            norm = mpl.colors.Normalize(vmin=gridValMin, vmax=gridValMax)
            print("No: ", plotNumber, ' m: ', np.nanmin(gridVal), ' M: ',
                  np.nanmax(gridVal))
            boxMarkerN.append(ax[plotNumber].scatter(grid_x,
                                                     grid_y,
                                                     grid_z,
                                                     marker='o',
                                                     c=gridVal,
                                                     s=30,
                                                     norm=norm))

            #ax[plotNumber].set_aspect('equal')
            ax[plotNumber].set_xlim(minimumPose[1], maximumPose[1])
            ax[plotNumber].set_ylim(minimumPose[1], maximumPose[1])
            ax[plotNumber].set_zlim(minimumPose[2], maximumPose[2])
            ax[plotNumber].set_xlabel('X')
            ax[plotNumber].set_ylabel('Y')
            ax[plotNumber].set_zlabel('Z')
            plt.title("Where to go")
            # Customize the view angle
            ax[plotNumber].view_init(elev=-5.,
                                     azim=-30)  #azim um z, links händisch
        fig.colorbar(boxMarkerN[0], shrink=0.5, aspect=5)
        plt.show()