Example #1
0
    def calcRewardFunction(self,
                           goal=42.0,
                           start=0.0,
                           obstacle=-7.0,
                           edge=0,
                           ret=False,
                           vrs=-1,
                           legVal=0.0,
                           legIdx=None):
        if vrs < 1: vrs = cmn._CALC_REWARD_VRS
        self.rewards = {}
        obsRateThresh = 1.0
        critRate = np.tan(np.pi / 20)  #IE if the slops is less than 9 deg
        negCritRate = np.tan(np.pi / 15)

        startPos = np.array(self.start)
        goalPos = np.array(self.goal)

        #For use later
        goalRwrds = np.zeros(shape=(self.rows, self.cols))
        maxRad = (self.rows + self.cols) / np.sqrt(2)
        fid = cmn._sim_res * 5
        nns = cmn.neighbors(mat=goalRwrds,
                            rad=maxRad,
                            r=self.goal[0],
                            c=self.goal[1])
        for nn in zip(list(nns[0]), list(nns[1])):
            tmpR = np.linalg.norm([nn[0] - goalPos[0], nn[1] - goalPos[1]])
            if np.isnan(tmpR) or tmpR < fid:
                tmpR = fid  # EPS
            goalRwrds[nn] = goal * ((1.0 / maxRad) * np.abs(tmpR - maxRad) +
                                    1.0 / np.power(tmpR, 1.0 / 2))
        #goalRwrds[self.goal] = goal

        if legIdx is None:
            legRwrds = np.zeros(shape=(self.rows, self.cols))
        else:
            print 'calculating leg reward using {}'.format(
                self.dsg.shortestRoverPathNodes[legIdx])
            legRwrds = np.zeros(shape=(self.rows, self.cols))
            tmpPath = copy.copy(self.dsg.shortestRoverPathNodes[legIdx])
            tmpPath = [cmn.convert_VREPXY(tmpNode) for tmpNode in tmpPath]
            for tmpNode in tmpPath:
                maxRad = 5.0
                nns = cmn.neighbors(mat=legRwrds,
                                    rad=maxRad,
                                    r=tmpNode[0],
                                    c=tmpNode[1])
                for nn in zip(list(nns[0]), list(nns[1])):
                    tmpR = np.linalg.norm(
                        [nn[0] - tmpNode[0], nn[1] - tmpNode[1]])
                    legRwrds[nn] += legVal * (np.abs(tmpR - maxRad) / maxRad)

            cmap = cm.Spectral
            cmap.set_bad(color='k')
            cmn.createArrImg(
                legRwrds,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {}'.format(legIdx),
                fn=os.path.join(cmn._IMG_FLDR,
                                'base_rewards_leg{}.svg'.format(legIdx)),
                show=False)

        maxEdge = np.max([self.rows, self.cols])
        for i in range(len(cmn._actions)):
            tmpRewardMap = np.full(shape=(self.rows, self.cols),
                                   fill_value=0.0,
                                   dtype=np.float)

            obsRwrds = np.zeros(shape=tmpRewardMap.shape)
            # Rewards based on self.rates first.
            for r in range(self.rows):
                for c in range(self.cols):
                    if vrs == 1:
                        if np.abs(self.rates[0][cmn._actions[i]][(
                                r, c)].item()) > obsRateThresh:
                            obsRwrds[(r, c)] = obstacle
                    elif vrs == 2:
                        if self.rates[0][cmn._actions[i]][(r, c)] <= 0:
                            obsRwrds[(r, c)] = 0.0
                        else:
                            obsRwrds[(r, c)] = obstacle * np.power(
                                self.rates[0][cmn._actions[i]][(r, c)].item(),
                                2)
                    elif vrs == 3:
                        if np.abs(self.rates[0][cmn._actions[i]][(
                                r, c)]) <= critRate:
                            obsRwrds[(r, c)] = 2.0
                        else:
                            obsRwrds[(r, c)] = obstacle * (np.exp(
                                np.abs(self.rates[0][cmn._actions[i]][
                                    (r, c)])) - np.exp(critRate))
                    elif vrs == 4:
                        absRate = np.abs(self.rates[0][cmn._actions[i]][(r,
                                                                         c)])
                        tmpCritRate = critRate
                        if self.rates[0][cmn._actions[i]][(r, c)] < 0:
                            tmpCritRate = negCritRate
                        if absRate > tmpCritRate:
                            obsRwrds[(
                                r,
                                c)] = obstacle - np.abs(absRate - tmpCritRate)
                        else:
                            obsRwrds[(r, c)] = obstacle * cmn.sigmoid(
                                cmn.scaleRange([
                                    0,
                                    float(tmpRewardMap[(r, c)]) / tmpCritRate,
                                    1
                                ], -6, 6)[1])

            maxReward = np.nanmax(obsRwrds)
            minReward = np.nanmin(obsRwrds)
            maxAbReward = np.nanmax([np.abs(maxReward), np.abs(minReward)])
            '''
            goalSlopeRad = (maxEdge / 2.0) * np.sqrt(maxEdge)
            nns = cmn.neighbors(mat=tmpRewardMap, rad=goalSlopeRad, r=self.goal[0], c=self.goal[1])
            for nn in zip(list(nns[0]), list(nns[1])):
                npNN = np.array(nn)
                goalRwrds[nn] = (1 / 5.0) * goal * (np.linalg.norm(npNN - self.goal) - goalSlopeRad)
            maxRad = 5
            fid = 0.4
            for rad in np.arange(maxRad, 0, -fid):
                nns = cmn.neighbors(mat=tmpRewardMap, rad=rad, r=self.goal[0], c=self.goal[1])
                for nn in zip(list(nns[0]), list(nns[1])):
                    if obsRwrds[nn] < 0:
                        priorInfluence = -1 * cmn.sigmoid(
                            cmn.scaleRange([0, float(obsRwrds[nn] / minReward), 1], -6, 6)[1])
                    else:
                        priorInfluence = cmn.sigmoid(
                            cmn.scaleRange([0, float(obsRwrds[nn] / maxReward), 1], -6, 6)[1])
                    goalRwrds[nn] = goal * (
                            priorInfluence + cmn.sigmoid(cmn.scaleRange([0, maxRad - rad, maxRad], -6, 6)[1]))
            goalRwrds[self.goal] = goal
            '''

            tmpRewardMap = np.add(goalRwrds, obsRwrds)
            tmpRewardMap = np.add(legRwrds, tmpRewardMap)

            #EDGES
            [
                tmpRewardMap[0, :], tmpRewardMap[-1, :], tmpRewardMap[:, 0],
                tmpRewardMap[:, -1]
            ] = [edge] * 4

            cmap = cm.Spectral
            cmap.set_bad(color='k')
            cmn.createArrImg(
                obsRwrds,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {} via {}'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_obs.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            cmn.overlayArrImgs(
                arr1=self.height_map,
                arr2=obsRwrds,
                cmap1=cm.gray,
                cmap2=cmap,
                alpha1=1,
                alpha2=.8,
                arr2Masked=False,
                plotTitle=
                'Base State Rewards for Leg {} via {} (with heightmap)'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_obs_ovr.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            cmn.createArrImg(
                goalRwrds,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {} via {}'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_goal.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            cmn.overlayArrImgs(
                arr1=self.height_map,
                arr2=goalRwrds,
                cmap1=cm.gray,
                cmap2=cmap,
                alpha1=1,
                alpha2=.8,
                arr2Masked=False,
                plotTitle=
                'Base State Rewards for Leg {} via {} (with heightmap)'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_goal_ovr.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            #GOAL AND START
            maxRad = 5.0
            fid = 0.4
            for rad in np.arange(maxRad, 0, -fid):
                tmpRewardMap[cmn.neighbors(
                    mat=tmpRewardMap,
                    rad=rad,
                    r=self.start[0],
                    c=self.start[1])] = start * cmn.sigmoid(
                        cmn.scaleRange([0, maxRad - rad, maxRad], -6, 6)[1])

            cmap = cm.Spectral
            cmap.set_bad(color='k')
            cmn.createArrImg(
                tmpRewardMap,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {} via {}'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR,
                    'base_rewards_l{}_{}.svg'.format(legIdx, cmn._actions[i])),
                show=False)

            cmn.overlayArrImgs(
                arr1=self.height_map,
                arr2=tmpRewardMap,
                cmap1=cm.gray,
                cmap2=cmap,
                alpha1=1,
                alpha2=.8,
                arr2Masked=False,
                plotTitle=
                'Base State Rewards for Leg {} via {} (with heightmap)'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_ovr.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            self.rewards.update({cmn._actions[i]: tmpRewardMap.copy()})

        shutil.copy('graph_search.py',
                    os.path.join(cmn._OUT_FLDR, 'graph_search.py'))
        if ret: return self.rewards.copy()
Example #2
0
def test_neighbors():
    neighbors_list = neighbors('ACG', 1)
    assert neighbors_list == {
        'CCG', 'TCG', 'GCG', 'AAG', 'ATG', 'AGG', 'ACA', 'ACC', 'ACT', 'ACG'
    }
Example #3
0
def neighbors_challenge():
    with open('../data/challenges/dataset_3014_3.txt', 'r') as f:
        lines = f.read().splitlines()
    neighbors_list = neighbors(lines[0], int(lines[1]))
    for neighbor in neighbors_list:
        print neighbor
Example #4
0
def test_neighbors_big_example():
    with open('../data/tests/Neighbors.txt', 'r') as f:
        lines = f.read().splitlines()
    supposed_result = set(lines[4:])
    neighbors_list = neighbors(lines[1], int(lines[2]))
    assert neighbors_list == supposed_result