def update_value(moved, state_value, ag_cars, params):
    # Convert state to nb of cars in each agency
    ag2, ag1    =   ag_cars

    # Probability of rents and returns - agency1
    nex_state1  =   ag1 + moved + params.get('carflow')
    idX         =   ut_closest.main([0, params.get('total_cars')], nex_state1)
    proba_ag1   =   params.get('jp1sum')[0, [range(idX[0], idX[1] + 1)]]

    # Probability of rents and returns - agency1
    nex_state2  =   ag2 - moved + params.get('carflow')
    idX         =   ut_closest.main([0, params.get('total_cars')], nex_state2)
    proba_ag2   =   params.get('jp2sum')[0, [range(idX[0], idX[1] + 1)]]

    # State probabilities
    stateProba  =   np.reshape(proba_ag2, [params.get('total_cars') + 1, 1]) * proba_ag1

    # State rewards
    poss_rent1  =   ag1 + moved - list(range(params.get('total_cars') + 1))
    poss_rent1  =   np.maximum(poss_rent1, 0)
    reward1     =   poss_rent1 * 10
    poss_rent2  =   ag2 - moved - list(range(params.get('total_cars') + 1))
    poss_rent2  =   np.maximum(poss_rent2, 0)
    reward2     =   poss_rent2 * 10
    state_reward=   np.reshape(reward2, [params.get('total_cars') + 1, 1]) + reward1 - np.abs(moved) * 2

    # Issue state value
    v = np.sum(np.multiply(stateProba, state_reward + state_value * params.get('gamma')))
    return v
Example #2
0
def initialize_centers(dataset, k, method):
    if method == 'random':
        ids = list(range(len(dataset)))
        random.shuffle(ids)
        return [dataset[i] for i in ids[:k]]

    elif method == 'kmpp':
        chances = [1] * len(dataset)
        centers = []

        for _ in range(k):
            chances = [x / sum(chances) for x in chances]
            r = random.random()
            acc = 0.0
            for index, chance in enumerate(chances):
                if acc + chance >= r:
                    break
                acc += chance
            centers.append(dataset[index])

            for index, point in enumerate(dataset):
                cids, distances = closest_clusters(centers, point)
                chances[index] = distances[cids[0]]
        return centers

    elif method == 'hockey':
        pts = [[0, 1], list(mean(dataset, axis=0)), [1, 0]]
        return ut_closest.main(pts, dataset)[1]

    elif type(method) is list and len(method) == k:
        return method
def update_value(moved, state_value, ag_cars, params):
    # Convert state to nb of cars in each agency
    ag2, ag1 = ag_cars

    # Probability of rents and returns - agency1
    nex_state1 = ag1 + moved + params.get('carflow')
    idX = ut_closest.main([0, params.get('total_cars')], nex_state1)
    proba_ag1 = params.get('jp1sum')[0, [range(idX[0], idX[1] + 1)]]

    # Probability of rents and returns - agency1
    nex_state2 = ag2 - moved + params.get('carflow')
    idX = ut_closest.main([0, params.get('total_cars')], nex_state2)
    proba_ag2 = params.get('jp2sum')[0, [range(idX[0], idX[1] + 1)]]

    # State probabilities
    stateProba = np.reshape(proba_ag2,
                            [params.get('total_cars') + 1, 1]) * proba_ag1

    # State rewards
    pos_rew1 = np.zeros([1, params.get('total_cars') + 1])
    pos_rew2 = np.zeros([1, params.get('total_cars') + 1])
    for ii in range(params.get('total_cars') + 1):
        # Adjust the sign matrix according to state
        new_sign = params.get('mat_sign') + ii
        new_sign = np.minimum(np.maximum(new_sign, -20), 20)
        pos_rew1[0, ii] = np.sum(
            np.multiply(new_sign * 10, params.get('jointProba1')))
        pos_rew2[0, ii] = np.sum(
            np.multiply(new_sign * 10, params.get('jointProba2')))

    # New condition1: Employee drives car from ag1 to ag2 every night
    if moved > 0:
        moved -= 1
    state_reward = np.reshape(pos_rew2, [params.get('total_cars') + 1, 1
                                         ]) + pos_rew1 - np.abs(moved) * 2

    # New condition2: More than 10 cars at a specific locations costs 4$ per night, irrespective of the total number of cars
    state_penalty = [0] * 11 + [4] * (params.get('total_cars') - 10)
    state_penalty = np.reshape(
        state_penalty, [params.get('total_cars') + 1, 1]) + state_penalty

    # Issue state value
    v = np.sum(
        np.multiply(
            stateProba,
            state_reward - state_penalty + state_value * params.get('gamma')))
    return v
Example #4
0
def ut_cumsum_thresh(vec, thresh):
    # Make sure it is sorted
    so_vec = np.sort(vec)[::-1]
    # Make cumulative sum of the vector
    cs_vec = np.cumsum(vec) / np.sum(vec)
    # Find closest value to threhsold
    idV = ut_closest.main([thresh], cs_vec)[0]
    return idV
def evaluate_policy(optimal_policy, state_value, params):
    # intialize change

    # Add vehicles moved according to pi
    carflow     =   list(range(-params.get('total_cars')-5, 0, 1)) + list(range(params.get('total_cars') + 6))

    # Probability of rents and returns - agency1
    proba1_rent =   ut_poisson_proba.main(params.get('expected_rent')[0], list(range(params.get('total_cars') + 1 + 5)))
    proba1_rtrn =   ut_poisson_proba.main(params.get('expected_return')[0], list(range(params.get('total_cars') + 1 + 5)))
    jointProba1 =   np.reshape(proba1_rent, [params.get('total_cars') + 1 + 5, 1]) * proba1_rtrn
    jp1sum      =   ut_sum_diagonals.main(jointProba1)
    nex_state1  =   ag1 + moved + carflow
    idX         =   ut_closest.main([0, params.get('total_cars')], nex_state1)
    proba_ag1   =   jp1sum[0, [range(idX[0], idX[1] + 1)]]

    # Probability of rents and returns - agency1
    proba2_rent =   ut_poisson_proba.main(params.get('expected_rent')[1], range(params.get('total_cars') + 1 + 5))
    proba2_rtrn =   ut_poisson_proba.main(params.get('expected_return')[1], range(params.get('total_cars') + 1 + 5))
    jointProba2 =   np.reshape(proba2_rent, [params.get('total_cars') + 1 + 5, 1]) * proba2_rtrn
    jp2sum      =   ut_sum_diagonals.main(jointProba2)
    nex_state2  =   ag2 - moved + carflow
    idX         =   ut_closest.main([0, params.get('total_cars')], nex_state2)
    proba_ag2   =   jp2sum[0, [range(idX[0], idX[1] + 1)]]

    # State probabilities
    stateProba  =   np.reshape(proba_ag2, [params.get('total_cars') + 1, 1]) * proba_ag1

    # State rewards
    poss_rent1  =   ag1 + moved - list(range(params.get('total_cars') + 1))
    poss_rent1  =   np.maximum(poss_rent1, 0)
    reward1     =   poss_rent1 * 10
    poss_rent2  =   ag2 - moved - list(range(params.get('total_cars') + 1))
    poss_rent2  =   np.maximum(poss_rent2, 0)
    reward2     =   poss_rent2 * 10
    state_reward=   np.reshape(reward2, [params.get('total_cars') + 1, 1]) + reward1 - np.abs(moved) * 2

    # Issue state value
    v = np.sum(np.multiply(stateProba, state_reward + state_value * params.get('gamma')))
    return v
def update_value(moved, state_value, ag_cars, params):
    # Convert state to nb of cars in each agency
    ag2, ag1 = ag_cars

    # Probability of rents and returns - agency1
    nex_state1 = ag1 + moved + params.get('carflow')
    idX = ut_closest.main([0, params.get('total_cars')], nex_state1)
    proba_ag1 = params.get('jp1sum')[0, [range(idX[0], idX[1] + 1)]]

    # Probability of rents and returns - agency1
    nex_state2 = ag2 - moved + params.get('carflow')
    idX = ut_closest.main([0, params.get('total_cars')], nex_state2)
    proba_ag2 = params.get('jp2sum')[0, [range(idX[0], idX[1] + 1)]]

    # State probabilities
    stateProba = np.reshape(proba_ag2,
                            [params.get('total_cars') + 1, 1]) * proba_ag1

    # State rewards
    pos_rew1 = np.zeros([1, params.get('total_cars') + 1])
    pos_rew2 = np.zeros([1, params.get('total_cars') + 1])
    for ii in range(params.get('total_cars') + 1):
        # Adjust the sign matrix according to state
        new_sign = params.get('mat_sign') + ii
        new_sign = np.minimum(np.maximum(new_sign, -20), 20)
        pos_rew1[0, ii] = np.sum(
            np.multiply(new_sign * 10, params.get('jointProba1')))
        pos_rew2[0, ii] = np.sum(
            np.multiply(new_sign * 10, params.get('jointProba2')))
    state_reward = np.reshape(pos_rew2, [params.get('total_cars') + 1, 1
                                         ]) + pos_rew1 - np.abs(moved) * 2

    # Issue state value
    v = np.sum(
        np.multiply(stateProba,
                    state_reward + state_value * params.get('gamma')))
    return v