def update_value(moved, state_value, ag_cars, params): # Convert state to nb of cars in each agency ag2, ag1 = ag_cars # Probability of rents and returns - agency1 nex_state1 = ag1 + moved + params.get('carflow') idX = ut_closest.main([0, params.get('total_cars')], nex_state1) proba_ag1 = params.get('jp1sum')[0, [range(idX[0], idX[1] + 1)]] # Probability of rents and returns - agency1 nex_state2 = ag2 - moved + params.get('carflow') idX = ut_closest.main([0, params.get('total_cars')], nex_state2) proba_ag2 = params.get('jp2sum')[0, [range(idX[0], idX[1] + 1)]] # State probabilities stateProba = np.reshape(proba_ag2, [params.get('total_cars') + 1, 1]) * proba_ag1 # State rewards poss_rent1 = ag1 + moved - list(range(params.get('total_cars') + 1)) poss_rent1 = np.maximum(poss_rent1, 0) reward1 = poss_rent1 * 10 poss_rent2 = ag2 - moved - list(range(params.get('total_cars') + 1)) poss_rent2 = np.maximum(poss_rent2, 0) reward2 = poss_rent2 * 10 state_reward= np.reshape(reward2, [params.get('total_cars') + 1, 1]) + reward1 - np.abs(moved) * 2 # Issue state value v = np.sum(np.multiply(stateProba, state_reward + state_value * params.get('gamma'))) return v
def initialize_centers(dataset, k, method): if method == 'random': ids = list(range(len(dataset))) random.shuffle(ids) return [dataset[i] for i in ids[:k]] elif method == 'kmpp': chances = [1] * len(dataset) centers = [] for _ in range(k): chances = [x / sum(chances) for x in chances] r = random.random() acc = 0.0 for index, chance in enumerate(chances): if acc + chance >= r: break acc += chance centers.append(dataset[index]) for index, point in enumerate(dataset): cids, distances = closest_clusters(centers, point) chances[index] = distances[cids[0]] return centers elif method == 'hockey': pts = [[0, 1], list(mean(dataset, axis=0)), [1, 0]] return ut_closest.main(pts, dataset)[1] elif type(method) is list and len(method) == k: return method
def update_value(moved, state_value, ag_cars, params): # Convert state to nb of cars in each agency ag2, ag1 = ag_cars # Probability of rents and returns - agency1 nex_state1 = ag1 + moved + params.get('carflow') idX = ut_closest.main([0, params.get('total_cars')], nex_state1) proba_ag1 = params.get('jp1sum')[0, [range(idX[0], idX[1] + 1)]] # Probability of rents and returns - agency1 nex_state2 = ag2 - moved + params.get('carflow') idX = ut_closest.main([0, params.get('total_cars')], nex_state2) proba_ag2 = params.get('jp2sum')[0, [range(idX[0], idX[1] + 1)]] # State probabilities stateProba = np.reshape(proba_ag2, [params.get('total_cars') + 1, 1]) * proba_ag1 # State rewards pos_rew1 = np.zeros([1, params.get('total_cars') + 1]) pos_rew2 = np.zeros([1, params.get('total_cars') + 1]) for ii in range(params.get('total_cars') + 1): # Adjust the sign matrix according to state new_sign = params.get('mat_sign') + ii new_sign = np.minimum(np.maximum(new_sign, -20), 20) pos_rew1[0, ii] = np.sum( np.multiply(new_sign * 10, params.get('jointProba1'))) pos_rew2[0, ii] = np.sum( np.multiply(new_sign * 10, params.get('jointProba2'))) # New condition1: Employee drives car from ag1 to ag2 every night if moved > 0: moved -= 1 state_reward = np.reshape(pos_rew2, [params.get('total_cars') + 1, 1 ]) + pos_rew1 - np.abs(moved) * 2 # New condition2: More than 10 cars at a specific locations costs 4$ per night, irrespective of the total number of cars state_penalty = [0] * 11 + [4] * (params.get('total_cars') - 10) state_penalty = np.reshape( state_penalty, [params.get('total_cars') + 1, 1]) + state_penalty # Issue state value v = np.sum( np.multiply( stateProba, state_reward - state_penalty + state_value * params.get('gamma'))) return v
def ut_cumsum_thresh(vec, thresh): # Make sure it is sorted so_vec = np.sort(vec)[::-1] # Make cumulative sum of the vector cs_vec = np.cumsum(vec) / np.sum(vec) # Find closest value to threhsold idV = ut_closest.main([thresh], cs_vec)[0] return idV
def evaluate_policy(optimal_policy, state_value, params): # intialize change # Add vehicles moved according to pi carflow = list(range(-params.get('total_cars')-5, 0, 1)) + list(range(params.get('total_cars') + 6)) # Probability of rents and returns - agency1 proba1_rent = ut_poisson_proba.main(params.get('expected_rent')[0], list(range(params.get('total_cars') + 1 + 5))) proba1_rtrn = ut_poisson_proba.main(params.get('expected_return')[0], list(range(params.get('total_cars') + 1 + 5))) jointProba1 = np.reshape(proba1_rent, [params.get('total_cars') + 1 + 5, 1]) * proba1_rtrn jp1sum = ut_sum_diagonals.main(jointProba1) nex_state1 = ag1 + moved + carflow idX = ut_closest.main([0, params.get('total_cars')], nex_state1) proba_ag1 = jp1sum[0, [range(idX[0], idX[1] + 1)]] # Probability of rents and returns - agency1 proba2_rent = ut_poisson_proba.main(params.get('expected_rent')[1], range(params.get('total_cars') + 1 + 5)) proba2_rtrn = ut_poisson_proba.main(params.get('expected_return')[1], range(params.get('total_cars') + 1 + 5)) jointProba2 = np.reshape(proba2_rent, [params.get('total_cars') + 1 + 5, 1]) * proba2_rtrn jp2sum = ut_sum_diagonals.main(jointProba2) nex_state2 = ag2 - moved + carflow idX = ut_closest.main([0, params.get('total_cars')], nex_state2) proba_ag2 = jp2sum[0, [range(idX[0], idX[1] + 1)]] # State probabilities stateProba = np.reshape(proba_ag2, [params.get('total_cars') + 1, 1]) * proba_ag1 # State rewards poss_rent1 = ag1 + moved - list(range(params.get('total_cars') + 1)) poss_rent1 = np.maximum(poss_rent1, 0) reward1 = poss_rent1 * 10 poss_rent2 = ag2 - moved - list(range(params.get('total_cars') + 1)) poss_rent2 = np.maximum(poss_rent2, 0) reward2 = poss_rent2 * 10 state_reward= np.reshape(reward2, [params.get('total_cars') + 1, 1]) + reward1 - np.abs(moved) * 2 # Issue state value v = np.sum(np.multiply(stateProba, state_reward + state_value * params.get('gamma'))) return v
def update_value(moved, state_value, ag_cars, params): # Convert state to nb of cars in each agency ag2, ag1 = ag_cars # Probability of rents and returns - agency1 nex_state1 = ag1 + moved + params.get('carflow') idX = ut_closest.main([0, params.get('total_cars')], nex_state1) proba_ag1 = params.get('jp1sum')[0, [range(idX[0], idX[1] + 1)]] # Probability of rents and returns - agency1 nex_state2 = ag2 - moved + params.get('carflow') idX = ut_closest.main([0, params.get('total_cars')], nex_state2) proba_ag2 = params.get('jp2sum')[0, [range(idX[0], idX[1] + 1)]] # State probabilities stateProba = np.reshape(proba_ag2, [params.get('total_cars') + 1, 1]) * proba_ag1 # State rewards pos_rew1 = np.zeros([1, params.get('total_cars') + 1]) pos_rew2 = np.zeros([1, params.get('total_cars') + 1]) for ii in range(params.get('total_cars') + 1): # Adjust the sign matrix according to state new_sign = params.get('mat_sign') + ii new_sign = np.minimum(np.maximum(new_sign, -20), 20) pos_rew1[0, ii] = np.sum( np.multiply(new_sign * 10, params.get('jointProba1'))) pos_rew2[0, ii] = np.sum( np.multiply(new_sign * 10, params.get('jointProba2'))) state_reward = np.reshape(pos_rew2, [params.get('total_cars') + 1, 1 ]) + pos_rew1 - np.abs(moved) * 2 # Issue state value v = np.sum( np.multiply(stateProba, state_reward + state_value * params.get('gamma'))) return v