Python rewardの例

プログラミング言語: Python

名前空間/パッケージ名: params

メソッド/関数: reward

hotexamples.comのコード掲載数: 8

Python reward - 8件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのparams.rewardの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: sim.py プロジェクト: smowton/scan

 def credit_reward(self, state):
     self.reward = params.reward(state.now - self.start_time, self.nrecords)
     if state.debug:
         print "Job", str(self), "done; credit", self.reward
     state.total_reward += self.reward
     self.actual_finish_time = state.now
     state.completed_jobs.append(self)

コード例 #2

ファイルを表示

def max_action(state, V, gamma=0.9, debug=False):
    """ Compute the best (action, value) pair from a state 
    
    Returns a tuple: (action, value)
    """
    a_max = (0, 0, 0)
    v_max = 0

    # Loop through all possible actions to determine max value
    for a in actions(state):
        # Choose action a and reach afterstate sa
        sa = afterstate(state, a)

        # Milk cows for reward
        r = reward(state, a)

        # Calculate the value of afterstate
        vn = value(sa, r, V, gamma)

        if vn > v_max:
            v_max = vn
            a_max = a

    return (a_max, v_max)

コード例 #3

ファイルを表示

ファイル: value_methods.py プロジェクト: okkhoy/gabe-and-joh

def max_action(state, V, gamma = 0.9, debug = False):
    """ Compute the best (action, value) pair from a state 
    
    Returns a tuple: (action, value)
    """
    a_max = (0, 0, 0)
    v_max = 0
    
    # Loop through all possible actions to determine max value
    for a in actions(state):
        # Choose action a and reach afterstate sa
        sa = afterstate(state, a)
        
        # Milk cows for reward
        r = reward(state, a)
        
        # Calculate the value of afterstate
        vn = value(sa, r, V, gamma)
        
        if vn > v_max:
            v_max = vn
            a_max = a
    
    return (a_max, v_max)

コード例 #4

ファイルを表示

def policy_iteration(gamma=0.9, theta=0.01, sweeps=None, value_list=None):
    """ Policy iteration
    
    gamma -- discount factor
    theta -- stop when delta < theta
    sweeps -- stop after N sweeps
    value_list -- passing a list here will populate it with the value functions
                  generated after each policy evaluation step
    
    Returns a tuple (pi*, V*) where
        pi*[s] = action
        V*[s] = value
    """

    # Initialize value function to 0
    V = dict((s, 0) for s in states())

    # Initialize policy to (0, 0, 0)
    pi = dict((s, (0, 0, 0)) for s in states())

    # Assume a stable policy
    policy_stable = False

    while not policy_stable:
        #
        # Policy Evaluation
        #
        print "Policy Evaluation..."
        sweep = 0
        while True:
            sweep += 1
            delta = 0

            # Report progress!
            print '\tSweep', sweep, '...',
            sys.stdout.flush()

            # Loop through every possible state
            for s in states():
                # Store old value of state
                v = V[s]

                # Act according to policy
                sa = afterstate(s, pi[s])
                V[s] = value(sa, reward(s, pi[s]), V, gamma)

                # Update delta
                delta = max(delta, abs(v - V[s]))

            print 'delta =', delta

            #raw_input('Hit enter to continue')

            if theta and delta < theta:
                break

            if sweeps and sweep == sweeps:
                break

        if isinstance(value_list, list):
            value_list.append(copy.deepcopy(V))

        #
        # Policy Improvement
        #
        print "Policy Improvement..."

        policy_stable = True

        # Go through every state
        for s in states():
            b = pi[s]

            an, vn = max_action(s, V, gamma)
            pi[s] = an

            #print "pi[%s] = %s" % (s, pi[s])

            if b != pi[s]:
                policy_stable = False

    # Return the value function and policy
    return V, pi

コード例 #5

ファイルを表示

ファイル: value_methods.py プロジェクト: okkhoy/gabe-and-joh

def policy_iteration(gamma = 0.9, theta = 0.01, sweeps = None, value_list = None):
    """ Policy iteration
    
    gamma -- discount factor
    theta -- stop when delta < theta
    sweeps -- stop after N sweeps
    value_list -- passing a list here will populate it with the value functions
                  generated after each policy evaluation step
    
    Returns a tuple (pi*, V*) where
        pi*[s] = action
        V*[s] = value
    """
    
    # Initialize value function to 0
    V = dict((s, 0) for s in states())
    
    # Initialize policy to (0, 0, 0)
    pi = dict((s, (0, 0, 0)) for s in states())
    
    # Assume a stable policy
    policy_stable = False
    
    while not policy_stable:
        # 
        # Policy Evaluation
        #
        print "Policy Evaluation..."
        sweep = 0
        while True:
            sweep += 1
            delta = 0
            
            # Report progress!
            print '\tSweep', sweep, '...',
            sys.stdout.flush()
            
            # Loop through every possible state
            for s in states():
                # Store old value of state
                v = V[s]
                
                # Act according to policy
                sa = afterstate(s, pi[s])
                V[s] = value(sa, reward(s, pi[s]), V, gamma)
                
                # Update delta
                delta = max(delta, abs(v - V[s]))
            
            print 'delta =', delta
            
            #raw_input('Hit enter to continue')
            
            if theta and delta < theta:
                break
            
            if sweeps and sweep == sweeps:
                break
        
        
        if isinstance(value_list, list):
            value_list.append(copy.deepcopy(V))
        
        #
        # Policy Improvement
        #
        print "Policy Improvement..."
        
        policy_stable = True
        
        # Go through every state
        for s in states():
            b = pi[s]
            
            an, vn = max_action(s, V, gamma)
            pi[s] = an
            
            #print "pi[%s] = %s" % (s, pi[s])
            
            if b != pi[s]:
                policy_stable = False
        
        
    # Return the value function and policy
    return V, pi

コード例 #6

ファイルを表示

ファイル: analyse_params.py プロジェクト: smowton/scan

print "Assumed job size:", job_size

def plan_times(plan):
    return [params.processing_time(job_size, cores, 1, phase, False) for (phase, cores) in enumerate(plan)]

def plan_cost(plan, times, costpercore):
    return costpercore * sum([x * y for (x, y) in zip(plan, times)])

for (i, tier) in enumerate(params.core_cost_tiers):

    print "Tier", i+1

    baseline_times = plan_times([1] * 7)
    baseline_cost = plan_cost([1] * 7, baseline_times, tier["cost"])
    print "Expected cost of single-threaded run:", baseline_cost
    baseline_profit = params.reward(sum(baseline_times), job_size) - baseline_cost
    print "Base profit:", baseline_profit

    for stage in range(7):

        best_profit = baseline_profit
        best_cores = 1

        for cores in params.dynamic_core_choices[1:]:

            plan = [1] * 7
            plan[stage] = cores
            times = plan_times(plan)
            cost = plan_cost(plan, times, tier["cost"])
            profit = params.reward(sum(times), job_size) - cost

コード例 #7

ファイルを表示

ファイル: sim.py プロジェクト: smowton/scan

 def predict_config_reward_profit(config):
     stage_times = config_stage_times(config)
     core_time_units = stage_core_time_units(config)
     total_cost = params.core_cost_tiers[0]["cost"] * sum(core_time_units)
     total_reward = params.reward(sum(stage_times) * self.vscale_params["queuefactor"], self.arrival_process.mean_records)
     return float(total_reward) - total_cost

コード例 #8

ファイルを表示

ファイル: sim.py プロジェクト: smowton/scan

 def estimate_reward(self, running_time):
     return params.reward(running_time, self.nrecords)