Python reorder_rewards_by_quartile 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: reorder_samples_in_rewards

메소드/함수: reorder_rewards_by_quartile

hotexamples.com에서의 예제들: 3

Python reorder_rewards_by_quartile - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 reorder_samples_in_rewards.reorder_rewards_by_quartile에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: run_switch_to_best_simulations.py 프로젝트: SIGKDDanon/SIGKDD2021DeAnonV2

def run_simulations(num_sims,
                    mean_list,
                    variance,
                    step_sizes,
                    outfile_directory,
                    softmax_beta=None,
                    reordering_fn=None,
                    prior_mean=0,
                    forceActions=0):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
        for num_steps in step_sizes:
            if forceActions != 0:
                print("Forcing actions:", forceActions)
                forced = make_forced_actions(len(mean_list), num_steps,
                                             forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory,
                                                   num_steps, i)
            # Check if they've passed in one variance for everything or multiple variances
            if not hasattr(variance, '__len__'):
                # only one variance - turn into a list
                variances = [variance] * len(mean_list)
            else:
                # multiple variances - pass straight through
                variances = variance

            generate_single_bandit.generate_normal_distribution_file(
                mean_list, variances, num_steps, cur_reward_file)
            if softmax_beta != None:
                # reorder rewards
                reordered_reward_file = get_reordered_rewards_filename(
                    outfile_directory, num_steps, i)
                reorder_samples_in_rewards.reorder_rewards_by_quartile(
                    cur_reward_file, reordered_reward_file, reordering_fn,
                    softmax_beta)
            else:
                reordered_reward_file = cur_reward_file
            cur_output_file = get_output_filename(outfile_directory, num_steps,
                                                  i)
            models = [
                ng_normal.NGNormal(mu=prior_mean, k=1, alpha=1, beta=1)
                for _ in range(len(mean_list))
            ]
            thompson_ng_policy.calculate_thompson_single_bandit(
                reordered_reward_file,
                num_actions=len(mean_list),
                dest=cur_output_file,
                models=models,
                action_mode=thompson_ng_policy.ActionSelectionMode.
                prob_is_best,
                relearn=True,
                forced=forced)

예제 #2

파일 보기

파일: run_effect_size_simulations_beta.py 프로젝트: SIGKDDanon/SIGKDD2021DeAnonV2

def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \
    reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
      #  num_steps_prev = 0
        for num_steps in step_sizes:
            if forceActions != 0:
#                 print("Forcing actions:", forceActions)
                forced = run_effect_size_simulations.make_forced_actions(len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory, num_steps, i)
            generate_single_bandit.generate_file(np.array(prob_per_arm),
                                                 num_steps,        
                                                 cur_reward_file)
            if softmax_beta != None:
                # reorder rewards
                reordered_reward_file = get_reordered_rewards_filename(outfile_directory, num_steps, i)
                reorder_samples_in_rewards.reorder_rewards_by_quartile(cur_reward_file, 
                                                                       reordered_reward_file, 
                                                                       reordering_fn, 
                                                                       softmax_beta)
            else:
                reordered_reward_file = cur_reward_file
            cur_output_file = get_output_filename(outfile_directory, num_steps, i)
            models = [beta_bernoulli.BetaBern(success=successPrior, failure=failurePrior) for _ in range(len(prob_per_arm))]


            '''thompson_policy.calculate_thompson_single_bandit(reordered_reward_file, 
                                         num_actions=len(prob_per_arm), 
                                         dest= cur_output_file, 
                                         models=models, 
                                         action_mode=thompson_policy.ActionSelectionMode.prob_is_best, 
                                         relearn=True,
                                         forced = forced,
                                         batch_size = batch_size, 
                                         burn_in_size = burn_in_size)
            '''
            # num_steps_prev = num_steps
            thompson_policy.old_two_phase_random_thompson_policy(reordered_reward_file, 
                                         num_actions=len(prob_per_arm), 
                                         dest= cur_output_file, 
                                         random_dur=0,
                                         models=models,
                                         random_start=0,
                                         action_mode=thompson_policy.ActionSelectionMode.prob_is_best, 
                                         relearn=True,
                                         forced = forced,
                                         batch_size = batch_size, 
                                         burn_in_size = burn_in_size)

예제 #3

파일 보기

def run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, successPrior = 1, failurePrior = 1, softmax_beta = None, \
    reordering_fn = None, forceActions = 0, batch_size = 1, burn_in_size = 1, c = 0.1, resample = True):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
        #  num_steps_prev = 0
        for num_steps in step_sizes:
            if forceActions != 0:
                #                 print("Forcing actions:", forceActions)
                forced = run_effect_size_simulations.make_forced_actions(
                    len(prob_per_arm), num_steps, forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory,
                                                   num_steps, i)
            generate_single_bandit.generate_file(np.array(prob_per_arm),
                                                 num_steps, cur_reward_file)
            if softmax_beta != None:
                # reorder rewards
                reordered_reward_file = get_reordered_rewards_filename(
                    outfile_directory, num_steps, i)
                reorder_samples_in_rewards.reorder_rewards_by_quartile(
                    cur_reward_file, reordered_reward_file, reordering_fn,
                    softmax_beta)
            else:
                reordered_reward_file = cur_reward_file
            cur_output_file = get_output_filename(outfile_directory, num_steps,
                                                  i)
            models = [
                beta_bernoulli.BetaBern(success=successPrior,
                                        failure=failurePrior)
                for _ in range(len(prob_per_arm))
            ]

            #if don't pass model, then will be Greedy
            #thresh = 0.03
            #        thresh = 0.1 # for small effect, es = 0.1, 0.55 - 0.45 = 0.10
            ppd.calculate_epsilon_single_bandit(reordered_reward_file,
                                                models=models,
                                                num_actions=len(prob_per_arm),
                                                dest=cur_output_file,
                                                forced=forced,
                                                c=c,
                                                resample=resample)