def moe_experiment_from_sample_arms(sample_arms):
    """Make a MOE experiment with all historical data (i.e., parameter value, CTR, noise variance triples).

    :param sample_arms: all arms from prev and current cohorts, keyed by coordinate-tuples
      Arm refers specifically to a :class:`moe.bandit.data_containers.SampleArm`
    :type sample_arms: dict
    :return: MOE Experiment object usable with GP endpoints like ``gp_mean_var`` and ``gp_next_points``
    :rtype: :class:`moe.easy_interface.experiment.Experiment`

    """
    experiment = Experiment(EXPERIMENT_DOMAIN)
    for sample_arm_point, sample_arm in sample_arms.items():
        arm_value, arm_variance = objective_function(
                sample_arm,
                sample_arms[tuple(STATUS_QUO_PARAMETER)],
                )
        # MOE *minimizes* and we want to *maximize* CTR so
        # we multiply the objetive (``arm_value``) by -1.0
        experiment.historical_data.append_sample_points([
            [
                sample_arm_point,
                -arm_value,
                arm_variance,
            ]])

    return experiment
Beispiel #2
0
def find_accurate_policy(trials, alpha_0=10, beta_0=10, num_exs=250, students=None,
                         test_path=TEST_PATH, make_plot=False):
    """
    Find the best alpha/beta for the teaching policy.
    Args:
        trials: Number of teaching plans to try out (including first plan).
        alpha_0: Starting alpha.
        beta_0: Starting beta.
        num_exs: The number of examples given to students per teaching plan.
        students: The students to teach, will default to STUDENTS if non given.
        test_path: The path of the file with test qs/answers.
        make_plot: Whether to make a scatter plot of the history.
    Returns: The best alpha/beta found.
    """
    if students is None:
        students = STUDENTS
    test_qs, test_ans = plan_eval.read_test(test_path)
    history = []
    eval_policy = _create_evaluator(num_exs, students, test_qs, test_ans, history)

    experiment = Experiment([[0, ALPHA_MAX], [0, BETA_MAX]])
    # Run the start experiment and evaluate.
    experiment.historical_data.append_sample_points([eval_policy(alpha_0, beta_0)])
    for i in xrange(trials-1):
        print '--------TRIAL %d DONE--------' % (i + 1)
        alpha, beta = gp_next_points(experiment)[0]
        experiment.historical_data.append_sample_points([eval_policy(alpha, beta)])
    best = max(history)
    if make_plot:
        plot_history(max(history), history)
    return best
Beispiel #3
0
def run_example(num_points_to_sample=20, verbose=True, **kwargs):
    """Run the example, aksing MOE for ``num_points_to_sample`` optimal points to sample."""
    exp = Experiment([[0, 2],
                      [0,
                       4]])  # 2D experiment, we build a tensor product domain
    # Bootstrap with some known or already sampled point(s)
    exp.historical_data.append_sample_points([
        SamplePoint(
            [0, 0], function_to_minimize([0, 0]), 0.05
        ),  # Iterables of the form [point, f_val, f_var] are also allowed
    ])

    # Sample num_points_to_sample points
    for _ in range(num_points_to_sample):
        # Use MOE to determine what is the point with highest Expected Improvement to use next
        next_point_to_sample = gp_next_points(
            exp, **kwargs)[0]  # By default we only ask for one point
        # Sample the point from our objective function, we can replace this with any function
        value_of_next_point = function_to_minimize(next_point_to_sample)

        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(
                str(next_point_to_sample), value_of_next_point)

        # Add the information about the point to the experiment historical data to inform the GP
        exp.historical_data.append_sample_points(
            [SamplePoint(next_point_to_sample, value_of_next_point,
                         0.01)])  # We can add some noise
Beispiel #4
0
def find_fastest_policy(trials, alpha_0=10, beta_0=10, ex_cutoff=250, perf_thresh=0.93,
                        students=None, test_path=TEST_PATH, make_plot=True):
    """
    Find the best alpha/beta for the teaching policy.
    Args:
        trials: Number of teaching plans to try out (including first plan).
        alpha_0: Starting alpha.
        beta_0: Starting beta.
        ex_cutoff: Max number of examples to show.
        perf_thresh: The threshold of what is considered perfect.
        students: The students to teach, will default to STUDENTS if non given.
        test_path: The path of the file with test qs/answers.
        make_plot: Whether to make a scatter plot of the history.
    Returns: The best alpha/beta found.
    """
    if students is None:
        students = STUDENTS
    test_qs, test_ans = plan_eval.read_test(test_path)
    history = []
    eval_policy = _create_perf_evaluator(ex_cutoff, perf_thresh, students, test_qs, test_ans, history)

    experiment = Experiment([[0, ALPHA_MAX], [0, BETA_MAX]])
    # Run the start experiment and evaluate.
    experiment.historical_data.append_sample_points([eval_policy(alpha_0, beta_0)])
    for i in xrange(trials-1):
        print '--------TRIAL %d DONE--------' % (i + 1)
        alpha, beta = gp_next_points(experiment)[0]
        experiment.historical_data.append_sample_points([eval_policy(alpha, beta)])
    best = min(history)
    print len(history)
    print len(history)

    if make_plot:
        plot_history(min(history), history)
    return best
def do_rfc_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs):
    exp_rfc = Experiment([[0.005, 1], [0.04, 1], [0.1, 1], [0.1, 1]])  # n_estimators_range = [5, 1000] and  max_features_range = [2, 24] are normalized 
                                                                       # max_depth_range = [1, 10] & min_samples_leaf_range = [1, 10] are normalized
    best_point = []
    best_point_value = 0.    
    for _ in range(num_points_to_sample):
        # Use MOE to determine what is the point with highest Expected Improvement to use next
        next_point_to_sample = gp_next_points(exp_rfc, rest_host='localhost', rest_port=6543, **kwargs)[0]  # By default we only ask for one point
        # Sample the point from objective function
        n_estimators = int(round(next_point_to_sample[0] * 1000.0)) 
        max_features =  int(round(next_point_to_sample[1] * 50))  
        max_depth = int(round(next_point_to_sample[2] * 10))    
        min_samples_leaf = int(round(next_point_to_sample[3] * 10))  
        rfc = RandomForestClassifier(n_estimators=n_estimators, criterion='gini', 
            max_depth=max_depth, min_samples_split=2, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=0.0,            
            max_features=max_features, max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=-1, 
            random_state=None, verbose=0, warm_start=False, class_weight=None)
        score_cv = cross_validation.cross_val_score(rfc, X_train, y_train, cv=10, scoring='accuracy')
        value_of next_point =  np.mean(score_cv) 
        if value_of_next_point > best_point_value:
            best_point_value = value_of_next_point
            best_point = next_point_to_sample          
        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point)
        # Add the information about the point to the experiment historical data to inform the GP
        exp_rfc.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)])  # We can add some noise
 def __init__(self, grid):
     if sys.version_info[0] == 2:
         from httplib import BadStatusLine
         from moe.easy_interface.experiment import Experiment
         from moe.easy_interface.simple_endpoint import gp_next_points
         from moe.optimal_learning.python.data_containers import SamplePoint
     else:
         raise RuntimeError("MOESearch requires Python2!")
     self.experiment = Experiment(grid)
    def __init__(self, benchmark, settings_list):
        # Run the experiment once to get the initial value

        self.benchmark = benchmark
        self.settings = settings_list
        self.results = []

        self.iteration_count = 0

        bounds = []

        for setting in self.settings:
            bounds.append([setting.get_minimum(), setting.get_maximum()])

        self.moe = Experiment(bounds)
def run_example(num_points_to_sample=200, verbose=False, **kwargs):
	b = Branin()
	bounds = b.get_meta_information()['bounds']
	dimensions = len(bounds)
	lower =np.array([i[0] for i in bounds])
	upper =np.array([i[1] for i in bounds])
	start_point = (upper-lower)/2
	exp = Experiment([lower,upper])
	exp.historical_data.append_sample_points([
        SamplePoint(start_point, wrapper(start_point,b), 0.6)])
	for _ in range(num_points_to_sample):
		next_point_to_sample = gp_next_points(exp, **kwargs)[0]
		value_of_next_point = wrapper(next_point_to_sample,b)
		if verbose:
			print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point)
		exp.historical_data.append_sample_points([SamplePoint(next_point_to_sample, value_of_next_point, 0.6)])
def do_xgb_train_MOE(num_points_to_sample,
                     X_train,
                     y_train,
                     verbose=True,
                     **kwargs):
    # Finding Best XGB parameters using MOE
    xgb_parameters = {}
    xgb_parameters['objective'] = 'multi:softmax'
    xgb_parameters['silent'] = 1
    xgb_parameters['nthread'] = 4
    xgb_parameters['num_class'] = 6
    # Range of XGBoost parameters that are optimized
    exp_xgb = Experiment([
        [0.1, 1], [0.02, 1]
    ])  # eta_range = [0.1, 1]; max_depth_range = [2, 100] but it is normalized

    num_round = 5
    n_folds = 10
    cv_folds = cross_validation.StratifiedKFold(y_train, n_folds=n_folds)

    best_point = []
    best_point_value = 0.
    for _ in range(num_points_to_sample):
        # Use MOE to determine what is the point with highest Expected Improvement to use next
        next_point_to_sample = gp_next_points(
            exp_xgb, rest_host='localhost', rest_port=6543,
            **kwargs)[0]  # By default we only ask for one point

        # Sample the point from objective function
        xgb_parameters['eta'] = next_point_to_sample[0]
        xgb_parameters['max_depth'] = int(round(next_point_to_sample[1] * 100))
        acc_cv, prec_cv, rec_cv, cm_cv, cm_full_cv = xgboost_train_cross_validation(
            X_train, y_train, xgb_parameters, num_round, cv_folds)
        value_of_next_point = acc_cv
        if value_of_next_point > best_point_value:
            best_point_value = value_of_next_point
            best_point = next_point_to_sample
        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(
                str(next_point_to_sample), value_of_next_point)
        # Add the information about the point to the experiment historical data to inform the GP
        exp_xgb.historical_data.append_sample_points(
            [SamplePoint(next_point_to_sample, -value_of_next_point,
                         0.0001)])  # We can add some noise
    best_point[1] = int(round(best_point[1] * 100))
    return best_point, best_point_value
def do_svc_linear_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs):
    exp_svc_linear = Experiment([[1.0000e-05, 1.0]])  # C_range = [0.1, 10000] is divided to be in [0.1, 1] range
    best_point = []
    best_point_value = 0.
    for _ in range(num_points_to_sample):
        # Use MOE to determine what is the point with hnighest Expected Improvement to use next
        next_point_to_sample = gp_next_points(exp_svc_linear, rest_host='localhost', rest_port=6543, **kwargs)[0]  # By default we only ask for one point
        # Sample the point from objective function
        C = next_point_to_sample[0] * 10000.0
        svc_linear = svm.LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=C, multi_class='ovr',
            fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000)
        score_cv = cross_validation.cross_val_score(svc_linear, X_train, y_train, cv=10, scoring='accuracy')
        value_of_next_point = np.mean(score_cv) 
        if value_of_next_point > best_point_value:
            best_point_value = value_of_next_point
            best_point = next_point_to_sample
        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point)
        # Add the information about the point to the experiment historical data to inform the GP; 
        # - infront of value_of_next_point is due to fact that moe minimize and max accuracy is of interest in HAR classification
        exp_svc_linear.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, .000001)])  # We can add some noise
def do_svc_rbf_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs):
    exp_svc_rbf = Experiment([[1.0000e-05, 1], [1.0000e-08, 1]])  # C_range = [0.1, 10000] is divided to be in [0.1, 1] range
    best_point = []
    best_point_value = 0.
    for _ in range(num_points_to_sample):
        # Use MOE to determine what is the point with highest Expected Improvement to use next
        next_point_to_sample = gp_next_points(exp_svc_rbf, rest_host='localhost', rest_port=6543, **kwargs)[0]  # By default we only ask for one point
        # Sample the point from objective function
        C = next_point_to_sample[0] * 10000.0   
        gamma =   next_point_to_sample[1]  
        svc_rbf = svm.SVC(C=C, kernel='rbf', degree=3, gamma=gamma, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200,
                 class_weight=None, verbose=False, max_iter=-1, random_state=None)
        score_cv = cross_validation.cross_val_score(svc_rbf, X_train, y_train, cv=10, scoring='accuracy')
        value_of_next_point = np.mean(score_cv)
        if value_of_next_point > best_point_value:
            best_point_value = value_of_next_point
            best_point = next_point_to_sample
        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point)
        # Add the information about the point to the experiment historical data to inform the GP
        exp_svc_rbf.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)])  # We can add some noise
def do_abc_MOE(num_points_to_sample, X_train, y_train, verbose=True, **kwargs):
    exp_abc = Experiment([[0.005, 1], [0.1, 1]])  # n_estimators_range = [5, 1000] is normalized 
    best_point = []
    best_point_value = 0.    
    for _ in range(num_points_to_sample):
        # Use MOE to determine what is the point with highest Expected Improvement to use next
        next_point_to_sample = gp_next_points(exp_abc, rest_host='localhost', rest_port=6543, **kwargs)[0]  # By default we only ask for one point
        # Sample the point from objective function
        n_estimators = int(round(next_point_to_sample[0] * 1000.0))  
        learning_rate =  next_point_to_sample[1]   
        abc = AdaBoostClassifier((DecisionTreeClassifier(max_depth=2)),n_estimators=n_estimators, learning_rate=learning_rate)
        score_cv = cross_validation.cross_val_score(abc, X_train, y_train, cv=10, scoring='accuracy')
        value_of next_point =  np.mean(score_cv) 
        if value_of_next_point > best_point_value:
            best_point_value = value_of_next_point
            best_point = next_point_to_sample          
        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point)
        # Add the information about the point to the experiment historical data to inform the GP
        exp_abc.historical_data.append_sample_points([SamplePoint(next_point_to_sample, -value_of_next_point, 0.0001)])  # We can add some noise
    best_point[0] = int(round(best_point[0] * 1000))        
    return best_point, best_point_value
Beispiel #13
0
    score = float(result.search(res).group(1))
    print score

    # We want to maximize the score
    return score


# Variance estimator arround a point
#values = []
#x = [1.5]
#while True:
#	print values, numpy.mean(values), numpy.var(values)
#	values.append(function_to_minimize(x))

# Experiment tensor product domain
exp = Experiment([[0.000001, 3.0]])

# Bootstrap with some known or already sampled point(s)
samplepoints = []
coords = []
scores = []
with open('TreeNode.csv', 'rb') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        row = [float(x) for x in row]
        x = row[:-1]
        score = row[-1]
        coords.append(x[0])
        scores.append(score)
        print x, score
        samplepoints.append(SamplePoint(x, offset + score * scale, variance))
Beispiel #14
0
 def __init__(self, grid):
     self.experiment = Experiment(grid)
Beispiel #15
0
def run_example(
        num_to_sample=20,
        verbose=True,
        testapp=None,
        gp_next_points_kwargs=None,
        gp_hyper_opt_kwargs=None,
        gp_mean_var_kwargs=None,
        **kwargs
):
    """Run the combined example.

    :param num_to_sample: Number of points for MOE to suggest and then sample [20]
    :type num_to_sample: int > 0
    :param verbose: Whether to print information to the screen [True]
    :type verbose: bool
    :param testapp: Whether to use a supplied test pyramid application or a rest server [None]
    :type testapp: Pyramid test application
    :param gp_next_points_kwargs: Optional kwargs to pass to gp_next_points endpoint
    :type gp_next_points_kwargs: dict
    :param gp_hyper_opt_kwargs: Optional kwargs to pass to gp_hyper_opt_kwargs endpoint
    :type gp_hyper_opt_kwargs: dict
    :param gp_mean_var_kwargs: Optional kwargs to pass to gp_mean_var_kwargs endpoint
    :type gp_mean_var_kwargs: dict
    :param kwargs: Optional kwargs to pass to all endpoints
    :type kwargs: dict

    """
    # Set and combine all optional kwargs
    # Note that the more specific kwargs take precedence (and will override general kwargs)
    if gp_next_points_kwargs is None:
        gp_next_points_kwargs = {}
    gp_next_points_kwargs = dict(kwargs.items() + gp_next_points_kwargs.items())

    if gp_hyper_opt_kwargs is None:
        gp_hyper_opt_kwargs = {}
    gp_hyper_opt_kwargs = dict(kwargs.items() + gp_hyper_opt_kwargs.items())

    if gp_mean_var_kwargs is None:
        gp_mean_var_kwargs = {}
    gp_mean_var_kwargs = dict(kwargs.items() + gp_mean_var_kwargs.items())

    exp = Experiment([[0, 2], [0, 4]])
    # Bootstrap with some known or already sampled point(s)
    exp.historical_data.append_sample_points([
        [[0, 0], function_to_minimize([0, 0]), 0.01],  # sampled points have the form [point_as_a_list, objective_function_value, value_variance]
        ])

    # Sample points
    for i in range(num_to_sample):
        covariance_info = {}
        if i > 0 and i % 5 == 0:
            covariance_info = gp_hyper_opt(exp.historical_data.to_list_of_sample_points(), testapp=testapp, **gp_hyper_opt_kwargs)

            if verbose:
                print "Updated covariance_info with {0:s}".format(str(covariance_info))
        # Use MOE to determine what is the point with highest Expected Improvement to use next
        next_point_to_sample = gp_next_points(
                exp,
                covariance_info=covariance_info,
                testapp=testapp,
                **gp_next_points_kwargs
                )[0]  # By default we only ask for one point
        # Sample the point from our objective function, we can replace this with any function
        value_of_next_point = function_to_minimize(next_point_to_sample)

        if verbose:
            print "Sampled f({0:s}) = {1:.18E}".format(str(next_point_to_sample), value_of_next_point)

        # Add the information about the point to the experiment historical data to inform the GP
        exp.historical_data.append_sample_points([[next_point_to_sample, value_of_next_point, 0.01]])  # We can add some noise

    points_to_evaluate = [[x, x] for x in numpy.arange(0, 1, 0.1)]  # uniform grid of points
    mean, var = gp_mean_var(
            exp.historical_data.to_list_of_sample_points(),  # Historical data to inform Gaussian Process
            points_to_evaluate,  # We will calculate the mean and variance of the GP at these points
            testapp=testapp,
            **gp_mean_var_kwargs
            )

    if verbose:
        print "GP mean at (0, 0), (0.1, 0.1), ...: {0:s}".format(str(mean))
Beispiel #16
0
    print score

    # We want to maximize the score
    return score


# Variance estimator arround a point
#values = []
#x = [0.2585045229, -0.8168569930, 0.9809096927, 0.9944286241, -0.6304206655, 0.0525435460]
#while True:
#	print values, numpy.mean(values), numpy.var(values)
#	y = [n * argscale + argoffset for n in x]
#	values.append(function_to_minimize(y))

# 6D experiment, we build a tensor product domain
exp = Experiment([[-argscale + argoffset, argscale + argoffset]] * 6)

print[[-argscale + argoffset, argscale + argoffset]] * 6

# Bootstrap with some known or already sampled point(s)
samplepoints = []
with open('moveheuristic.csv', 'rb') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        row = [float(x) for x in row]
        y = row[-1]
        print row[:-1], y, offset + y * scale
        x = [n * argscale + argoffset for n in row[:-1]]
        samplepoints.append(SamplePoint(x, offset + y * scale, variance))
exp.historical_data.append_sample_points(samplepoints)
Beispiel #17
0
                          mu=predicted,
                          tau=tau,
                          observed=True,
                          value=observed)
        print(predicted, observed, tau, var.logp)
        variables.append(var)

    model = pymc.MCMC(variables)
    return model.logp


a, b = data[keys].iloc[0].values
logp = objective(a, b)

get_bounds = lambda variable: (variable.parents["lower"], variable.parents[
    "upper"])

experiment_bounds = [get_bounds(q0), get_bounds(sigma0)]
exp = Experiment(experiment_bounds)

for (q0_val, sigma0_val) in data.set_index(keys).index:
    value = objective(q0_val, sigma0_val)
    print(q0_val, sigma0_val, value)
    error = 0.001
    exp.historical_data.append_sample_points([[(q0_val, sigma0_val), value,
                                               error]])

covariance_info = gp_hyper_opt(exp.historical_data.to_list_of_sample_points())
next_point_to_sample = gp_next_points(exp, covariance_info=covariance_info)
print next_point_to_sample