Beispiel #1
 def test_for_log(self):
     test for log time
     calc_log = int(total_time_component.find_total_time(TIMES_LOG)[0])
     self.assertGreater(EXPT_TIME_LOG_RANGE[1], calc_log)
     self.assertLess(EXPT_TIME_LOG_RANGE[0], calc_log)
Beispiel #2
 def test_for_nlogn(self):
     test for nlogn time
     calc_nlogn = int(total_time_component.find_total_time(TIMES_NLOGN)[0])
     self.assertGreater(EXPT_TIME_NLOGN_RANGE[1], calc_nlogn)
     self.assertLess(EXPT_TIME_NLOGN_RANGE[0], calc_nlogn)
Beispiel #3
 def test_for_squared(self):
     test for squared time
     calc_sqrd = int(total_time_component.find_total_time(TIMES_SQRD)[0])
     self.assertAlmostEqual(EXPT_TIME_SQRD, calc_sqrd)
 def test_for_complete_df_empty(self):
     test to see if added times dataframe is empty
     benchmark_df = rc.get_benchmark_data()
     times, percents = [2, 4, 6], [1, 5, 10]
     est_time_user = tt.find_total_time(times, percents)
     user_benchmark = br.run_benchmark()
     est_time_aws = benchmark_df[['runtime']] \
         / user_benchmark * est_time_user[0]
     benchmark_df["estimated_time_aws"] = est_time_aws
     self.assertGreater(benchmark_df.shape[0], 0)
Beispiel #5
def add_estimated_time_aws(dataframe, python_call, module_name):
    This function estimates the time required to run the users algorithim on
    each instance and adds it to the dataframe
    :param python_call: str python string calling the algorithm to be timed
    :param module_name: str name of module from which function is called
    :param dataframe: the benchmark dataframe output from get_benchmark_data()
    :return: dataframe with added estimated times
    times, percents = ar.run_algo(python_call, module_name)
    est_time_user = tt.find_total_time(times, percents)
    user_benchmark = br.run_benchmark()
    est_time_aws = dataframef[['runtime']]/user_benchmark * est_time_user[0]
    dataframe["estimated_time_aws"] = est_time_aws
    return dataframe
Beispiel #6
 def test_add_estimated_price(self):
     This function tests adding the spot and on-demand pricing
     to the dataframe
     benchmark_df = rc.get_benchmark_data()
     times, percents = [2, 4, 6], [1, 5, 10]
     est_time_user = tt.find_total_time(times, percents)
     user_benchmark = br.run_benchmark()
     est_time_aws = benchmark_df[['runtime']] \
         / user_benchmark * est_time_user[0]
     benchmark_df["estimated_time_aws"] = est_time_aws
     instance_types = benchmark_df["instance_type"].tolist()
     price = ap.get_instance_pricing(instance_types)
     complete_df = pd.merge(benchmark_df, price, on="instance_type")
     complete_df["est_cost_spot_price"] = \
         complete_df["estimated_time_aws"] \
         * complete_df["spot_price"] / 3600
     complete_df["est_cost_on_demand_price"] = \
         complete_df["estimated_time_aws"] \
         * complete_df["on_demand_price"] / 3600
     self.assertGreater(complete_df.shape[0], 0)
Beispiel #7
def demo(num_pts=3, num_iter=3):
    A demonstration of the time-prediction components of our program.
    :param python_call: str python string calling the algorithm to be timed
    :param module_name: name of module from which function is called
    :param num_pts: number of points to consider
    :param num_iter: number of iterations over points
    :return: tot_time the predicted time to run on 100% of data in seconds.
    # Download MNIST data:
    print('Loading mnist dataset...')
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    print('Finished loading mnist dataset.')

    # flatten 28*28 images to a 784 vector for each image
    num_pixels = X_train.shape[1] * X_train.shape[2]
    X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
    X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')

    data = pd.concat([pd.DataFrame(X_train), pd.DataFrame(X_test)], axis=0)

    # y_train = np_utils.to_categorical(y_train)
    # y_test = np_utils.to_categorical(y_test)

    target = pd.concat([pd.DataFrame(y_train), pd.DataFrame(y_test)], axis=0)

    this_dir = os.path.dirname(os.path.abspath(__file__))

    if not os.path.exists(this_dir + '/data'):
        os.mkdir(this_dir + '/data')

    # Data and target to csv
    print('Saving data to disk.')
    print('Finished saving data to disk.')

    # Create python call
    PYTHON_CALL = "run_mnist(data_loc = 'data/mnist_data.csv', target_loc = " \
    MODULE_NAME = 'awsforyou.tests.test_keras_mnist'

    print('Running algo_runner, this may take a few moments.')
    times, percents = algo_runner.run_algo(PYTHON_CALL, MODULE_NAME, num_pts,
    print('Finished algo_runner.')
    model = total_time_component.find_total_time(times, percents)
    tot_time = model[0]

    print('Removing MNIST data files.')

    all_times = [times[0]]
    all_percents = [percents[0]]

    model_type = model[1]
    x_plot = np.arange(0, 100, 0.1)
    a_factor = model[2]
    y_int = model[3]

    if model_type == "linear":
        y_plot = a_factor * x_plot + y_int
    elif model_type == "nlogn":
        y_plot = a_factor * np.multiply(x_plot, np.log(x_plot)) + y_int
    elif model_type == "sqrd":
        y_plot = a_factor * np.multiply(x_plot, x_plot) + y_int
    elif model_type == "log":
        y_plot = a_factor * np.log(x_plot) + y_int
        raise ValueError("model type must be linear, nlogn, sqrd, or log")

    print('Plotting results.')
    plt.plot(percents, times, 'ro')
    plt.plot(x_plot, y_plot, 'b', linewidth=2)
    plt.plot(x_plot[-1], y_plot[-1], 'go', markersize=10)
    plt.xlim(0, 110)
    plt.ylim(0, max(tot_time + 0.1 * tot_time, 100))
    plt.gca().set_aspect('equal', adjustable='box')
    print("The estimated total time is: " + str(tot_time[0]) + " seconds.")
    return None
Beispiel #8
 def test_for_linear(self):
     test for linear time
     calc_linear = int(total_time_component.find_total_time(TIMES_LIN)[0])
     self.assertAlmostEqual(calc_linear, EXPT_TIME_LIN)