Esempio n. 1
0
    def test_run_algo(self):
        """
        tests the algo_runner functino in algo_runner.py
        :return: None
        """

        print('Loading mnist dataset...')
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        print('Finished loading mnist dataset.')

        # flatten 28*28 images to a 784 vector for each image
        num_pixels = X_train.shape[1] * X_train.shape[2]
        X_train = X_train.reshape(X_train.shape[0],
                                  num_pixels).astype('float32')
        X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')

        data = pd.concat([pd.DataFrame(X_train), pd.DataFrame(X_test)], axis=0)

        # y_train = np_utils.to_categorical(y_train)
        # y_test = np_utils.to_categorical(y_test)

        target = pd.concat([pd.DataFrame(y_train),
                            pd.DataFrame(y_test)],
                           axis=0)

        this_dir = os.path.dirname(os.path.abspath(__file__))

        if not os.path.exists(this_dir + '/data'):
            os.mkdir(this_dir + '/data')

        data_path = os.path.join(this_dir, "data/mnist_data.csv")
        target_path = os.path.join(this_dir, "data/mnist_target.csv")

        # Data and target to csv
        print('Saving data to disk.')
        data.to_csv(data_path)
        target.to_csv(target_path)
        print('Finished saving data to disk.')

        run_string = "run_mnist(data_loc='" + data_path + "', target_loc='" \
                     + target_path + "')"

        times, percents = algo_runner.run_algo(
            run_string, 'awsforyou.tests.'
            'test_keras_mnist')

        self.assertTrue(isinstance(times, list))
        for item in times:
            self.assertTrue(isinstance(item, float))

        self.assertTrue(isinstance(percents, list))
        for item in percents:
            self.assertTrue(isinstance(item, float))

        return None
Esempio n. 2
0
def add_estimated_time_aws(dataframe, python_call, module_name):
    """
    This function estimates the time required to run the users algorithim on
    each instance and adds it to the dataframe
    :param python_call: str python string calling the algorithm to be timed
    :param module_name: str name of module from which function is called
    :param dataframe: the benchmark dataframe output from get_benchmark_data()
    :return: dataframe with added estimated times
    """
    times, percents = ar.run_algo(python_call, module_name)
    est_time_user = tt.find_total_time(times, percents)
    user_benchmark = br.run_benchmark()
    est_time_aws = dataframef[['runtime']]/user_benchmark * est_time_user[0]
    dataframe["estimated_time_aws"] = est_time_aws
    return dataframe
Esempio n. 3
0
def demo(num_pts=3, num_iter=3):
    """
    A demonstration of the time-prediction components of our program.
    :param python_call: str python string calling the algorithm to be timed
    :param module_name: name of module from which function is called
    :param num_pts: number of points to consider
    :param num_iter: number of iterations over points
    :return: tot_time the predicted time to run on 100% of data in seconds.
    """
    # Download MNIST data:
    print('Loading mnist dataset...')
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    print('Finished loading mnist dataset.')

    # flatten 28*28 images to a 784 vector for each image
    num_pixels = X_train.shape[1] * X_train.shape[2]
    X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
    X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')

    data = pd.concat([pd.DataFrame(X_train), pd.DataFrame(X_test)], axis=0)

    # y_train = np_utils.to_categorical(y_train)
    # y_test = np_utils.to_categorical(y_test)

    target = pd.concat([pd.DataFrame(y_train), pd.DataFrame(y_test)], axis=0)

    this_dir = os.path.dirname(os.path.abspath(__file__))

    if not os.path.exists(this_dir + '/data'):
        os.mkdir(this_dir + '/data')

    # Data and target to csv
    print('Saving data to disk.')
    data.to_csv('data/mnist_data.csv')
    target.to_csv('data/mnist_target.csv')
    print('Finished saving data to disk.')

    # Create python call
    PYTHON_CALL = "run_mnist(data_loc = 'data/mnist_data.csv', target_loc = " \
                  "'data/mnist_target.csv')"
    MODULE_NAME = 'awsforyou.tests.test_keras_mnist'

    print('Running algo_runner, this may take a few moments.')
    times, percents = algo_runner.run_algo(PYTHON_CALL, MODULE_NAME, num_pts,
                                           num_iter)
    print('Finished algo_runner.')
    model = total_time_component.find_total_time(times, percents)
    tot_time = model[0]

    print('Removing MNIST data files.')
    os.remove('data/mnist_data.csv')
    os.remove('data/mnist_target.csv')

    all_times = [times[0]]
    all_percents = [percents[0]]
    all_times.append(tot_time)
    all_percents.append(100)

    model_type = model[1]
    x_plot = np.arange(0, 100, 0.1)
    a_factor = model[2]
    y_int = model[3]

    if model_type == "linear":
        y_plot = a_factor * x_plot + y_int
    elif model_type == "nlogn":
        y_plot = a_factor * np.multiply(x_plot, np.log(x_plot)) + y_int
    elif model_type == "sqrd":
        y_plot = a_factor * np.multiply(x_plot, x_plot) + y_int
    elif model_type == "log":
        y_plot = a_factor * np.log(x_plot) + y_int
    else:
        raise ValueError("model type must be linear, nlogn, sqrd, or log")

    print('Plotting results.')
    plt.plot(percents, times, 'ro')
    plt.plot(x_plot, y_plot, 'b', linewidth=2)
    plt.plot(x_plot[-1], y_plot[-1], 'go', markersize=10)
    plt.xlim(0, 110)
    plt.ylim(0, max(tot_time + 0.1 * tot_time, 100))
    plt.gca().set_aspect('equal', adjustable='box')
    plt.show()
    print("The estimated total time is: " + str(tot_time[0]) + " seconds.")
    return None