def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    train_x_df = common.get_test_data_df(X=common.X_dfc, size=num_observations)
    train_y = common.get_test_data_yc(size=num_observations)
    num_rows = len(train_x_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        reg = LogisticRegression().fit(train_x_df, train_y)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
예제 #2
0
def run_inference(num_observations:int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_dfc,size = num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):
        
        start_time = timer()
        
        cluster = DBSCAN(eps=0.3, min_samples=10)
        cluster.fit(test_df)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time*10e3)

        inference_time = total_time*(10e6)/num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
예제 #3
0
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data(num_observations)
    #test_df = common.get_test_data(num_observations)
    #data = pd.DataFrame(test_df, dtype=np.float32)
    #predictor = d4p.gbt_regression_prediction(**PARAMS)
    num_rows = len(test_df)

    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        daal_predict_algo = d4p.gbt_regression_prediction(fptype='float')
        daal_prediction = daal_predict_algo.compute(test_df, daal_model)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    print(num_observations, ", ", common.calculate_stats(inference_times))
예제 #4
0
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        predict_algo = d4p.logistic_regression_prediction(
            nClasses=2,
            resultsToEvaluate=
            "computeClassLabels|computeClassProbabilities|computeClassLogProbabilities"
        )
        predict_result = predict_algo.compute(test_df, train_result.model)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)
    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
예제 #5
0
def run_inference(num_observations:int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_dfc,size = num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):
        
        start_time = timer()
        init_alg = d4p.kmeans_init(nClusters = 5, fptype = "float",
                                   method = "randomDense")
        centroids = init_alg.compute(test_df).centroids
        alg = d4p.kmeans(nClusters = 5, maxIterations = 100,
                         fptype = "float", accuracyThreshold = 0,
                         assignFlag = False)
        result = alg.compute((test_df), centroids)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time*10e3)

        inference_time = total_time*(10e6)/num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    train_x_df = common.get_test_data_df(X=common.X_df, size=num_observations)
    train_y_df = common.get_test_data_df(X=common.y_df, size=num_observations)
    num_rows = len(train_x_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        MODEL = d4p.decision_forest_regression_training(nTrees=100)
        train_result = MODEL.compute(train_x_df, train_y_df)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_dfc, size=num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()

        cluster = KMeans(n_clusters=5, **kmeans_kwargs)
        cluster.fit(test_df)

        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
예제 #8
0
def run_inference(num_observations:int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data(num_observations)

    num_rows = len(test_df)
    # print(f"Running {NUM_LOOPS} inference loops with batch size {num_rows}...")

    run_times3 = []
    inference_times3 = []
    for _ in range(NUM_LOOPS):
        start_time = timer()
        data = xgb.DMatrix(test_df)
        MODEL.predict(data)
        end_time = timer()
		
        total_time3 = end_time - start_time
        run_times3.append(total_time3*10e3)

        inference_time3 = total_time3*(10e6)/num_rows
        inference_times3.append(inference_time3)
		
    print(num_observations, ", ", common.calculate_stats(inference_times3))
예제 #9
0
# main module
if __name__ == '__main__':
    # Run
    try:
        logging.info(f"Loading model {args.model}")
        model = importlib.import_module('models.' + args.model)
    except ModuleNotFoundError:
        logging.error(f"Model {args.model} not found.")
        quit()

    if args.train:
        logging.info(f"Running training benchmark for {args.model}...")
        logging.info(common.get_header())
        logging.info(common.get_underline())
        batch_size = 10
        while batch_size <= args.observations:
            total_times, observation_times = model.run_training(batch_size)
            stats = common.calculate_stats(observation_times)
            logging.info(common.format_stats(batch_size, stats))
            batch_size *= 10
    # else:
    #     logging.info(f"Running testing benchmark for {args.model}...")
    #     logging.info(common.STATS)
    #     batch_size = 1
    #     while batch_size <= args.observations:
    #         model.run_inference(batch_size)
    #         batch_size *= 10
else:
    logging.error(f"Could not find benchmark for {model}")