Ejemplo n.º 1
0
class TestEngine(unittest.TestCase):
    def setUp(self):
        self.data = test_utils.load_test_dataset()
        self.engine = Engine()

    def test_trainer(self):

        with self.subTest("Sanity - everything just works"):
            best_algo, best_params, best_score, tasks = self.engine.train(
                data=self.data, cpu_time_limit=60, max_evals=20)
            self.assertTrue(best_algo)
            self.assertTrue(best_score)
            self.assertTrue(tasks)

            self.assertCountEqual(list(tasks.keys()), FULL_ALGO_LIST)
Ejemplo n.º 2
0
                     sep=';',
                     error_bad_lines=False,
                     encoding="latin-1")
    df.columns = ['user', 'item', 'rating']

    reader = Reader(rating_scale=(0, 10))
    data = Dataset.load_from_df(df.sample(n=100000, random_state=134),
                                reader=reader)
    del (df)

    benchmark_results = {'Algorithm': [], 'RMSE': [], 'MAE': [], 'Time': []}

    # Evaluate AutoSurprise
    start_time = time.time()
    time_limt = 60 * 60 * 12  # Run for 12 hours
    engine = Engine(verbose=False)
    best_model, best_params, best_score, tasks = engine.train(
        data=data,
        target_metric='test_rmse',
        quick_compute=False,
        cpu_time_limit=time_limt,
        max_evals=10000,
        hpo_algo=hyperopt.atpe.suggest)

    cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time)))
    cv_results = cross_validate(engine.build_model(best_model, best_params),
                                data, ['rmse', 'mae'])
    mean_rmse = '{:.4f}'.format(np.mean(cv_results['test_rmse']))
    mean_mae = '{:.4f}'.format(np.mean(cv_results['test_mae']))

    print("--------- Done ----------")
Ejemplo n.º 3
0
                datetime.timedelta(seconds=int(time.time() - start_time)))
            mean_rmse = '{:.3f}'.format(np.mean(cv_results['test_rmse']))
            mean_mae = '{:.3f}'.format(np.mean(cv_results['test_mae']))

            benchmark_results['Algorithm'].append(algo_name)
            benchmark_results['RMSE'].append(mean_rmse)
            benchmark_results['MAE'].append(mean_mae)
            benchmark_results['Best params'].append({})
            benchmark_results['Time'].append(cv_time)

        except Exception as exc:
            print('Exception : ', exc)

    # Evaluate AutoSurprise
    start_time = time.time()
    engine = Engine(verbose=False)
    best_model, best_params, best_score, tasks = engine.train(
        data=data,
        target_metric='test_rmse',
        quick_compute=False,
        cpu_time_limit=3600,
        max_evals=500)
    cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time)))

    print("--------- Done ----------")
    print("Best model: ", best_model)
    print("Best params: ", best_params)
    print("Best score: ", best_score)
    print("All tasks: ", tasks)

    benchmark_results['Algorithm'].append('AutoSurprise')
Ejemplo n.º 4
0
import time
import datetime
import os
import sys
from surprise import Dataset
from auto_surprise.engine import Engine

if __name__ == "__main__":

    data = Dataset.load_builtin("ml-100k")

    # Run auto surprise
    start_time = time.time()
    engine = Engine(verbose=True)
    # This is just a demo configuration. You'd ideally want to change the time limit
    best_algo, best_params, best_score, tasks = engine.train(
        data=data,
        target_metric="test_rmse",
        cpu_time_limit=720,
        max_evals=100)
    cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time)))

    print("--------- Done ----------")
    print("Time taken: ", cv_time)
    print("Best algorithm: ", best_algo)
    print("Best params: ", best_params)
    print("Best score: ", best_score)
    print("All tasks: ", tasks)
Ejemplo n.º 5
0
random.seed(1)
numpy.random.seed(1)

if __name__ == "__main__":
    # Load Movielens 100k dataset Dataset
    file_path = os.path.expanduser("../datasets/ml-100k/u.data")
    reader = Reader(line_format="user item rating timestamp",
                    sep="\t",
                    rating_scale=(1, 5))

    data = Dataset.load_from_file(file_path, reader=reader)

    # Run auto surprise
    start_time = time.time()
    engine = Engine(verbose=True,
                    random_state=numpy.random.RandomState(1),
                    algorithms=["baseline_only", "knn_basic"])
    best_model, best_params, best_score, tasks = engine.train(
        data=data,
        target_metric="test_rmse",
        cpu_time_limit=180,
        max_evals=100,
        hpo_algo=hyperopt.tpe.suggest,
    )
    cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time)))

    print("--------- Done ----------")
    print("Time taken: ", cv_time)
    print("Best model: ", best_model)
    print("Best params: ", best_params)
    print("Best score: ", best_score)
Ejemplo n.º 6
0
 def setUp(self):
     self.data = test_utils.load_test_dataset()
     self.engine = Engine()
Ejemplo n.º 7
0
sys.path.insert(1, './')

from auto_surprise.engine import Engine

if __name__ == '__main__':
    # Load Movielens 100k dataset Dataset
    file_path = os.path.expanduser('../datasets/ml-100k/u.data')
    reader = Reader(line_format='user item rating timestamp',
                    sep='\t',
                    rating_scale=(1, 5))

    data = Dataset.load_from_file(file_path, reader=reader)

    # Run auto surprise
    start_time = time.time()
    engine = Engine(debug=True)
    best_model, best_params, best_score, tasks = engine.train(
        data=data,
        target_metric='test_rmse',
        cpu_time_limit=180,
        max_evals=100,
        hpo_algo=hyperopt.atpe.suggest)
    cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time)))

    print("--------- Done ----------")
    print("Time taken: ", cv_time)
    print("Best model: ", best_model)
    print("Best params: ", best_params)
    print("Best score: ", best_score)
    print("All tasks: ", tasks)
Ejemplo n.º 8
0
import time
import datetime
import os
import sys
from surprise import Dataset
from auto_surprise.engine import Engine

if __name__ == '__main__':

    data = Dataset.load_builtin('ml-100k')

    # Run auto surprise
    start_time = time.time()
    engine = Engine(debug=False)
    best_model, best_params, best_score, tasks = engine.train(
        data=data,
        target_metric='test_rmse',
        cpu_time_limit=720,
        max_evals=100)
    cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time)))

    print("--------- Done ----------")
    print("Time taken: ", cv_time)
    print("Best model: ", best_model)
    print("Best params: ", best_params)
    print("Best score: ", best_score)
    print("All tasks: ", tasks)