i.e. remove the features that have the same value in all samples.

    Examples
    --------
    >>> X = [[0, 2, 0, 3],
    ...      [0, 1, 4, 3],
    ...      [0, 1, 1, 3]]
    >>> selector = VarianceThreshold(0.0)
    >>> selector.fit_transform(X)
    array([[2, 0],
           [1, 4],
           [1, 1]])
    """
    def __init__(self, threshold=0.0):
        self.threshold = threshold

    def _check_X(self, X):
        if not isinstance(X, np.ndarray):
            X = np.asarray(X)
        return X

    def fit_transform(self, X):
        _X = self._check_X(X)
        return _X[:, (np.std(_X, axis=0) > self.threshold + 1e-8)]


if __name__ == '__main__':
    # run corresponding tests
    from utils.testing import run_tests
    run_tests(__file__)
                            self.best_model_ = self.model
                            if self.save_models:
                                self.best_model_.save(filepath=os.path.join(
                                    self.dirpath, self._best_model_name()),
                                                      **self.save_params)
                        # verbosing
                        if self.verbose:
                            print_inline(
                                " - best acc.: {0:.4f} +/- 2 * {1:.3f} at {2}\n"
                                .format(self.best_score_, self.best_std_,
                                        self.best_params_))

        # convert lists to np.ndarray
        for key in (['mean_score', 'std_score', 'params'] + [
                'split{0}_{1}'.format(k, s) for k in xrange(self.n_splits)
                for s in ('score', 'train_time', 'test_time')
        ]):
            self.cv_results_[key] = np.asarray(self.cv_results_[key])
        return self

    def to_df(self):
        import pandas as pd
        return pd.DataFrame.from_dict(self.cv_results_).fillna('')


if __name__ == '__main__':
    # run corresponding tests
    import tests.test_model_selection as t
    from utils.testing import run_tests
    run_tests(__file__, t)
Ejemplo n.º 3
0
                               MULTI_THREADED_SOLUTIONS, 1, args.max_thds + 1,
                               CombingRunner, build_logger, SOLUTIONS_FOLDER,
                               CXX_COMPILER_PATH)

    test_cases = []
    for name in listdir(args.tests):
        with open(os.path.join(args.tests, name), 'r') as f:
            _, size = f.readline(), int(f.readline())
            test_cases.append((size, os.path.join(args.tests, name)))

    if args.real_data:
        tests_pairs = [(74437, 91721), (124884, 134226)]
    else:
        tests_pairs = [(100000, 100000), (50000, 50000)]

    tests = []

    for i in range(len(test_cases)):
        for j in range(len(test_cases)):
            x, y = test_cases[i], test_cases[j]
            if x[1] != y[1] and (x[0], y[0]) in tests_pairs:
                tests.append((x[0], CombingTest(x[1], y[1])))

    tests.sort(key=lambda x: x[0])

    _, tests = zip(*tests)
    tests = list(tests)

    run_tests(runners, tests, args.repeats, args.resultcsvfile, default_logger,
              True)
SINGLE_THREADED_SOLUTIONS = [
    'braid_multiplication_sequential_non_optimized',
    'braid_multiplication_sequential_memory',
    'braid_multiplication_sequential_precompute'
]
MULTI_THREADED_SOLUTIONS = [
    'braid_multiplication_parallel',
]

SOLUTIONS_FOLDER = 'braid_mults_solutions'  # where we put our ready to run implementations

CSV_FILE = 'braid_mul.csv'

REPEATS = 10
default_logger = Logger('log.txt')
build_logger = Logger('build_log.txt')

if __name__ == '__main__':
    arg_parser = ArgumentParser()
    arg_parser.add_argument('depth', type=int, help='Depth for level paralellism')
    args = arg_parser.parse_args()
    runners = compile_programs(SINGLE_THREADED_SOLUTIONS, MULTI_THREADED_SOLUTIONS, 0, args.depth + 1, BraidMulRunner,
                               build_logger, SOLUTIONS_FOLDER, CXX_COMPILER_PATH)

    n = [1000, 5000, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000,150000,200000, 250000,
         300000,350000,400000,450000,500000,550000,600000,650000,700000,750000,800000,850000,900000,950000,1000000,
         2000000,  5000000, 7500000, 10000000]
    tests = [BraidMulTest(str(x), '42') for x in n]

    run_tests(runners, tests, REPEATS, CSV_FILE, default_logger, False)