コード例 #1
0
ファイル: knn.py プロジェクト: wyslatitude/FunnyPyML
        return pred


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + '/../dataset/electricity-normalized.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode='kd_tree')
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode='brutal')
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0])
    scheduler.print_task_schedule('brutal')
    scheduler.print_task_schedule('kd_tree')
    print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree)

    # KNN for regression task
    # path = os.getcwd() + '/../dataset/winequality-white.csv'
    # loader = DataLoader(path)
    # dataset = loader.load(target_col_name='quality')
    # trainset, testset = dataset.cross_split()
    # knn = KNNRegressor(search_mode='brutal')
    # knn.fit(trainset[0], trainset[1])
    # predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0])
コード例 #2
0
        return pred


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + '/../dataset/electricity-normalized.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode='kd_tree')
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode='brutal')
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0])
    scheduler.print_task_schedule('brutal')
    scheduler.print_task_schedule('kd_tree')
    print accuracy_score(testset[1], predict_brutal), accuracy_score(
        testset[1], predict_kd_tree)

    # KNN for regression task
    # path = os.getcwd() + '/../dataset/winequality-white.csv'
    # loader = DataLoader(path)
    # dataset = loader.load(target_col_name='quality')
    # trainset, testset = dataset.cross_split()
    # knn = KNNRegressor(search_mode='brutal')
    # knn.fit(trainset[0], trainset[1])
コード例 #3
0
ファイル: knn.py プロジェクト: Guangweidao/FunnyPyML
logger = get_logger("KNN")

if __name__ == "__main__":
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + "/../dataset/electricity-normalized.arff"
    loader = DataLoader(path)
    dataset = loader.load(target_col_name="class")
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode="kd_tree")
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac("kd_tree", knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode="brutal")
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac("brutal", knn.predict, X=testset[0])
    scheduler.print_task_schedule("brutal")
    scheduler.print_task_schedule("kd_tree")
    print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree)

    # KNN for regression task
    # path = os.getcwd() + '/../dataset/winequality-white.csv'
    # loader = DataLoader(path)
    # dataset = loader.load(target_col_name='quality')
    # trainset, testset = dataset.cross_split()
    # knn = KNNRegressor(search_mode='brutal')
    # knn.fit(trainset[0], trainset[1])
    # predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0])
コード例 #4
0
ファイル: sampler.py プロジェクト: Guangweidao/FunnyPyML
                    break
        return np.array(result)

    def _fast_sample_by_proba(self, sample_weight):
        nArray = 1e5
        cdf_map = list()
        start_ix = 0
        end_ix = 0
        for i in xrange(self._N):
            if i == self._N:
                cdf_map.extend([i] * (self._N - start_ix + 1))
                break
            end_ix += int(math.floor(sample_weight[i] * nArray))
            cdf_map.extend([i] * (end_ix - start_ix + 1))
            start_ix = end_ix
        cdf_map = np.array(cdf_map)
        indices = np.random.randint(0, nArray, self._K)
        return cdf_map[indices]


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()
    fast_sampler = Sampler(1e5, 1e3, mode='fast')
    normal_sampler = Sampler(1e5, 1e3, mode='normal')
    scheduler.tic_tac('sample_fast', fast_sampler.sample)
    scheduler.tic_tac('sample_normal', normal_sampler.sample)
    scheduler.print_task_schedule('sample_fast')
    scheduler.print_task_schedule('sample_normal')
コード例 #5
0
                    break
        return np.array(result)

    def _fast_sample_by_proba(self, sample_weight):
        nArray = 1e5
        cdf_map = list()
        start_ix = 0
        end_ix = 0
        for i in xrange(self._N):
            if i == self._N:
                cdf_map.extend([i] * (self._N - start_ix + 1))
                break
            end_ix += int(math.floor(sample_weight[i] * nArray))
            cdf_map.extend([i] * (end_ix - start_ix + 1))
            start_ix = end_ix
        cdf_map = np.array(cdf_map)
        indices = np.random.randint(0, nArray, self._K)
        return cdf_map[indices]


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()
    fast_sampler = Sampler(1e5, 1e3, mode='fast')
    normal_sampler = Sampler(1e5, 1e3, mode='normal')
    scheduler.tic_tac('sample_fast', fast_sampler.sample)
    scheduler.tic_tac('sample_normal', normal_sampler.sample)
    scheduler.print_task_schedule('sample_fast')
    scheduler.print_task_schedule('sample_normal')