Example #1
0
            for i in xrange(X.shape[0]):
                dist = list()
                for irow in range(self._parameter['neighbor_X'].shape[0]):
                    dist.append(np.linalg.norm(X[i, :] - self._parameter['neighbor_X'][irow, :]))
                indices = np.argsort(dist)[:K]
                pred.append(np.mean(self._parameter['neighbor_y'][indices]))
                self._logger.info('progress: %.2f %%' % (float(i) / X.shape[0] * 100))
        else:
            raise ValueError
        return pred


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + '/../dataset/electricity-normalized.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode='kd_tree')
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode='brutal')
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0])
    scheduler.print_task_schedule('brutal')
    scheduler.print_task_schedule('kd_tree')
    print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree)
Example #2
0
                    dist.append(
                        np.linalg.norm(X[i, :] -
                                       self._parameter['neighbor_X'][irow, :]))
                indices = np.argsort(dist)[:K]
                pred.append(np.mean(self._parameter['neighbor_y'][indices]))
                self._logger.info('progress: %.2f %%' %
                                  (float(i) / X.shape[0] * 100))
        else:
            raise ValueError
        return pred


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + '/../dataset/electricity-normalized.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode='kd_tree')
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode='brutal')
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0])
    scheduler.print_task_schedule('brutal')
    scheduler.print_task_schedule('kd_tree')
    print accuracy_score(testset[1], predict_brutal), accuracy_score(
Example #3
0
                for irow in range(self._parameter["neighbor_X"].shape[0]):
                    dist.append(np.linalg.norm(X[i, :] - self._parameter["neighbor_X"][irow, :]))
                indices = np.argsort(dist)[:K]
                pred.append(np.mean(self._parameter["neighbor_y"][indices]))
                logger.info("progress: %.2f %%" % (float(i) / X.shape[0] * 100))
        else:
            raise ValueError
        return pred


logger = get_logger("KNN")

if __name__ == "__main__":
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + "/../dataset/electricity-normalized.arff"
    loader = DataLoader(path)
    dataset = loader.load(target_col_name="class")
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode="kd_tree")
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac("kd_tree", knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode="brutal")
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac("brutal", knn.predict, X=testset[0])
    scheduler.print_task_schedule("brutal")
    scheduler.print_task_schedule("kd_tree")
    print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree)
Example #4
0
                    break
        return np.array(result)

    def _fast_sample_by_proba(self, sample_weight):
        nArray = 1e5
        cdf_map = list()
        start_ix = 0
        end_ix = 0
        for i in xrange(self._N):
            if i == self._N:
                cdf_map.extend([i] * (self._N - start_ix + 1))
                break
            end_ix += int(math.floor(sample_weight[i] * nArray))
            cdf_map.extend([i] * (end_ix - start_ix + 1))
            start_ix = end_ix
        cdf_map = np.array(cdf_map)
        indices = np.random.randint(0, nArray, self._K)
        return cdf_map[indices]


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()
    fast_sampler = Sampler(1e5, 1e3, mode='fast')
    normal_sampler = Sampler(1e5, 1e3, mode='normal')
    scheduler.tic_tac('sample_fast', fast_sampler.sample)
    scheduler.tic_tac('sample_normal', normal_sampler.sample)
    scheduler.print_task_schedule('sample_fast')
    scheduler.print_task_schedule('sample_normal')
Example #5
0
                    break
        return np.array(result)

    def _fast_sample_by_proba(self, sample_weight):
        nArray = 1e5
        cdf_map = list()
        start_ix = 0
        end_ix = 0
        for i in xrange(self._N):
            if i == self._N:
                cdf_map.extend([i] * (self._N - start_ix + 1))
                break
            end_ix += int(math.floor(sample_weight[i] * nArray))
            cdf_map.extend([i] * (end_ix - start_ix + 1))
            start_ix = end_ix
        cdf_map = np.array(cdf_map)
        indices = np.random.randint(0, nArray, self._K)
        return cdf_map[indices]


if __name__ == '__main__':
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()
    fast_sampler = Sampler(1e5, 1e3, mode='fast')
    normal_sampler = Sampler(1e5, 1e3, mode='normal')
    scheduler.tic_tac('sample_fast', fast_sampler.sample)
    scheduler.tic_tac('sample_normal', normal_sampler.sample)
    scheduler.print_task_schedule('sample_fast')
    scheduler.print_task_schedule('sample_normal')