from base.time_scheduler import TimeScheduler scheduler = TimeScheduler() # KNN for classification task path = os.getcwd() + '/../dataset/electricity-normalized.arff' loader = DataLoader(path) dataset = loader.load(target_col_name='class') trainset, testset = dataset.cross_split() knn = KNNClassifier(search_mode='kd_tree') knn.fit(trainset[0], trainset[1]) predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0]) knn = KNNClassifier(search_mode='brutal') knn.fit(trainset[0], trainset[1]) predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0]) scheduler.print_task_schedule('brutal') scheduler.print_task_schedule('kd_tree') print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree) # KNN for regression task # path = os.getcwd() + '/../dataset/winequality-white.csv' # loader = DataLoader(path) # dataset = loader.load(target_col_name='quality') # trainset, testset = dataset.cross_split() # knn = KNNRegressor(search_mode='brutal') # knn.fit(trainset[0], trainset[1]) # predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0]) # knn = KNNRegressor(search_mode='kd_tree') # knn.fit(trainset[0], trainset[1]) # predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0]) # scheduler.print_task_schedule('brutal')
from base.time_scheduler import TimeScheduler scheduler = TimeScheduler() # KNN for classification task path = os.getcwd() + '/../dataset/electricity-normalized.arff' loader = DataLoader(path) dataset = loader.load(target_col_name='class') trainset, testset = dataset.cross_split() knn = KNNClassifier(search_mode='kd_tree') knn.fit(trainset[0], trainset[1]) predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0]) knn = KNNClassifier(search_mode='brutal') knn.fit(trainset[0], trainset[1]) predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0]) scheduler.print_task_schedule('brutal') scheduler.print_task_schedule('kd_tree') print accuracy_score(testset[1], predict_brutal), accuracy_score( testset[1], predict_kd_tree) # KNN for regression task # path = os.getcwd() + '/../dataset/winequality-white.csv' # loader = DataLoader(path) # dataset = loader.load(target_col_name='quality') # trainset, testset = dataset.cross_split() # knn = KNNRegressor(search_mode='brutal') # knn.fit(trainset[0], trainset[1]) # predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0]) # knn = KNNRegressor(search_mode='kd_tree') # knn.fit(trainset[0], trainset[1]) # predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0])
from base.time_scheduler import TimeScheduler scheduler = TimeScheduler() # KNN for classification task path = os.getcwd() + "/../dataset/electricity-normalized.arff" loader = DataLoader(path) dataset = loader.load(target_col_name="class") trainset, testset = dataset.cross_split() knn = KNNClassifier(search_mode="kd_tree") knn.fit(trainset[0], trainset[1]) predict_kd_tree = scheduler.tic_tac("kd_tree", knn.predict, X=testset[0]) knn = KNNClassifier(search_mode="brutal") knn.fit(trainset[0], trainset[1]) predict_brutal = scheduler.tic_tac("brutal", knn.predict, X=testset[0]) scheduler.print_task_schedule("brutal") scheduler.print_task_schedule("kd_tree") print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree) # KNN for regression task # path = os.getcwd() + '/../dataset/winequality-white.csv' # loader = DataLoader(path) # dataset = loader.load(target_col_name='quality') # trainset, testset = dataset.cross_split() # knn = KNNRegressor(search_mode='brutal') # knn.fit(trainset[0], trainset[1]) # predict_brutal = scheduler.tic_tac('brutal', knn.predict, X=testset[0]) # knn = KNNRegressor(search_mode='kd_tree') # knn.fit(trainset[0], trainset[1]) # predict_kd_tree = scheduler.tic_tac('kd_tree', knn.predict, X=testset[0]) # scheduler.print_task_schedule('brutal')
break return np.array(result) def _fast_sample_by_proba(self, sample_weight): nArray = 1e5 cdf_map = list() start_ix = 0 end_ix = 0 for i in xrange(self._N): if i == self._N: cdf_map.extend([i] * (self._N - start_ix + 1)) break end_ix += int(math.floor(sample_weight[i] * nArray)) cdf_map.extend([i] * (end_ix - start_ix + 1)) start_ix = end_ix cdf_map = np.array(cdf_map) indices = np.random.randint(0, nArray, self._K) return cdf_map[indices] if __name__ == '__main__': from base.time_scheduler import TimeScheduler scheduler = TimeScheduler() fast_sampler = Sampler(1e5, 1e3, mode='fast') normal_sampler = Sampler(1e5, 1e3, mode='normal') scheduler.tic_tac('sample_fast', fast_sampler.sample) scheduler.tic_tac('sample_normal', normal_sampler.sample) scheduler.print_task_schedule('sample_fast') scheduler.print_task_schedule('sample_normal')