def KL_method(X, y, k_for_meaning = 5, epsilon = 0.01, begin = None, end = None, num = None, progress_bar = None, multiprocess=False): if progress_bar is None: progress_bar = list else: progress_bar=lambda x: tqdm(x, leave = False) if end is None: end = X.shape[0] - 1 if begin is None: begin = 2*X.shape[1] if num is None: num = 5 subset_sizes = np.arange(begin, end, num, dtype=np.int64) m_size = end X, y = shuffle(X, y) list_of_E, list_of_S = ut.calculater(X, y, subset_sizes, k_for_meaning, ut.klFunction, multiprocess, progress_bar) for m, mean in zip(reversed(subset_sizes), reversed(list_of_E)): if mean < epsilon: m_size = m return {'m*': m_size, 'E': np.array(list_of_E), 'S': np.array(list_of_S), 'm': np.array(subset_sizes), }
def ALC(X, y, k_for_meaning = 100, l = 0.5, alpha = 0.05, begin = None, end = None, num = None, progress_bar = None, multiprocess = False): if progress_bar is None: progress_bar = list else: progress_bar=lambda x: tqdm(x, leave = False) if end is None: end = X.shape[0] - 1 if begin is None: begin = 2*X.shape[1] if num is None: num = 5 subset_sizes = np.arange(begin, end, num, dtype=np.int64) m_size = end X, y = shuffle(X, y) list_of_E, list_of_S = ut.calculater(X, y, subset_sizes, k_for_meaning, ut.aDistribution, multiprocess, progress_bar, alpha) for m, mean, std in zip(reversed(subset_sizes), reversed(list_of_E), reversed(list_of_S)): if mean < l: m_size = m return {'m*': m_size, 'E': np.array(list_of_E), 'S': np.array(list_of_S), 'm': np.array(subset_sizes), }
def MAX_U(X, y, k_for_meaning = 100, c = 0.005, begin = None, end = None, num = None, progress_bar = None, multiprocess=False): if progress_bar is None: progress_bar = list else: progress_bar=lambda x: tqdm(x, leave = False) if end is None: end = X.shape[0] - 1 if begin is None: begin = 2*X.shape[1] if num is None: num = max(5, int(X.shape[0]/20)) subset_sizes = np.arange(begin, end, num, dtype=np.int64) m_size = end X, y = shuffle(X, y) list_of_E, list_of_S = ut.calculater(X, y, subset_sizes, k_for_meaning, ut.uFunction, multiprocess, progress_bar, c) return {'m*': subset_sizes[np.argmax(np.array(list_of_E))], 'E': np.array(list_of_E), 'S': np.array(list_of_S), 'm': np.array(subset_sizes), }