def __init__(self, *args, **kwargs): super(QueryByCommittee, self).__init__(*args, **kwargs) self.disagreement = kwargs.pop('disagreement', 'vote') models = kwargs.pop('models', None) if models is None: raise TypeError( "__init__() missing required keyword-only argument: 'models'" ) elif not models: raise ValueError("models list is empty") if self.disagreement == 'kl_divergence': for model in models: if not isinstance(model, ProbabilisticModel): raise TypeError( "Given disagreement set as 'kl_divergence', all models" "should be ProbabilisticModel." ) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.students = list() for model in models: if isinstance(model, str): self.students.append(getattr(libact.models, model)()) else: self.students.append(model) self.n_students = len(self.students) self.teach_students()
def __init__(self, dataset, model=None, loss='log', random_state=None, random_sampling=None): super(EER, self).__init__(dataset) self.model = model if self.model is None: raise TypeError( "__init__() missing required keyword-only argument: 'model'") if not isinstance(self.model, ProbabilisticModel): raise TypeError("model has to be a ProbabilisticModel") self.loss = loss if self.loss not in ['01', 'log']: raise TypeError( "supported methods are ['01', 'log'], the given one " "is: " + self.method) self.random_sampling = random_sampling if not (self.random_sampling is None or isinstance(self.random_sampling, int) or isinstance(self.random_sampling, float)): raise TypeError( "random_sampling parameter has to be either None, int or float " "is: " + str(self.random_sampling_type)) self.random_state_ = seed_random_state(random_state)
def __init__(self, dataset, base_query_strategy, similarity_metric=None, clustering_method=None, beta=1.0, random_state=None): super(DensityWeightedMeta, self).__init__(dataset=dataset) if not isinstance(base_query_strategy, QueryStrategy): raise TypeError( "'base_query_strategy' has to be an instance of 'QueryStrategy'" ) if base_query_strategy.dataset != self.dataset: raise ValueError("base_query_strategy should share the same" "dataset instance with DensityWeightedMeta") self.base_query_strategy = base_query_strategy self.beta = beta self.random_state_ = seed_random_state(random_state) if clustering_method is not None: self.clustering_method = clustering_method else: self.clustering_method = KMeans(n_clusters=5, random_state=self.random_state_) if similarity_metric is not None: self.similarity_metric = similarity_metric else: self.similarity_metric = cosine_similarity
def __init__(self, dataset, **kwargs): self._dataset = dataset self.scores_dict = None self.real_scores_dict = None self.scores_valid = False dataset.on_update(self.update) self.random_state_ = seed_random_state(5) # default random state
def __init__(self, *args, **kwargs): super(HintSVM, self).__init__(*args, **kwargs) # Weight on labeled data's classification error self.cl = kwargs.pop('Cl', 0.1) if self.cl <= 0: raise ValueError('Parameter Cl should be greater than 0.') # Weight on hinted data's classification error self.ch = kwargs.pop('Ch', 0.1) if self.ch <= 0: raise ValueError('Parameter Cl should be greater than 0.') # Prabability of sampling a data from unlabeled pool to hinted pool self.p = kwargs.pop('p', 0.5) if self.p > 1.0 or self.p < 0.0: raise ValueError( 'Parameter p should be greater than or equal to 0 and less ' 'than or equal to 1.') random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) # svm solver parameters self.svm_params = {} self.svm_params['kernel'] = kwargs.pop('kernel', 'linear') self.svm_params['degree'] = kwargs.pop('degree', 3) self.svm_params['gamma'] = kwargs.pop('gamma', 0.1) self.svm_params['coef0'] = kwargs.pop('coef0', 0.) self.svm_params['tol'] = kwargs.pop('tol', 1e-3) self.svm_params['shrinking'] = kwargs.pop('shrinking', 1) self.svm_params['cache_size'] = kwargs.pop('cache_size', 100.) self.svm_params['verbose'] = kwargs.pop('verbose', 0) self.svm_params['C'] = self.cl
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.disagreement = kwargs.pop('disagreement', 'vote') models = kwargs.pop('models', None) if models is None: raise TypeError( "__init__() missing required keyword-only argument: 'models'") elif not models: raise ValueError("models list is empty") if self.disagreement == 'kl_divergence': for model in models: if not isinstance(model, ProbabilisticModel): raise TypeError( "Given disagreement set as 'kl_divergence', all models" "should be ProbabilisticModel.") random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.students = list() for model in models: if isinstance(model, str): self.students.append(getattr(libact.models, model)()) else: self.students.append(model) self.n_students = len(self.students) self.teach_students()
def __init__(self, *args, **kwargs): super(MaximumLossReductionMaximalConfidence, self).__init__(*args, **kwargs) # self.n_labels = len(self.dataset.get_labeled_entries()[0][1]) self.n_labels = len(self.dataset.get_labeled_entries()[1][0]) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.logreg_param = kwargs.pop( 'logreg_param', { 'multi_class': 'multinomial', 'solver': 'newton-cg', 'random_state': random_state }) self.logistic_regression_ = LogisticRegression(**self.logreg_param) self.br_base = kwargs.pop( 'br_base', SklearnProbaAdapter( SVC(kernel='linear', probability=True, gamma="auto", random_state=random_state)))
def __init__(self, dataset, base_clf, random_state=None): super(BinaryMinimization, self).__init__(dataset) self.n_labels = len(self.dataset.data[0][1]) self.base_clf = base_clf self.random_state_ = seed_random_state(random_state)
def __init__(self, dataset, classes, active_selecting=True, subsample_qs=None, random_state=None): super(HierarchicalSampling, self).__init__(dataset) X = np.array(next(zip(*self.dataset.get_entries()))) cluster = AgglomerativeClustering() cluster.fit(X) childrens = cluster.children_ if subsample_qs is not None: if not isinstance(subsample_qs, QueryStrategy): raise TypeError("subsample_qs has to be a QueryStrategy") self.sub_qs = subsample_qs else: self.sub_qs = None self.active_selecting = active_selecting self.random_state_ = seed_random_state(random_state) self.n = len(childrens) + 1 self.m = self.n * 2 - 1 self.num_class = len(classes) self.classes = list(classes) self.class_id = dict(zip(self.classes, range(self.num_class))) self.parent = np.full(self.m, NO_NODE, dtype=int) self.size = np.zeros(self.m, dtype=int) self.depth = np.zeros(self.m, dtype=int) for i, (left_child, right_child) in enumerate(childrens): parent = i + self.n self.parent[left_child] = parent self.parent[right_child] = parent self.left_child = np.concatenate([np.full(self.n, NO_NODE), childrens[:,0]]).astype(int) self.right_child = np.concatenate([np.full(self.n, NO_NODE), childrens[:,1]]).astype(int) for i in range(self.n): node = i cur_depth = 0 while node != NO_NODE: assert node >= 0 and node < self.m self.size[node] += 1 self.depth[node] = max(self.depth[node], cur_depth) cur_depth += 1 node = self.parent[node] self.count = np.zeros((self.m, self.num_class), dtype=int) self.total = np.zeros(self.m, dtype=int) self.upper_bound = np.ones((self.m, self.num_class), dtype=float) self.lower_bound = np.zeros((self.m, self.num_class), dtype=float) self.admissible = np.zeros((self.m, self.num_class), dtype=bool) self.best_label = np.full(self.m, NO_LABEL, dtype=int) self.split = np.zeros(self.m, dtype=bool) self.cost = self.size.copy() self.prunings = [self.m-1] for i, entry in enumerate(self.dataset.data): if entry[1] != None: self.update(i, entry[1])
def __init__(self, *args, **kwargs): super(ActiveLearningByLearning, self).__init__(*args, **kwargs) self.query_strategies_ = kwargs.pop("query_strategies", None) if self.query_strategies_ is None: raise TypeError("__init__() missing required keyword-only argument: " "'query_strategies'") elif not self.query_strategies_: raise ValueError("query_strategies list is empty") # check if query_strategies share the same dataset with albl for qs in self.query_strategies_: if qs.dataset != self.dataset: raise ValueError("query_strategies should share the same" "dataset instance with albl") # parameters for Exp4.p self.delta = kwargs.pop("delta", 0.1) # query budget self.T = kwargs.pop("T", None) if self.T is None: raise TypeError("__init__() missing required keyword-only argument: 'T'") self.unlabeled_entry_ids, _ = zip(*self.dataset.get_unlabeled_entries()) self.unlabeled_invert_id_idx = {} for i, entry in enumerate(self.dataset.get_unlabeled_entries()): self.unlabeled_invert_id_idx[entry[0]] = i self.uniform_sampler = kwargs.pop("uniform_sampler", True) if not isinstance(self.uniform_sampler, bool): raise ValueError("'uniform_sampler' should be {True, False}") self.pmin = kwargs.pop("pmin", None) n_algorithms = len(self.query_strategies_) + self.uniform_sampler if self.pmin and (self.pmin < (1.0 / n_algorithms) or self.pmin < 0): raise ValueError("'pmin' should be 0 < pmin < " "1/len(n_active_algorithm)") self.exp4p_ = Exp4P( query_strategies=self.query_strategies_, T=self.T, delta=self.delta, pmin=self.pmin, unlabeled_invert_id_idx=self.unlabeled_invert_id_idx, uniform_sampler=self.uniform_sampler, ) self.budget_used = 0 # classifier instance self.model = kwargs.pop("model", None) if self.model is None: raise TypeError("__init__() missing required keyword-only argument: 'model'") random_state = kwargs.pop("random_state", None) self.random_state_ = seed_random_state(random_state) self.query_dist = None self.W = [] self.queried_hist_ = []
def __init__(self, *args, **kwargs): super(BatchQuery, self).__init__(*args, **kwargs) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) batch_size = kwargs.pop('batch_size', None) if (batch_size is None): raise TypeError( "__init__() missing required keyword-only argument: 'batch_size'" ) self.batch_size_ = batch_size
def __init__(self, *args, **kwargs): super(DWUS, self).__init__(*args, **kwargs) self.n_clusts = kwargs.pop('n_clusters', 5) self.sigma = kwargs.pop('sigma', 0.1) self.max_iter = kwargs.pop('max_iter', 100) self.tol = kwargs.pop('tol', 1e-4) self.C = kwargs.pop('C', 1.) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) kmeans_param = kwargs.pop('kmeans_param', {}) if 'random_state' not in kmeans_param: kmeans_param['random_state'] = self.random_state_ self.kmeans_ = KMeans(n_clusters=self.n_clusts, **kmeans_param) all_x = np.array([xy[0] for xy in self.dataset.data]) # Cluster the data. self.kmeans_.fit(all_x) d = len(all_x[0]) centers = self.kmeans_.cluster_centers_ P_k = np.ones(self.n_clusts) / float(self.n_clusts) dis = np.zeros((len(all_x), self.n_clusts)) for i in range(self.n_clusts): dis[:, i] = np.exp(-np.einsum('ij,ji->i', (all_x - centers[i]), (all_x - centers[i]).T) / 2 / self.sigma) # EM percedure to estimate the prior for _ in range(self.max_iter): # E-step P(k|x) temp = dis * np.tile(P_k, (len(all_x), 1)) # P_k_x, shape = (len(all_x), n_clusts) P_k_x = temp / np.tile(np.sum(temp, axis=1), (self.n_clusts, 1)).T # M-step P_k = 1. / len(all_x) * np.sum(P_k_x, axis=0) self.P_k_x = P_k_x p_x_k = np.zeros((len(all_x), self.n_clusts)) for i in range(self.n_clusts): p_x_k[:, i] = multivariate_normal.pdf(all_x, mean=centers[i], cov=np.ones(d) * np.sqrt(self.sigma)) self.p_x = np.dot(p_x_k, P_k).reshape(-1)
def __init__(self, dataset, cost_matrix, base_regressor, embed_dim=None, mds_params={}, nn_params={}, random_state=None): super(ActiveLearningWithCostEmbedding, self).__init__(dataset) self.cost_matrix = cost_matrix self.base_regressor = base_regressor self.n_classes = len(cost_matrix) if embed_dim is None: self.embed_dim = self.n_classes else: self.embed_dim = embed_dim self.regressors = [ copy.deepcopy(self.base_regressor) for _ in range(self.embed_dim) ] self.random_state_ = seed_random_state(random_state) self.mds_params = { 'metric': False, 'n_components': self.embed_dim, 'n_uq': self.n_classes, 'max_iter': 300, 'eps': 1e-6, 'dissimilarity': "precomputed", 'n_init': 8, 'n_jobs': 1, 'random_state': self.random_state_ } self.mds_params.update(mds_params) self.nn_params = {} self.nn_params.update(nn_params) self.nn_ = NearestNeighbors(n_neighbors=1, **self.nn_params) dissimilarity = np.zeros((2 * self.n_classes, 2 * self.n_classes)) dissimilarity[:self.n_classes, self.n_classes:] = self.cost_matrix dissimilarity[self.n_classes:, :self.n_classes] = self.cost_matrix.T mds_ = MDSP(**self.mds_params) embedding = mds_.fit(dissimilarity).embedding_ self.class_embed = embedding[:self.n_classes, :] self.nn_.fit(embedding[self.n_classes:, :])
def __init__(self, dataset, base_clf, betas=None, n_jobs=1, random_state=None): super(AdaptiveActiveLearning, self).__init__(dataset) self.n_labels = len(self.dataset.data[0][1]) self.base_clf = copy.deepcopy(base_clf) # TODO check beta value self.betas = betas if self.betas is None: self.betas = [i/10. for i in range(0, 11)] self.n_jobs = n_jobs self.random_state_ = seed_random_state(random_state)
def __init__(self, dataset, model=None, loss='log', random_state=None): super(EER, self).__init__(dataset) self.model = model if self.model is None: raise TypeError( "__init__() missing required keyword-only argument: 'model'") if not isinstance(self.model, ProbabilisticModel): raise TypeError("model has to be a ProbabilisticModel") self.loss = loss if self.loss not in ['01', 'log']: raise TypeError( "supported methods are ['01', 'log'], the given one " "is: " + self.loss) self.random_state_ = seed_random_state(random_state)
def __init__(self, dataset, major_learner, auxiliary_learner, criterion='hlr', b=1., random_state=None): super(MultilabelWithAuxiliaryLearner, self).__init__(dataset) self.n_labels = len(self.dataset.data[0][1]) self.major_learner = major_learner self.auxiliary_learner = auxiliary_learner self.b = b self.random_state_ = seed_random_state(random_state) self.criterion = criterion if self.criterion not in ['hlr', 'shlr', 'mmr']: raise TypeError( "supported criterion are ['hlr', 'shlr', 'mmr'], the given " "one is: " + self.criterion )
def __init__(self, *args, **kwargs): super(QueryByCommittee, self).__init__(*args, **kwargs) models = kwargs.pop('models', None) if models is None: raise TypeError( "__init__() missing required keyword-only argument: 'models'") elif not models: raise ValueError("models list is empty") random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.students = list() for model in models: if type(model) is str: self.students.append(getattr(libact.models, model)()) else: self.students.append(model) self.n_students = len(self.students) self.teach_students()
def __init__(self, *args, **kwargs): super(QueryByCommittee, self).__init__(*args, **kwargs) models = kwargs.pop('models', None) if models is None: raise TypeError( "__init__() missing required keyword-only argument: 'models'" ) elif not models: raise ValueError("models list is empty") random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.students = list() for model in models: if type(model) is str: self.students.append(getattr(libact.models, model)()) else: self.students.append(model) self.n_students = len(self.students) self.teach_students()
def __init__(self, dataset, args, **kwargs): super(RandomSampling, self).__init__(dataset, args, **kwargs) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.model = kwargs.pop('model', None) if self.model is None: raise TypeError( "__init__() missing required keyword-only argument: 'model'" ) self.T = kwargs.pop('T', None) if self.T is None: raise TypeError( "__init__() missing required keyword-only argument: 'T'" ) self.W = [1] self.queried_hist_ = [] self.raw_rw = 0
def __init__(self, dataset, major_learner, auxiliary_learner, criterion='hlr', b=1., random_state=None): super(MultilabelWithAuxiliaryLearner, self).__init__(dataset) self.n_labels = len(self.dataset.data[0][1]) self.major_learner = major_learner self.auxiliary_learner = auxiliary_learner self.b = b self.random_state_ = seed_random_state(random_state) self.criterion = criterion if self.criterion not in ['hlr', 'shlr', 'mmr']: raise TypeError( "supported criterion are ['hlr', 'shlr', 'mmr'], the given " "one is: " + self.criterion)
def __init__(self, *args, **kwargs): super(HintSVM, self).__init__(*args, **kwargs) # Weight on labeled data's classification error self.cl = kwargs.pop('Cl', 0.1) if self.cl <= 0: raise ValueError('Parameter Cl should be greater than 0.') # Weight on hinted data's classification error self.ch = kwargs.pop('Ch', 0.1) if self.ch <= 0: raise ValueError('Parameter Cl should be greater than 0.') # Prabability of sampling a data from unlabeled pool to hinted pool self.p = kwargs.pop('p', 0.5) if self.p > 1.0 or self.p < 0.0: raise ValueError( 'Parameter p should be greater than or equal to 0 and less ' 'than or equal to 1.' ) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) # svm solver parameters self.svm_params = {} self.svm_params['kernel'] = kwargs.pop('kernel', 'linear') self.svm_params['degree'] = kwargs.pop('degree', 3) self.svm_params['gamma'] = kwargs.pop('gamma', 0.1) self.svm_params['coef0'] = kwargs.pop('coef0', 0.) self.svm_params['tol'] = kwargs.pop('tol', 1e-3) self.svm_params['shrinking'] = kwargs.pop('shrinking', 1) self.svm_params['cache_size'] = kwargs.pop('cache_size', 100.) self.svm_params['verbose'] = kwargs.pop('verbose', 0) self.svm_params['C'] = self.cl
def __init__(self, dataset, **kwargs): super(RandomSampling, self).__init__(dataset, **kwargs) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state)
def __init__(self, dataset, args, **kwargs): super(ActiveLearningByLearning, self).__init__(dataset, args, **kwargs) self.query_strategies_ = kwargs.pop('query_strategies', None) if self.query_strategies_ is None: raise TypeError( "__init__() missing required keyword-only argument: " "'query_strategies'" ) elif not self.query_strategies_: raise ValueError("query_strategies list is empty") # check if query_strategies share the same dataset with albl for qs in self.query_strategies_: if qs.dataset != self.dataset: raise ValueError("query_strategies should share the same" "dataset instance with albl") # parameters for Exp4.p self.delta = kwargs.pop('delta', 0.1) # query budget self.T = kwargs.pop('T', None) if self.T is None: raise TypeError( "__init__() missing required keyword-only argument: 'T'" ) _, self.unlabeled_entry_ids = self.dataset.get_unlabeled_entries() self.unlabeled_invert_id_idx = {} for i, idx in enumerate(self.dataset.get_unlabeled_entries()[1]): self.unlabeled_invert_id_idx[idx] = i self.uniform_sampler = kwargs.pop('uniform_sampler', True) if not isinstance(self.uniform_sampler, bool): raise ValueError("'uniform_sampler' should be {True, False}") self.pmin = kwargs.pop('pmin', None) n_algorithms = (len(self.query_strategies_) + self.uniform_sampler) if self.pmin and (self.pmin > (1. / n_algorithms) or self.pmin < 0): raise ValueError("'pmin' should be 0 < pmin < " "1/len(n_active_algorithm)") self.reward_fn = kwargs.pop('reward_fn', None) self.exp4p_ = Exp4P( query_strategies=self.query_strategies_, T=self.T, delta=self.delta, pmin=self.pmin, unlabeled_invert_id_idx=self.unlabeled_invert_id_idx, uniform_sampler=self.uniform_sampler ) self.budget_used = 0 # classifier instance self.model = kwargs.pop('model', None) if self.model is None: raise TypeError( "__init__() missing required keyword-only argument: 'model'" ) random_state = kwargs.pop('random_state', None) self.random_state_ = seed_random_state(random_state) self.query_dist = None self.W = [] self.queried_hist_ = [] self.raw_rw = 0