def __init__( self, adata_manager: AnnDataManager, unlabeled_category, train_size: float = 0.9, validation_size: Optional[float] = None, n_samples_per_label: Optional[int] = None, use_gpu: bool = False, **kwargs, ): super().__init__() self.adata_manager = adata_manager self.unlabeled_category = unlabeled_category self.train_size = float(train_size) self.validation_size = validation_size self.data_loader_kwargs = kwargs self.n_samples_per_label = n_samples_per_label original_key = adata_manager.get_state_registry( REGISTRY_KEYS.LABELS_KEY ).original_key labels = np.asarray(adata_manager.adata.obs[original_key]).ravel() self._unlabeled_indices = np.argwhere(labels == unlabeled_category).ravel() self._labeled_indices = np.argwhere(labels != unlabeled_category).ravel() self.data_loader_kwargs = kwargs self.use_gpu = use_gpu
def _get_batch_code_from_category(adata_manager: AnnDataManager, category: Sequence[Union[Number, str]]): if not isinstance(category, IterableClass) or isinstance(category, str): category = [category] batch_mappings = adata_manager.get_state_registry( REGISTRY_KEYS.BATCH_KEY).categorical_mapping batch_code = [] for cat in category: if cat is None: batch_code.append(None) elif cat not in batch_mappings: raise ValueError('"{}" not a valid batch category.'.format(cat)) else: batch_loc = np.where(batch_mappings == cat)[0][0] batch_code.append(batch_loc) return batch_code
def __init__( self, adata_manager: AnnDataManager, unlabeled_category: str, n_samples_per_label: Optional[int] = None, indices: Optional[List[int]] = None, shuffle: bool = False, batch_size: int = 128, data_and_attributes: Optional[dict] = None, drop_last: Union[bool, int] = False, **data_loader_kwargs, ): adata = adata_manager.adata if indices is None: indices = np.arange(adata.n_obs) self.indices = indices if len(indices) == 0: return None self.n_samples_per_label = n_samples_per_label labels_obs_key = adata_manager.get_state_registry( REGISTRY_KEYS.LABELS_KEY).original_key labels = np.asarray(adata_manager.adata.obs[labels_obs_key]).ravel() # save a nested list of the indices per labeled category self.labeled_locs = [] for label in np.unique(labels): if label != unlabeled_category: label_loc_idx = np.where(labels[indices] == label)[0] label_loc = indices[label_loc_idx] self.labeled_locs.append(label_loc) labelled_idx = self.subsample_labels() super().__init__( adata_manager=adata_manager, indices_list=[indices, labelled_idx], shuffle=shuffle, batch_size=batch_size, data_and_attributes=data_and_attributes, drop_last=drop_last, **data_loader_kwargs, )