def _split_tcga(tcga_metadataset, counts): all_allowed_samples = tcga_metadataset.task_ids # We first uniquely assing every sample to a task sample_to_task_assignment = _assign_samples(tcga_metadataset) keys = [i for i in all_allowed_samples.keys()] difference = set(sample_to_task_assignment.keys()).difference(set(keys)) unassigned_samples = OrderedSet() for key in difference: unassigned_samples.update(sample_to_task_assignment[key]) # Second we split the metadataset # with a torch-based random sample permutation = torch.randperm(len(keys)).numpy() metadatasets = [] start = 0 end = 0 for count in counts: end += count current_keys = [keys[index] for index in permutation[start:end]] metadatasets.append( {key: sample_to_task_assignment[key] for key in current_keys}) start = end expanded_metadatasets = [None] * len(metadatasets) order = np.argsort([len(metadataset) for metadataset in metadatasets]) # Finally we expand the tasks by reusing samples wherever possible in the sets blacklist = OrderedSet() for i in order: additional_samples = unassigned_samples.difference(blacklist) expanded_metadataset, used_additional_samples = _expand_sample_usage( metadatasets[i], all_allowed_samples, additional_samples) expanded_metadatasets[i] = (expanded_metadataset) blacklist.update(used_additional_samples) tcga_metadatasets = [] tcga_metadataset.close() preloaded = tcga_metadataset.preloaded for metadataset in expanded_metadatasets: current_tcga_metadataset = copy.deepcopy(tcga_metadataset) current_tcga_metadataset.task_ids = metadataset if preloaded: current_tcga_metadataset.open() tcga_metadatasets.append(current_tcga_metadataset) return tcga_metadatasets
def move_to(self, pos, force=False): if not force and entity_pre_move.has_receivers_for(self): for func, ret in entity_pre_move.send(self, new_pos=pos): if ret is MoveValidationResult.reject: entity_move_rejected.send(self) return False elif ret is MoveValidationResult.cancel: return False with measure("Inside of query"): new_inside_of = OrderedSet( entity for entity in self.map.intersections_at_position( pos, self.current_effective_width)) if entity_pre_enter.has_receivers_for( self) or entity_post_enter.has_receivers_for(self): enters = new_inside_of.difference(self.is_inside_of) if enters and not force and entity_pre_enter.has_receivers_for(self): for func, ret in entity_pre_enter.send(self, enters=enters): if ret is MoveValidationResult.reject: entity_move_rejected.send(self) return False elif ret is MoveValidationResult.cancel: return False if entity_pre_leave.has_receivers_for( self) or entity_post_leave.has_receivers_for(self): leaves = self.is_inside_of.difference(new_inside_of) if not force and entity_pre_leave.has_receivers_for(self) and leaves: for func, ret in entity_pre_leave.send(self, leaves=leaves): if ret is MoveValidationResult.reject: entity_move_rejected.send(self) return False elif ret is MoveValidationResult.cancel: return False self.position = pos self.was_inside_of = OrderedSet(self.is_inside_of) # Note that we unfortunately can not assign the new_inside_of set directly as it has no defined ordering from the database, or rather, it has likely the wrong one. for leaving in leaves: self.is_inside_of.remove(leaving) for entering in enters: self.is_inside_of.add(entering) if leaves and entity_post_leave.has_receivers_for(self): entity_post_leave.send(self, leaves=leaves, enters=enters) if enters and entity_post_enter.has_receivers_for(self): entity_post_enter.send(self, enters=enters) entity_post_move.send(self)
def missing_indices(array1, array2): """ Set difference between array2 and array1 Parameters ---------- array1: np.ndarray first array array2: np.ndarray second array Returns ---------- list set difference """ set_a1 = OrderedSet(array1) set_a2 = OrderedSet(array2) diff = set_a2.difference(set_a1) return list(diff)