def testGetInclusizeSubsets(self): expectSet = ((frozenset(("A",)), frozenset(("C",)), frozenset(("A","C",)))) subsets = Subsets(SubsetsTests.testSet) iss = subsets.getInclusiveSubsets(frozenset(("A", "C",))) self.assertEqual(iss, expectSet)
def testGetSubsets(self): expectSet = ((frozenset(("A",)), frozenset(("B",)), frozenset(("C",)), frozenset(("A","B",)), frozenset(("A","C",)), frozenset(("B","C",)), frozenset(("A","B","C")))) subsets = Subsets(SubsetsTests.testSet) ss = subsets.getSubsets() self.assertEqual(ss, expectSet)
def __init__(self, subsets=None, isInclusive=False): """create new Venn. If subsets is None, a private Subsets object is created,""" # is this a standard venn or inclusive? self.isInclusive = isInclusive self.subsets = subsets if self.subsets is None: self.subsets = Subsets() # Tables mappings set name to items and items to set names self.nameToItems = SetDict() self.itemToNames = SetDict() # Venn table, dict index by name, of items (lazy build) self.venn = None
class Venn(object): """Generate Venn diagram set intersections. Each set has list of ids associated with it that are shared between sets. """ def __init__(self, subsets=None, isInclusive=False): """create new Venn. If subsets is None, a private Subsets object is created,""" # is this a standard venn or inclusive? self.isInclusive = isInclusive self.subsets = subsets if self.subsets is None: self.subsets = Subsets() # Tables mappings set name to items and items to set names self.nameToItems = SetDict() self.itemToNames = SetDict() # Venn table, dict index by name, of items (lazy build) self.venn = None def addItem(self, setName, item): "add a single item from a named set" self.subsets.add(setName) self.nameToItems.add(setName, item) self.itemToNames.add(item, setName) self.venn = None def addItems(self, setName, items): "add items from a named set" self.subsets.add(setName) for item in items: self.nameToItems.add(setName, item) self.itemToNames.add(item, setName) self.venn = None def getNumItems(self): return len(self.itemToNames) def _buildVenn(self): "build Venn table" self.venn = SetDict(self.subsets.getSubsets()) for item in list(self.itemToNames.keys()): nameSet = frozenset(self.itemToNames[item]) self.venn.add(nameSet, item) def _buildInclusive(self): "build as inclusive subsets" self.venn = SetDict(self.subsets.getSubsets()) for item in list(self.itemToNames.keys()): setName = frozenset(self.itemToNames[item]) for iss in self.subsets.getInclusiveSubsets(setName): self.venn.add(iss, item) def _update(self): "build venn or inclusive venn, if it doesn't exists" if self.venn is None: if self.isInclusive: self._buildInclusive() else: self._buildVenn() def getSubsetIds(self, subset): "get ids for the specified subset" self._update() ids = self.venn.get(subset) if ids is None: ids = [] return ids def getSubsetCounts(self, subset): "get counts for the specified subset" return len(self.getSubsetIds(subset)) def getTotalCounts(self): "get total of counts for all subsets (meaningless on inclusive)" t = 0 for subset in self.subsets.getSubsets(): t += self.getSubsetCounts(subset) return t @staticmethod def formatSubsetName(subset, subsetNameSeparator=" ", setNameFormatter=str): return subsetNameSeparator.join( sorted([setNameFormatter(s) for s in subset])) def writeCounts(self, fh, subsetNameSeparator=" ", setNameFormatter=str): "write TSV of subset counts to an open file" fileOps.prRowv(fh, "subset", "count") for subset in self.subsets.getSubsets(): fileOps.prRowv( fh, self.formatSubsetName(subset, subsetNameSeparator, setNameFormatter), self.getSubsetCounts(subset)) def writeSets(self, fh, subsetNameSeparator=" ", setNameFormatter=str): "write TSV of subsets and ids to an open file" fileOps.prRowv(fh, "subset", "ids") for subset in self.subsets.getSubsets(): fileOps.prRowv( fh, self.formatSubsetName(subset, subsetNameSeparator, setNameFormatter), self.getSubsetCounts(subset))
class Venn(object): """Generate Venn diagram set intersections. Each set has list of ids associated with it that are shared between sets. """ def __init__(self, subsets=None, isInclusive=False): """create new Venn. If subsets is None, a private Subsets object is created,""" # is this a standard venn or inclusive? self.isInclusive = isInclusive self.subsets = subsets if self.subsets is None: self.subsets = Subsets() # Tables mappings set name to items and items to set names self.nameToItems = SetDict() self.itemToNames = SetDict() # Venn table, dict index by name, of items (lazy build) self.venn = None def addItem(self, setName, item): "add a single item from a named set" self.subsets.add(setName) self.nameToItems.add(setName, item) self.itemToNames.add(item, setName) self.venn = None def addItems(self, setName, items): "add items from a named set" self.subsets.add(setName) for item in items: self.nameToItems.add(setName, item) self.itemToNames.add(item, setName) self.venn = None def getNumItems(self): return len(self.itemToNames) def __buildVenn(self): "build Venn table" self.venn = SetDict(self.subsets.getSubsets()) for item in self.itemToNames.iterkeys(): nameSet = frozenset(self.itemToNames[item]) self.venn.add(nameSet, item) def __buildInclusive(self): "build as inclusive subsets" self.venn = SetDict(self.subsets.getSubsets()) for item in self.itemToNames.iterkeys(): setName = frozenset(self.itemToNames[item]) for iss in self.subsets.getInclusiveSubsets(setName): self.venn.add(iss, item) def __update(self): "build venn or inclusive venn, if it doesn't exists" if self.venn is None: if self.isInclusive: self.__buildInclusive() else: self.__buildVenn() def getSubsetIds(self, subset): "get ids for the specified subset" self.__update() ids = self.venn.get(subset) if ids is None: ids = [] return ids def getSubsetCounts(self, subset): "get counts for the specified subset" return len(self.getSubsetIds(subset)) def getTotalCounts(self): "get total of counts for all subsets (meaningless on inclusive)" t = 0 for subset in self.subsets.getSubsets(): t += self.getSubsetCounts(subset) return t @staticmethod def formatSubsetName(subset, subsetNameSeparator=" ", setNameFormatter=str): return subsetNameSeparator.join(sorted([setNameFormatter(s) for s in subset])) def writeCounts(self, fh, subsetNameSeparator=" ", setNameFormatter=str): "write TSV of subset counts to an open file" fileOps.prRowv(fh, "subset", "count") for subset in self.subsets.getSubsets(): fileOps.prRowv(fh, self.formatSubsetName(subset, subsetNameSeparator, setNameFormatter), self.getSubsetCounts(subset)) def writeSets(self, fh, subsetNameSeparator=" ", setNameFormatter=str): "write TSV of subsets and ids to an open file" fileOps.prRowv(fh, "subset", "ids") for subset in self.subsets.getSubsets(): fileOps.prRowv(fh, self.formatSubsetName(subset, subsetNameSeparator, setNameFormatter), self.getSubsetCounts(subset))