def testOneRelationship(self): schema = Schema() schema.addEntity('A') schema.addEntity('B') schema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.MANY)) schema.addAttribute('A', 'X') schema.addAttribute('B', 'Y') schema.addAttribute('AB', 'XY') relDeps = RelationalSpace.getRelationalDependencies( schema, 0, includeExistence=True) hop0 = [] self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, hop0, [str(relDep) for relDep in relDeps]) relDeps = RelationalSpace.getRelationalDependencies( schema, 1, includeExistence=True) hop1 = [ '[A, AB].XY -> [A].X', '[A, AB].exists -> [A].X', '[AB, A].X -> [AB].XY', '[AB, A].X -> [AB].exists', '[AB, B].Y -> [AB].XY', '[AB, B].Y -> [AB].exists', '[B, AB].XY -> [B].Y', '[B, AB].exists -> [B].Y' ] self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, hop0 + hop1, [str(relDep) for relDep in relDeps]) relDeps = RelationalSpace.getRelationalDependencies( schema, 2, includeExistence=True) hop2 = [ '[A, AB, B].Y -> [A].X', '[AB, B, AB].exists -> [AB].XY', '[B, AB, A].X -> [B].Y' ] self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, hop0 + hop1 + hop2, [str(relDep) for relDep in relDeps]) relDeps = RelationalSpace.getRelationalDependencies( schema, 3, includeExistence=True) hop3 = [ '[A, AB, B, AB].XY -> [A].X', '[A, AB, B, AB].exists -> [A].X', '[AB, B, AB, A].X -> [AB].XY' ] self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, hop0 + hop1 + hop2 + hop3, [str(relDep) for relDep in relDeps])
def identifyUndirectedDependencies(self): ''' This is for the Phase I of RCD-Light. ''' potential_deps = RelationalSpace.getRelationalDependencies(self._schema, self._hop_threshold) keyfunc = lambda dep: dep.relVar2 self._causes = {effect: set(cause.relVar1 for cause in causes) for effect, causes in itertools.groupby(sorted(potential_deps, key=keyfunc), key=keyfunc)} to_be_tested = set(potential_deps) for d in itertools.count(): for dep in list(to_be_tested): # remove-safe loop if dep not in to_be_tested: continue cause, effect = dep.relVar1, dep.relVar2 sepset, tested = self._find_sepset_with_size(cause, effect, d, 'Phase I') if not tested: to_be_tested.remove(dep) if sepset is not None: dep_reversed = dep.reverse() to_be_tested -= {dep, dep_reversed} self._causes[dep.relVar2].remove(dep.relVar1) self._causes[dep_reversed.relVar2].remove(dep_reversed.relVar1) if not to_be_tested: break self.undirectedDependencies = {RelationalDependency(c, e) for e, cs in self._causes.items() for c in cs} return set(self.undirectedDependencies)
def testTwoRelationships(self): schema = Schema() schema.addEntity('A') schema.addEntity('B') schema.addEntity('C') schema.addRelationship('AB', ('A', Schema.MANY), ('B', Schema.ONE)) schema.addRelationship('BC', ('B', Schema.ONE), ('C', Schema.MANY)) schema.addAttribute('A', 'X') schema.addAttribute('B', 'Y') schema.addAttribute('C', 'Z') schema.addAttribute('AB', 'XY') schema.addAttribute('BC', 'YZ') hop0 = [] hop1 = [ '[A, AB].exists -> [A].X', '[B, AB].exists -> [B].Y', '[B, BC].exists -> [B].Y', '[C, BC].exists -> [C].Z', '[A, AB].XY -> [A].X', '[B, AB].XY -> [B].Y', '[B, BC].YZ -> [B].Y', '[C, BC].YZ -> [C].Z', '[AB, A].X -> [AB].exists', '[AB, A].X -> [AB].XY', '[AB, B].Y -> [AB].exists', '[AB, B].Y -> [AB].XY', '[BC, C].Z -> [BC].exists', '[BC, C].Z -> [BC].YZ', '[BC, B].Y -> [BC].exists', '[BC, B].Y -> [BC].YZ' ] hop2 = [ '[AB, A, AB].exists -> [AB].XY', '[AB, B, BC].exists -> [AB].exists', '[AB, B, BC].exists -> [AB].XY', '[BC, C, BC].exists -> [BC].YZ', '[BC, B, AB].exists -> [BC].exists', '[BC, B, AB].exists -> [BC].YZ', '[A, AB, B].Y -> [A].X', '[B, AB, A].X -> [B].Y', '[B, BC, C].Z -> [B].Y', '[C, BC, B].Y -> [C].Z', '[AB, B, BC].YZ -> [AB].exists', '[AB, B, BC].YZ -> [AB].XY', '[BC, B, AB].XY -> [BC].exists', '[BC, B, AB].XY -> [BC].YZ' ] hop3 = [ '[A, AB, B, BC].exists -> [A].X', '[B, AB, A, AB].exists -> [B].Y', '[B, BC, C, BC].exists -> [B].Y', '[C, BC, B, AB].exists -> [C].Z', '[A, AB, B, BC].YZ -> [A].X', '[B, AB, A, AB].XY -> [B].Y', '[B, BC, C, BC].YZ -> [B].Y', '[C, BC, B, AB].XY -> [C].Z', '[AB, A, AB, B].Y -> [AB].XY', '[AB, B, BC, C].Z -> [AB].exists', '[AB, B, BC, C].Z -> [AB].XY', '[BC, C, BC, B].Y -> [BC].YZ', '[BC, B, AB, A].X -> [BC].exists', '[BC, B, AB, A].X -> [BC].YZ' ] relDeps = RelationalSpace.getRelationalDependencies( schema, 3, includeExistence=True) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, hop0 + hop1 + hop2 + hop3, [str(relDep) for relDep in relDeps])
def identifyUndirectedDependencies(self, orderIndependentSkeleton=False,times=2): logger.info('Phase I: identifying undirected dependencies') # Create fully connected undirected AGG potentialDeps = RelationalSpace.getRelationalDependencies(self.schema, self.hopThreshold, includeExistence=False) potentialDeps = self.potentialDependencySorter(potentialDeps) self.constructAggsFromDependencies(potentialDeps, times) self.full_num_agg_nodes = sum(len(agg.nodes()) for agg in self.perspectiveToAgg.values()) self.full_num_agg_edges = sum(len(agg.edges()) for agg in self.perspectiveToAgg.values()) # Keep track of separating sets self.sepsets = {} self.maxDepthReached = -1 if self.depth is None: self.depth = max([len(agg.nodes()) - 2 for agg in self.perspectiveToAgg.values()]) logger.info("Number of potentialDeps %d", len(potentialDeps)) remainingDeps = potentialDeps[:] currentDepthDependenciesToRemove = [] # Check for independencies for conditioningSetSize in range(self.depth+1): self.maxDepthReached = conditioningSetSize testedAtCurrentSize = False logger.info("Conditioning set size %d", conditioningSetSize) logger.debug("remaining dependencies %s", remainingDeps) for potentialDep in potentialDeps: logger.debug("potential dependency %s", potentialDep) if potentialDep not in remainingDeps: continue relVar1, relVar2 = potentialDep.relVar1, potentialDep.relVar2 sepset, curTestedAtCurrentSize = self.findSepset(relVar1, relVar2, conditioningSetSize, phaseForRecording='Phase I') testedAtCurrentSize = testedAtCurrentSize or curTestedAtCurrentSize if sepset is not None: logger.debug("removing edge %s -- %s", relVar1, relVar2) self.sepsets[relVar1, relVar2] = set(sepset) self.sepsets[relVar2, relVar1] = set(sepset) remainingDeps.remove(potentialDep) potentialDepReverse = potentialDep.reverse() remainingDeps.remove(potentialDepReverse) if not orderIndependentSkeleton: self.removeDependency(potentialDep) else: # delay removal in underlying AGGs until after current depth currentDepthDependenciesToRemove.append(potentialDep) if orderIndependentSkeleton: for potentialDep in currentDepthDependenciesToRemove: self.removeDependency(potentialDep) currentDepthDependenciesToRemove = [] if not testedAtCurrentSize: # exit early, no possible sepsets of a larger size break potentialDeps = remainingDeps[:] self.undirectedDependencies = remainingDeps logger.info("Undirected dependencies: %s", self.undirectedDependencies) logger.info(self.ciRecord)
def testTooManyDependencies(self): schema = Schema() schema.addEntity('A') schema.addAttribute('A', 'X1') TestUtil.assertRaisesMessage(self, Exception, "Could not generate a model: not enough dependencies to draw from", ModelGenerator.generateModel, schema, 0, 1) schema.addAttribute('A', 'X2') TestUtil.assertRaisesMessage(self, Exception, "Could not generate a model: failed to find a model with 2 dependenc[y|ies]", ModelGenerator.generateModel, schema, 0, 2) schema = Schema() schema.addEntity('A') schema.addEntity('B') schema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.MANY)) schema.addAttribute('A', 'X') schema.addAttribute('B', 'Y') schema.addAttribute('AB', 'XY') TestUtil.assertRaisesMessage(self, Exception, "Could not generate a model: failed to find a model with 7 dependenc[y|ies]", ModelGenerator.generateModel, schema, 2, 7, dependencies=RelationalSpace.getRelationalDependencies(schema, 1, includeExistence=True))
def identifyUndirectedDependencies(self): ''' This is for the Phase I of RCD-Light. ''' potential_deps = RelationalSpace.getRelationalDependencies( self._schema, self._hop_threshold) keyfunc = lambda dep: dep.relVar2 self._causes = { effect: set(cause.relVar1 for cause in causes) for effect, causes in itertools.groupby( sorted(potential_deps, key=keyfunc), key=keyfunc) } to_be_tested = set(potential_deps) for d in itertools.count(): for dep in list(to_be_tested): # remove-safe loop if dep not in to_be_tested: continue cause, effect = dep.relVar1, dep.relVar2 sepset, tested = self._find_sepset_with_size( cause, effect, d, 'Phase I') if not tested: to_be_tested.remove(dep) if sepset is not None: dep_reversed = dep.reverse() to_be_tested -= {dep, dep_reversed} self._causes[dep.relVar2].remove(dep.relVar1) self._causes[dep_reversed.relVar2].remove( dep_reversed.relVar1) if not to_be_tested: break self.undirectedDependencies = { RelationalDependency(c, e) for e, cs in self._causes.items() for c in cs } return set(self.undirectedDependencies)
# Random Schema schema = SchemaGenerator.generateSchema(numEntities, numRelationships, entityAttrDistribution=ConstantDistribution(2), relationshipAttrDistribution=ConstantDistribution(1), allowCycles=True, oneRelationshipPerPair=False) # Random Model try: model = ModelGenerator.generateModel(schema, hopThreshold, numDependencies, maxNumParents=maxNumParents) except Exception: continue # Some RCD algorithm takes too much time. # This limits generated models to be 'easy' if len(RelationalSpace.getRelationalDependencies(schema, hopThreshold)) > 100: continue # This oracle uses AGGs. oracle = Oracle(model, 2 * hopThreshold) # Since CI-tests are cached, comparing time spent on RCD and RCDL directly should be avoided. rcdl = RCDLight(schema, oracle, hopThreshold) rcdl.identifyUndirectedDependencies() rcdl.orientDependencies() rcd = RCD(schema, oracle, hopThreshold, depth=rcdDepth) rcd.identifyUndirectedDependencies() rcd.orientDependencies() assert ModelEvaluation.orientedPrecision(model, rcdl.orientedDependencies) == 1.0
def testOneEntity(self): schema = Schema() schema.addEntity('A') relDeps = RelationalSpace.getRelationalDependencies(schema, 0) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, [], [str(relDep) for relDep in relDeps]) schema.addAttribute('A', 'X1') relDeps = RelationalSpace.getRelationalDependencies(schema, 0) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, [], [str(relDep) for relDep in relDeps]) schema.addAttribute('A', 'X2') relDeps = RelationalSpace.getRelationalDependencies(schema, 0) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual( self, ['[A].X1 -> [A].X2', '[A].X2 -> [A].X1'], [str(relDep) for relDep in relDeps]) schema.addAttribute('A', 'X3') relDeps = RelationalSpace.getRelationalDependencies(schema, 0) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, [ '[A].X1 -> [A].X2', '[A].X2 -> [A].X1', '[A].X1 -> [A].X3', '[A].X3 -> [A].X1', '[A].X2 -> [A].X3', '[A].X3 -> [A].X2' ], [str(relDep) for relDep in relDeps]) relDeps = RelationalSpace.getRelationalDependencies(schema, 2) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, [ '[A].X1 -> [A].X2', '[A].X2 -> [A].X1', '[A].X1 -> [A].X3', '[A].X3 -> [A].X1', '[A].X2 -> [A].X3', '[A].X3 -> [A].X2' ], [str(relDep) for relDep in relDeps]) schema.addEntity('B') relDeps = RelationalSpace.getRelationalDependencies(schema, 0) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, [ '[A].X1 -> [A].X2', '[A].X2 -> [A].X1', '[A].X1 -> [A].X3', '[A].X3 -> [A].X1', '[A].X2 -> [A].X3', '[A].X3 -> [A].X2' ], [str(relDep) for relDep in relDeps]) schema.addAttribute('B', 'Y1') schema.addAttribute('B', 'Y2') relDeps = RelationalSpace.getRelationalDependencies(schema, 0) self.assertTrue( all([ isinstance(relDep, RelationalDependency) for relDep in relDeps ])) TestUtil.assertUnorderedListEqual(self, [ '[A].X1 -> [A].X2', '[A].X2 -> [A].X1', '[A].X1 -> [A].X3', '[A].X3 -> [A].X1', '[A].X2 -> [A].X3', '[A].X3 -> [A].X2', '[B].Y1 -> [B].Y2', '[B].Y2 -> [B].Y1', ], [str(relDep) for relDep in relDeps])