def testAllowCyclesFalse(self): schema = SchemaGenerator.generateSchema( 4, 3, entityPairPicker=makePairPickerWithList([('A', 'B'), ('B', 'C'), ('A', 'C'), ('C', 'D')]), oneRelationshipPerPair=True, allowCycles=False) self.assertEqual(3, len(schema.getRelationships())) self.assertEqual(1, len(schema.getRelationshipsBetweenEntities('A', 'B'))) self.assertEqual(1, len(schema.getRelationshipsBetweenEntities('B', 'C'))) self.assertEqual(1, len(schema.getRelationshipsBetweenEntities('C', 'D'))) schema = SchemaGenerator.generateSchema( 4, 3, entityPairPicker=makePairPickerWithList([('C', 'D'), ('B', 'C'), ('B', 'D'), ('D', 'B'), ('A', 'D')]), oneRelationshipPerPair=True, allowCycles=False) self.assertEqual(3, len(schema.getRelationships())) self.assertEqual(1, len(schema.getRelationshipsBetweenEntities('D', 'C'))) self.assertEqual(1, len(schema.getRelationshipsBetweenEntities('C', 'B'))) self.assertEqual(1, len(schema.getRelationshipsBetweenEntities('D', 'A')))
def testCardinalities(self): schema = SchemaGenerator.generateSchema( 2, 1, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(0), cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.ONE)) self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema( 2, 1, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(0), cardinalityDistribution=ConstantDistribution(Schema.MANY)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addRelationship('AB', ('A', Schema.MANY), ('B', Schema.MANY)) self.assertEqual(schema, expectedSchema)
def testGenerateRelationshipAttributes(self): schema = SchemaGenerator.generateSchema( 2, 1, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(1), cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.ONE)) expectedSchema.addAttribute('AB', 'XY1') self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema( 2, 2, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(2), cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema.addRelationship('AB2', ('A', Schema.ONE), ('B', Schema.ONE)) expectedSchema.addAttribute('AB', 'XY2') expectedSchema.addAttribute('AB2', 'XY2_1') expectedSchema.addAttribute('AB2', 'XY2_2') self.assertEqual(schema, expectedSchema)
def testOneRelationshipPerPairFlag(self): schema = SchemaGenerator.generateSchema( 2, 1, relationshipAttrDistribution=ConstantDistribution(0), cardinalityDistribution=ConstantDistribution(Schema.ONE), oneRelationshipPerPair=True) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.ONE)) self.assertEqual([expectedSchema.getRelationship('AB')], schema.getRelationshipsBetweenEntities('A', 'B')) schema = SchemaGenerator.generateSchema( 3, 2, relationshipAttrDistribution=ConstantDistribution(0), entityPairPicker=makePairPickerWithList([('A', 'B'), ('A', 'B'), ('A', 'C')]), cardinalityDistribution=ConstantDistribution(Schema.ONE), oneRelationshipPerPair=True) expectedSchema.addEntity('C') expectedSchema.addRelationship('AC', ('A', Schema.ONE), ('C', Schema.ONE)) self.assertEqual([expectedSchema.getRelationship('AB')], schema.getRelationshipsBetweenEntities('A', 'B')) self.assertEqual([expectedSchema.getRelationship('AC')], schema.getRelationshipsBetweenEntities('A', 'C')) schema = SchemaGenerator.generateSchema( 4, 4, relationshipAttrDistribution=ConstantDistribution(0), entityPairPicker=makePairPickerWithList([('A', 'B'), ('A', 'B'), ('A', 'C'), ('A', 'D'), ('A', 'D'), ('B', 'C')]), cardinalityDistribution=ConstantDistribution(Schema.ONE), oneRelationshipPerPair=True) expectedSchema.addEntity('D') expectedSchema.addRelationship('AD', ('A', Schema.ONE), ('D', Schema.ONE)) expectedSchema.addRelationship('BC', ('B', Schema.ONE), ('C', Schema.ONE)) self.assertEqual([expectedSchema.getRelationship('AB')], schema.getRelationshipsBetweenEntities('A', 'B')) self.assertEqual([expectedSchema.getRelationship('AC')], schema.getRelationshipsBetweenEntities('A', 'C')) self.assertEqual([expectedSchema.getRelationship('AD')], schema.getRelationshipsBetweenEntities('A', 'D')) self.assertEqual([expectedSchema.getRelationship('BC')], schema.getRelationshipsBetweenEntities('B', 'C'))
def testGenerateEntities(self): schema = SchemaGenerator.generateSchema(0, 0) expectedSchema = Schema() self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema( 1, 0, entityAttrDistribution=ConstantDistribution(0)) expectedSchema.addEntity('A') self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema( 2, 0, entityAttrDistribution=ConstantDistribution(0)) expectedSchema.addEntity('B') self.assertEqual(schema, expectedSchema)
def testGenerateEntityAttributes(self): schema = SchemaGenerator.generateSchema( 1, 0, entityAttrDistribution=ConstantDistribution(1)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addAttribute('A', 'X1') self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema( 2, 0, entityAttrDistribution=ConstantDistribution(2)) expectedSchema.addEntity('B') expectedSchema.addAttribute('A', 'X2') expectedSchema.addAttribute('B', 'Y1') expectedSchema.addAttribute('B', 'Y2') self.assertEqual(schema, expectedSchema)
from shlee.RCDLight import RCDLight # This generates random schemas and models, and compare their theoretical performance based on # conditional independence tests from Abstract Ground Graphs (AGGs) # One can see that Improved RCD-Light can identify more orientations than RCD. while True: numEntities = random.randint(2, 3) numRelationships = random.randint(2, 3) numDependencies = random.randint(5, 10) hopThreshold = random.randint(2, 5) maxNumParents = rcdDepth = 4 # 4 # Random Schema schema = SchemaGenerator.generateSchema(numEntities, numRelationships, entityAttrDistribution=ConstantDistribution(2), relationshipAttrDistribution=ConstantDistribution(1), allowCycles=True, oneRelationshipPerPair=False) # Random Model try: model = ModelGenerator.generateModel(schema, hopThreshold, numDependencies, maxNumParents=maxNumParents) except Exception: continue # Some RCD algorithm takes too much time. # This limits generated models to be 'easy' if len(RelationalSpace.getRelationalDependencies(schema, hopThreshold)) > 100: continue # This oracle uses AGGs. oracle = Oracle(model, 2 * hopThreshold)
def testGenerateRelationships(self): schema = SchemaGenerator.generateSchema( 2, 1, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(0), cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.ONE)) self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema( 2, 2, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(0), cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema.addRelationship('AB2', ('B', Schema.ONE), ('A', Schema.ONE)) self.assertEqual(schema, expectedSchema) picker = lambda entityPairs, constOne: [ sorted(entityPairs, key=lambda entPair: entPair[0].name + entPair[1].name if entPair[0].name < entPair[1].name else entPair[1].name + entPair[0].name)[0] ] schema = SchemaGenerator.generateSchema( 3, 2, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(0), entityPairPicker=picker, cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addEntity('C') expectedSchema.addRelationship('AB', ('A', Schema.ONE), ('B', Schema.ONE)) expectedSchema.addRelationship('AB2', ('A', Schema.ONE), ('B', Schema.ONE)) self.assertEqual(schema, expectedSchema) picker = lambda entityPairs, constOne: [ sorted(entityPairs, key=lambda entPair: entPair[0].name + entPair[1].name if entPair[0].name < entPair[1].name else entPair[1].name + entPair[0].name)[-1] ] schema = SchemaGenerator.generateSchema( 3, 2, entityAttrDistribution=ConstantDistribution(0), relationshipAttrDistribution=ConstantDistribution(0), entityPairPicker=picker, cardinalityDistribution=ConstantDistribution(Schema.ONE)) expectedSchema = Schema() expectedSchema.addEntity('A') expectedSchema.addEntity('B') expectedSchema.addEntity('C') expectedSchema.addRelationship('BC', ('B', Schema.ONE), ('C', Schema.ONE)) expectedSchema.addRelationship('BC2', ('B', Schema.ONE), ('C', Schema.ONE)) self.assertEqual(schema, expectedSchema) schema = SchemaGenerator.generateSchema(3, 5) self.assertEqual(5, len(schema.getRelationships()))
from causality.modelspace import SchemaGenerator import logging logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) # Parameters numEntities = 3 numRelationships = 2 numDependencies = 10 hopThreshold = 4 maxNumParents = rcdDepth = 3 # Parameters schema = SchemaGenerator.generateSchema(numEntities, numRelationships, allowCycles=False, oneRelationshipPerPair=True) logger.info(schema) model = ModelGenerator.generateModel(schema, hopThreshold, numDependencies, maxNumParents=maxNumParents) logger.info('Model: %s', model.dependencies) oracle = Oracle(model, 2 * hopThreshold) # Run RCD algorithm and collect statistics on learned model rcd = RCD(schema, oracle, hopThreshold, depth=rcdDepth) rcd.identifyUndirectedDependencies() rcd.orientDependencies() logger.info(