def testOrientedRecall(self): schema = Schema() schema.addEntity('A') schema.addAttribute('A', 'X1') schema.addAttribute('A', 'X2') schema.addAttribute('A', 'X3') schema.addAttribute('A', 'X4') dependencies = [] model = Model(schema, dependencies) learnedDependencies = [] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X1 -> [A].X2'] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X1 -> [A].X2', '[A].X2 -> [A].X1'] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X1 -> [A].X2', '[A].X2 -> [A].X3'] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) # true model has one dependency dependencies = ['[A].X1 -> [A].X2'] model = Model(schema, dependencies) learnedDependencies = [] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X1 -> [A].X2', '[A].X2 -> [A].X1'] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X2 -> [A].X1'] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X1 -> [A].X2', '[A].X2 -> [A].X3'] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X1 -> [A].X2', '[A].X2 -> [A].X3', '[A].X3 -> [A].X2' ] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) # true model has two dependencies dependencies = ['[A].X1 -> [A].X2', '[A].X3 -> [A].X2'] model = Model(schema, dependencies) learnedDependencies = ['[A].X3 -> [A].X2'] self.assertEqual( 0.5, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X2 -> [A].X3'] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X3 -> [A].X2', '[A].X2 -> [A].X3'] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = ['[A].X3 -> [A].X2', '[A].X4 -> [A].X1'] self.assertEqual( 0.5, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X2 -> [A].X3', '[A].X4 -> [A].X1', '[A].X1 -> [A].X4' ] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X4 -> [A].X1', '[A].X1 -> [A].X4' ] self.assertEqual( 0.5, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X4 -> [A].X1', '[A].X1 -> [A].X4', '[A].X1 -> [A].X2' ] self.assertEqual( 1.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X2 -> [A].X3', '[A].X4 -> [A].X1', '[A].X1 -> [A].X4', '[A].X1 -> [A].X3', '[A].X2 -> [A].X1' ] self.assertEqual( 0.0, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X2 -> [A].X3', '[A].X4 -> [A].X1', '[A].X1 -> [A].X4', '[A].X1 -> [A].X3', '[A].X1 -> [A].X2' ] self.assertEqual( 0.5, ModelEvaluation.orientedRecall(model, learnedDependencies)) # true model has three dependencies dependencies = [ '[A].X1 -> [A].X2', '[A].X3 -> [A].X2', '[A].X2 -> [A].X4' ] model = Model(schema, dependencies) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X2 -> [A].X4', '[A].X1 -> [A].X4' ] self.assertEqual( 2 / 3, ModelEvaluation.orientedRecall(model, learnedDependencies)) learnedDependencies = [ '[A].X3 -> [A].X2', '[A].X2 -> [A].X4', '[A].X4 -> [A].X2', '[A].X1 -> [A].X4', '[A].X4 -> [A].X1' ] self.assertEqual( 1 / 3, ModelEvaluation.orientedRecall(model, learnedDependencies))
except Exception: continue # Some RCD algorithm takes too much time. # This limits generated models to be 'easy' if len(RelationalSpace.getRelationalDependencies(schema, hopThreshold)) > 100: continue # This oracle uses AGGs. oracle = Oracle(model, 2 * hopThreshold) # Since CI-tests are cached, comparing time spent on RCD and RCDL directly should be avoided. rcdl = RCDLight(schema, oracle, hopThreshold) rcdl.identifyUndirectedDependencies() rcdl.orientDependencies() rcd = RCD(schema, oracle, hopThreshold, depth=rcdDepth) rcd.identifyUndirectedDependencies() rcd.orientDependencies() assert ModelEvaluation.orientedPrecision(model, rcdl.orientedDependencies) == 1.0 assert ModelEvaluation.skeletonPrecision(model, rcdl.undirectedDependencies) == 1.0 assert ModelEvaluation.skeletonRecall(model, rcdl.undirectedDependencies) == 1.0 rcdl_ori_recall = ModelEvaluation.orientedRecall(model, rcdl.orientedDependencies) rcd_ori_recall = ModelEvaluation.orientedRecall(model, rcd.orientedDependencies) assert rcdl_ori_recall >= rcd_ori_recall print('.', end='', flush=True) if rcdl_ori_recall > rcd_ori_recall: print('\nRCDL beats RCD:', rcdl_ori_recall, '>', rcd_ori_recall)
schema, model = shlee.RCDLight.incompleteness_example() logger.info('Model: %s', model.dependencies) hopThreshold = max(len(d.relVar1.path) + 1 for d in model.dependencies) oracle = Oracle(model, 3 * hopThreshold) rcd = RCD(schema, oracle, hopThreshold, depth=2) rcd.identifyUndirectedDependencies() rcd.orientDependencies() print('Skeleton precision:', ModelEvaluation.skeletonPrecision(model, rcd.undirectedDependencies)) print('Skeleton recall:', ModelEvaluation.skeletonRecall(model, rcd.undirectedDependencies)) precision = ModelEvaluation.orientedPrecision(model, rcd.orientedDependencies) print('Oriented precision:', precision) print('Oriented recall:', ModelEvaluation.orientedRecall(model, rcd.orientedDependencies)) rcdl = shlee.RCDLight.RCDLight(schema, oracle, hopThreshold) rcdl.identifyUndirectedDependencies() rcdl.orientDependencies() print('Skeleton precision:', ModelEvaluation.skeletonPrecision(model, rcdl.undirectedDependencies)) print('Skeleton recall:', ModelEvaluation.skeletonRecall(model, rcdl.undirectedDependencies)) precision = ModelEvaluation.orientedPrecision(model, rcdl.orientedDependencies) print('Oriented precision:', precision) print('Oriented recall:', ModelEvaluation.orientedRecall(model, rcdl.orientedDependencies)) assert ModelEvaluation.orientedRecall(model, rcdl.orientedDependencies) == \ ModelEvaluation.orientedRecall(model, rcd.orientedDependencies) == \
# Parameters schema, model = shlee.RCDLight.incompleteness_example() logger.info('Model: %s', model.dependencies) hopThreshold = max(len(d.relVar1.path) + 1 for d in model.dependencies) oracle = Oracle(model, 3 * hopThreshold) rcd = RCD(schema, oracle, hopThreshold, depth=2) rcd.identifyUndirectedDependencies() rcd.orientDependencies() print('Skeleton precision:', ModelEvaluation.skeletonPrecision(model, rcd.undirectedDependencies)) print('Skeleton recall:', ModelEvaluation.skeletonRecall(model, rcd.undirectedDependencies)) precision = ModelEvaluation.orientedPrecision(model, rcd.orientedDependencies) print('Oriented precision:', precision) print('Oriented recall:', ModelEvaluation.orientedRecall(model, rcd.orientedDependencies)) rcdl = shlee.RCDLight.RCDLight(schema, oracle, hopThreshold) rcdl.identifyUndirectedDependencies() rcdl.orientDependencies() print('Skeleton precision:', ModelEvaluation.skeletonPrecision(model, rcdl.undirectedDependencies)) print('Skeleton recall:', ModelEvaluation.skeletonRecall(model, rcdl.undirectedDependencies)) precision = ModelEvaluation.orientedPrecision(model, rcdl.orientedDependencies) print('Oriented precision:', precision) print('Oriented recall:', ModelEvaluation.orientedRecall(model, rcdl.orientedDependencies)) assert ModelEvaluation.orientedRecall(model, rcdl.orientedDependencies) == \ ModelEvaluation.orientedRecall(model, rcd.orientedDependencies) == \ 0.0 # Demonstrate that there is no 'unshielded triple' in AGGs for the counter-example.
hopThreshold = 4 maxNumParents = rcdDepth = 3 # Parameters schema = SchemaGenerator.generateSchema(numEntities, numRelationships, allowCycles=False, oneRelationshipPerPair=True) logger.info(schema) model = ModelGenerator.generateModel(schema, hopThreshold, numDependencies, maxNumParents=maxNumParents) logger.info('Model: %s', model.dependencies) oracle = Oracle(model, 2 * hopThreshold) # Run RCD algorithm and collect statistics on learned model rcd = RCD(schema, oracle, hopThreshold, depth=rcdDepth) rcd.identifyUndirectedDependencies() rcd.orientDependencies() logger.info( 'Skeleton precision: %s', ModelEvaluation.skeletonPrecision(model, rcd.undirectedDependencies)) logger.info('Skeleton recall: %s', ModelEvaluation.skeletonRecall(model, rcd.undirectedDependencies)) logger.info('Oriented precision: %s', ModelEvaluation.orientedPrecision(model, rcd.orientedDependencies)) logger.info('Oriented recall: %s', ModelEvaluation.orientedRecall(model, rcd.orientedDependencies))