class Connectivity(FeatureInterface): def __init__(self): self.connectivityIdentifier = ConnectivityIdentifier() def calculate(self, column, columnIndex, table): connectivity = self.connectivityIdentifier.getConnectivity(table) return connectivity[columnIndex]
class ConnectivityIdentifierTestCase(unittest.TestCase): def setUp(self): sampler = T2DSampler() self.testTable = sampler.getTestTable() self.scIdentifier = ConnectivityIdentifier() self.testTables = sampler.getTablesSubjectIdentificationGoldStandard() def testConnectivityIdentifier(self): """ 0.409836065574 """ connectivityCeil = 0.32 connectivityFloors = np.arange(0,1.0,0.1) for connectivityFloor in connectivityFloors: correctly = 0 for table in self.testTables: subjectColumn = self.scIdentifier.identifySubjectColumn(table, applyWeights=False, connectivityFloor=connectivityFloor, connectivityCeil=connectivityCeil) if table.isSubjectColumn(subjectColumn): correctly += 1 print "connectivity floor: %s" % connectivityFloor print "connectivity ceil: %s" % connectivityCeil print correctly print float(correctly) / len(self.testTables)
class SupportConnectivityIdentifier(object): def __init__(self): self.logger = Logger().getLogger(__name__) self.connectivityIdentifier = ConnectivityIdentifier() self.supportIdentifier = SupportIdentifier() def identifySubjectColumn(self, table, supportFloor=10, supportCeil=70, connectivityThreshold=0.01, alpha=0.5): connectivities = self.connectivityIdentifier.getConnectivity(table, applyWeights=False) supports = self.supportIdentifier.getSupport(table) supports = [support if support < supportCeil and support > supportFloor else 0 for support in supports] connectivities = [ connectivity if connectivity > connectivityThreshold else 0 for connectivity in connectivities ] # Make supports and connectivities on the same scale connectivities = [connectivity * 100 for connectivity in connectivities] # supports = [support / 10 for support in supports] consups = [0] * len(connectivities) for columnIndex, item in enumerate(consups): consups[columnIndex] = alpha * supports[columnIndex] + (1 - alpha) * connectivities[columnIndex] return consups.index(max(consups))
def __init__(self): self.connectivityIdentifier = ConnectivityIdentifier()
def setUp(self): sampler = T2DSampler() self.testTable = sampler.getTestTable() self.scIdentifier = ConnectivityIdentifier() self.testTables = sampler.getTablesSubjectIdentificationGoldStandard()
def __init__(self): self.logger = Logger().getLogger(__name__) self.connectivityIdentifier = ConnectivityIdentifier() self.supportIdentifier = SupportIdentifier()