Beispiel #1
0
class Connectivity(FeatureInterface):
    def __init__(self):
        self.connectivityIdentifier = ConnectivityIdentifier()

    def calculate(self, column, columnIndex, table):
        connectivity = self.connectivityIdentifier.getConnectivity(table)
        return connectivity[columnIndex]
class ConnectivityIdentifierTestCase(unittest.TestCase):
    def setUp(self):
        sampler = T2DSampler()
        self.testTable = sampler.getTestTable()
        self.scIdentifier = ConnectivityIdentifier()
        self.testTables = sampler.getTablesSubjectIdentificationGoldStandard()

    def testConnectivityIdentifier(self):
        """
            0.409836065574
        """
        connectivityCeil = 0.32
        connectivityFloors = np.arange(0,1.0,0.1)
        for connectivityFloor in connectivityFloors:
            correctly = 0
            for table in self.testTables:
                subjectColumn = self.scIdentifier.identifySubjectColumn(table, applyWeights=False, connectivityFloor=connectivityFloor, connectivityCeil=connectivityCeil)
                if table.isSubjectColumn(subjectColumn):
                    correctly += 1
            print "connectivity floor: %s" % connectivityFloor
            print "connectivity ceil: %s" % connectivityCeil
            print correctly
            print float(correctly) / len(self.testTables)
class SupportConnectivityIdentifier(object):
    def __init__(self):
        self.logger = Logger().getLogger(__name__)
        self.connectivityIdentifier = ConnectivityIdentifier()
        self.supportIdentifier = SupportIdentifier()

    def identifySubjectColumn(self, table, supportFloor=10, supportCeil=70, connectivityThreshold=0.01, alpha=0.5):
        connectivities = self.connectivityIdentifier.getConnectivity(table, applyWeights=False)
        supports = self.supportIdentifier.getSupport(table)

        supports = [support if support < supportCeil and support > supportFloor else 0 for support in supports]
        connectivities = [
            connectivity if connectivity > connectivityThreshold else 0 for connectivity in connectivities
        ]

        # Make supports and connectivities on the same scale
        connectivities = [connectivity * 100 for connectivity in connectivities]
        # supports = [support / 10 for support in supports]

        consups = [0] * len(connectivities)
        for columnIndex, item in enumerate(consups):
            consups[columnIndex] = alpha * supports[columnIndex] + (1 - alpha) * connectivities[columnIndex]

        return consups.index(max(consups))
Beispiel #4
0
 def __init__(self):
     self.connectivityIdentifier = ConnectivityIdentifier()
 def setUp(self):
     sampler = T2DSampler()
     self.testTable = sampler.getTestTable()
     self.scIdentifier = ConnectivityIdentifier()
     self.testTables = sampler.getTablesSubjectIdentificationGoldStandard()
 def __init__(self):
     self.logger = Logger().getLogger(__name__)
     self.connectivityIdentifier = ConnectivityIdentifier()
     self.supportIdentifier = SupportIdentifier()