class SimplePropertyRecommenderTestCase(unittest.TestCase):
    def setUp(self):
        sampler = T2DSampler()
        self.testTables = sampler.getTablesPropertyAnnotation()
        self.spr = SimplePropertyRecommender()

    def testRecommendPropertiesForTable(self):
        for num, table in enumerate(self.testTables):
            print "table %s" %(num,)
            properties = self.spr.recommendPropertiesForTable(table)
            import ipdb; ipdb.set_trace()

    def testLookupPropertiesLOV(self):
        columnHeader = "city"
        properties = self.spr.lookupPropertiesLOV(columnHeader)
Example #2
0
    def mapProperties(self, table):
        properties = []

        header = table.getHeader()
        propertyRecommender = SimplePropertyRecommender()
        for columnIndex, headerItem in enumerate(header):
            lovProperties = propertyRecommender.lookupPropertiesLOV(headerItem)
            if lovProperties == None:
                continue

            for _property in lovProperties:
                if _property["score"] > self.scoreThreshold:
                    properties.append({"columnIndex": columnIndex, "uri": _property["uri"]})
                    break

        return properties
Example #3
0
    def mapProperties(self, table):
        properties = []
        scores = self.getScores(table)

        header = table.getHeader()
        propertyRecommender = SimplePropertyRecommender()
        for columnIndex, headerItem in enumerate(header):
            lovProperties = propertyRecommender.lookupPropertiesLOV(headerItem)
            if lovProperties == None:
                continue

            for _property in lovProperties:
                if len(scores) > 0:
                    taipanScore = scores[columnIndex].get(_property['uri'], 0)*10
                else:
                    taipanScore = 0
                lovScore = _property['score']
                score = float(taipanScore) + lovScore
                if _property['score'] > self.scoreThreshold:
                    properties.append({"columnIndex": columnIndex, "uri": _property["uri"], "lovScore": lovScore, "taipanScore": taipanScore, "score": score})

        #filter properties with maximum scores
        propertiesRanked = []
        for columnIndex, headerItem in enumerate(header):
            #get all properties with the same columnIndex
            columnIndexProperties = []
            for _property in properties:
                if _property['columnIndex'] == columnIndex:
                    columnIndexProperties.append(_property)

            if len(columnIndexProperties) == 0:
                continue
                
            propertiesRanked.append(max(columnIndexProperties, key=lambda x: x['score']))

        return propertiesRanked
 def setUp(self):
     sampler = T2DSampler()
     self.testTables = sampler.getTablesPropertyAnnotation()
     self.spr = SimplePropertyRecommender()