def setUp(self):
     sampler = T2DSampler()
     self.logger = Logger().getLogger(__name__)
     self.simplePropertyMapper = SimplePropertyMapper()
     self.testTable = sampler.getTestTable()
     #self.testTables20 = sampler.get20Tables()
     self.testTables = sampler.getTablesSubjectIdentification()
class SimplePropertyMappingTestCase(unittest.TestCase):
    def setUp(self):
        sampler = T2DSampler()
        self.testTables = sampler.getTablesPropertyAnnotationDbpediaGoldStandard()
        self.propertyMapper = SimplePropertyMapper()

    def testMapProperties(self):
        logging.disable(logging.DEBUG)
        logging.disable(logging.INFO)
        for num, table in enumerate(self.testTables):
            propertyScores = self.propertyMapper.getScores(table)
            propertiesRetrieved = self.propertiesRetrievedByTP(propertyScores, table.propertiesGold)
            #(overall, correct) = self.diffProperties(properties, table.propertiesGold)
            print "%s, %s" % (propertiesRetrieved, len(table.propertiesGold),)

    def propertiesRetrievedByTP(self, propertyScores, propertiesGold):
        count = 0
        if propertyScores == []:
            return count
            
        for _property in propertiesGold:
            propertyScore = propertyScores[_property['columnIndex']].get(_property['uri'])
            if propertyScore != None:
                count += 1
        return count

    def diffProperties(self, propertiesMapped, propertiesGold):
        correct = 0
        overall = len(propertiesMapped)
        for propertyMapped in propertiesMapped:
            #find property with the same columnIndex
            for propertyGold in propertiesGold:
                if propertyMapped['columnIndex'] == propertyGold['columnIndex']:
                    if propertyMapped['uri'] == propertyGold['uri']:
                        correct += 1
        return (overall, correct)
 def setUp(self):
     sampler = T2DSampler()
     self.testTables = sampler.getTablesPropertyAnnotationDbpediaGoldStandard()
     self.propertyMapper = SimplePropertyMapper()
Beispiel #4
0
 def getScores(self, table):
     simplePropertyMapper = SimplePropertyMapper()
     return simplePropertyMapper.getScores(table)
class SimpleCachePropertyMappingBenchTestCase(unittest.TestCase):
    def setUp(self):
        sampler = T2DSampler()
        self.logger = Logger().getLogger(__name__)
        self.simplePropertyMapper = SimplePropertyMapper()
        self.testTable = sampler.getTestTable()
        #self.testTables20 = sampler.get20Tables()
        self.testTables = sampler.getTablesSubjectIdentification()

    def determineResultsFilename(self, filename):
        import os
        while os.path.exists(os.path.join("results",filename)):
            filename = filename.split(".")
            index = str(int(filename.pop()) + 1)
            filename.append(index)
            filename = ".".join(filename)

        filename = os.path.join("results",filename)
        return filename

    def resultsIterativePrinter(self, row, filename):
        import csv
        with open(filename, 'a') as csvfile:
            spamwriter = csv.writer(csvfile, delimiter=',',
                                    quotechar='"', quoting=csv.QUOTE_MINIMAL)
            spamwriter.writerow(row)


    def simplePropertyMapping(self, tables, rowsToDisambiguate=20, threshold=10):
        resultsFilename = "%srows.%sthreshold.propertyMapping.results.csv.1" %(rowsToDisambiguate, threshold)
        resultsFilename = self.determineResultsFilename(resultsFilename)
        header = ["tableId","subjectColumn","subjectColumnCorrect","propertyIdentified","correctly","falsePositives","notIdentified","seedListContains","executionTimeFull","executionTimePure","disambiguationTime","classSearchTime","propertySearchTime"]
        self.resultsIterativePrinter(header,resultsFilename)

        for table in tables:
            properties = self.simplePropertyMapper.mapProperties(table,rowsToDisambiguate, threshold)
            subjectColumn = self.simplePropertyMapper.subjectColumn
            subjectColumnCorrect = self.simplePropertyMapper.subjectColumnCorrect
            executionTimeFull = self.simplePropertyMapper.executionTimeFull
            executionTimePure = self.simplePropertyMapper.executionTimePure
            disambiguationTime = self.simplePropertyMapper.disambiguationTime
            classSearchTime = self.simplePropertyMapper.classSearchTime
            propertySearchTime = self.simplePropertyMapper.propertySearchTime
            seedListContains = self.simplePropertyMapper.seedListContains
            falsePositives = 0
            falseNegatives = table.getNumberOfProperties()
            correct = 0
            propertiesString = u""
            lastItem = len(properties) - 1
            for i, _property in enumerate(properties):
                (uri, index) = _property
                if table.isProperty(_property):
                    correct += 1
                    falseNegatives -= 1
                else:
                    falsePositives += 1
                if i == lastItem:
                    propertiesString += uri
                else:
                    propertiesString += uri + u"|"

            result = [table.id, subjectColumn, subjectColumnCorrect, propertiesString, correct, falsePositives,falseNegatives,seedListContains,executionTimeFull,executionTimePure,disambiguationTime,classSearchTime,propertySearchTime]
            self.resultsIterativePrinter(result,resultsFilename)

    def testMapProperties(self):
        #tables = [self.testTable]
        tables = self.testTables
        self.simplePropertyMapping(tables)