def setUp(self): sampler = T2DSampler() self.logger = Logger().getLogger(__name__) self.simplePropertyMapper = SimplePropertyMapper() self.testTable = sampler.getTestTable() #self.testTables20 = sampler.get20Tables() self.testTables = sampler.getTablesSubjectIdentification()
class SimplePropertyMappingTestCase(unittest.TestCase): def setUp(self): sampler = T2DSampler() self.testTables = sampler.getTablesPropertyAnnotationDbpediaGoldStandard() self.propertyMapper = SimplePropertyMapper() def testMapProperties(self): logging.disable(logging.DEBUG) logging.disable(logging.INFO) for num, table in enumerate(self.testTables): propertyScores = self.propertyMapper.getScores(table) propertiesRetrieved = self.propertiesRetrievedByTP(propertyScores, table.propertiesGold) #(overall, correct) = self.diffProperties(properties, table.propertiesGold) print "%s, %s" % (propertiesRetrieved, len(table.propertiesGold),) def propertiesRetrievedByTP(self, propertyScores, propertiesGold): count = 0 if propertyScores == []: return count for _property in propertiesGold: propertyScore = propertyScores[_property['columnIndex']].get(_property['uri']) if propertyScore != None: count += 1 return count def diffProperties(self, propertiesMapped, propertiesGold): correct = 0 overall = len(propertiesMapped) for propertyMapped in propertiesMapped: #find property with the same columnIndex for propertyGold in propertiesGold: if propertyMapped['columnIndex'] == propertyGold['columnIndex']: if propertyMapped['uri'] == propertyGold['uri']: correct += 1 return (overall, correct)
def setUp(self): sampler = T2DSampler() self.testTables = sampler.getTablesPropertyAnnotationDbpediaGoldStandard() self.propertyMapper = SimplePropertyMapper()
def getScores(self, table): simplePropertyMapper = SimplePropertyMapper() return simplePropertyMapper.getScores(table)
class SimpleCachePropertyMappingBenchTestCase(unittest.TestCase): def setUp(self): sampler = T2DSampler() self.logger = Logger().getLogger(__name__) self.simplePropertyMapper = SimplePropertyMapper() self.testTable = sampler.getTestTable() #self.testTables20 = sampler.get20Tables() self.testTables = sampler.getTablesSubjectIdentification() def determineResultsFilename(self, filename): import os while os.path.exists(os.path.join("results",filename)): filename = filename.split(".") index = str(int(filename.pop()) + 1) filename.append(index) filename = ".".join(filename) filename = os.path.join("results",filename) return filename def resultsIterativePrinter(self, row, filename): import csv with open(filename, 'a') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(row) def simplePropertyMapping(self, tables, rowsToDisambiguate=20, threshold=10): resultsFilename = "%srows.%sthreshold.propertyMapping.results.csv.1" %(rowsToDisambiguate, threshold) resultsFilename = self.determineResultsFilename(resultsFilename) header = ["tableId","subjectColumn","subjectColumnCorrect","propertyIdentified","correctly","falsePositives","notIdentified","seedListContains","executionTimeFull","executionTimePure","disambiguationTime","classSearchTime","propertySearchTime"] self.resultsIterativePrinter(header,resultsFilename) for table in tables: properties = self.simplePropertyMapper.mapProperties(table,rowsToDisambiguate, threshold) subjectColumn = self.simplePropertyMapper.subjectColumn subjectColumnCorrect = self.simplePropertyMapper.subjectColumnCorrect executionTimeFull = self.simplePropertyMapper.executionTimeFull executionTimePure = self.simplePropertyMapper.executionTimePure disambiguationTime = self.simplePropertyMapper.disambiguationTime classSearchTime = self.simplePropertyMapper.classSearchTime propertySearchTime = self.simplePropertyMapper.propertySearchTime seedListContains = self.simplePropertyMapper.seedListContains falsePositives = 0 falseNegatives = table.getNumberOfProperties() correct = 0 propertiesString = u"" lastItem = len(properties) - 1 for i, _property in enumerate(properties): (uri, index) = _property if table.isProperty(_property): correct += 1 falseNegatives -= 1 else: falsePositives += 1 if i == lastItem: propertiesString += uri else: propertiesString += uri + u"|" result = [table.id, subjectColumn, subjectColumnCorrect, propertiesString, correct, falsePositives,falseNegatives,seedListContains,executionTimeFull,executionTimePure,disambiguationTime,classSearchTime,propertySearchTime] self.resultsIterativePrinter(result,resultsFilename) def testMapProperties(self): #tables = [self.testTable] tables = self.testTables self.simplePropertyMapping(tables)