def getRowsNumber(self): # predefined where are numerical and subject columns dataset = Dataset(self.datasetPath, 1, [2]) if self.checkDataset(dataset.df) == False: return -1 return dataset.df.size
def getScores(self): # predefined where are numerical and subject columns dataset = Dataset(self.datasetPath, 1, [2]) if self.checkDataset(dataset.df) == False: return -1 sem = Semantification(dataset) scores = {} scores['columnResults'] = sem.columnsResultsKS scores['rowResults'] = sem.rowPredictions scores['finalResults'] = sem.finalResults return scores
def getScores(self): # predefined where are numerical and subject columns dataset = Dataset(self.datasetPath, 0, [2]) if self.checkDataset(dataset.df) == False: return -1 correctProperty = self.getCorrectPrediction() # overwriting column type with a correct type & cell types dataset.subjectColumn.columnTypes = [(correctProperty,1)] for cell in dataset.subjectColumn.cellPredictions: #print cell.cell cell.uri = cell.cell cell.types = correctProperty sem = Semantification(dataset) scores = {} scores['columnResults'] = sem.columnsResultsKS scores['rowResults'] = sem.rowPredictions scores['finalResults'] = sem.finalResults #self.checkCorrect(scores['finalResults']) return scores
def getColumnMapping(self): dataset = Dataset(self.datasetPath , 0, [2]) if self.checkDataset(dataset.df) == False: return -1 return list(dataset.df.columns.values)
from approach.Dataset import Dataset #dataset = Dataset('10786782_0_7941448888047609465.csv') dataset = Dataset('96960685_0_6886906070865701391.csv') print "subject columns is " + str(dataset.subjectColumnId) print "numerical column are [2] " print dataset.numericalColumnsIds print dataset.columnsWithMappingsIds dataset = Dataset('10786782_0_7941448888047609465.csv') print "subject columns is " + str(dataset.subjectColumnId) print "numerical column is " print dataset.numericalColumnsIds print dataset.columnsWithMappingsIds dataset = Dataset('52340077_0_7623033473986759010.csv') print "subject columns is " + str(dataset.subjectColumnId) print "numerical column is " print dataset.numericalColumnsIds print dataset.columnsWithMappingsIds
from approach.Dataset import Dataset from approach.config.paths import * from approach.config.imports import * d = Dataset(path) #print d.name #print "Rows: " + str(d.noRows) #print "Column: " + str(d.noColumns) #print "Headers are: " #print d.printHeaders() #for column in d.columns: #print column.name # print column.type
from approach.Dataset import Dataset from approach.config.paths import * from approach.config.imports import * for fn in os.listdir(datasetsPath): print "______________________________________________" print fn dataset = Dataset(fn)
from approach.Dataset import Dataset #dataset = Dataset('10786782_0_7941448888047609465.csv') #dataset = Dataset('96960685_0_6886906070865701391.csv') dataset = Dataset('56834172_0_5710924050177414995.csv')
from approach.Dataset import Dataset from approach.Semantification import Semantification from approach.config.paths import * from approach.config.imports import * #for fn in os.listdir(datasetsPath): # sem = Semantification(fn) # sem.getRowPredictions() #sem = Semantification('56834172_0_5710924050177414995.csv') dataset = Dataset('/Users/emiliakacprzak/Code/papers/semantification/data/dbpediaTest.csv', 0, [1]) sem = Semantification(dataset) print sem.columnsResultsKS print "___" print sem.rowPredictions print "___" print sem.finalResults