Example #1
0
    def getRowsNumber(self):
        # predefined where are numerical and subject columns
        dataset = Dataset(self.datasetPath, 1, [2])
        if self.checkDataset(dataset.df) == False:
            return -1

        return dataset.df.size
Example #2
0
    def getScores(self):
        # predefined where are numerical and subject columns
        dataset = Dataset(self.datasetPath, 1, [2])
        if self.checkDataset(dataset.df) == False:
            return -1

        sem = Semantification(dataset)
        scores = {}
        scores['columnResults'] = sem.columnsResultsKS
        scores['rowResults'] = sem.rowPredictions
        scores['finalResults'] = sem.finalResults

        return scores
Example #3
0
    def getScores(self):
        # predefined where are numerical and subject columns
        dataset = Dataset(self.datasetPath, 0, [2])
        if self.checkDataset(dataset.df) == False:
            return -1

        correctProperty = self.getCorrectPrediction()
        # overwriting column type with a correct type  & cell types
        dataset.subjectColumn.columnTypes = [(correctProperty,1)]
        for cell in dataset.subjectColumn.cellPredictions:
            #print cell.cell
            cell.uri   = cell.cell
            cell.types = correctProperty

        sem = Semantification(dataset)
        scores = {}
        scores['columnResults']   = sem.columnsResultsKS
        scores['rowResults']      = sem.rowPredictions
        scores['finalResults']    = sem.finalResults

        #self.checkCorrect(scores['finalResults'])

        return scores
Example #4
0
 def getColumnMapping(self):
     dataset = Dataset(self.datasetPath , 0, [2])
     if self.checkDataset(dataset.df) == False:
         return -1
     return list(dataset.df.columns.values)
Example #5
0
from approach.Dataset import Dataset

#dataset = Dataset('10786782_0_7941448888047609465.csv')
dataset = Dataset('96960685_0_6886906070865701391.csv')
print "subject columns is " + str(dataset.subjectColumnId)
print "numerical column are [2] "
print dataset.numericalColumnsIds
print dataset.columnsWithMappingsIds

dataset = Dataset('10786782_0_7941448888047609465.csv')
print "subject columns is " + str(dataset.subjectColumnId)
print "numerical column is "
print dataset.numericalColumnsIds
print dataset.columnsWithMappingsIds

dataset = Dataset('52340077_0_7623033473986759010.csv')
print "subject columns is " + str(dataset.subjectColumnId)
print "numerical column is "
print dataset.numericalColumnsIds
print dataset.columnsWithMappingsIds
Example #6
0
from approach.Dataset import Dataset

from approach.config.paths import *
from approach.config.imports import *

d = Dataset(path)

#print d.name
#print "Rows: " + str(d.noRows)
#print "Column: " + str(d.noColumns)
#print "Headers are: "
#print d.printHeaders()

#for column in d.columns:
#print column.name
#    print column.type
Example #7
0
from approach.Dataset import Dataset
from approach.config.paths import *
from approach.config.imports import *

for fn in os.listdir(datasetsPath):
    print "______________________________________________"
    print fn
    dataset = Dataset(fn)
from approach.Dataset import Dataset

#dataset = Dataset('10786782_0_7941448888047609465.csv')
#dataset = Dataset('96960685_0_6886906070865701391.csv')
dataset = Dataset('56834172_0_5710924050177414995.csv')
Example #9
0
from approach.Dataset import Dataset
from approach.Semantification import Semantification

from approach.config.paths import *
from approach.config.imports import *

#for fn in os.listdir(datasetsPath):
#    sem = Semantification(fn)
#    sem.getRowPredictions()

#sem = Semantification('56834172_0_5710924050177414995.csv')
dataset = Dataset('/Users/emiliakacprzak/Code/papers/semantification/data/dbpediaTest.csv', 0, [1])
sem = Semantification(dataset)

print sem.columnsResultsKS
print "___"
print sem.rowPredictions
print "___"
print sem.finalResults