Ejemplo n.º 1
0
    def run(self):
        support = self.support
        totalNumberOfValues = self.totalNumberOfValues

        column = self.column
        columnNames = self.columnNames
        item = self.item
        rlist = sparqlQuerypy.findBottomUp(item)
        for r in rlist:

            rel_data = Neo4jDrive.insertNodeAndRelationship(
                columnNames[column], "cc", r[2])
            node = Neo4jDrive.findNodeByName(r[2])
            if node.properties['incoming'] == None:
                node.properties['incoming'] = 1
            else:
                node.properties['incoming'] += 1
            node.properties['type'] = 'type'
            node.push()

            rel_data = rel_data[0]
            rel_data.properties['rel_class'] = 'cc'
            rel_data.properties['support'] = support[item] / (
                totalNumberOfValues * 1.0)
            rel_data.push()
Ejemplo n.º 2
0
    def run(self):
        support=self.support
        totalNumberOfValues=self.totalNumberOfValues

        column=self.column
        columnNames=self.columnNames
        item=self.item
        node=Neo4jDrive.findNodeByName(item)
        if  node== None:
            Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item)
            node=Neo4jDrive.findNodeByName(item)
            node.properties['fvalue']=support[item]
            node.push()
            rlist=sparqlQuerypy.findBottomUp(item)
            for r in rlist:
                try:
                    rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0])
                    rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2])          
                    node=node=Neo4jDrive.findNodeByName(r[2])
                    if node.properties['incoming']==None:
                        node.properties['incoming']=1
                    else:
                        node.properties['incoming']+=1
                    node.properties['type']='type'
                    node.push()
                except :
                    print columnNames[column],'cc',r[0]

                rel_data=rel_data[0]
                rel_data.properties['rel_class'] = 'cc'
                rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0)
                rel_data.push()
Ejemplo n.º 3
0
    def ccScores(self):
        data=self.data
        columnNames=self.columnNames
        totalSize=self.totalSize
        relationships=self.relationships
        size=len(data)
        bitmap={}
        for i,column in enumerate(columnNames):
            relationships[column]={}
            bitmap[column]={} #this is a dictionary which is a set of flags per data value remembering if the increment already happened.
            for element in data:
                item=element[i]
                rlist=sparqlQuerypy.findBottomUp(item.strip())
                print 'number of nodes for', item.strip(), " is ", len(rlist)
                bitmap[column][item]={}
                for r in rlist:
                    if r[0] not in bitmap[column][item].keys():
                        bitmap[column][item][r[0]]=0
                    if r[0] not in relationships[column].keys():
                        relationships[column][r[0]]={}
                    relationships[column][r[0]]['name']='cc'
                    if 'incoming' not in relationships[column][r[0]].keys():
                        relationships[column][r[0]]['incoming']=1
                        relationships[column][r[0]]['cc']=1.0/totalSize
                    else:
                        relationships[column][r[0]]['incoming']+=1 
                        relationships[column][r[0]]['cc']=relationships[column][r[0]]['incoming']*1.0/totalSize
                        bitmap[column][item][r[0]]=1
        classSet=set() # A set to save all the possible cc classes for ease of retrieval later and to streamline it.


        for column in columnNames: #Loop to push the relations and nodes to Neo4j
            for classes in relationships[column].keys():
                classSet.add(classes)
                rel_data=Neo4jDrive.insertNodeAndRelationship(column,'cc',classes)[0]
                rel_data.properties['rel_class']='cc'
                rel_data.properties['fk']=relationships[column][classes]['cc'] 
                rel_data.push()
         
        for classes in classSet: #Loop to update the CCS score for each class after the previous loop is over. CCS=sum(fk)/no(fk) for the node.
            print classes
            cummulative=0 # The accumulator
            linkNumbers=0 # The denominator
            for link in Neo4jDrive.findIncomingCCLinks(classes): #loop to find incoming cc edges.
                cummulative+=link[0].properties['fk']
                linkNumbers+=1
            node=Neo4jDrive.findNodeByName(classes)
            node.properties['ccs']=cummulative*1.0/linkNumbers
            node.properties['type']='cc'
            node.push()
Ejemplo n.º 4
0
    def run(self):
        support=self.support
        totalNumberOfValues=self.totalNumberOfValues*1.0

        column=self.column
        columnNames=self.columnNames
        item=self.item
        rlist=sparqlQuerypy.findBottomUp(item.strip())

        print 'number of nodes for', item.strip(), " is ", len(rlist)
        log.write('number of nodes for'+str( item.strip())+ " is "+ str(len(rlist))+'\n')
        flag=0
        for r in rlist:
            rel_data=Neo4jDrive.insertNodeAndRelationship(columnNames[column],"cc",r[2])       
            rel_data=rel_data[0]
            node=Neo4jDrive.findNodeByName(r[2])
            if r[2]=='http://dbpedia.org/ontology/PopulatedPlace':
                print columnNames[column], 'Happening'
            
                print 'potato',rel_data
            if rel_data.properties['incoming']==None: #find out why this is not happenings
                rel_data.properties['incoming']=1
                rel_data.properties['ccs']=1/totalNumberOfValues
                rel_data.push()
                #print 'tomato',rel_data
            else:
                if flag==0:
                    rel_data.properties['incoming']+=1
                    rel_data.push()
                    rel_data.properties['ccs']=node.properties['incoming']/totalNumberOfValues
                    flag=1
            node.properties['type']='cc'
            node.properties['ccs']=0
            numberOfLinks=0
            for link in Neo4jDrive.findIncomingCCLinks(r[2]):
                node.properties['ccs']+=link[0].properties['ccs']
                numberOfLinks+=1
            if numberOfLinks>0: node.properties['ccs']/=numberOfLinks
            node.push()
            
            
            
            rel_data.properties['rel_class'] = 'cc'
            #rel_data.properties['ccs']=node.proper/(totalNumberOfValues*1.0)
            rel_data.push()
Ejemplo n.º 5
0
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)
    
    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])):
        support=CSVRead.getSupport(nameOfFile,column)
        totalNumberOfValues=CSVRead.numberOfItems(support)
        
        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])
         
        for item in support.keys():
            node=Neo4jDrive.findNodeByName(item)
            if  node== None:
                Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item)
                node=Neo4jDrive.findNodeByName(item)
                node.properties['fvalue']=support[item]
                node.push()
                rlist=sparqlQuerypy.findBottomUp(item)
                for r in rlist:
                    try:
                        rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0])
                        rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2])          
                        node=node=Neo4jDrive.findNodeByName(r[2])
                        if node.properties['incoming']==None:
                            node.properties['incoming']=1
                        else:
                            node.properties['incoming']+=1
                        node.properties['type']='type'
                        node.push()
                    except :
                        
                        print columnNames[column],'cc',r[0]

                    rel_data=rel_data[0]
                    rel_data.properties['rel_class'] = 'cc'
                    rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0)
                    rel_data.push()
Ejemplo n.º 6
0
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name)

    # support=CSVRead.getSupport(nameOfFile,0)
    # totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])):
        support = CSVRead.getSupport(nameOfFile, column)
        totalNumberOfValues = CSVRead.numberOfItems(support)

        # print i.end_node
        # cNode=Neo4jDrive.findNodeByName(columnNames[column])

        for item in support.keys():
            node = Neo4jDrive.findNodeByName(item)
            if node == None:
                Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item)
                node = Neo4jDrive.findNodeByName(item)
                node.properties["fvalue"] = support[item]
                node.push()
                rlist = sparqlQuerypy.findBottomUp(item)
                for r in rlist:
                    try:
                        rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0])
                        rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2])
                        node = node = Neo4jDrive.findNodeByName(r[2])
                        if node.properties["incoming"] == None:
                            node.properties["incoming"] = 1
                        else:
                            node.properties["incoming"] += 1
                        node.properties["type"] = "type"
                        node.push()
                    except:

                        print columnNames[column], "cc", r[0]

                    rel_data = rel_data[0]
                    rel_data.properties["rel_class"] = "cc"
                    rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0)
                    rel_data.push()
Ejemplo n.º 7
0
    def run(self):
        support = self.support
        totalNumberOfValues = self.totalNumberOfValues

        column = self.column
        columnNames = self.columnNames
        item = self.item
        rlist = sparqlQuerypy.findBottomUp(item)
        for r in rlist:

            rel_data = Neo4jDrive.insertNodeAndRelationship(columnNames[column], "cc", r[2])
            node = Neo4jDrive.findNodeByName(r[2])
            if node.properties["incoming"] == None:
                node.properties["incoming"] = 1
            else:
                node.properties["incoming"] += 1
            node.properties["type"] = "type"
            node.push()

            rel_data = rel_data[0]
            rel_data.properties["rel_class"] = "cc"
            rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0)
            rel_data.push()