def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames = CSVRead.readCSV(nameOfFile,
                                  firstRow=True,
                                  choice=[0, 1, 2, 3, 4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name)

    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(
            sum([
                1 for _ in Neo4jDrive.findRelationshipsOfNode(
                    nameOfFile, "Column")
            ])):
        support = CSVRead.getSupport(nameOfFile, column)
        totalNumberOfValues = CSVRead.numberOfItems(support)

        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])

        for item in support.keys():
            k = itemThread(item, columnNames, column, support,
                           totalNumberOfValues)
            k.start()
            k.join()
def run(data,tables,size):
    support=[[]]
    columnNames=[]
    for i,nameOfFile in enumerate(tables):
        columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])]
        columnNames[i]=[c.strip() for c in columnNames[i]]
        for j,name in enumerate(columnNames[i]):
            z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0]
            node=Neo4jDrive.findNodeByName(name)
            node.properties['type']='Column'
            node.push()
            z.properties['type']="Column"
            z.push()
            support[i]+=[CSVRead.getSupport(nameOfFile,j)]
        support+=[[]]
    support=support[:-1]
   
    totalNumberOfValues=CSVRead.getSize(nameOfFile,0)
   
    
    hyplock=Lock()
    stypelock=Lock()
    
    for itemPiece in data:
        indexOfFile=itemPiece[1]
        item=itemPiece[0]
        for column in range(len(columnNames[indexOfFile])):
        #support=CSVRead.getSupport(nameOfFile,column)
        #totalNumberOfValues=CSVRead.numberOfItems(support)
        
            k=ccThread(item[column],columnNames[indexOfFile],column,support[indexOfFile],size[indexOfFile])
            k.start()
            k.join()
    for itemPiece in data:
        indexOfFile=itemPiece[1]
        item=itemPiece[0]
        for column in range(len(columnNames[indexOfFile])):
           #support=CSVRead.getSupport(nameOfFile,column)
           #totalNumberOfValues=CSVRead.numberOfItems(support)

            for perm_column in range(len(columnNames[indexOfFile])):
                if perm_column!=column:
                    k=dmsThread(item[column],item[perm_column],size[indexOfFile],columnNames[indexOfFile],column,perm_column)
                    k.start()
                    k.join()
        
        
    allCC=set(Neo4jDrive.findAllCCNodes())
    for s,c in enumerate(columnNames):
        for column in c:
            k=topDownThread(column,hyplock,stypelock,allCC,size[s])
            k.start()
            k.join()
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)
    
    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])):
        support=CSVRead.getSupport(nameOfFile,column)
        totalNumberOfValues=CSVRead.numberOfItems(support)
        
        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])
         
        for item in support.keys():
            k=itemThread(item,columnNames,column,support,totalNumberOfValues)
            k.start()
            k.join()
Beispiel #4
0
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)
    
    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])):
        support=CSVRead.getSupport(nameOfFile,column)
        totalNumberOfValues=CSVRead.numberOfItems(support)
        
        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])
         
        for item in support.keys():
            node=Neo4jDrive.findNodeByName(item)
            if  node== None:
                Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item)
                node=Neo4jDrive.findNodeByName(item)
                node.properties['fvalue']=support[item]
                node.push()
                rlist=sparqlQuerypy.findBottomUp(item)
                for r in rlist:
                    try:
                        rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0])
                        rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2])          
                        node=node=Neo4jDrive.findNodeByName(r[2])
                        if node.properties['incoming']==None:
                            node.properties['incoming']=1
                        else:
                            node.properties['incoming']+=1
                        node.properties['type']='type'
                        node.push()
                    except :
                        
                        print columnNames[column],'cc',r[0]

                    rel_data=rel_data[0]
                    rel_data.properties['rel_class'] = 'cc'
                    rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0)
                    rel_data.push()
Beispiel #5
0
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name)

    # support=CSVRead.getSupport(nameOfFile,0)
    # totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])):
        support = CSVRead.getSupport(nameOfFile, column)
        totalNumberOfValues = CSVRead.numberOfItems(support)

        # print i.end_node
        # cNode=Neo4jDrive.findNodeByName(columnNames[column])

        for item in support.keys():
            node = Neo4jDrive.findNodeByName(item)
            if node == None:
                Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item)
                node = Neo4jDrive.findNodeByName(item)
                node.properties["fvalue"] = support[item]
                node.push()
                rlist = sparqlQuerypy.findBottomUp(item)
                for r in rlist:
                    try:
                        rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0])
                        rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2])
                        node = node = Neo4jDrive.findNodeByName(r[2])
                        if node.properties["incoming"] == None:
                            node.properties["incoming"] = 1
                        else:
                            node.properties["incoming"] += 1
                        node.properties["type"] = "type"
                        node.push()
                    except:

                        print columnNames[column], "cc", r[0]

                    rel_data = rel_data[0]
                    rel_data.properties["rel_class"] = "cc"
                    rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0)
                    rel_data.push()