def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames = CSVRead.readCSV(nameOfFile,
                                  firstRow=True,
                                  choice=[0, 1, 2, 3, 4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name)

    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(
            sum([
                1 for _ in Neo4jDrive.findRelationshipsOfNode(
                    nameOfFile, "Column")
            ])):
        support = CSVRead.getSupport(nameOfFile, column)
        totalNumberOfValues = CSVRead.numberOfItems(support)

        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])

        for item in support.keys():
            k = itemThread(item, columnNames, column, support,
                           totalNumberOfValues)
            k.start()
            k.join()
def run(data,tables,size):
    support=[[]]
    columnNames=[]
    for i,nameOfFile in enumerate(tables):
        columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])]
        columnNames[i]=[c.strip() for c in columnNames[i]]
        for j,name in enumerate(columnNames[i]):
            z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0]
            node=Neo4jDrive.findNodeByName(name)
            node.properties['type']='Column'
            node.push()
            z.properties['type']="Column"
            z.push()
            support[i]+=[CSVRead.getSupport(nameOfFile,j)]
        support+=[[]]
    support=support[:-1]
   
    totalNumberOfValues=CSVRead.getSize(nameOfFile,0)
   
    
    hyplock=Lock()
    stypelock=Lock()
    
    for itemPiece in data:
        indexOfFile=itemPiece[1]
        item=itemPiece[0]
        for column in range(len(columnNames[indexOfFile])):
        #support=CSVRead.getSupport(nameOfFile,column)
        #totalNumberOfValues=CSVRead.numberOfItems(support)
        
            k=ccThread(item[column],columnNames[indexOfFile],column,support[indexOfFile],size[indexOfFile])
            k.start()
            k.join()
    for itemPiece in data:
        indexOfFile=itemPiece[1]
        item=itemPiece[0]
        for column in range(len(columnNames[indexOfFile])):
           #support=CSVRead.getSupport(nameOfFile,column)
           #totalNumberOfValues=CSVRead.numberOfItems(support)

            for perm_column in range(len(columnNames[indexOfFile])):
                if perm_column!=column:
                    k=dmsThread(item[column],item[perm_column],size[indexOfFile],columnNames[indexOfFile],column,perm_column)
                    k.start()
                    k.join()
        
        
    allCC=set(Neo4jDrive.findAllCCNodes())
    for s,c in enumerate(columnNames):
        for column in c:
            k=topDownThread(column,hyplock,stypelock,allCC,size[s])
            k.start()
            k.join()
def main():
    csvitems=[]
    data=[]
    tables=["StatesandCapitals.csv","RiversandSourceState.csv"]
    size=[]

    for nameOfFile in tables:
        Neo4jDrive.insertNode(nameOfFile)
        node=Neo4jDrive.findNodeByName(nameOfFile)
        node.properties['type']='table'
        node.push()
        csvitems+=[CSVRead.readCSV(nameOfFile,firstRow=False, choice=[0,1])[1:]]
        size+=[len(csvitems[-1])]
        random.shuffle(csvitems[-1])
    i=k=0          
    while len(csvitems)>0:
        
        for l,item in enumerate(csvitems):
            
            end=k+sample
            s=sample
            if k+sample>len(item):
                s=sample-(end-len(item))
                end=len(item)
            data[i:i+s]=[[it,l] for it in item[k:end]]
            i+=s
            if k+sample>len(item):
               csvitems.remove(item)
        k+=sample
    run(data,tables,size)
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)
    
    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])):
        support=CSVRead.getSupport(nameOfFile,column)
        totalNumberOfValues=CSVRead.numberOfItems(support)
        
        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])
         
        for item in support.keys():
            k=itemThread(item,columnNames,column,support,totalNumberOfValues)
            k.start()
            k.join()
Beispiel #5
0
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)
    
    #support=CSVRead.getSupport(nameOfFile,0)
    #totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])):
        support=CSVRead.getSupport(nameOfFile,column)
        totalNumberOfValues=CSVRead.numberOfItems(support)
        
        #print i.end_node
        #cNode=Neo4jDrive.findNodeByName(columnNames[column])
         
        for item in support.keys():
            node=Neo4jDrive.findNodeByName(item)
            if  node== None:
                Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item)
                node=Neo4jDrive.findNodeByName(item)
                node.properties['fvalue']=support[item]
                node.push()
                rlist=sparqlQuerypy.findBottomUp(item)
                for r in rlist:
                    try:
                        rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0])
                        rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2])          
                        node=node=Neo4jDrive.findNodeByName(r[2])
                        if node.properties['incoming']==None:
                            node.properties['incoming']=1
                        else:
                            node.properties['incoming']+=1
                        node.properties['type']='type'
                        node.push()
                    except :
                        
                        print columnNames[column],'cc',r[0]

                    rel_data=rel_data[0]
                    rel_data.properties['rel_class'] = 'cc'
                    rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0)
                    rel_data.push()
Beispiel #6
0
def main():
    Neo4jDrive.insertNode(nameOfFile)
    columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4])
    for name in columnNames:
        Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name)

    # support=CSVRead.getSupport(nameOfFile,0)
    # totalNumberOfValues=CSVRead.numberOfItems(support)
    for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])):
        support = CSVRead.getSupport(nameOfFile, column)
        totalNumberOfValues = CSVRead.numberOfItems(support)

        # print i.end_node
        # cNode=Neo4jDrive.findNodeByName(columnNames[column])

        for item in support.keys():
            node = Neo4jDrive.findNodeByName(item)
            if node == None:
                Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item)
                node = Neo4jDrive.findNodeByName(item)
                node.properties["fvalue"] = support[item]
                node.push()
                rlist = sparqlQuerypy.findBottomUp(item)
                for r in rlist:
                    try:
                        rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0])
                        rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2])
                        node = node = Neo4jDrive.findNodeByName(r[2])
                        if node.properties["incoming"] == None:
                            node.properties["incoming"] = 1
                        else:
                            node.properties["incoming"] += 1
                        node.properties["type"] = "type"
                        node.push()
                    except:

                        print columnNames[column], "cc", r[0]

                    rel_data = rel_data[0]
                    rel_data.properties["rel_class"] = "cc"
                    rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0)
                    rel_data.push()
Beispiel #7
0
def main():
    columnNames=[]
    colNam={}
    csvitems={}
    size={}
    tables=["StatesandCapitals.csv","RiversandSourceState.csv"] 
    for i, nameOfFile in enumerate(tables):  #pushes each table as a node into the graph along with the columns
        Neo4jDrive.insertNode(nameOfFile)
        node=Neo4jDrive.findNodeByName(nameOfFile)
        node.properties['type']='table'
        node.push() #end of push
        columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])]
        columnNames[i]=[c.strip() for c in columnNames[i]]
        colNam[nameOfFile]=[c.strip() for c in columnNames[i]]
        for j,name in enumerate(columnNames[i]):
            z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0]
            node=Neo4jDrive.findNodeByName(name)
            node.properties['type']='Column'
            node.push()
            z.properties['type']="Column"
            z.push() #end of the Column Pushing


        csvitems[nameOfFile]=CSVRead.readCSV(nameOfFile,firstRow=False,choice=[0,1])[1:] #stores each data set in a dictionary of lists
        size[nameOfFile]=[len(csvitems[nameOfFile])] #stores the sizes of the lists in a dictionary called size
        random.shuffle(csvitems[nameOfFile]) #shuffles for randomness
    relationships={}
    iterations=1
    convergence=False #the test flag for whether convergence has been reached
    while(not convergence):
        for table in tables:
            start=sample*(iterations-1)
            end=sample*iterations
            rt=runThread(table, csvitems[table][start:end], colNam[table],end,relationships)
            rt.start()
            rt.join()
        iterations+=1
        if end>5:convergence=True