def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range( sum([ 1 for _ in Neo4jDrive.findRelationshipsOfNode( nameOfFile, "Column") ])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): k = itemThread(item, columnNames, column, support, totalNumberOfValues) k.start() k.join()
def run(data,tables,size): support=[[]] columnNames=[] for i,nameOfFile in enumerate(tables): columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])] columnNames[i]=[c.strip() for c in columnNames[i]] for j,name in enumerate(columnNames[i]): z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0] node=Neo4jDrive.findNodeByName(name) node.properties['type']='Column' node.push() z.properties['type']="Column" z.push() support[i]+=[CSVRead.getSupport(nameOfFile,j)] support+=[[]] support=support[:-1] totalNumberOfValues=CSVRead.getSize(nameOfFile,0) hyplock=Lock() stypelock=Lock() for itemPiece in data: indexOfFile=itemPiece[1] item=itemPiece[0] for column in range(len(columnNames[indexOfFile])): #support=CSVRead.getSupport(nameOfFile,column) #totalNumberOfValues=CSVRead.numberOfItems(support) k=ccThread(item[column],columnNames[indexOfFile],column,support[indexOfFile],size[indexOfFile]) k.start() k.join() for itemPiece in data: indexOfFile=itemPiece[1] item=itemPiece[0] for column in range(len(columnNames[indexOfFile])): #support=CSVRead.getSupport(nameOfFile,column) #totalNumberOfValues=CSVRead.numberOfItems(support) for perm_column in range(len(columnNames[indexOfFile])): if perm_column!=column: k=dmsThread(item[column],item[perm_column],size[indexOfFile],columnNames[indexOfFile],column,perm_column) k.start() k.join() allCC=set(Neo4jDrive.findAllCCNodes()) for s,c in enumerate(columnNames): for column in c: k=topDownThread(column,hyplock,stypelock,allCC,size[s]) k.start() k.join()
def main(): csvitems=[] data=[] tables=["StatesandCapitals.csv","RiversandSourceState.csv"] size=[] for nameOfFile in tables: Neo4jDrive.insertNode(nameOfFile) node=Neo4jDrive.findNodeByName(nameOfFile) node.properties['type']='table' node.push() csvitems+=[CSVRead.readCSV(nameOfFile,firstRow=False, choice=[0,1])[1:]] size+=[len(csvitems[-1])] random.shuffle(csvitems[-1]) i=k=0 while len(csvitems)>0: for l,item in enumerate(csvitems): end=k+sample s=sample if k+sample>len(item): s=sample-(end-len(item)) end=len(item) data[i:i+s]=[[it,l] for it in item[k:end]] i+=s if k+sample>len(item): csvitems.remove(item) k+=sample run(data,tables,size)
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): k=itemThread(item,columnNames,column,support,totalNumberOfValues) k.start() k.join()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node=Neo4jDrive.findNodeByName(item) if node== None: Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item) node=Neo4jDrive.findNodeByName(item) node.properties['fvalue']=support[item] node.push() rlist=sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0]) rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2]) node=node=Neo4jDrive.findNodeByName(r[2]) if node.properties['incoming']==None: node.properties['incoming']=1 else: node.properties['incoming']+=1 node.properties['type']='type' node.push() except : print columnNames[column],'cc',r[0] rel_data=rel_data[0] rel_data.properties['rel_class'] = 'cc' rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0) rel_data.push()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) # support=CSVRead.getSupport(nameOfFile,0) # totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) # print i.end_node # cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node = Neo4jDrive.findNodeByName(item) if node == None: Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item) node = Neo4jDrive.findNodeByName(item) node.properties["fvalue"] = support[item] node.push() rlist = sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0]) rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2]) node = node = Neo4jDrive.findNodeByName(r[2]) if node.properties["incoming"] == None: node.properties["incoming"] = 1 else: node.properties["incoming"] += 1 node.properties["type"] = "type" node.push() except: print columnNames[column], "cc", r[0] rel_data = rel_data[0] rel_data.properties["rel_class"] = "cc" rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0) rel_data.push()
def main(): columnNames=[] colNam={} csvitems={} size={} tables=["StatesandCapitals.csv","RiversandSourceState.csv"] for i, nameOfFile in enumerate(tables): #pushes each table as a node into the graph along with the columns Neo4jDrive.insertNode(nameOfFile) node=Neo4jDrive.findNodeByName(nameOfFile) node.properties['type']='table' node.push() #end of push columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])] columnNames[i]=[c.strip() for c in columnNames[i]] colNam[nameOfFile]=[c.strip() for c in columnNames[i]] for j,name in enumerate(columnNames[i]): z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0] node=Neo4jDrive.findNodeByName(name) node.properties['type']='Column' node.push() z.properties['type']="Column" z.push() #end of the Column Pushing csvitems[nameOfFile]=CSVRead.readCSV(nameOfFile,firstRow=False,choice=[0,1])[1:] #stores each data set in a dictionary of lists size[nameOfFile]=[len(csvitems[nameOfFile])] #stores the sizes of the lists in a dictionary called size random.shuffle(csvitems[nameOfFile]) #shuffles for randomness relationships={} iterations=1 convergence=False #the test flag for whether convergence has been reached while(not convergence): for table in tables: start=sample*(iterations-1) end=sample*iterations rt=runThread(table, csvitems[table][start:end], colNam[table],end,relationships) rt.start() rt.join() iterations+=1 if end>5:convergence=True