def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range( sum([ 1 for _ in Neo4jDrive.findRelationshipsOfNode( nameOfFile, "Column") ])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): k = itemThread(item, columnNames, column, support, totalNumberOfValues) k.start() k.join()
def main(): csvitems=[] data=[] tables=["StatesandCapitals.csv","RiversandSourceState.csv"] size=[] for nameOfFile in tables: Neo4jDrive.insertNode(nameOfFile) node=Neo4jDrive.findNodeByName(nameOfFile) node.properties['type']='table' node.push() csvitems+=[CSVRead.readCSV(nameOfFile,firstRow=False, choice=[0,1])[1:]] size+=[len(csvitems[-1])] random.shuffle(csvitems[-1]) i=k=0 while len(csvitems)>0: for l,item in enumerate(csvitems): end=k+sample s=sample if k+sample>len(item): s=sample-(end-len(item)) end=len(item) data[i:i+s]=[[it,l] for it in item[k:end]] i+=s if k+sample>len(item): csvitems.remove(item) k+=sample run(data,tables,size)
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): k=itemThread(item,columnNames,column,support,totalNumberOfValues) k.start() k.join()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node=Neo4jDrive.findNodeByName(item) if node== None: Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item) node=Neo4jDrive.findNodeByName(item) node.properties['fvalue']=support[item] node.push() rlist=sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0]) rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2]) node=node=Neo4jDrive.findNodeByName(r[2]) if node.properties['incoming']==None: node.properties['incoming']=1 else: node.properties['incoming']+=1 node.properties['type']='type' node.push() except : print columnNames[column],'cc',r[0] rel_data=rel_data[0] rel_data.properties['rel_class'] = 'cc' rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0) rel_data.push()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) # support=CSVRead.getSupport(nameOfFile,0) # totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) # print i.end_node # cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node = Neo4jDrive.findNodeByName(item) if node == None: Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item) node = Neo4jDrive.findNodeByName(item) node.properties["fvalue"] = support[item] node.push() rlist = sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0]) rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2]) node = node = Neo4jDrive.findNodeByName(r[2]) if node.properties["incoming"] == None: node.properties["incoming"] = 1 else: node.properties["incoming"] += 1 node.properties["type"] = "type" node.push() except: print columnNames[column], "cc", r[0] rel_data = rel_data[0] rel_data.properties["rel_class"] = "cc" rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0) rel_data.push()
def main(): columnNames=[] colNam={} csvitems={} size={} tables=["StatesandCapitals.csv","RiversandSourceState.csv"] for i, nameOfFile in enumerate(tables): #pushes each table as a node into the graph along with the columns Neo4jDrive.insertNode(nameOfFile) node=Neo4jDrive.findNodeByName(nameOfFile) node.properties['type']='table' node.push() #end of push columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])] columnNames[i]=[c.strip() for c in columnNames[i]] colNam[nameOfFile]=[c.strip() for c in columnNames[i]] for j,name in enumerate(columnNames[i]): z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0] node=Neo4jDrive.findNodeByName(name) node.properties['type']='Column' node.push() z.properties['type']="Column" z.push() #end of the Column Pushing csvitems[nameOfFile]=CSVRead.readCSV(nameOfFile,firstRow=False,choice=[0,1])[1:] #stores each data set in a dictionary of lists size[nameOfFile]=[len(csvitems[nameOfFile])] #stores the sizes of the lists in a dictionary called size random.shuffle(csvitems[nameOfFile]) #shuffles for randomness relationships={} iterations=1 convergence=False #the test flag for whether convergence has been reached while(not convergence): for table in tables: start=sample*(iterations-1) end=sample*iterations rt=runThread(table, csvitems[table][start:end], colNam[table],end,relationships) rt.start() rt.join() iterations+=1 if end>5:convergence=True