def run(self): support = self.support totalNumberOfValues = self.totalNumberOfValues column = self.column columnNames = self.columnNames item = self.item rlist = sparqlQuerypy.findBottomUp(item) for r in rlist: rel_data = Neo4jDrive.insertNodeAndRelationship( columnNames[column], "cc", r[2]) node = Neo4jDrive.findNodeByName(r[2]) if node.properties['incoming'] == None: node.properties['incoming'] = 1 else: node.properties['incoming'] += 1 node.properties['type'] = 'type' node.push() rel_data = rel_data[0] rel_data.properties['rel_class'] = 'cc' rel_data.properties['support'] = support[item] / ( totalNumberOfValues * 1.0) rel_data.push()
def run(self): support=self.support totalNumberOfValues=self.totalNumberOfValues column=self.column columnNames=self.columnNames item=self.item node=Neo4jDrive.findNodeByName(item) if node== None: Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item) node=Neo4jDrive.findNodeByName(item) node.properties['fvalue']=support[item] node.push() rlist=sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0]) rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2]) node=node=Neo4jDrive.findNodeByName(r[2]) if node.properties['incoming']==None: node.properties['incoming']=1 else: node.properties['incoming']+=1 node.properties['type']='type' node.push() except : print columnNames[column],'cc',r[0] rel_data=rel_data[0] rel_data.properties['rel_class'] = 'cc' rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0) rel_data.push()
def ccScores(self): data=self.data columnNames=self.columnNames totalSize=self.totalSize relationships=self.relationships size=len(data) bitmap={} for i,column in enumerate(columnNames): relationships[column]={} bitmap[column]={} #this is a dictionary which is a set of flags per data value remembering if the increment already happened. for element in data: item=element[i] rlist=sparqlQuerypy.findBottomUp(item.strip()) print 'number of nodes for', item.strip(), " is ", len(rlist) bitmap[column][item]={} for r in rlist: if r[0] not in bitmap[column][item].keys(): bitmap[column][item][r[0]]=0 if r[0] not in relationships[column].keys(): relationships[column][r[0]]={} relationships[column][r[0]]['name']='cc' if 'incoming' not in relationships[column][r[0]].keys(): relationships[column][r[0]]['incoming']=1 relationships[column][r[0]]['cc']=1.0/totalSize else: relationships[column][r[0]]['incoming']+=1 relationships[column][r[0]]['cc']=relationships[column][r[0]]['incoming']*1.0/totalSize bitmap[column][item][r[0]]=1 classSet=set() # A set to save all the possible cc classes for ease of retrieval later and to streamline it. for column in columnNames: #Loop to push the relations and nodes to Neo4j for classes in relationships[column].keys(): classSet.add(classes) rel_data=Neo4jDrive.insertNodeAndRelationship(column,'cc',classes)[0] rel_data.properties['rel_class']='cc' rel_data.properties['fk']=relationships[column][classes]['cc'] rel_data.push() for classes in classSet: #Loop to update the CCS score for each class after the previous loop is over. CCS=sum(fk)/no(fk) for the node. print classes cummulative=0 # The accumulator linkNumbers=0 # The denominator for link in Neo4jDrive.findIncomingCCLinks(classes): #loop to find incoming cc edges. cummulative+=link[0].properties['fk'] linkNumbers+=1 node=Neo4jDrive.findNodeByName(classes) node.properties['ccs']=cummulative*1.0/linkNumbers node.properties['type']='cc' node.push()
def run(self): support=self.support totalNumberOfValues=self.totalNumberOfValues*1.0 column=self.column columnNames=self.columnNames item=self.item rlist=sparqlQuerypy.findBottomUp(item.strip()) print 'number of nodes for', item.strip(), " is ", len(rlist) log.write('number of nodes for'+str( item.strip())+ " is "+ str(len(rlist))+'\n') flag=0 for r in rlist: rel_data=Neo4jDrive.insertNodeAndRelationship(columnNames[column],"cc",r[2]) rel_data=rel_data[0] node=Neo4jDrive.findNodeByName(r[2]) if r[2]=='http://dbpedia.org/ontology/PopulatedPlace': print columnNames[column], 'Happening' print 'potato',rel_data if rel_data.properties['incoming']==None: #find out why this is not happenings rel_data.properties['incoming']=1 rel_data.properties['ccs']=1/totalNumberOfValues rel_data.push() #print 'tomato',rel_data else: if flag==0: rel_data.properties['incoming']+=1 rel_data.push() rel_data.properties['ccs']=node.properties['incoming']/totalNumberOfValues flag=1 node.properties['type']='cc' node.properties['ccs']=0 numberOfLinks=0 for link in Neo4jDrive.findIncomingCCLinks(r[2]): node.properties['ccs']+=link[0].properties['ccs'] numberOfLinks+=1 if numberOfLinks>0: node.properties['ccs']/=numberOfLinks node.push() rel_data.properties['rel_class'] = 'cc' #rel_data.properties['ccs']=node.proper/(totalNumberOfValues*1.0) rel_data.push()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node=Neo4jDrive.findNodeByName(item) if node== None: Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item) node=Neo4jDrive.findNodeByName(item) node.properties['fvalue']=support[item] node.push() rlist=sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0]) rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2]) node=node=Neo4jDrive.findNodeByName(r[2]) if node.properties['incoming']==None: node.properties['incoming']=1 else: node.properties['incoming']+=1 node.properties['type']='type' node.push() except : print columnNames[column],'cc',r[0] rel_data=rel_data[0] rel_data.properties['rel_class'] = 'cc' rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0) rel_data.push()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) # support=CSVRead.getSupport(nameOfFile,0) # totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) # print i.end_node # cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node = Neo4jDrive.findNodeByName(item) if node == None: Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item) node = Neo4jDrive.findNodeByName(item) node.properties["fvalue"] = support[item] node.push() rlist = sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0]) rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2]) node = node = Neo4jDrive.findNodeByName(r[2]) if node.properties["incoming"] == None: node.properties["incoming"] = 1 else: node.properties["incoming"] += 1 node.properties["type"] = "type" node.push() except: print columnNames[column], "cc", r[0] rel_data = rel_data[0] rel_data.properties["rel_class"] = "cc" rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0) rel_data.push()
def run(self): support = self.support totalNumberOfValues = self.totalNumberOfValues column = self.column columnNames = self.columnNames item = self.item rlist = sparqlQuerypy.findBottomUp(item) for r in rlist: rel_data = Neo4jDrive.insertNodeAndRelationship(columnNames[column], "cc", r[2]) node = Neo4jDrive.findNodeByName(r[2]) if node.properties["incoming"] == None: node.properties["incoming"] = 1 else: node.properties["incoming"] += 1 node.properties["type"] = "type" node.push() rel_data = rel_data[0] rel_data.properties["rel_class"] = "cc" rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0) rel_data.push()