def pair2tra(self, ei, ef1, ef2, traFile, mapFile1, mapFile2): f1 = None f1 <<= nm.mcut(f="{}:node1".format(self.ef1), i=self.ei) f1 <<= nm.mdelnull(f="node1") f1 <<= nm.muniq(k="node1") f1 <<= nm.mnumber(s="node1", a="num1", o=mapFile1) f2 = None f2 <<= nm.mcut(f="{}:node2".format(self.ef2), i=self.ei) f2 <<= nm.mdelnull(f="node2") f2 <<= nm.muniq(k="node2") f2 <<= nm.mnumber(s="node2", a="num2", o=mapFile2) f3 = None f3 <<= nm.mcut(f="{}:node1,{}:node2".format(self.ef1, self.ef2), i=self.ei) f3 <<= nm.mjoin(k="node1", m=f1, f="num1") f3 <<= nm.mjoin(k="node2", m=f2, f="num2") f3 <<= nm.mcut(f="num1,num2") f3 <<= nm.msortf(f="num1,num2%n") f3 <<= nm.mtra(k="num1", s="num2%n", f="num2") f3 <<= nm.msortf(f="num1%n") f3 <<= nm.mcut(f="num2", nfno=True, o=traFile) f3.run() os.system("cat " + traFile)
def msortf_float2(iFile, loop): sec = [] for i in range(loop): st = time.time() nm.msortf(f="float2%n", i=iFile, o="oFile").run() sec.append(time.time() - st) return sec
def __convertToNumeric(self): wf1 = self.__tempW.file() wf2 = self.__tempW.file() wf3 = self.__tempW.file() self.mFile = self.__temp.file() nm.mcut(f="e1:node", i=self.__wfE, o=wf1).run() nm.mcut(f="e2:node", i=self.__wfE, o=wf2).run() mcmd = None if self.__wfN is not None: nm.mcut(f="n:node", i=self.__wfN, o=wf3).run() mcmd <<= nm.mcat(i=wf1 + "," + wf2 + "," + wf3, f="node") else: mcmd <<= nm.mcat(i=wf1 + "," + wf2, f="node") mcmd <<= nm.muniq(k="node") mcmd <<= nm.mnumber(q=True, a="id", o=self.mFile) mcmd.run() #エッジファイル変換・保存 self.eFile = self.__temp.file() mcmd = nm.mjoin(i=self.__wfE, m=self.mFile, f="id:id1", k="e1", K="node") mcmd <<= nm.mjoin(m=self.mFile, f="id:id2", k="e2", K="node") mcmd <<= nm.muniq(k="id1,id2") mcmd <<= nm.msortf(f="id1%n,id2%n") mcmd <<= nm.mcut(f="id1:e1,id2:e2,no:row_index", o=self.eFile) mcmd.run() #take内部用エッジファイル保存 self.eFileT = self.__temp.file() mcmd = nm.mcal(i=self.eFile, c="cat(\" \",$s{e1},$s{e2})", a="edge") mcmd <<= nm.mcut(nfno=True, f="edge", o=self.eFileT) mcmd.run() #ノードファイル変換・保存 self.nFile = self.__temp.file() if self.__wfN is not None: mcmd = nm.mjoin(i=self.__wfN, m=self.mFile, f="id", k="n", K="node") mcmd <<= nm.muniq(k="id") mcmd <<= nm.msortf(f="id%n") mcmd <<= nm.mcut(f="id:n,no:row_index", o=self.nFile) mcmd.run() else: #エッジよりノード生成 wf4 = self.__tempW.file() wf5 = self.__tempW.file() nm.mcut(i=self.eFile, f="e1:n", o=wf4).run() nm.mcut(i=self.eFile, f="e2:n", o=wf5).run() mcmd = nm.mcat(i="%s,%s" % (wf4, wf5)) mcmd <<= nm.muniq(k="n") mcmd <<= nm.msortf(f="n%n") mcmd <<= nm.mnumber(q=True, a="row_index", o=self.nFile) mcmd.run() #ワークファイル削除 self.__tempW.rm()
def msum_key3_presort(iFile, loop): nm.msortf(f="key3", i=iFile, o="sorted").run() sec = [] for i in range(loop): st = time.time() nm.msum(k="key3", f="int1,int2,float1,float2", i="sorted", o="oFile").run() sec.append(time.time() - st) return sec
def g2pair(self,ni,nf,ei,ef1,ef2,ew,numFile,mapFile,weightFile): inobj = [] inobj.append(nm.mcut(f="%s:node"%(ef1),i=ei ).msetstr(a="flag",v=0)) inobj.append(nm.mcut(f="%s:node"%(ef2),i=ei ).msetstr(a="flag",v=0)) if nf : inobj.append(nm.mcut(f="%s:node"%(nf),i=ni ).msetstr(a="flag",v=1)) f = nm.mbest(i=inobj,k="node",s="flag",fr=0,size=1 ) # isolated nodes are set to the end of position in mapping file. # S= must start from 0 (but inside R vertex number will be added one) f <<= nm.mnumber(s="flag,node",a="num",S=0,o=mapFile) f.run() f = None f <<= nm.mcut(f=[ef1,ef2] , i=ei) f <<= nm.mjoin( k=ef1 , K="node" , m=mapFile ,f="num:num1") f <<= nm.mjoin( k=ef2 , K="node" , m=mapFile ,f="num:num2") f <<= nm.mcut(f="num1,num2") f <<= nm.mfsort(f="num1,num2") f <<= nm.msortf(f="num1%n,num2%n",nfno=True) f <<= nm.cmd("tr ',' ' ' " ) f <<= nm.mwrite(o=numFile) f.run() nodeSize=mrecount(i=mapFile) if ew: nm.mcut(f=ew,i=ei,o=weightFile).run() else: ew="weight" nm.msetstr(v=1,a=ew,i=ei).mcut(f=ew,o=weightFile).run() return nodeSize
def edge2mtx(self, ei, itra, map1, map2): p1 = nm.mcut(f=self.ef1, i=ei) p1 <<= nm.muniq(k=self.ef1) p1 <<= nm.mdelnull(f=self.ef1) p1 <<= nm.mnumber(q=True, a="num1", S=1, o=map1) p2 = nm.mcut(f=self.ef2, i=ei) p2 <<= nm.muniq(k=self.ef2) p2 <<= nm.mdelnull(f=self.ef2) p2 <<= nm.mnumber(q=True, a="num2", S=1, o=map2) runp = None runp <<= nm.mcut(f=[self.ef1, self.ef2], i=ei) runp <<= nm.mjoin(k=self.ef1, m=p1, f="num1") runp <<= nm.mjoin(k=self.ef2, m=p2, f="num2") runp <<= nm.mcut(f="num1,num2") runp <<= nm.mtra(k="num1", f="num2") runp <<= nm.msortf(f="num1%n") runp <<= nm.mcut(f="num2", nfno=True) runp <<= nm.cmd("tr ',' ' '") runp <<= nm.mwrite(o=itra) #runp <<= nm.mcut(f="num2",nfno=True,o=wff1) runp.run()
def calTime(iFile, oFile): f = None f <<= nm.mnumber(q=True, a="id", i=iFile) f <<= nm.mcal(c='$s{mean}+"("+$s{sd}+")"', a="time") f <<= nm.m2cross(k="method", s="dataSize", f="time") f <<= nm.msortf(f="id%n") f <<= nm.mcut(f="method,10000:small,1000000:middle,100000000:large") f <<= nm.mfldname(q=True, o=oFile) f.run()
def calRelative(iFile, oFile): mcut = None mcut <<= nm.mselstr(f="method", v="mcut", i="methods.csv") f = None f <<= nm.mnumber(q=True, a="id", i=iFile) f <<= nm.mjoin(k="dataSize", m=mcut, f="mean:base") f <<= nm.mcal(c='round(${mean}/${base},0.1)', a="score") f <<= nm.m2cross(k="method", s="dataSize", f="score") f <<= nm.msortf(f="id%n") f <<= nm.mcut(f="method,10000:small,1000000:middle,100000000:large") f <<= nm.mfldname(q=True, o=oFile) f.run()
def readCSV(iParams): iFile = iParams["iFile"] sidF = iParams["sid"] eidF = iParams["time"] itemF = iParams["item"] temp = Mtemp() xxdatPath = temp.file() mkDir(xxdatPath) # classファイルの処理 if "cFile" in iParams: cFile = iParams["cFile"] csidF = iParams["csid"] classF = iParams["cNames"] f = None f <<= nm.mcut(f="%s:sid,%s:eid,%s:item" % (sidF, eidF, itemF), i=iFile) f <<= nm.mdelnull(f="sid,eid,item") f <<= nm.muniq(k="sid,eid,item") f <<= nm.mjoin(k="sid", K=csidF, m=cFile, f="%s:class" % (classF)) f <<= nm.msep(s="sid,eid%n,item", d="%s/${class}" % (xxdatPath), p=True) f.run() classNames = glob.glob("%s/*" % (xxdatPath)) classNames = [os.path.basename(path) for path in classNames] else: f = None f <<= nm.mcut(f="%s:sid,%s:eid,%s:item" % (sidF, eidF, itemF), i=iFile) f <<= nm.mdelnull(f="sid,eid,item") f <<= nm.muniq(k="sid,eid,item") f <<= nm.msortf(f="sid,eid%n,item", o="%s/single" % (xxdatPath)) f.run() classNames = ["single"] datas = {} for name in classNames: dataFile = "%s/%s" % (xxdatPath, name) datas[name] = _readCSV_sub(dataFile) return datas
def g2pair(self,ni,nf,ei,ef1,ef2,ew,numFile,mapFile,weightFile): #MCMD::msgLog("converting graph files into a pair of numbered nodes ...") #wf=MCMD::Mtemp.new #wf1=wf.file #wf2=wf.file #wf3=wf.file allinObj =[] wf1 = nm.mcut(f="%s:node"%(ef1),i=ei ).msetstr(v=0,a="flag") wf2 = nm.mcut(f="%s:node"%(ef2),i=ei ).msetstr(v=0,a="flag") f = None if nf: f <<= nm.mcut(i=[wf1,wf2,nm.mcut(f=nf+":node",i=ni).msetstr(v=1,a="flag")],f="node,flag") f <<= nm.mbest(k="node" , s="flag" , fr=0 ,size=1) else: f <<= nm.mcut(i=[wf1,wf2],f="node,flag") f <<= nm.muniq(k="node") f <<= nm.mnumber(s="flag,node",a="num",S=0,o=mapFile) f.run() f = None f <<= nm.mcut(f=[ef1,ef2],i=ei) f <<= nm.mjoin(k=ef1 , K="node",m=mapFile , f="num:num1") f <<= nm.mjoin(k=ef2 , K="node",m=mapFile , f="num:num2") f <<= nm.mcut(f="num1,num2") f <<= nm.mfsort(f="num1,num2") f <<= nm.msortf(f="num1%n,num2%n",nfno=True) f <<= nm.cmd("tr ',' ' ' ") f <<= nm.mwrite(o=numFile) f.run() if ew : nm.mcut(f=ew,i=ei,o=weightFile).run() else: ew="weight" nm.msetstr(v=1 , a=ew ,i=ei).mcut(f=ew,o=weightFile).run() nodeSize=mrecount(i=mapFile) return nodeSize
def convRsl(self, ifile, ofile, map1, map2, logDir=None): # 上記iterationで収束したマイクロクラスタグラフを元の節点文字列に直して出力する #MCMD::msgLog("converting the numbered nodes into original name ...") f = None f <<= nm.mcut(nfni=True, f="0:tra", i=ifile) f <<= nm.msed(f="tra", c=' $', v="") f <<= nm.mnumber(q=True, S=1, a="num1") f <<= nm.mtra(r=True, f="tra:num2") f <<= nm.mjoin(k="num2", m=map2, f=self.ef2) f <<= nm.mjoin(k="num1", m=map1, f=self.ef1) f <<= nm.msortf(f="num1%n,num2%n") f <<= nm.mcut(f=[self.ef1, self.ef2]) if logDir: f <<= nm.mfldname(q=True, o="{}/#{ofile}".format(logDir, ofile)) else: f <<= nm.mfldname(q=True, o=ofile) f.run()
def g2pair(self, ni, nf, ei, ef1, ef2, ipair, mapFile): dlist = [ nm.mcut(f=ef1 + ":node", i=ei), nm.mcut(f=ef2 + ":node", i=ei) ] if (ni): dlist.append(nm.mcut(f=nf + ":node", i=ni)) fmap = None fmap <<= nm.mcut(i=dlist, f="node") fmap <<= nm.muniq(k="node") fmap <<= nm.mnumber(q=True, a="num", o=mapFile) f = nm.mcut(f=[ef1, ef2], i=ei) f <<= nm.mjoin(k=ef1, K="node", m=fmap, f="num:num1") f <<= nm.mjoin(k=ef2, K="node", m=fmap, f="num:num2") f <<= nm.mcut(f="num1,num2") f <<= nm.mfsort(f="num1,num2") f <<= nm.msortf(f="num1%n,num2%n", nfno=True, o=ipair) f.run()
def repTaxo(self, taxonomy): #@taxonomy=taxonomy self.items.repTaxo(taxonomy) # アイテムクラスをtaxonomyで置換する tFile = taxonomy.file itemFN = taxonomy.itemFN taxoFN = taxonomy.taxoFN tf = mtemp.Mtemp() xx1 = tf.file() f = None f <<= nm.mjoin(k=self.itemFN, K=itemFN, f=taxoFN, m=tFile, i=self.file) f <<= nm.mcut(f=self.idFN + "," + self.timeFN + "," + taxoFN + ":" + self.itemFN) f <<= nm.msortf(f=self.idFN + "," + self.timeFN + "," + self.itemFN, o=xx1) f.run() self.file = self.temp.file() shutil.move(xx1, self.file)
def g2pair(self, ni, nf, ei, ef1, ef2, numFile, mapFile): #MCMD::msgLog("converting graph files into a pair of numbered nodes ...") #wf=MCMD::Mtemp.new #wf1=wf.file #wf2=wf.file #wf3=wf.file inobj = [] inobj.append(nm.mcut(f="%s:node" % (ef1), i=ei).msetstr(a="flag", v=0)) inobj.append(nm.mcut(f="%s:node" % (ef2), i=ei).msetstr(a="flag", v=0)) if nf: inobj.append( nm.mcut(f="%s:node" % (nf), i=ni).msetstr(a="flag", v=1)) f = nm.mbest(i=inobj, k="node", s="flag", fr=0, size=1) # isolated nodes are set to the end of position in mapping file. # S= must start from 0 (but inside R vertex number will be added one) f <<= nm.mnumber(s="flag,node", a="num", S=0, o=mapFile) f.run() f = None f <<= nm.mcut(f=[ef1, ef2], i=ei) f <<= nm.mjoin(k=ef1, K="node", m=mapFile, f="num:num1") f <<= nm.mjoin(k=ef2, K="node", m=mapFile, f="num:num2") f <<= nm.mcut(f="num1,num2") #f << "mfsort f=num1,num2 |" f <<= nm.msortf(f="num1%n,num2%n", nfno=True) f <<= nm.cmd("tr ',' ' ' ") f <<= nm.mwrite(o=numFile) f.run() nodeSize = mrecount(i=mapFile) return nodeSize
def __init__(self,db,outtf=True): self.size = None self.msgoff = True self.temp = nu.Mtemp() self.db = db # 入力データベース self.file = self.temp.file() self.outtf = outtf items = self.db.items # 重みファイルの作成 # pos,negのTransactionオブジェクトに対してLCMが扱う整数アイテムによるトランザクションファイルを生成する。 # この時、pos,negを併合して一つのファイルとして作成され(@wNumTraFile)、 # 重みファイル(@weightFile[クラス])の作成は以下の通り。 # 1.対象クラスをpos、その他のクラスをnegとする。 # 2. negの重みは-1に設定し、posの重みはcalOmegaで計算した値。 # 3.@wNumTraFileの各行のクラスに対応した重みデータを出力する(1項目のみのデータ)。 self.weightFile = {} self.posWeight = {} self.sigma = {} for cName,posSize in db.clsNameRecSize.items(): self.weightFile[cName] = self.temp.file() self.posWeight[cName] = self.calOmega(posSize) cpara = "%s:%s"%(cName,self.posWeight[cName]) nm.mcut(nfno=True,f=self.db.clsFN,i=self.db.cFile).mchgstr(nfn=True,f=0,O=-1,o=self.weightFile[cName],c=cpara).run() # アイテムをシンボルから番号に変換する。 f=None f <<= nm.mjoin(k=self.db.itemFN,K=items.itemFN,m=items.file,f=items.idFN,i=self.db.file) f <<= nm.mcut(f=self.db.idFN+","+self.db.timeFN+","+items.idFN) f <<= nm.msortf(f=self.db.idFN+","+self.db.timeFN+"%n") f <<= nm.mtra(k=self.db.idFN,f=items.idFN) f <<= nm.mcut(f=items.idFN,nfno=True,o=self.file) f.run()
def mnetpie(ei, ni, ef, nf, o, nodeSizeFld=None, nodeTipsFld=None, nodeColorFld=None, edgeWidthFld=None, edgeColorFld=None, pieDataFld=None, pieTipsFld=None, picFld=None, undirect=False, offline=False): #ei:edge file #ef:egfile if type(ef) is str: ef = ef.split(',') if len(ef) != 2: raise Exception("ef= takes just two field names") if not ((pieDataFld == None and pieTipsFld == None) or (pieDataFld != None and pieTipsFld != None)): raise Exception( "pieDataFld= pieTipsFld= are necessary at the same time") if picFld != None and pieDataFld != None: raise Exception( "picFld= cannot be specified with pieDataFld= pieTipsFld=") if nodeColorFld != None: if picFld != None or pieDataFld != None or pieTipsFld != None: raise Exception( "nodeColorFld= cannot be specified with pieDataFld= pieTipsFld= picFld=" ) if pieDataFld != None and pieTipsFld != None: caseNo = 1 elif picFld != None: caseNo = 2 else: caseNo = 0 tempW = mtemp.Mtemp() xxnode = tempW.file() nodefld = [] nodedmy1 = [] nodedmy2 = [] nodefld.append("%s:node" % (nf)) if nodeSizeFld != None: nodefld.append("%s:nodesize" % (nodeSizeFld)) else: nodedmy1.append("nodesize") nodedmy2.append("50") if nodeTipsFld != None: nodefld.append("%s:nodeT" % (nodeTipFld)) else: nodedmy1.append("nodeT") nodedmy2.append("") if nodeColorFld != None: nodefld.append("%s:nodeClr" % (nodeColorFld)) else: nodedmy1.append("nodeClr") nodedmy2.append("skyblue") if caseNo == 1: nodefld.append("%s:pieD" % (pieDataFld)) nodefld.append("%s:pieT" % (pieTipsFld)) elif caseNo == 2: nodefld.append("%s:pic" % (picFld)) else: nodedmy1.append("pic") nodedmy2.append("") f1 = None f1 <<= nm.mcut(i=ni, f=nodefld) if len(nodedmy1) != 0: f1 <<= nm.msetstr(a=nodedmy1, v=nodedmy2) if caseNo == 1: f1 <<= nm.mshare(k="node", f="pieD:pieDS") f1 <<= nm.mnumber(k="node", a="nodeid", B=True) f2 = nm.muniq(k="pieT", i=f1) f2 <<= nm.mnumber(q=True, a="pieTno") f2 <<= nm.mjoin(k="pieT", f="pieTno", i=f1).iredirect("m") f2 <<= nm.msortf(f="nodeid%n,pieTno%n", o=xxnode) else: f2 = nm.mnumber(a="nodeid%n", q=True, i=f1, o=xxnode) f2.run() xxedge = tempW.file() # MAKE EDGE DATA edgefld = [] edgedmy1 = [] edgedmy2 = [] edgefld.append("%s:edgeS" % (ef[0])) edgefld.append("%s:edgeE" % (ef[1])) if edgeWidthFld != None: edgefld.append("%s:edgesize" % (edgeWidthFld)) else: edgedmy1.append("edgesize") edgedmy2.append("1") if edgeColorFld != None: edgefld.append("%s:edgecolor" % (edgeColorFld)) else: edgedmy1.append("edgecolor") edgedmy2.append("black") f3 = None f3 <<= nm.mcut(i=ei, f=edgefld) if len(edgedmy1) != 0: f3 <<= nm.msetstr(a=edgedmy1, v=edgedmy2) f3 <<= nm.mnumber(a="preNo", q=True) f3 <<= nm.mbest(k="edgeS,edgeE", s="preNo%nr") f3 <<= nm.mnumber(s="preNo%n", a="edgeID") f3 <<= nm.mjoin(k="edgeS", K="node", f="nodeid:edgeSid", m=xxnode) f3 <<= nm.mjoin(k="edgeE", K="node", f="nodeid:edgeEid", m=xxnode) #双方向チェック一応 f4 = None f4 <<= nm.mfsort(i=f3, f="edgeS,edgeE") f4 <<= nm.mcount(k="edgeS,edgeE", a="edgecnt") f4 <<= nm.mselnum(c="[2,]", f="edgecnt") f4 <<= nm.msetstr(a="biflg", v=1) f4 <<= nm.mjoin(k="edgeID", f="biflg", n=True, i=f3).iredirect("m") f4 <<= nm.msortf(f="edgeID%n", o=xxedge) f4.run() gdata = "{\"nodes\":[" if caseNo == 1: nodedatastk = [] nodedatas = "" for val, top, bot in nm.readcsv(xxnode).getline(k="nodeid", otype='dict', q=True): name = val["node"] r = val["nodesize"] title = val["nodeT"] if top: nodedatas = "{\"name\":\"%s\",\"title\":\"%s\",\"r\":%s,\"node\":[" % ( name, title, r) pieTno = val["pieTno"] pieT = val["pieT"] pieDS = val["pieDS"] nodedatas += "{\"group\":%s,\"color\":%s,\"value\":%s,\"title\":\"%s\"}" % ( pieTno, pieDS, pieDS, pieT) if bot: nodedatas += "]}" nodedatastk.append(nodedatas) nodedatas = "" else: nodedatas += "," gdata += ",".join(nodedatastk) else: nodedatastk = [] for val in nm.readcsv(xxnode).getline(otype='dict'): name = val["node"] r = val["nodesize"] title = val["nodeT"] pic = val["pic"] nclr = val["nodeClr"] nodedatas = "{\"name\":\"%s\",\"title\":\"%s\",\"pic\":\"%s\",\"color\":\"%s\",\"r\":%s}" % ( name, title, pic, nclr, r) nodedatastk.append(nodedatas) gdata += ",".join(nodedatastk) gdata += "],\"links\": [" edgedatastk = [] for val in nm.readcsv(xxedge).getline(otype='dict'): es = val["edgeSid"] et = val["edgeEid"] esize = val["edgesize"] ecolor = val["edgecolor"] edgedatas = "{\"source\":%s,\"target\":%s,\"length\":500,\"ewidth\":%s,\"color\":\"%s\"}" % ( es, et, esize, ecolor) edgedatastk.append(edgedatas) gdata += ','.join(edgedatastk) gdata += "]}" direct = ".attr('marker-end','url(#arrowhead)')" if undirect: direct = "" nodeTemplate = ''' node .append("circle") .attr("r",function(d){return d.r/4;}) .attr("fill", function(d){return d.color;}) .append("title") .text(function(d){return d.title;}) ''' nodemakeTemplate = ''' for(var i=0 ; i< graph.nodes.length;i++){ graph.nodes[i].id = i } ''' if pieDataFld != None: nodeTemplate = ''' node.selectAll("path") .data( function(d, i){ return pie(d.node); }) .enter() .append("svg:path") .attr("d", arc) .attr("fill", function(d, i) { return color(d.data.group); }) .append("title") .text(function(d){{return d.data.title;}}) node.append("circle") .attr("r",function(d){{return d.r/4;}}) .attr({ 'fill': 'white' }) .append("title") .text(function(d){{return d.title;}}); ''' nodemakeTemplate = ''' for(var i=0 ; i< graph.nodes.length;i++){ var r = graph.nodes[i].r for(var j=0 ; j< graph.nodes[i].node.length;j++){ graph.nodes[i].node[j]['r'] = r } graph.nodes[i].id = i } ''' elif picFld != None: nodeTemplate = ''' node .append("image") .attr("height",function(d){return d.r;}) .attr("width",function(d){return d.r;}) .attr("x",function(d){return -1 * d.r/2; }) .attr("y",function(d){return -1 * d.r/2; }) .attr("xlink:href",function(d){return d.pic; }) .append("title") .text(function(d){return d.title;}) ''' d3js_str = "<script type='text/javascript' src='http://d3js.org/d3.v3.min.js'></script>" if offline: d3js_str = "<script>%s<script>" % (vjs.ViewJs.d3jsMin()) outTemplate = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> {d3js_str} <style></style> </head> <body> <script type="text/javascript"> var graph = {gdata} ; var width = 4000, height = 3000; var color = d3.scale.category10(); {nodemakeTemplate}; for(var i=0 ; i< graph.links.length;i++){{ graph.links[i].id = i }} var pie = d3.layout.pie() .sort(null) .value(function(d) {{ return d.value; }}); var arc = d3.svg.arc() .outerRadius( function(d){{ return d.data.r ; }}) .innerRadius( function(d){{ return d.data.r/2 ; }} ); var svg = d3.select("body").append("svg") .attr("width", width) .attr("height", height); d3.select("svg").append('defs').append('marker') .attr({{'id':'arrowhead', 'viewBox':'-0 -5 10 10', 'refX':30, 'refY':0, 'orient':'auto-start-reverse', 'markerWidth':5, 'markerHeight':5, 'xoverflow':'visible'}}) .append('path') .attr('d', 'M 0,-5 L 10 ,0 L 0,5') .attr('fill', '#999') .style('stroke','none'); var g = svg.append("g"); var node = g.selectAll(".node"); var link = g.selectAll(".link"); nodes = graph.nodes links = graph.links var force = d3.layout.force() .linkDistance(200) .linkStrength(3.5) .charge(-3500) .gravity(0.1) .friction(0.95) .size([width, height]) .on("tick", function() {{ link .attr("x1", function(d) {{ return d.source.x; }}) .attr("y1", function(d) {{ return d.source.y; }}) .attr("x2", function(d) {{ return d.target.x; }}) .attr("y2", function(d) {{ return d.target.y; }}); node .attr("x", function(d) {{ return d.x; }}) .attr("y", function(d) {{ return d.y; }}) .attr("transform", function(d) {{ return "translate(" + d.x + "," + d.y + ")"}}); }}); node = node.data(nodes, function( d ) {{ return d.id; }} ); link = link.data(links, function( d ) {{ return d.id; }} ); link .enter() .append("line") .attr("class", "link") .style("stroke", function( d ) {{ return d.color; }} ) .style("stroke-width", function( d ) {{ return d.ewidth; }}) {direct} node .enter() .append("g") .attr("class", "node") .style({{}}) .call(force.drag) .on("contextmenu", function(nd) {{ d3.event.preventDefault(); force.stop() nodes.splice( nd.index, 1 ); links = links.filter(function(nl) {{ return nl.source.index != nd.index && nl.target.index != nd.index; }}); node = node.data(nodes, function( d ) {{ return d.id; }} ); node.exit().remove(); link = link.data( links, function( d ) {{ return d.id; }} ); link.exit().remove(); force.nodes(nodes) .links(links) .start(); }}); {nodeTemplate} node .append("text") .attr("text-anchor", "middle") .style("stroke", "black") .text(function(d) {{ return d.name; }}); force.nodes(nodes) .links(links) .start(); </script> </body> </html> '''.format(d3js_str=d3js_str, gdata=gdata, nodemakeTemplate=nodemakeTemplate, direct=direct, nodeTemplate=nodeTemplate) html = sys.stdout if not o == None: html = open(o, "w") html.write(outTemplate) if not o == None: html.close()
def run(self): tempW = mtemp.Mtemp() xxtra = tempW.file() xxmap1 = tempW.file() xxmap2 = tempW.file() lcmout = tempW.file() xxt0 = tempW.file() xxp0 = tempW.file() xx3t = tempW.file() xx4t = tempW.file() self.pair2tra(self.ei, self.ef1, self.ef2, xxtra, xxmap1, xxmap2) runPara = {} runPara["type"] = "CIf" runPara["sup"] = 1 runPara["o"] = lcmout runPara["i"] = xxtra if self.minSize2: runPara["l"] = self.minSize2 if self.maxSize2: runPara["u"] = self.maxSize2 extTake.lcm(runPara) extTake.lcmtrans(lcmout, "p", xxt0) f = None f <<= nm.mdelnull(f="pattern", i=xxt0) f <<= nm.mvreplace(vf="pattern", m=xxmap2, K="num2", f="node2") f <<= nm.mcut(f="pid,pattern,size:size2") f <<= nm.mvsort(vf="pattern") f <<= nm.msortf(f="pid") if self.byedge: f_e0 = nm.mtra(f="pattern", i=f, r=True) extTake.lcmtrans(lcmout, "t", xx3t) f_e1 = None f_e1 <<= nm.mjoin(k="__tid", m=xxmap1, f="node1", K="num1", i=xx3t) f_e1 <<= nm.msortf(f="pid") ## xx2 f_e2 = None f_e2 <<= nm.mcount(k="pid", a="size1", i=f_e1) f_e2 <<= nm.mselnum(f="size1", c="[{},{}]".format(self.minSize1, self.maxSize1)) f_e3 = None f_e3 <<= nm.mjoin(k="pid", m=f_e2, f="size1", i=f_e1) f_e3 <<= nm.mnjoin(k="pid", m=f_e0, f="pattern,size2") f_e3 <<= nm.mcut(f="pid:id,node1:{},pattern:{},size1,size2".format( self.ef1, self.ef2), o=self.oFile) f_e3.run() else: extTake.lcmtrans(lcmout, "t", xx4t) f_e4 = None f_e4 <<= nm.mjoin(k="__tid", m=xxmap1, i=xx4t, f="node1", K="num1") f_e4 <<= nm.mtra(k="pid", f="node1") f_e4 <<= nm.mvcount(vf="node1:size1") f_e4 <<= nm.mjoin(k="pid", m=f, f="pattern,size2") f_e4 <<= nm.mselnum(f="size1", c="[{},{}]".format(self.minSize1, self.maxSize1)) f_e4 <<= nm.mvsort(vf="node1,pattern") f_e4 <<= nm.msortf(f="node1,pattern") f_e4 <<= nm.mcut(f="node1:{},pattern:{},size1,size2".format( self.ef1, self.ef2), o=self.oFile) f_e4.run()
def __init__(self, iFile, idFN, timeFN, itemFN, padding, clsFN=None): self.file = None # トランザクションファイル名 self.idFN = None # トランザクションID項目名(String) self.timeFN = None # 時間項目名(String) self.itemFN = None # アイテム集合項目名(String) self.clsFN = None # クラス項目名(String) self.size = None # トランザクションサイズ(Num) self.items = None # Itemsクラス self.taxonomy = None # 階層分類クラス self.clsNameRecSize = None # クラス別件数 self.clsSize = None # クラス数 self.cFile = None # クラスファイル self.temp = mtemp.Mtemp() self.iFile = iFile # 入力ファイル self.iPath = os.path.abspath(self.iFile) # フルパス self.idFN = idFN # トランザクションID項目名 self.timeFN = timeFN # 時間項目名 self.itemFN = itemFN # アイテム項目名 self.file = self.temp.file() # 出力ファイル名 self.padding = padding # 時系列パディング # padding指定の場合は、asciiコードの先頭文字列を挿入 f = None if self.padding: f <<= nm.mcut(f=self.idFN + "," + self.timeFN + "," + self.itemFN, i=self.iFile) f <<= nm.msortf(f=self.idFN + "," + self.timeFN + "%n," + self.itemFN) f <<= nm.muniq(k=self.idFN + "," + self.timeFN + "," + self.itemFN) f <<= nm.mpadding(k=self.idFN, f=self.timeFN + "%n", v="!", o=self.file) else: f <<= nm.mcut(f=self.idFN + "," + self.timeFN + "," + self.itemFN, i=self.iFile) f <<= nm.msortf(f=self.idFN + "," + self.timeFN + "%n," + self.itemFN) f <<= nm.muniq(k=self.idFN + "," + self.timeFN + "," + self.itemFN, o=self.file) f.run() # レコード数の計算 #@recCnt = MCMD::mrecount("i=#{@file}") # トランザクション数の計算 xx1 = nm.mcut(f=self.idFN, i=self.file).muniq(k=self.idFN).mcount( a="__cnt").mcut(f='__cnt').run() self.size = int(xx1[0][0]) # トランザクションデータからアイテムオブジェクトを生成 if self.padding: #同じ? # paddingの場合は、特殊アイテム(!)を0番とし、出力を制御する。 self.items = items.Items(self.file, self.itemFN) else: self.items = items.Items(self.file, self.itemFN) if clsFN: self.clsFN = clsFN self.cFile = self.temp.file() fpara_c = "%s,%s" % (self.idFN, self.clsFN) nm.mcut(f=fpara_c, i=self.iFile).muniq(k=fpara_c, o=self.cFile).run() # 文字列としてのクラス別件数配列を数値配列に変換する self.clsSize = 0 self.clsNames = [] self.clsNameRecSize = {} for vv in nm.mcut(f=self.clsFN, i=self.cFile).mcount(k=self.clsFN, a='count'): self.clsNames.append(vv[0]) self.clsNameRecSize[vv[0]] = int(vv[1]) self.clsSize += 1
def run(self, **kw_args): os.environ['KG_ScpVerboseLevel'] = "2" if "msg" in kw_args: if kw_args["msg"] == "on": os.environ['KG_ScpVerboseLevel'] = "4" ln = "#{@pt}line" # make the line number ln = "{}line".format(self.pt) xxmap = self.workf.file() sdata = self.workf.file() # convert the data for sketchport # mkdata xx1 = nm.mnumber(S=0, a=ln, q=True, i=self.iFile) if self.wfH: xx2 = nm.mcut(f=self.wfH + self.tidH + self.elem, i=xx1) else: self.wfH = ["{}wf".format(self.pt)] xx2 = nm.msetstr(v=0, a=self.wfH, i=xx1) xx2 <<= nm.mcut(f=self.wfH + self.tidH + self.elem) fmap = nm.mcut(f=[ln] + self.tidH, i=xx1, o=xxmap) xx2 <<= nm.mcut(f=self.wfH + self.elem, nfno=True) xx2 <<= nm.cmd("tr ',' ' '") xx2 <<= nm.mwrite(o=sdata) nm.runs([fmap, xx2]) # do sort outf = self.workf.file() para = {} if self.dist == "C": para["cosdist"] = self.th elif self.dist == "H": para["hamdist"] = self.th if not self.uc: para["centering"] = True para["auto"] = True para["windowsize"] = self.ws para["seed"] = self.seed para["missingratio"] = self.mr para["i"] = sdata para["o"] = outf status = extMining.sketchsort(para) if status: raise Exception("#ERROR# checking sketchsort messages") tmp = [] for val in self.tidH: tmp.append("{}:{}2".format(val, val)) tid2 = ",".join(tmp) f = nm.mread(i=outf) f <<= nm.cmd("tr ' ' ',' ") f <<= nm.mcut(nfni=True, f="0:eline1,1:eline2,2:distance") f <<= nm.mfsort(f="eline*") # 行番号に対応するtidを取得 f <<= nm.mjoin(k="eline1", K="{}line".format(self.pt), f=self.tidH, m=xxmap) f <<= nm.mjoin(k="eline2", K="{}line".format(self.pt), f=tid2, m=xxmap) f <<= nm.msortf(f="eline1%n,eline2%n") f <<= nm.mcut(r=True, f="eline1,eline2") f <<= nm.msortf(f=self.tidH) f <<= nm.mfldname(q=True, o=self.oFile) f.run() nu.mmsg.endLog(self.__cmdline())
def enumerate(self,eArgs): """ eArgsで与えられた条件で、頻出アイテム集合の列挙を実行する。 :type eArgs: dict :type eArgs['type']: str :type eArgs['minCnt']: int :type eArgs['minSup']: float :type eArgs['maxCnt']: int :type eArgs['maxSup']: float :type eArgs['minLen']: int :type eArgs['maxLen']: int :type eArgs['top']: int :type eArgs['skipTP']: bool【default:False】 :param eArgs: 各種列挙パラメータ :param eArgs['type']: 抽出するアイテム集合の型【'F':頻出集合, 'C':飽和集合, 'M':極大集合】 :param eArgs['minCnt']: 最小サポート(件数) :param eArgs['minSup']: 最小サポート(確率) :param eArgs['maxCnt']: 最大サポート(件数) :param eArgs['maxSup']: 最大サポート(確率) :param eArgs['minLen']: アイテム集合の最小アイテム数(件数) :param eArgs['maxLen']: アイテム集合の最大アイテム数(件数) :param eArgs['top']: 列挙するサポート上位件数(件数) :param eArgs['skipTP']: トランザクションにマッチするパターン(アイテム集合)の出力を行わない。 """ tf=mtemp.Mtemp() self.eArgs=eArgs self.type = eArgs["type"] if "minCnt" in eArgs and eArgs["minCnt"] != None: self.minCnt = int(eArgs["minCnt"]) self.minSup = float(self.minCnt) / float(self.db.traSize) else: self.minSup = float(eArgs["minSup"]) self.minCnt = int(self.minSup * float(self.db.traSize) + 0.99) # 最大サポートと最大サポート件数 self.maxCnt=None if ("maxCnt" in eArgs and eArgs["maxCnt"]!= None) or ( "maxSup" in eArgs and eArgs["maxSup"]!= None): if "maxCnt" in eArgs and eArgs["maxCnt"]!= None: self.maxCnt = int(eArgs["maxCnt"]) self.maxSup = float(self.maxCnt) / float(self.db.traSize) else: self.maxSup = float(eArgs["maxSup"]) self.maxCnt = int(self.maxSup * float(self.db.traSize) + 0.99) params = {} if self.msgoff: params["type"] ="%sIf_"%(self.type) else: params["type"] ="%sIf"%(self.type) if self.maxCnt : params["U"] = str(self.maxCnt) if "minLen" in eArgs and eArgs["minLen"] != None : params["l"] = str(eArgs['minLen']) if "maxLen" in eArgs and eArgs["maxLen"] != None : params["u"] = str(eArgs['maxLen']) # 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る if "top" in eArgs and eArgs["top"] != None : self.top = eArgs["top"] if self.top and self.top>0 : xxtop = tf.file() import copy top_params = copy.deepcopy(params) top_params["i"] = self.file top_params["sup"] = "1" top_params["K"] = str(self.top) top_params["so"] = xxtop import re top_params["type"] = re.sub('_$', '', top_params["type"] ) extTake.lcm(top_params) with open(xxtop, "r") as rfile: self.minCnt = int(rfile.read().strip()) if self.minCnt<0 : self.minCnt=1 self.skipTP=False if "skipTP" in eArgs: self.skipTP=eArgs["skipTP"] # lcm_seq出力ファイル lcmout = tf.file() # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので # そのときのために空ファイルを生成しておいく。 with open(lcmout, "w") as efile: pass # lcm実行 params["i"] = self.file params["sup"] = str(self.minCnt) params["o"] = lcmout extTake.lcm(params) # caliculate one itemset for lift value xxone= tf.file() tpstr = "FIf_" if self.msgoff else "FIf" extTake.lcm(type=tpstr,i=self.file,sup=1,o=xxone,l=1,u=1) # パターンのサポートを計算しCSV出力する #MCMD::msgLog("output patterns to CSV file ...") xxp0 = tf.file() self.pFile = self.temp.file() items=self.db.items trans0 = self.temp.file() extTake.lcmtrans(lcmout,"p",trans0) f = nm.mdelnull(i=trans0,f="pattern") f <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN) f <<= nm.msetstr(v=self.db.traSize,a="total") f <<= nm.mcal(c='${count}/${total}',a="support") f <<= nm.mcut(f="pid,pattern,size,count,total,support") f <<= nm.mvsort(vf="pattern") f <<= nm.msortf(f="pid",o=xxp0) f.run() # xxp0 # pid,count,total,support,pattern # 0,13,13,1,A # 4,6,13,0.4615384615,A B xxp1=tf.file() # taxonomy指定がない場合(2010/11/20追加) if items.taxonomy==None: shutil.move(xxp0,xxp1) # taxonomy指定がある場合 else: #MCMD::msgLog("reducing redundant rules in terms of taxonomy ...") zdd=VSOP.constant(0) fobj = nm.mcut(i=xxp0,f='pattern') for fldVal in fobj: zdd=zdd+VSOP.itemset(fldVal[0]) zdd=self.reduceTaxo(zdd,self.db.items) xxz1=tf.file() xxz2=tf.file() zdd.csvout(xxz1) f0=None f0 <<= nm.mcut(nfni=True,f="1:pattern",i=xxz1) f0 <<= nm.mvsort(vf="pattern") f0 <<= nm.msortf(f="pattern") f=None f <<= nm.msortf(f="pattern",i=xxp0) f <<= nm.mcommon(k="pattern",m=f0) f <<= nm.msortf(f="pid",o=xxp1) f.run() # lift値の計算 transl = tf.file() extTake.lcmtrans(xxone,"p",transl) xxp2 = nm.mdelnull(i=transl,f="pattern") xxp2 <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN) xxp2 <<= nm.msortf(f="pattern") xxp3 = nm.mcut(f="pid,pattern",i=xxp1) xxp3 <<= nm.mtra(f="pattern",r=True) xxp3 <<= nm.mjoin(k="pattern",m=xxp2,f="count:c1") xxp3 <<= nm.mcal(c='ln(${c1})',a="c1ln") xxp3 <<= nm.msum(k="pid",f="c1ln") # p3 # pid,pattern,c1,c1ln # 0,A,13,2.564949357 # 1,E,7,1.945910149 #おかしくなる?=>OK f3 = nm.mjoin(k="pid",f="c1ln",i=xxp1,m=xxp3) f3 <<= nm.mcal(c='round(exp(ln(${count})-${c1ln}+(${size}-1)*ln(${total})),0.0001)',a="lift") f3 <<= nm.mcut(f="pid,size,count,total,support,lift,pattern") f3 <<= nm.msortf(f="support%nr",o=self.pFile) f3.run() #self.size = mrecount.mrecount(i=self.file) #MCMD::msgLog("the number of patterns enumerated is #{@size}") if not self.skipTP: # トランザクション毎に出現するシーケンスを書き出す #MCMD::msgLog("output tid-patterns ...") self.tFile = self.temp.file() xxw3i = tf.file() extTake.lcmtrans(lcmout,"t",xxw3i) xxw1 = nm.mcut(f=self.db.idFN,i=self.db.file).muniq(k=self.db.idFN).mnumber(S=0,a="__tid",q=True).msortf(f="__tid") xxw2 = nm.mcut(f="pid",i=self.pFile) xxw3 = nm.mcommon(k="pid",i=xxw3i,m=xxw2).mjoin(k="__tid",m=xxw1,f=self.db.idFN).mcut(f=self.db.idFN+",pid",o=self.tFile) xxw3.run()
def enumerate(self,eArgs): tf=nu.Mtemp() # 最小サポートと最小サポート件数 if "minCnt" in eArgs : self.minCnt = int(eArgs["minCnt"]) self.minSup = float(self.minCnt)/ float(self.db.size) else: self.minSup = float(eArgs["minSup"]) self.minCnt = int(self.minSup * float(self.db.size) + 0.99) # 最大サポートと最大サポート件数 self.maxCnt=None if "maxCnt" in eArgs or "maxSup" in eArgs: if "maxCnt" in eArgs: self.maxCnt = int(eArgs["maxCnt"]) self.maxSup = float(self.maxCnt)/float(self.db.size) else: self.maxSup = float(eArgs["maxSup"]) self.maxCnt = int(self.maxSup * float(self.db.size) + 0.99) #未使用 #@minProb = eArgs["minProb"].to_f # 事後確率 #@minGR = @minProb/(1-@minProb) # 増加率 #@minGR = eArgs["minGR"].to_f if eArgs["minGR"] # あるクラスをpos、他のクラスをnegにして、パターン列挙した結果ファイル名を格納する pFiles=[] tFiles=[] for cName,posSize in self.db.clsNameRecSize.items(): negSize=self.db.size-posSize # minGRの計算 if "minGR" in eArgs: self.minGR=eArgs["minGR"] else: minProb = eArgs["minProb"] if ( "minProb" in eArgs ) else 0.5 if "uniform" in eArgs and eArgs["uniform"]: self.minGR = (minProb/(1-minProb)) * (self.db.clsSize-1) # マニュアルの式(4) else: self.minGR = (minProb/(1-minProb)) * (float(negSize)/float(posSize)) # マニュアルの式(4) # 最小サポートと最小サポート件数 if "minCnt" in eArgs: self.minPos = eArgs["minCnt"] else: self.minPos = int(eArgs["minSup"] * float(posSize) + 0.99) # 最大サポートと最大サポート件数 if "maxCnt" in eArgs or "maxSup" in eArgs: if "maxCnt" in eArgs: self.maxCnt = int(eArgs["maxCnt"]) else: self.maxCnt = int(eArgs["maxSup"] * float(posSize) + 0.99) self.sigma[cName] = self.calSigma(self.minPos,self.minGR,posSize,negSize) # lcm_seqのパラメータ設定と実行 lcmout = tf.file() # lcm_seq出力ファイル # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので # そのときのために空ファイルを生成しておいく。 with open(lcmout, "w") as efile: pass params = {} if self.msgoff: params["type"] ="CIA_" else: params["type"] ="CIA" if self.maxCnt: # windowサイズ上限 params["U"] = str(self.maxCnt) if "minLen" in eArgs: params["l"] = str(eArgs["minLen"]) if 'maxLen' in eArgs: params["u"] = str(eArgs["maxLen"]) if 'gap' in eArgs: params["g"] = str(eArgs["gap"]) if 'win' in eArgs: params["G"] = str(eArgs["win"]) params["w"] = self.weightFile[cName] params["i"] = self.file params["sup"] = str(self.sigma[cName]) params["o"] = lcmout # lcm_seq実行 #MCMD::msgLog("#{run}") if 'padding' in eArgs and eArgs["padding"]: # padding指定時は、0アイテムを出力しないlcm_seqを実行 extTake.lcmseq_zero(params) else: extTake.lcmseq(params) # パターンのサポートを計算しCSV出力する #MCMD::msgLog("output patterns to CSV file ...") pFiles.append(self.temp.file()) transle = self.temp.file() extTake.lcmtrans(lcmout,"e",transle) # pattern,countP,countN,size,pid f=None f <<= nm.mdelnull(f="pattern",i=transle) f <<= nm.mcal(c='round(${countN},1)',a="neg") f <<= nm.mcal(c='round(${countP}/%s,1)'%(self.posWeight[cName]),a="pos") f <<= nm.mdelnull(f="pattern") f <<= nm.msetstr(v=cName,a="class") f <<= nm.msetstr(v=posSize,a="posTotal") f <<= nm.msetstr(v=self.minGR,a="minGR") f <<= nm.mcut(f="class,pid,pattern,size,pos,neg,posTotal,minGR",o=pFiles[-1]) f.run() #s = MCMD::mrecount("i=#{pFiles.last}") # 列挙されたパターンの数 #MCMD::msgLog("the number of contrast patterns on class `#{cName}' enumerated is #{s}") if self.outtf : # トランザクション毎に出現するシーケンスを書き出す #MCMD::msgLog("output tid-patterns ...") tFiles.append(self.temp.file()) xxw= tf.file() f=None f <<= nm.mcut(f=self.db.idFN,i=self.db.file) f <<= nm.muniq(k=self.db.idFN) f <<= nm.mnumber(S=0,a="__tid",q=True) f <<= nm.msortf(f="__tid",o=xxw) f.run() nm.mcut(f=self.db.idFN,i=self.db.file).muniq(k=self.db.idFN).mnumber(S=0,a="__tid",q=True,o=xxw).run() translt = self.temp.file() extTake.lcmtrans(lcmout,"t",translt) nm.mjoin(k="__tid",m=xxw,f=self.db.idFN,i=translt).msetstr(v=cName,a="class").mcut(f=self.db.idFN+",class,pid",o=tFiles[-1]).run() # クラス別のパターンとtid-pidファイルを統合して最終出力 self.pFile = self.temp.file() self.tFile = self.temp.file() # パターンファイル併合 xxpCat = tf.file() f = nm.mcat(i=",".join(pFiles)) f <<= nm.msortf(f="class,pid") f <<= nm.mnumber(s="class,pid",S=0,a="ppid",o=xxpCat) f.run() # パターンファイル計算 items=self.db.items f="" f = nm.mcut(f="class,ppid:pid,pattern,size,pos,neg,posTotal,minGR",i=xxpCat) f <<= nm.msetstr(v=self.db.size,a="total") f <<= nm.mcal(c='${total}-${posTotal}',a="negTotal") # negのトータル件数 f <<= nm.mcal(c='${pos}/${posTotal}',a="support") # サポートの計算 f <<= nm.mcal(c='if(${neg}==0,1.797693135e+308,(${pos}/${posTotal})/(${neg}/${negTotal}))',a="growthRate") if "uniform" in eArgs and eArgs["uniform"] == True: f <<= nm.mcal(c='(${pos}/${posTotal})/(${pos}/${posTotal}+(%s-1)*${neg}/${negTotal})'%(self.db.clsSize),a="postProb") else: f <<= nm.mcal(c='${pos}/(${pos}+${neg})',a="postProb") f <<= nm.msel(c='${pos}>=%s&&${growthRate}>=${minGR}'%(self.minPos)) # minSupとminGRによる選択 f <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN) f <<= nm.mcut(f="class,pid,pattern,size,pos,neg,posTotal,negTotal,total,support,growthRate,postProb") f <<= nm.mvsort(vf="pattern") f <<= nm.msortf(f="class%nr,postProb%nr,pos%nr",o=self.pFile) f.run() if self.outtf : # 列挙されたパターンを含むtraのみ選択するためのマスタ xxp4=nm.mcut(f="class,pid",i=self.pFile) f = nm.mcat(i=",".join(tFiles)) f <<= nm.mjoin(k="class,pid",m=xxpCat,f="ppid") # 全クラス統一pid(ppid)結合 f <<= nm.mcommon(k="class,ppid",K="class,pid",m=xxp4) # 列挙されたパターンの選択 f <<= nm.mcut(f=self.db.idFN+",class,ppid:pid") f <<= nm.msortf(f=self.db.idFN+",class,pid",o=self.tFile) f.run() self.size = nu.mrecount(i=self.pFile)
def run(self): from datetime import datetime t = datetime.now() wf = nu.Mtemp() xxinp = wf.file() xxmap = wf.file() input = self.ei self.g2pair(self.ni, self.nf, self.ei, self.ef1, self.ef2, xxinp, xxmap) xxmace = wf.file() # maceの出力(tra形式) para = {} if self.msgoff: para["type"] = "Ce_" if self.all else "Me_" else: para["type"] = "Ce" if self.all else "Me" para["i"] = xxinp para["o"] = xxmace if self.minSize: para["l"] = self.minSize if self.maxSize: para["u"] = self.maxSize extTake.mace(para) #MCMD::msgLog("converting the numbered nodes into original name ...") id = nu.mrecount(i=xxmace, nfni=True) # xxpair = wf.file() # 上記traをpair形式に変換したデータ fpair = None fpair <<= nm.mcut(i=xxmace, nfni=True, f="0:num") fpair <<= nm.mnumber(q=True, a="id") fpair <<= nm.mvcount(vf="num:size") fpair <<= nm.mtra(r=True, f="num") # when ni= specified, it add the isolated single cliques. if self.ni: fpair_u = nm.mread(i=fpair) if self.all: fpair_u <<= nm.mselstr(f="size", v=1) fpair_u <<= nm.mcut(f="num") fpair_u <<= nm.muniq(k="num") # select all nodes which are not included in any cliques xxiso = None xxiso <<= nm.mcut(f="num", i=xxmap) xxiso <<= nm.mcommon(k="num", m=fpair_u, r=True) xxiso <<= nm.mnumber(S=id, a="id", q=True) xxiso <<= nm.msetstr(v=1, a="size") xxiso <<= nm.mcut(f="id,num,size") # mcut入れないとおかしくなるあとで直す #ddlist = [fpair.mcut(f="id,num,size"),xxiso] xxpair = nm.mcut(i=[fpair, xxiso], f="id,num,size") else: xxpair = fpair xxpair <<= nm.mjoin(m=xxmap, k="num", f="node") xxpair <<= nm.mcut(f="id,node,size") xxpair <<= nm.msortf(f="id,node", o=self.oFile) xxpair.run() procTime = datetime.now() - t # ログファイル出力 if self.logFile: kv = [["key", "value"]] for k, v in self.args.items(): kv.append([k, str(v)]) kv.append(["time", str(procTime)]) nm.writecsv(i=kv, o=self.logFile).run()
def run(self): wf = mtemp.Mtemp() xxpal = wf.file() xxa = wf.file() xxb = wf.file() xxc = wf.file() xxd = wf.file() xxout = wf.file() # ============ # n1,n2,sim # a,b,0.40 # a,c,0.31 # a,d,0.22 # b,c,0.20 # b,d,0.24 # b,e,0.14 # c,d,0.30 # d,e,0.09 xpal = None if self.directed: # 任意の枝a->bのaについて上位rankを選択 xpal <<= nm.mnumber(k=self.ef1, s=self.sim + "%nr", e="skip", S=1, a="##rank", i=self.ei) xpal <<= nm.mselnum(f="##rank", c="[," + str(self.rank) + "]") else: xxa = nm.mfsort(f=[self.ef1, self.ef2], i=self.ei) xxb = nm.mfsort(f=[self.ef2, self.ef1], i=self.ei) xpal <<= nm.muniq(k=[self.ef1, self.ef2], i=[xxa, xxb]) xpal <<= nm.mnumber(k=self.ef1, s=self.sim + "%nr", e="skip", S=1, a="##rank") xpal <<= nm.mselnum(f="##rank", c="[," + str(self.rank) + "]") # 両方向+片方向 xpal1 = None if self.dir == "x": xpal1 <<= nm.mcut(f=[self.ef1, self.ef2, self.sim], i=xpal) # 両方向 elif self.dir == "b": selpara = "$s{%s}==$s{##ef2}" % (self.ef1) # 得られた上位rankグラフからa->b->cを作成し、a==cであれば相思相愛ということ xpal1 <<= nm.mnjoin(k=self.ef2, K=self.ef1, m=xpal, f=self.ef2 + ":##ef2," + self.sim + ":sim2", i=xpal) xpal1 <<= nm.msel(c=selpara) xpal1 <<= nm.mcut(f=[self.ef1, self.ef2, self.sim]) else: selpara = "$s{%s}==$s{##ef2}" % (self.ef1) xxc = None xxc <<= nm.mnjoin(k=self.ef2, K=self.ef1, m=xpal, f=self.ef2 + ":##ef2," + self.sim + ":sim2", i=xpal) xxc <<= nm.msel(c=selpara) xxc <<= nm.mcut(f=[self.ef1, self.ef2]) xpal1 <<= nm.mcut(f=[self.ef1, self.ef2, self.sim], i=xpal) xpal1 <<= nm.mcommon(k=self.ef1 + "," + self.ef2, m=xxc, r=True) runpal = None kpara = "%s,%s" % (self.ef1, self.ef2) if self.udout: runpal <<= nm.mfsort(f=kpara, i=xpal1) runpal <<= nm.mavg(k=kpara, f=self.sim) runpal <<= nm.msortf(f=kpara, o=self.eo) else: runpal <<= nm.msortf(f=kpara, i=xpal1, o=self.eo) runpal.run() if self.ni and self.no: shutil.copyfile(self.ni, self.no)
def enumerate(self, eArgs): tf = mtemp.Mtemp() # 最小サポートと最小サポート件数 if "minCnt" in eArgs and eArgs["minCnt"] != None: self.minCnt = int(eArgs["minCnt"]) self.minSup = float(self.minCnt) / float(self.db.size) else: self.minSup = float(eArgs["minSup"]) self.minCnt = int(self.minSup * float(self.db.size) + 0.99) # 最大サポートと最大サポート件数 self.maxCnt = None if ("maxCnt" in eArgs and eArgs["maxCnt"] != None) or ("maxSup" in eArgs and eArgs["maxSup"] != None): if "maxCnt" in eArgs and eArgs["maxCnt"] != None: self.maxCnt = int(eArgs["maxCnt"]) self.maxSup = float(self.maxCnt) / float(self.db.size) else: self.maxSup = float(eArgs["maxSup"]) self.maxCnt = int(self.maxSup * float(self.db.size) + 0.99) # 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る if "top" in eArgs and eArgs["top"] != None: self.top = eArgs["top"] # 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る if self.top and self.top > 0: xxtop = tf.file() extTake.lcmseq(type="Cf", K=str(self.top), i=self.file, sup="1", so=xxtop) with open(xxtop, "r") as rfile: self.minCnt = int(rfile.read().strip()) # lcm_seq出力ファイル lcmout = tf.file() # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので # そのときのために空ファイルを生成しておいく。 with open(lcmout, "w") as efile: pass # lcm_seqのパラメータ設定と実行 params = {} if self.msgoff: params["type"] = "CIf_" else: params["type"] = "CIf" if self.maxCnt: params["U"] = str(self.maxCnt) if "minLen" in eArgs: params["l"] = str(eArgs["minLen"]) if 'maxLen' in eArgs: params["u"] = str(eArgs["maxLen"]) if 'gap' in eArgs: params["g"] = str(eArgs["gap"]) if 'win' in eArgs: params["G"] = str(eArgs["win"]) params["i"] = self.file params["sup"] = str(self.minCnt) params["o"] = lcmout # lcm_seq実行 #MCMD::msgLog("#{run}") if 'padding' in eArgs and eArgs[ "padding"]: # padding指定時は、0アイテムを出力しないlcm_seqを実行 extTake.lcmseq_zero(params) else: extTake.lcmseq(params) # パターンのサポートを計算しCSV出力する self.pFile = self.temp.file() items = self.db.items transl = self.temp.file() extTake.lcmtrans(lcmout, "p", transl) f = nm.mdelnull(f="pattern", i=transl) f <<= nm.mvreplace(vf="pattern", m=items.file, K=items.idFN, f=items.itemFN) f <<= nm.msetstr(v=self.db.size, a="total") f <<= nm.mcal(c='${count}/${total}', a="support") # サポートの計算 f <<= nm.mcut(f="pid,pattern,size,count,total,support") f <<= nm.msortf(f="support%nr", o=self.pFile) f.run() if self.outtf: # トランザクション毎に出現するシーケンスを書き出す #MCMD::msgLog("output tid-patterns ...") self.tFile = self.temp.file() xxw = tf.file() #Mtemp.new.name f = None f <<= nm.mcut(f=self.db.idFN, i=self.db.file) f <<= nm.muniq(k=self.db.idFN) f <<= nm.mnumber(S=0, a="__tid", q=True) f <<= nm.msortf(f="__tid", o=xxw) f.run() translt = self.temp.file() extTake.lcmtrans(lcmout, "t", translt) f = None f <<= nm.msortf(f="__tid", i=translt) f <<= nm.mjoin(k="__tid", m=xxw, f=self.db.idFN) f <<= nm.mcut(f=self.db.idFN + ",pid") f <<= nm.msortf(f=self.db.idFN + ",pid", o=self.tFile) f.run()
def enumerate(self, eArgs): pFiles = [] tFiles = [] tf = mtemp.Mtemp() for cName, posSize in self.db.clsNameRecSize.items(): negSize = self.db.traSize - posSize if "minGR" in eArgs: self.minGR = eArgs["minGR"] else: minProb = eArgs["minProb"] if ("minProb" in eArgs) else 0.5 if "uniform" in eArgs and eArgs["uniform"] == True: self.minGR = (minProb / (1 - minProb)) * ( self.db.clsSize - 1) # マニュアルの式(4) else: self.minGR = (minProb / (1 - minProb)) * ( float(negSize) / float(posSize)) # マニュアルの式(4) # 最小サポートと最小サポート件数 # s=0.05 # s=c1:0.05,c2:0.06 # S=10 # S=c1:10,c2:15 if "minCnt" in eArgs: if isinstance(eArgs["minCnt"], dict): self.minPos = eArgs["minCnt"][cName] else: self.minPos = eArgs["minCnt"] else: if isinstance(eArgs["minSup"], dict): self.minPos = int(eArgs["minSup"][cName] * float(posSize) + 0.99) else: self.minPos = int(eArgs["minSup"] * flost(posSize) + 0.99) # 最大サポートと最大サポート件数 if "maxCnt" in eArgs: if isinstance(eArgs["maxCnt"], dict): self.maxPos = eArgs["maxCnt"][cName] else: self.maxPos = eArgs["maxCnt"] elif "maxSup" in eArgs: if isinstance(eArgs["maxSup"], dict): self.maxPos = int(eArgs["maxSup"][cName] * float(posSize) + 0.99) else: self.maxPos = int(eArgs["maxSup"] * float(posSize) + 0.99) else: self.maxPos = None self.sigma[cName] = self.calSigma(self.minPos, self.minGR, posSize, negSize) # lcmのパラメータ設定と実行 # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので # そのときのために空ファイルを生成しておいく。 lcmout = tf.file() # lcm出力ファイル with open(lcmout, "w") as efile: pass runPara = {} if self.msgoff: runPara["type"] = eArgs["type"] + "IA_" else: runPara["type"] = eArgs["type"] + "IA" #if self.maxPos: #rubyだとif @maxCntなってる(どこにも設定されてないので)動いてないはず if self.maxPos: runPara["U"] = self.maxPos if "minLen" in eArgs: runPara["l"] = str(eArgs["minLen"]) if "maxLen" in eArgs: runPara["u"] = str(eArgs["maxLen"]) runPara["w"] = self.weightFile[cName] runPara["i"] = self.file runPara["sup"] = str(self.sigma[cName]) runPara["o"] = lcmout # lcm実行 #MCMD::msgLog("#{run}") #TAKE::run_lcm(run) #print(self.sigma) #print(runPara) #MCMD::msgLog("output patterns to CSV file ...") extTake.lcm(runPara) pFiles.append(self.temp.file()) transle = tf.file() extTake.lcmtrans(lcmout, "e", transle) f = nm.mdelnull(f="pattern", i=transle) f <<= nm.mcal(c='round(${countN},1)', a="neg") f <<= nm.mcal(c='round(${countP}/%s,1)' % (self.posWeight[cName]), a="pos") f <<= nm.mdelnull(f="pattern") #いる? f <<= nm.msetstr(v=cName, a="class") f <<= nm.msetstr(v=posSize, a="posTotal") f <<= nm.msetstr(v=self.minGR, a="minGR") f <<= nm.mcut(f="class,pid,pattern,size,pos,neg,posTotal,minGR", o=pFiles[-1]) f.run() #s = nutil.mrecount(i=self.file) #MCMD::msgLog("the number of contrast patterns on class `#{cName}' enumerated is #{s}") if self.outtf: # トランザクション毎に出現するパターンを書き出す #MCMD::msgLog("output tid-patterns ...") tFiles.append(self.temp.file()) xxw = tf.file() xxw = nm.mcut(f=self.db.idFN, i=self.db.file) xxw <<= nm.muniq(k=self.db.idFN) xxw <<= nm.mnumber(S=0, a="__tid", q=True) translt = self.temp.file() extTake.lcmtrans(lcmout, "t", translt) f = nm.mjoin(k="__tid", m=xxw, f=self.db.idFN, i=translt) f <<= nm.msetstr(v=cName, a="class") f <<= nm.mcut(f=self.db.idFN + ",class,pid", o=tFiles[-1]) f.run() # クラス別のパターンとtid-pidファイルを統合して最終出力 self.pFile = self.temp.file() self.tFile = self.temp.file() # パターンファイル併合 xxpCat = tf.file() f = nm.mcat(i=",".join(pFiles)) f <<= nm.msortf(f="class,pid") f <<= nm.mnumber(s="class,pid", S=0, a="ppid", o=xxpCat) f.run() # パターンファイル計算 items = self.db.items f = nm.mcut(f="class,ppid:pid,pattern,size,pos,neg,posTotal,minGR", i=xxpCat) f <<= nm.msetstr(v=self.db.traSize, a="total") f <<= nm.mcal(c='${total}-${posTotal}', a="negTotal") # negのトータル件数 f <<= nm.mcal(c='${pos}/${posTotal}', a="support") # サポートの計算 f <<= nm.mcal( c= 'if(${neg}==0,1.797693135e+308,(${pos}/${posTotal})/(${neg}/${negTotal}))', a="growthRate") if "uniform" in eArgs and eArgs["uniform"] == True: f <<= nm.mcal( c='(${pos}/${posTotal})/(${pos}/${posTotal}+(%s-1)*${neg}/${negTotal})' % (self.db.clsSize), a="postProb") else: f <<= nm.mcal(c='${pos}/(${pos}+${neg})', a="postProb") f <<= nm.msel(c='${pos}>=%s&&${growthRate}>=${minGR}' % (self.minPos)) # minSupとminGRによる選択 f <<= nm.mvreplace(vf="pattern", m=items.file, K=items.idFN, f=items.itemFN) f <<= nm.mcut( f="class,pid,pattern,size,pos,neg,posTotal,negTotal,total,support,growthRate,postProb" ) f <<= nm.mvsort(vf="pattern") f <<= nm.msortf(f="class%nr,postProb%nr,pos%nr", o=self.pFile) f.run() # アイテムを包含している冗長なタクソノミを削除 if items.taxonomy: #MCMD::msgLog("reducing redundant rules in terms of taxonomy ...") ##ここは後で zdd = VSOP.constant(0) dt = nm.mcut(i=self.pFile, f="pattern") for fldVal in dt: zdd = zdd + VSOP.itemset(fldVal[0]) zdd = self.reduceTaxo(zdd, self.db.items) xxp1 = tf.file() xxp2 = tf.file() xxp3 = tf.file() zdd.csvout(xxp1) nm.mcut(nfni=True, f="1:pattern", i=xxp1).mvsort(vf="pattern").msortf(f="pattern", o=xxp2).run() nm.msortf(f="pattern", i=self.pFile).mcommon( k="pattern", m=xxp2).msortf(f="class%nr,postProb%nr,pos%nr", o=xxp3).run() shutil.move(xxp3, self.pFile) if self.outtf: # 列挙されたパターンを含むtraのみ選択するためのマスタ xxp4 = nm.mcut(f="class,pid", i=self.pFile) f = nm.mcat(i=",".join(tFiles)) f <<= nm.mjoin(k="class,pid", m=xxpCat, f="ppid") # 全クラス統一pid(ppid)結合 f <<= nm.mcommon(k="class,ppid", K="class,pid", m=xxp4) # 列挙されたパターンの選択 f <<= nm.mcut(f=self.db.idFN + ",class,ppid:pid") f <<= nm.msortf(f=self.db.idFN + ",class,pid", o=self.tFile) f.run()
def __mkTree(iFile, oFile): temp = mtemp.Mtemp() xxbase0 = temp.file() xxbase1 = temp.file() xxiFile2 = temp.file() xxcheck = temp.file() """ # #{iFile} # key,nam%0,keyNum,num,nv,nc # #2_1,#1_1,4,1,6,1 # #2_1,#1_2,4,2,0.9999999996,1 """ # keyNumとnum項目のuniqリストを作り、お互いの包含関係でrootノードとleafノードを識別する。 f0 = nm.mcut(f="keyNum,num", i=iFile) #{xxiFile1} fk = f0.mcut(f="keyNum").muniq(k="keyNum") #{xxkey} fn = f0.mcut(f="num").muniq(k="num") #{xxnum} # root nodesの選択 fr = nm.mcommon(k="keyNum", K="num", m=fn, i=fk, r=True).mcut(f="keyNum:node0", o=xxbase0) #{xxbase[0]} # leaf nodesの選択 fl = nm.mcommon(k="num", K="keyNum", m=fk, i=fn, r=True).mcut(f="num") #{xxleaf} # leaf nodeの構造を知る必要はないので入力ファイルのnodeからleafを除外 f = nm.mcommon(k="num", m=fl, r=True, i=f0, o=xxiFile2) nm.runs([f, fr]) def _xnjoin(inf, outf, mfile, check, no): f = nm.mnjoin(k="node%d" % (no), K="keyNum", m=mfile, n=True, f="num:node%d" % (no + 1), i=inf, o=outf) fc = nm.mdelnull(i=f, f="node%d" % (no + 1), o=check) return fc i = 0 depth = None inf = xxbase0 outf = xxbase1 ''' # root nodesファイルから親子関係noodeを次々にjoinしていく # xxbase0 : root nodes # node0%0 # 3 # 4 # xxbase1 # node0%0,node1 # 3, # 4,1 # 4,2 # xxbase2 # node0,node1%0,node2 # 3,, # 4,1, # 4,2, # join項目(node2)の非null項目が0件で終了 ''' while True: _xnjoin(inf, outf, xxiFile2, xxcheck, i).run() size = mrecount(i=xxcheck) if size == 0: nm.msortf(f="*", i=outf, o=oFile).run() depth = i + 1 break # swap f_name xxtmp = outf outf = inf inf = xxtmp i += 1 return depth
def msankey(i, o, v, f, title="", h=500, w=960, nl=False, T=None): # f= 2 fld if type(f) is str: ff = f.split(',') elif type(f) is list: ff = f else: raise TypeError("f= unsupport " + str(type(k))) if len(ff) < 2: raise TypeError("f= takes just two field names") if T != None: import re os.environ["KG_TmpPath"] = re.sub(r'/$', "", T) if h == None: h = 500 if w == None: w = 960 if title == None: title = "" tempW = mtemp.Mtemp() nodef = tempW.file() edgef = tempW.file() ef1 = ff[0] ef2 = ff[1] ev = v iFile = i oFile = o f0 = nm.mcut(i=iFile, f="%s:nodes" % (ef1)) f1 = nm.mcut(i=iFile, f="%s:nodes" % (ef2)) f2 = None f2 <<= nm.muniq(i=[f0, f1], k="nodes") f2 <<= nm.mnumber(s="nodes", a="num", o=nodef) f2.run() f3 = None f3 <<= nm.mcut(f="%s:nodes1,%s:nodes2,%s" % (ef1, ef2, ev), i=iFile) f3 <<= nm.mjoin(k="nodes1", K="nodes", m=nodef, f="num:num1") f3 <<= nm.mjoin(k="nodes2", K="nodes", m=nodef, f="num:num2") f3 <<= nm.mcut(f="num1,num2,%s" % (ev)) f3 <<= nm.msortf(f="num1%n,num2%n", o=edgef) f3.run() wk = [] nodeL = [] for flds in nm.readcsv(nodef).getline(otype='dict'): nodeL.append({"name": flds['nodes']}) nodes = json.JSONEncoder().encode(nodeL) linkL = [] for flds in nm.readcsv(edgef).getline(otype='dict', dtype={ "num1": "int", "num2": "int", ev: "int" }): linkL.append({ "source": flds["num1"], "target": flds["num2"], "value": flds[ev] }) links = json.JSONEncoder().encode(linkL) nolabel = "" if nl: nolabel = "font-size: 0px;" html = sys.stdout if not oFile == None: html = open(oFile, "w") outTemplate = ''' <!DOCTYPE html> <html class="ocks-org do-not-copy"> <meta charset="utf-8"> <title>{title}</title> <style> body {{ font: 10px sans-serif; }} svg {{ padding: 10px 0 0 10px; }} .arc {{ stroke: #fff; }} #tooltip {{ position: absolute; width: 150px; height: auto; padding: 10px; background-color: white; -webkit-border-radius: 10px; -moz-border-radius: 10px; border-radius: 10px; -webkit-box-shadow: 4px 4px 10px rgba(0,0,0,0.4); -moz-box-shadow: 4px 4px 10px rgba(0,0,0,0.4); box-shadow: 4px 4px 10px rgba(0,0,0,0.4); pointer-events: none; }} #tooltip.hidden {{ display: none; }} #tooltip p {{ margin: 0; font-family: sans-serif; font-size: 10px; line-height: 14px; }} #chart {{ height: 500px; }} .node rect {{ cursor: move; fill-opacity: .9; shape-rendering: crispEdges; }} .node text {{ pointer-events: none; text-shadow: 0 1px 0 #fff; {nolabel} }} .link {{ fill: none; stroke: #000; stroke-opacity: .2; }} .link:hover {{ stroke-opacity: .5; }} </style> <body> <h1>{title}</h1> <p id="chart"> <script> {d3js_str} d3.sankey = function() {{ var sankey = {{}}, nodeWidth = 24, nodePadding = 8, size = [1, 1], nodes = [], links = []; sankey.nodeWidth = function(_) {{ if (!arguments.length) return nodeWidth; nodeWidth = +_; return sankey; }}; sankey.nodePadding = function(_) {{ if (!arguments.length) return nodePadding; nodePadding = +_; return sankey; }}; sankey.nodes = function(_) {{ if (!arguments.length) return nodes; nodes = _; return sankey; }}; sankey.links = function(_) {{ if (!arguments.length) return links; links = _; return sankey; }}; sankey.size = function(_) {{ if (!arguments.length) return size; size = _; return sankey; }}; sankey.layout = function(iterations){{ computeNodeLinks(); computeNodeValues(); computeNodeBreadths(); computeNodeDepths(iterations); computeLinkDepths(); return sankey; }}; sankey.relayout = function() {{ computeLinkDepths(); return sankey; }}; sankey.link = function() {{ var curvature = .5; function link(d) {{ var x0 = d.source.x + d.source.dx, x1 = d.target.x, xi = d3.interpolateNumber(x0, x1), x2 = xi(curvature), x3 = xi(1 - curvature), y0 = d.source.y + d.sy + d.dy / 2, y1 = d.target.y + d.ty + d.dy / 2; return "M" + x0 + "," + y0 + "C" + x2 + "," + y0 + " " + x3 + "," + y1 + " " + x1 + "," + y1; }} link.curvature = function(_) {{ if (!arguments.length) return curvature; curvature = +_; return link; }}; return link; }}; // Populate the sourceLinks and targetLinks for each node. // Also, if the source and target are not objects, assume they are indices. function computeNodeLinks() {{ nodes.forEach(function(node) {{ node.sourceLinks = []; node.targetLinks = []; }}); links.forEach(function(link) {{ var source = link.source, target = link.target; if (typeof source === "number") source = link.source = nodes[link.source]; if (typeof target === "number") target = link.target = nodes[link.target]; source.sourceLinks.push(link); target.targetLinks.push(link); }}); }} // Compute the value (size) of each node by summing the associated links. function computeNodeValues() {{ nodes.forEach( function(node) {{ node.value = Math.max(d3.sum(node.sourceLinks, value),d3.sum(node.targetLinks, value)); }} ); }} // Iteratively assign the breadth (x-position) for each node. // Nodes are assigned the maximum breadth of incoming neighbors plus one; // nodes with no incoming links are assigned breadth zero, while // nodes with no outgoing links are assigned the maximum breadth. function computeNodeBreadths() {{ var remainingNodes = nodes, nextNodes, x = 0; while (remainingNodes.length) {{ nextNodes = []; remainingNodes.forEach(function(node) {{ node.x = x; node.dx = nodeWidth; node.sourceLinks.forEach(function(link) {{ nextNodes.push(link.target); }}); }}); remainingNodes = nextNodes; ++x; }} // moveSinksRight(x); scaleNodeBreadths((width - nodeWidth) / (x - 1)); }} function moveSourcesRight() {{ nodes.forEach(function(node) {{ if (!node.targetLinks.length) {{ node.x = d3.min(node.sourceLinks, function(d) {{ return d.target.x; }} ) - 1; }} }}); }} function moveSinksRight(x) {{ nodes.forEach(function(node) {{ if (!node.sourceLinks.length) {{ node.x = x - 1; }} }}); }} function scaleNodeBreadths(kx) {{ nodes.forEach(function(node) {{ node.x *= kx; }}); }} function computeNodeDepths(iterations) {{ var nodesByBreadth = d3.nest() .key(function(d) {{ return d.x; }}) .sortKeys(d3.ascending) .entries(nodes) .map(function(d) {{ return d.values; }}); // initializeNodeDepth(); resolveCollisions(); for (var alpha = 1; iterations > 0; --iterations){{ relaxRightToLeft(alpha *= .99); resolveCollisions(); relaxLeftToRight(alpha); resolveCollisions(); }} function initializeNodeDepth() {{ var ky = d3.min(nodesByBreadth, function(nodes) {{ return (size[1] - (nodes.length - 1) * nodePadding) / d3.sum(nodes, value); }}); nodesByBreadth.forEach(function(nodes) {{ nodes.forEach(function(node, i) {{ node.y = i; node.dy = node.value * ky; }}); }}); links.forEach(function(link) {{ link.dy = link.value * ky; }}); }} function relaxLeftToRight(alpha) {{ nodesByBreadth.forEach(function(nodes, breadth) {{ nodes.forEach(function(node) {{ if (node.targetLinks.length) {{ var y = d3.sum(node.targetLinks, weightedSource) / d3.sum(node.targetLinks, value); node.y += (y - center(node)) * alpha; }} }}); }}); function weightedSource(link) {{ return center(link.source) * link.value; }} }} function relaxRightToLeft(alpha) {{ nodesByBreadth.slice().reverse().forEach(function(nodes){{ nodes.forEach(function(node) {{ if (node.sourceLinks.length) {{ var y = d3.sum(node.sourceLinks, weightedTarget) / d3.sum(node.sourceLinks, value); node.y += (y - center(node)) * alpha; }} }}); }}); function weightedTarget(link) {{ return center(link.target) * link.value; }} }} function resolveCollisions() {{ nodesByBreadth.forEach(function(nodes) {{ var node, dy, y0 = 0, n = nodes.length, i; // Push any overlapping nodes down. nodes.sort(ascendingDepth); for (i = 0; i < n; ++i) {{ node = nodes[i]; dy = y0 - node.y; if (dy > 0) node.y += dy; y0 = node.y + node.dy + nodePadding; }} // If the bottommost node goes outside the bounds, push it back up. dy = y0 - nodePadding - size[1]; if (dy > 0) {{ y0 = node.y -= dy; // Push any overlapping nodes back up. for (i = n - 2; i >= 0; --i) {{ node = nodes[i]; dy = node.y + node.dy + nodePadding - y0; if (dy > 0) node.y -= dy; y0 = node.y; }} }} }}); }} function ascendingDepth(a, b) {{ return a.y - b.y; }} }} function computeLinkDepths() {{ nodes.forEach(function(node) {{ node.sourceLinks.sort(ascendingTargetDepth); node.targetLinks.sort(ascendingSourceDepth); }}); nodes.forEach(function(node) {{ var sy = 0, ty = 0; node.sourceLinks.forEach(function(link) {{ link.sy = sy; sy += link.dy; }}); node.targetLinks.forEach(function(link) {{ link.ty = ty; ty += link.dy; }}); }}); function ascendingSourceDepth(a, b) {{ return a.source.y - b.source.y; }} function ascendingTargetDepth(a, b) {{ return a.target.y - b.target.y; }} }} function center(node){{ return node.y + node.dy / 2; }} function value(link) {{ return link.value; }} return sankey; }}; </script> <script> var margin = {{top: 1, right: 1, bottom: 6, left: 1}}, width = {width} - margin.left - margin.right, height = {height} - margin.top - margin.bottom; var formatNumber = d3.format(",.0f"), format = function(d) {{ return formatNumber(d) + " TWh"; }}, color = d3.scale.category20(); var svg = d3.select("#chart").append("svg") .attr("width", width + margin.left + margin.right) .attr("height", height + margin.top + margin.bottom) .append("g") .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); var sankey = d3.sankey() .nodeWidth(15) .nodePadding(10) .size([width, height]); var path = sankey.link(); var nodes={nodes} var links={links} sankey .nodes(nodes) .links(links) .layout(32); var link = svg.append("g").selectAll(".link") .data(links) .enter().append("path") .attr("class", "link") .attr("d", path) .style("stroke-width", function(d) {{ return Math.max(1, d.dy); }}) .sort(function(a, b) {{ return b.dy - a.dy; }}); link.append("title") .text(function(d) {{ return d.source.name + " → " + d.target.name + "" + format(d.value); }}); var node = svg.append("g").selectAll(".node") .data(nodes) .enter().append("g") .attr("class", "node") .attr("transform", function(d) {{ return "translate(" + d.x + "," + d.y + ")"; }}) .call( d3.behavior.drag() .origin(function(d) {{ return d; }}) .on("dragstart", function() {{ this.parentNode.appendChild(this); }}) .on("drag", dragmove) ); node.append("rect") .attr("height", function(d) {{ return d.dy; }}) .attr("width", sankey.nodeWidth() ) .style("fill", function(d) {{ return d.color = color(d.name.replace(/ .*/, "")); }}) .style("stroke", function(d) {{ return d3.rgb(d.color).darker(2); }}) .append("title") .text(function(d) {{ return d.name + "" + format(d.value); }}); node.append("text") .attr("x", -6) .attr("y", function(d) {{ return d.dy / 2; }}) .attr("dy", ".35em") .attr("text-anchor", "end") .attr("transform", null) .text(function(d) {{ return d.name; }}) .filter(function(d) {{ return d.x < width / 2; }}) .attr("x", 6 + sankey.nodeWidth()) .attr("text-anchor", "start"); function dragmove(d){{ d3.select(this) .attr("transform", "translate(" + d.x + "," + (d.y = Math.max(0, Math.min(height - d.dy, d3.event.y))) + ")"); sankey.relayout(); link.attr("d", path); }} </script> '''.format(title=title, nolabel=nolabel, d3js_str=vjs.ViewJs.d3jsMin(), nodes=nodes, links=links, width=w, height=h) html.write(outTemplate) if not oFile == None: html.close()
def run(self): from datetime import datetime t = datetime.now() temp=nu.Mtemp() xxsspcin=temp.file() xxmap=temp.file() # traファイルの変換とマップファイルの作成 if self.num : total = self.convN(self.iFile,self.idFN,self.itemFN,xxsspcin,xxmap) else: total = self.conv(self.iFile,self.idFN,self.itemFN,xxsspcin,xxmap) # system "head xxsspcin" # 3 5 0 2 # 4 1 2 # 0 2 3 1 # 1 0 2 # 3 4 0 1 # system "head xxmap" # ##item,##freq%0nr,##num # b,4,0 # d,4,1 # f,4,2 minSupp = int(total*self.minSupPrb) if self.minSupPrb else self.minSupCnt # sspc用simの文字列 if self.sim : if self.sim=="J": sspcSim="R" elif self.sim=="P": sspcSim="P" elif self.sim=="C": sspcSim="i" # sim=省略時はRでth=0とする(sim制約なし) else: sspcSim="R" self.th=0 ############ 列挙本体 ############ xxsspcout=temp.file() tpstr = sspcSim+"ft_" if self.msgoff else sspcSim+"ft" extTake.sspc(type=tpstr,TT=minSupp,i=xxsspcin,th=self.th,o=xxsspcout) ################################## xxtmmp=temp.file() f = nm.mread(i=xxsspcout) f <<= nm.cmd("tr ' ()' ','") f <<= nm.mcut(f="1:i1,2:i2,0:frequency,4:sim",nfni=True) if self.num : f <<= nm.mfldname(f="i1:node1,i2:node2") if self.sim!="C": f <<= nm.mfsort(f="node1,node2") f <<= nm.mjoin(k="node1",K="##item",m=xxmap,f="##freq:frequency1") f <<= nm.mjoin(k="node2",K="##item",m=xxmap,f="##freq:frequency2") else: f <<= nm.mjoin(k="i1",K="##num",m=xxmap,f="##item:node1,##freq:frequency1") f <<= nm.mjoin(k="i2",K="##num",m=xxmap,f="##item:node2,##freq:frequency2") if self.sim!="C": f <<= nm.mcut(f="i1,i2,frequency,sim,node1,node2,frequency1,frequency2,node1:node1x,node2:node2x") f <<= nm.mfsort(f="node1x,node2x") f <<= nm.mcal(c='if($s{node1}==$s{node1x},$s{frequency1},$s{frequency2})',a="freq1") f <<= nm.mcal(c='if($s{node2}==$s{node2x},$s{frequency2},$s{frequency1})',a="freq2") f <<= nm.mcut(f="i1,i2,frequency,sim,node1x:node1,node2x:node2,freq1:frequency1,freq2:frequency2") f <<= nm.msetstr(v=total,a="total") f <<= nm.mcal(c='${frequency}/${frequency1}',a="confidence") f <<= nm.mcal(c='${frequency}/${total}',a="support") f <<= nm.mcal(c='${frequency}/(${frequency1}+${frequency2}-${frequency})',a="jaccard") f <<= nm.mcal(c='(${frequency}*${total})/((${frequency1}*${frequency2}))',a="lift") f <<= nm.mcal(c='(ln(${frequency})+ln(${total})-ln(${frequency1})-ln(${frequency2}))/(ln(${total})-ln(${frequency}))',a="PMI") f <<= nm.mcut(f="node1,node2,frequency,frequency1,frequency2,total,support,confidence,lift,jaccard,PMI") f <<= nm.msortf(f="node1,node2",o=self.oeFile) f.run() if self.onFile: f4 = nm.mcut(f=self.itemFN+":node",i=self.iFile) f4 <<= nm.mcount(k="node",a="frequency") if self.node_support : minstr = "[%s,]"%(minSupp) f4 <<= nm.mselnum(f="frequency",c=minstr) f4 <<= nm.msetstr(v=total,a="total") f4 <<= nm.mcal(c='${frequency}/${total}',a="support") f4 <<= nm.mcut(f="node,support,frequency,total",o=self.onFile) f4.run() procTime=datetime.now()-t # ログファイル出力 if self.logFile : kv=[["key","value"]] for k,v in self.args.items(): kv.append([k,str(v)]) kv.append(["time",str(procTime)]) nm.writecsv(i=kv,o=self.logFile).run()
def run(self, **kw_args): os.environ["KG_VerboseLevel"] = "2" if "msg" in kw_args: if kw_args["msg"] == "on": os.environ['KG_ScpVerboseLevel'] = "3" temp = Mtemp() xxedge = temp.file() xxnode = temp.file() xxnam2num = temp.file() xxnum2nam = temp.file() xxebase = temp.file() xxbody = temp.file() e1 = None if self.ew: e1 <<= nm.mcut(f="%s:__node1,%s:__node2,%s:__weight" % (self.ef1, self.ef2, self.ew), i=self.eFile) else: e1 <<= nm.mcut(f="%s:__node1,%s:__node2" % (self.ef1, self.ef2), i=self.eFile) e1 <<= nm.muniq(k="__node1,__node2") e2 = nm.mfldname(i=e1, f="__node2:__node1,__node1:__node2") fe = None fe <<= nm.muniq(k="__node1,__node2", i=[e1, e2], o=xxedge) fe.run() # cleaning the node data (remove duplicate nodes) fn = None if self.nFile: if self.nw: fn <<= nm.mcut(f="%s:__node,%s" % (self.nf, self.nw), i=self.nFile) else: fn <<= nm.mcut(f="%s:__node" % (self.nf), i=self.nFile) fn <<= nm.muniq(k="__node", o=xxnode) else: xxen1 = nm.mcut(f="__node1:__node", i=xxedge) xxen2 = nm.mcut(f="__node2:__node", i=xxedge) fn <<= nm.muniq(k="__node", o=xxnode, i=[xxen1, xxen2]) fn.run() # 節点名<=>節点番号変換表の作成 fmap = None fmap <<= nm.mcut(f="__node", i=xxnode) fmap <<= nm.mnumber(a="__num", S=1, q=True, o=xxnam2num) fmap <<= nm.msortf(f="__num", o=xxnum2nam) fmap.run() # 節点ファイルが指定された場合は枝ファイルとの整合性チェック if self.nFile: ncheck = nm.mcut(f="__node1:__node", i=xxedge) ncheck <<= nm.mcommon(k="__node", m=xxnam2num, r=True) nmatch = ncheck.run() if len(nmatch) > 0: raise Exception( "#ERROR# the node named '%s' in the edge file doesn't exist in the node file." % (nmatch[0][0])) # metisのグラフファイルフォーマット # 先頭行n m [fmt] [ncon] # n: 節点数、m:枝数、ncon: 節点weightの数 # 1xx: 節点サイズ有り (not used, meaning always "0") # x1x: 節点weight有り # xx1: 枝がweightを有り # s w_1 w_2 ... w_ncon v_1 e_1 v_2 e_2 ... v_k e_k # s: 節点サイズ (節点サイズは利用不可) # w_x: 節点weight # v_x: 接続のある節点番号(行番号) # e_x: 枝weight # -------------------- # generate edge data using the integer numbered nodes #fnnum = None fnnum = nm.mcut(f="__num:__node_n1", i=xxnam2num) # {xxnnum} fenum = None fenum <<= nm.mjoin(k="__node1", K="__node", f="__num:__node_n1", m=xxnam2num, i=xxedge) fenum <<= nm.mjoin(k="__node2", K="__node", f="__num:__node_n2", m=xxnam2num) fenum <<= nm.msortf(f="__node_n1") #{xxenum} febase = None febase <<= nm.mnjoin(k="__node_n1", m=fenum, i=fnnum, n=True) febase <<= nm.msortf(f="__node_n1%n,__node_n2%n", o=xxebase) #{xxebase}" febase.run() fbody = None if not self.ew: fbody <<= nm.mcut(f="__node_n1,__node_n2", i=xxebase) fbody <<= nm.mtra(k="__node_n1", f="__node_n2", q=True) fbody <<= nm.mcut(f="__node_n2", nfno=True, o=xxbody) # if ew= is specified, merge the weight data into the edge data. else: febody = None febody <<= nm.mcut(f="__node_n1,__node_n2:__v", i=xxebase) febody <<= nm.mnumber(S=0, I=2, a="__seq", q=True) fwbody = None fwbody <<= nm.mcut(f="__node_n1,__weight:__v", i=xxebase) fwbody <<= nm.mnumber(S=1, I=2, a="__seq", q=True) fbody <<= nm.msortf(f="__seq%n", i=[febody, fwbody]) fbody <<= nm.mtra(k="__node_n1", f="__v", q=True) fbody <<= nm.mcut(f="__v", nfno=True, o=xxbody) fbody.run() # xxbody # 2 7 3 8 5 9 # 1 7 3 10 5 11 7 12 # 1 8 2 10 4 13 7 14 # -------------------- # generate node data using integer number if self.nFile and self.nw: # xxnode # __node,v1,v2 # a,1,1 # b,1,1 # c,1,1 xxnbody = temp.file() xxnbody1 = temp.file() fnbody = None fnbody <<= nm.mjoin(k="__node", f="__num", i=xxnode, m=xxnam2num) fnbody <<= nm.msortf(f="__num%n") fnbody <<= nm.mcut(f=self.nw, nfno=True) fnbody <<= nm.cmd("tr ',' ' ' ") # tricky!! fnbody <<= nm.mwrite(o=xxnbody) fnbody.run() # xxnbody # 1 1 # 1 1 # 1 1 # paste the node weight with edge body fnbody1 = None fnbody1 <<= nm.mpaste(nfn=True, m=xxbody, i=xxnbody) fnbody1 <<= nm.cmd("tr ',' ' ' ") fnbody1 <<= nm.mwrite(o=xxnbody1) fnbody1.run() os.system("mv %s %s" % (xxnbody1, xxbody)) # xxbody # 1 1 2 7 3 8 5 9 # 1 1 1 7 3 10 5 11 7 12 # 1 1 1 8 2 10 4 13 7 14 eSize = mrecount(i=xxedge) eSize /= 2 nSize = mrecount(i=xxnode) nwFlag = 1 if self.nw else 0 ewFlag = 1 if self.ew else 0 fmt = "0%d%d" % (nwFlag, ewFlag) xxhead = temp.file() xxgraph = temp.file() os.system("echo '%d %d %s %d' > %s" % (nSize, eSize, fmt, self.ncon, xxhead)) os.system("cat %s %s > %s" % (xxhead, xxbody, xxgraph)) if self.mFile: nm.mfldname(f="__num:num,__node:node", i=xxnum2nam, o=self.mFile).run() if self.dFile: os.system("cp %s %s" % (xxgraph, self.dFile)) if not self.noexe: if self.verbose: os.system( "gpmetis -seed=%d -ptype=%s -ncuts=%d -ufactor=%d %s %d" % (self.seed, self.ptype, self.ncuts, self.ufactor, xxgraph, self.kway)) else: os.system( "gpmetis -seed=%d -ptype=%s -ncuts=%d -ufactor=%d %s %d > /dev/null" % (self.seed, self.ptype, self.ncuts, self.ufactor, xxgraph, self.kway)) import glob if len(glob.glob(xxgraph + ".part.*")) == 0: raise Exception( "#ERROR# command `gpmetis' didn't output any results") # 節点名を数字から元に戻す # #{xxgraph}.part.#{kway} # 1 # 0 # 1 fo = None fo <<= nm.mcut(f="0:cluster", nfni=True, i=xxgraph + ".part." + str(self.kway)) fo <<= nm.mnumber(S=1, a="__num", q=True) fo <<= nm.mjoin(k="__num", f="__node", m=xxnum2nam) fo <<= nm.msortf(f="__node,cluster") if self.nf: fo <<= nm.mcut(f="__node:%s,cluster" % (self.nf), o=self.oFile) else: fo <<= nm.mcut(f="__node:node,cluster", o=self.oFile) fo.run() nu.mmsg.endLog(self.__cmdline())
def run(self): from datetime import datetime t = datetime.now() input = self.gi.eFileT xxmap = self.gi.mFile xxmaprev = self.__tempW.file() nm.msortf(f="id", i=xxmap, o=xxmaprev).run() xxpair = self.__tempW.file() # sscpの出力(pair形式) xxtra = self.__tempW.file() # sscpの入力(tra形式) xxprev = self.__tempW.file() # 前回のxxtra xxtmmp = self.__tempW.file() shutil.copyfile(input, xxpair) nSizes = [] eSizes = [] denses = [] iter = 0 while True: # グラフ特徴量の計算 if (self.logFile): nSize, eSize, dens = self.features(xxpair) nSizes.append(nSize) eSizes.append(eSize) denses.append(dens) # node pairをsspc入力形式に変換 if self.indirect: gtpstri = "ue_" if self.msgoff else "ue" extTake.grhfil(type=gtpstri, i=xxpair, o=xxtra) else: gtpstri0 = "ue0_" if self.msgoff else "ue0" extTake.grhfil(type=gtpstri0, i=xxpair, o=xxtra) para = "%s,%s" % (self.gi.edgeFN1, self.gi.edgeFN2) if self.outDir: os.system("tr ' ' ',' < %s > %s " % (xxpair, xxtmmp)) f = nm.mcut(f="0:num1,1:num2", nfni=True, i=xxtmmp) f <<= nm.mjoin(k="num1", K="id", m=xxmaprev, f="node:%s" % (self.gi.edgeFN1)) f <<= nm.mjoin(k="num2", K="id", m=xxmaprev, f="node:%s" % (self.gi.edgeFN2)).mcut(f=para).mfsort(f=para) f <<= nm.msortf(f=para, o="%s/pair_%s.csv" % (self.outDir, iter)) f.run() # 終了判定 if (iter >= self.iterMax): break if iter != 0 and self.same(xxtra, xxprev): break shutil.copyfile(xxtra, xxprev) tpstr = self.measure + "_" if self.msgoff else self.measure extTake.sspc(type=tpstr, l=self.minSupp, i=xxtra, th=self.th, o=xxpair) #gtpstr = "ue0_" if self.msgoff else "ue0" #extTake.grhfil(type=gtpstr,i=xxpair,o=xxtra) iter += 1 # 上記iterationで収束したマイクロクラスタグラフを元の節点文字列に直して出力する os.system("tr ' ' ',' < %s > %s" % (xxpair, xxtmmp)) f = nm.mcut(f="0:num1,1:num2", nfni=True, i=xxtmmp) f <<= nm.mjoin(k="num1", K="id", m=xxmaprev, f="node:%s" % (self.gi.edgeFN1)).mjoin( k="num2", K="id", m=xxmaprev, f="node:%s" % (self.gi.edgeFN2)) f <<= nm.mcut(f=para).mfsort(f=para).msortf(f=para, o=self.__eo) f.run() if (self.__no): if self.__nf: nm.mcut(f="node:%s" % (self.__nf), i=xxmap, o=self.__no).run() else: nm.mcut(f="node", i=xxmap, o=self.__no).run() if (self.go): self.go.readCSV(edgeFile=self.__eo, title1=self.gi.edgeFN1, title2=self.gi.edgeFN2, nodeFile=self.__no, title=self.__nf) else: self.go = ntg.graph(edgeFile=self.__eo, title1=self.gi.edgeFN1, title2=self.gi.edgeFN2, nodeFile=self.__no, title=self.__nf) procTime = datetime.now() - t # ログファイル出力 if (self.logFile): kv = [["key", "value"]] kv.append(["iter", str(self.iterMax)]) kv.append(["outDir", str(self.outDir)]) kv.append(["th", str(self.th)]) kv.append(["indirect", str(self.indirect)]) kv.append(["measure", str(self.measure)]) kv.append(["minSupp", str(self.minSupp)]) kv.append(["logFile", str(self.logFile)]) kv.append(["outDir", str(self.outDir)]) kv.append(["time", str(procTime)]) for i in range(len(nSizes)): kv.append(["nSize" + str(i), str(nSizes[i])]) kv.append(["eSize" + str(i), str(eSizes[i])]) kv.append(["dens" + str(i), str(denses[i])]) nm.writecsv(i=kv, o=self.logFile).run() return self.go