Ejemplo n.º 1
0
    def __init__(self, db, outtf=True):
        self.size = None
        self.pFile = None
        self.tFile = None
        self.temp = mtemp.Mtemp()
        self.db = db  # 入力データベース
        self.file = self.temp.file()
        self.outtf = outtf
        self.weightFile = {}
        self.posWeight = {}
        self.sigma = {}
        self.msgoff = True

        items = self.db.items
        for cName, posSize in db.clsNameRecSize.items():
            self.weightFile[cName] = self.temp.file()
            self.posWeight[cName] = self.calOmega(posSize)
            f = nm.mcut(nfno=True, f=self.db.clsFN, i=self.db.cFile)
            f <<= nm.mchgstr(nfn=True,
                             f=0,
                             O=-1,
                             o=self.weightFile[cName],
                             c="%s:%s" % (cName, self.posWeight[cName]))
            f.run()

        # アイテムをシンボルから番号に変換する。
        f = nm.mjoin(k=self.db.itemFN,
                     K=items.itemFN,
                     i=self.db.file,
                     m=items.file,
                     f=items.idFN)
        f <<= nm.mcut(f=self.db.idFN + "," + items.idFN)
        f <<= nm.mtra(k=self.db.idFN, f=items.idFN)
        f <<= nm.mcut(f=items.idFN, nfno=True, o=self.file)
        f.run()
Ejemplo n.º 2
0
	def __init__(self,db):
		self.db=db
		self.eArgs=None
		self.type =None
		self.minCnt=None
		self.minSup=None
		self.maxCnt=None
		self.maxSup=None
		self.minLen=None
		self.maxLen=None
		self.top =None
		self.skipTP=False

		#self.size =None
		self.pFile =None
		self.tFile =None
		self.msgoff = True

		self.temp=mtemp.Mtemp()
		self.db = db # 入力データベース
		self.file=self.temp.file()
		items=self.db.items

		# アイテムをシンボルから番号に変換する。
		f =   nm.mjoin(k=self.db.itemFN,K=items.itemFN,m=items.file,f=items.idFN,i=self.db.file)
		f <<= nm.mcut(f=self.db.idFN+","+items.idFN)
		f <<= nm.mtra(k=self.db.idFN,f=items.idFN)
		f <<= nm.mcut(f=items.idFN,nfno=True,o=self.file)
		f.run()
Ejemplo n.º 3
0
    def __init__(self,
                 edgeFile=None,
                 edgeList=None,
                 title1=None,
                 title2=None,
                 nodeFile=None,
                 nodeList=None,
                 title=None):
        #メンバ変数定義
        self.eFileT = None  # Take内部で使用するエッジファイル(アイテム間をスペース区切りにしたファイル)
        self.eFile = None  # エッジファイル
        self.eFileN = None  # エッジファイル名
        self.edgeFN1 = None  # エッジファイル項目名1(String)
        self.edgeFN2 = None  # エッジファイル項目名2(String)
        self.nFile = None  # ノードファイル
        self.nFileN = None  # ノードファイル名
        self.nodeFN = None  # ノードファイル項目名(String)
        self.mFile = None  # ノードIDマッピングファイル
        self.__wfE = None  # エッジ用ワークファイル
        self.__wfN = None  # ノード用ワークファイル
        self.__tempW = mtemp.Mtemp()
        self.__temp = mtemp.Mtemp()
        #引数指定があればデータ読み込み(ファイルとリストが同時に指定された場合はファイル優先)
        #エッジ読み込み

        if edgeFile is not None:
            self.__readEdgeCSV(edgeFile=edgeFile, title1=title1, title2=title2)
        else:
            if edgeList is not None:
                self.__readEdgeList(edgeList=edgeList,
                                    title1=title1,
                                    title2=title2)
        #ノード読み込み
        if nodeFile is not None:
            self.__readNodeCSV(nodeFile=nodeFile, title=title)
        else:
            if nodeList is not None:
                self.__readNodeList(nodeList=nodeList, title=title)
        #符号化
        if self.__wfE is not None:
            self.__convertToNumeric()
Ejemplo n.º 4
0
 def isSame(self, targetGraph):
     if os.path.getsize(self.eFile) != os.path.getsize(targetGraph.eFile):
         return False
     else:
         xxt = mtemp.Mtemp()
         xx = xxt.file()
         os.system("diff -q %s %s > %s" %
                   (self.eFile, targetGraph.eFile, xx))
         if (os.path.getsize(xx) != 0):
             xxt.rm()
             return False
         else:
             xxt.rm()
             return True
Ejemplo n.º 5
0
def mnest2tree(ei, ef, k, ni=None, nf=None, ev=None, no=None, eo=None):
    # paracheck追加
    efs = ef.split(",")
    ef1 = efs[0]
    ef2 = efs[1]

    f = nm.mcut(f="%s:#orgKey,%s:#orgEf1,%s:#orgEf2" % (k, ef1, ef2), i=ei)

    temp = mtemp.Mtemp()
    of = temp.file()

    with _nu.mcsvout(o=of, f="#orgKey,#orgEf1,#orgEf2,#ef1,#ef2") as oCSV:
        for flds in f:
            orgKey = flds[0]
            orgEf1 = flds[1]
            orgEf2 = flds[2]
            oCSV.write([orgKey, orgEf1, orgEf2, orgKey, orgEf1])
            oCSV.write([orgKey, orgEf1, orgEf2, orgKey, orgEf2])

    f = None
    f <<= nm.mjoin(k="#orgKey,#orgEf1,#orgEf2", K=[k, ef1, ef2], m=ei,
                   i=of)  # 全項目join
    if ev:
        f <<= nm.mavg(k="#ef1,#ef2", f=ev)
    else:
        f <<= nm.muniq(k="#ef1,#ef2")

    f <<= nm.mcut(r=True, f="#orgKey,#orgEf1,#orgEf2")
    f <<= nm.mfldname(f="#ef1:%s,#ef2:%s" % (ef1, ef2), o=eo)
    f.run()

    if ni:
        head = nu.mheader(i=ni)
        fldnames = [s for s in head if s != nf]
        commas = ',' * (len(fldnames) - 1)

        f0 = None
        f0 <<= nm.mcut(f="%s:%s" % (ef1, nf), i=eo)
        f0 <<= nm.muniq(k=nf)
        f0 <<= nm.mcommon(k=nf, m=ni, r=True)
        f0 <<= nm.msetstr(v=commas, a=fldnames)

        f = nm.mcut(f=k, r=True, i=[ni, f0])
        f <<= nm.msetstr(v="", a=k, o=no)
        f.run()
Ejemplo n.º 6
0
    def __init__(self,
                 gi=None,
                 go=None,
                 sim="R",
                 th=None,
                 indirect=False,
                 sup=0,
                 iter=30,
                 O=None,
                 log=None):

        self.gi = None  # (graph object) input graph set
        self.go = None  # (graph object) output graph set
        self.__eo = None  # (string) output edge file name
        self.__no = None  # (string) output node file name
        self.__nf = None  # (string) output node file column title
        self.th = float(th)  # (float) threshhold of degree of similarity
        self.indirect = indirect  # (boolean) exclude direct relationship from adjacent node set in similarity calculation
        self.measure = sim  # (string) similarity measure
        self.minSupp = int(sup)  # (float) minimum support
        self.iterMax = int(iter)  # (int) upper bound of iterations
        self.logFile = log  # (string) log file name
        self.outDir = O  # (string) directory name for outputs in the process
        self.msgoff = True
        self.__tempW = mtemp.Mtemp(
        )  # temp files created by this instance will be deleted, after self.run() executed.

        if (gi):
            self.gi = gi  # input graph object
        else:
            raise Exception("gi is mandatory.")

        if (go):
            self.go = go
        self.__eo = self.__tempW.file()
        self.__no = self.__tempW.file()
        self.__nf = "n"

        if self.iterMax < 1:
            raise Exception("iter must be 1 or more.")

        if self.outDir and not os.path.isdir(self.outDir):
            os.makedirs(self.outDir)
Ejemplo n.º 7
0
 def __init__(self, db, outtf=True):
     self.size = None
     self.temp = mtemp.Mtemp()
     self.db = db  # 入力データベース
     self.file = self.temp.file()
     items = self.db.items
     self.outtf = outtf
     self.top = None
     self.msgoff = True
     # アイテムをシンボルから番号に変換する。
     f = nm.mjoin(k=self.db.itemFN,
                  K=items.itemFN,
                  m=items.file,
                  f=items.idFN,
                  i=self.db.file)
     f <<= nm.mcut(f=self.db.idFN + "," + self.db.timeFN + "," + items.idFN)
     f <<= nm.mtra(k=self.db.idFN, s=self.db.timeFN + "%n", f=items.idFN)
     f <<= nm.mcut(f=items.idFN, nfno=True, o=self.file)
     f.run()
Ejemplo n.º 8
0
	def __init__(self,iFile,itemFN,taxoFN):
		# アイテムの項目名(=>String)
		self.itemFN = None

		# 分類の項目名(=>String)
		self.taxoFN = None

		# アイテムの種類数(=>Fixnum)
		self.itemSize = None

		# 分類の種類数(=>Fixnum)
		self.taxoSize = None

		# taxonomyデータファイル名(=>String)
		self.file = None

		self.temp = mtemp.Mtemp()

		self.iFile  = iFile

		self.iPath  = os.path.abspath(self.iFile)
		self.itemFN  = itemFN
		self.taxoFN  = taxoFN

		# item順に並べ替えてpathに書き出す
		self.file = self.temp.file()
		para_it = self.itemFN +"," + self.taxoFN
		nm.mcut(f=para_it,i=self.iFile).muniq(k=para_it,o=self.file).run(msg="on")
	

		f = nm.mcut(f=self.itemFN,i=self.iFile)
		f <<= nm.mtrafld(f=self.itemFN,a="__fld",valOnly=True)
		f <<= nm.mtra(f="__fld",r=True)
		f <<= nm.muniq(k="__fld")
		f <<= nm.mcount(a="size")
		f <<= nm.mcut(f="size")
		xx1 = f.run()

		self.itemSize = int(xx1[0][0])

		xx2 = nm.mcut(f=self.taxoFN+":item",i=self.file).muniq(k="item").mcount(a="size").mcut(f="size").run()
		self.taxoSize = int(xx2[0][0])
Ejemplo n.º 9
0
    def repTaxo(self, taxonomy):

        #@taxonomy=taxonomy

        self.items.repTaxo(taxonomy)  # アイテムクラスをtaxonomyで置換する

        tFile = taxonomy.file
        itemFN = taxonomy.itemFN
        taxoFN = taxonomy.taxoFN

        tf = mtemp.Mtemp()
        xx1 = tf.file()
        f = None
        f <<= nm.mjoin(k=self.itemFN, K=itemFN, f=taxoFN, m=tFile, i=self.file)
        f <<= nm.mcut(f=self.idFN + "," + self.timeFN + "," + taxoFN + ":" +
                      self.itemFN)
        f <<= nm.msortf(f=self.idFN + "," + self.timeFN + "," + self.itemFN,
                        o=xx1)
        f.run()

        self.file = self.temp.file()
        shutil.move(xx1, self.file)
Ejemplo n.º 10
0
def msankey(i, o, v, f, title="", h=500, w=960, nl=False, T=None):

    # f= 2 fld
    if type(f) is str:
        ff = f.split(',')
    elif type(f) is list:
        ff = f
    else:
        raise TypeError("f= unsupport " + str(type(k)))

    if len(ff) < 2:
        raise TypeError("f= takes just two field names")

    if T != None:
        import re
        os.environ["KG_TmpPath"] = re.sub(r'/$', "", T)

    if h == None:
        h = 500

    if w == None:
        w = 960

    if title == None:
        title = ""

    tempW = mtemp.Mtemp()
    nodef = tempW.file()
    edgef = tempW.file()

    ef1 = ff[0]
    ef2 = ff[1]
    ev = v

    iFile = i
    oFile = o

    f0 = nm.mcut(i=iFile, f="%s:nodes" % (ef1))
    f1 = nm.mcut(i=iFile, f="%s:nodes" % (ef2))
    f2 = None
    f2 <<= nm.muniq(i=[f0, f1], k="nodes")
    f2 <<= nm.mnumber(s="nodes", a="num", o=nodef)
    f2.run()

    f3 = None
    f3 <<= nm.mcut(f="%s:nodes1,%s:nodes2,%s" % (ef1, ef2, ev), i=iFile)
    f3 <<= nm.mjoin(k="nodes1", K="nodes", m=nodef, f="num:num1")
    f3 <<= nm.mjoin(k="nodes2", K="nodes", m=nodef, f="num:num2")
    f3 <<= nm.mcut(f="num1,num2,%s" % (ev))
    f3 <<= nm.msortf(f="num1%n,num2%n", o=edgef)
    f3.run()

    wk = []
    nodeL = []

    for flds in nm.readcsv(nodef).getline(otype='dict'):
        nodeL.append({"name": flds['nodes']})

    nodes = json.JSONEncoder().encode(nodeL)

    linkL = []
    for flds in nm.readcsv(edgef).getline(otype='dict',
                                          dtype={
                                              "num1": "int",
                                              "num2": "int",
                                              ev: "int"
                                          }):
        linkL.append({
            "source": flds["num1"],
            "target": flds["num2"],
            "value": flds[ev]
        })

    links = json.JSONEncoder().encode(linkL)

    nolabel = ""
    if nl:
        nolabel = "font-size: 0px;"

    html = sys.stdout
    if not oFile == None:
        html = open(oFile, "w")

    outTemplate = '''
<!DOCTYPE html>
<html class="ocks-org do-not-copy">
<meta charset="utf-8">
<title>{title}</title>
<style>
body {{
    font: 10px sans-serif;
}}
svg {{
    padding: 10px 0 0 10px;
}}
.arc {{
    stroke: #fff;
}}
#tooltip {{
position: absolute;
width: 150px;
height: auto;
padding: 10px;
background-color: white;
-webkit-border-radius: 10px;
-moz-border-radius: 10px;
border-radius: 10px;
-webkit-box-shadow: 4px 4px 10px rgba(0,0,0,0.4);
-moz-box-shadow: 4px 4px 10px rgba(0,0,0,0.4);
box-shadow: 4px 4px 10px rgba(0,0,0,0.4);
pointer-events: none;
}}
#tooltip.hidden {{
display: none;
}}
#tooltip p {{
margin: 0;
font-family: sans-serif;
font-size: 10px;
line-height: 14px;
}}
#chart {{
height: 500px;
}}
.node rect {{
    cursor: move;
    fill-opacity: .9;
    shape-rendering: crispEdges;
}}
.node text {{
    pointer-events: none;
    text-shadow: 0 1px 0 #fff;
    {nolabel}
}}
.link {{
    fill: none;
    stroke: #000;
    stroke-opacity: .2;
}}
.link:hover {{
    stroke-opacity: .5;
}}
</style>
<body>
<h1>{title}</h1>
<p id="chart">
<script>
	{d3js_str}
	d3.sankey = function() {{
		var sankey = {{}},
		nodeWidth = 24,
		nodePadding = 8,
		size = [1, 1],
		nodes = [],
		links = [];

		sankey.nodeWidth = function(_) {{
			if (!arguments.length) return nodeWidth;
			nodeWidth = +_;
			return sankey;
		}};

		sankey.nodePadding = function(_) {{
			if (!arguments.length) return nodePadding;
			nodePadding = +_;
			return sankey;
		}};

		sankey.nodes = function(_) {{
			if (!arguments.length) return nodes;
			nodes = _;
			return sankey;
		}};
		sankey.links = function(_) {{
			if (!arguments.length) return links;
			links = _;
			return sankey;
		}};
		sankey.size = function(_) {{
			if (!arguments.length) return size;
			size = _;
			return sankey;
		}};
	
		sankey.layout = function(iterations){{
			computeNodeLinks();
			computeNodeValues();
			computeNodeBreadths();
			computeNodeDepths(iterations);
			computeLinkDepths();
			return sankey;
		}};

		sankey.relayout = function() {{
			computeLinkDepths();
			return sankey;
		}};

		sankey.link = function() {{
			var curvature = .5;
			function link(d) {{
				var x0 = d.source.x + d.source.dx,
				x1 = d.target.x,
				xi = d3.interpolateNumber(x0, x1),
				x2 = xi(curvature),
				x3 = xi(1 - curvature),
				y0 = d.source.y + d.sy + d.dy / 2,
				y1 = d.target.y + d.ty + d.dy / 2;
				return "M" + x0 + "," + y0
				+ "C" + x2 + "," + y0
				+ " " + x3 + "," + y1
				+ " " + x1 + "," + y1;
			}}
			link.curvature = function(_) {{
				if (!arguments.length) return curvature;
				curvature = +_;
				return link;
			}};
			return link;
		}};

		// Populate the sourceLinks and targetLinks for each node.
		// Also, if the source and target are not objects, assume they are indices.
		function computeNodeLinks() {{
			nodes.forEach(function(node) {{
				node.sourceLinks = [];
				node.targetLinks = [];
			}});

			links.forEach(function(link) {{
				var source = link.source,
				target = link.target;
				if (typeof source === "number") source = link.source = nodes[link.source];
				if (typeof target === "number") target = link.target = nodes[link.target];
				source.sourceLinks.push(link);
				target.targetLinks.push(link);
			}});
		}}

		// Compute the value (size) of each node by summing the associated links.
		function computeNodeValues() {{
			nodes.forEach( function(node) {{
				node.value = Math.max(d3.sum(node.sourceLinks, value),d3.sum(node.targetLinks, value));
			}} );
 		}}
 		
		// Iteratively assign the breadth (x-position) for each node.
		// Nodes are assigned the maximum breadth of incoming neighbors plus one;
		// nodes with no incoming links are assigned breadth zero, while
		// nodes with no outgoing links are assigned the maximum breadth.
		function computeNodeBreadths() {{
			var remainingNodes = nodes,
					nextNodes,
					x = 0;
			while (remainingNodes.length) {{
				nextNodes = [];
				remainingNodes.forEach(function(node) {{
					node.x = x;
					node.dx = nodeWidth;
					node.sourceLinks.forEach(function(link) {{
						nextNodes.push(link.target);
					}});
				}});
				remainingNodes = nextNodes;
				++x;
			}}
			//
			moveSinksRight(x);
			scaleNodeBreadths((width - nodeWidth) / (x - 1));
		}}
		
		function moveSourcesRight() {{
			nodes.forEach(function(node) {{
				if (!node.targetLinks.length) {{
					node.x = d3.min(node.sourceLinks, function(d) {{ return d.target.x; }} ) - 1;
				}}
			}});
		}}
	
		function moveSinksRight(x) {{
			nodes.forEach(function(node) {{
				if (!node.sourceLinks.length) {{
					node.x = x - 1;
				}}
			}});
		}}

		function scaleNodeBreadths(kx) {{
			nodes.forEach(function(node) {{
				node.x *= kx;
			}});
		}}



		function computeNodeDepths(iterations) {{
			var nodesByBreadth = d3.nest()
													.key(function(d) {{ return d.x; }})
													.sortKeys(d3.ascending)
													.entries(nodes)
													.map(function(d) {{ return d.values; }});
                                                                
			//
			initializeNodeDepth();
			resolveCollisions();

			for (var alpha = 1; iterations > 0; --iterations){{
				relaxRightToLeft(alpha *= .99);
				resolveCollisions();
				relaxLeftToRight(alpha);
				resolveCollisions();
			}}
                                                                
			function initializeNodeDepth() {{
				var ky = d3.min(nodesByBreadth, function(nodes) {{
					return (size[1] - (nodes.length - 1) * nodePadding) / d3.sum(nodes, value);
				}});
				nodesByBreadth.forEach(function(nodes) {{
					nodes.forEach(function(node, i) {{
						node.y = i;
						node.dy = node.value * ky;
					}});
				}});
				links.forEach(function(link) {{
					link.dy = link.value * ky;
				}});
			}}
		
			function relaxLeftToRight(alpha) {{
				nodesByBreadth.forEach(function(nodes, breadth) {{
					nodes.forEach(function(node) {{
						if (node.targetLinks.length) {{
							var y = d3.sum(node.targetLinks, weightedSource) / d3.sum(node.targetLinks, value);
							node.y += (y - center(node)) * alpha;
						}}
					}});
				}});
			
				function weightedSource(link) {{
					return center(link.source) * link.value;
				}}
			}}
		
			function relaxRightToLeft(alpha) {{
				nodesByBreadth.slice().reverse().forEach(function(nodes){{
					nodes.forEach(function(node) {{
						if (node.sourceLinks.length) {{
							var y = d3.sum(node.sourceLinks, weightedTarget) / d3.sum(node.sourceLinks, value);
							node.y += (y - center(node)) * alpha;
						}}
					}});
				}});

				function weightedTarget(link) {{
					return center(link.target) * link.value;
				}}
			}}
		
			function resolveCollisions() {{
				
				nodesByBreadth.forEach(function(nodes) {{
					var node, dy, y0 = 0,
						n = nodes.length, i;
					// Push any overlapping nodes down.
					nodes.sort(ascendingDepth);
					for (i = 0; i < n; ++i) {{
						node = nodes[i];
						dy = y0 - node.y;
						if (dy > 0) node.y += dy;
						y0 = node.y + node.dy + nodePadding;
					}}
					// If the bottommost node goes outside the bounds, push it back up.
					dy = y0 - nodePadding - size[1];
					if (dy > 0) {{
						y0 = node.y -= dy;
						// Push any overlapping nodes back up.
						for (i = n - 2; i >= 0; --i) {{
							node = nodes[i];
							dy = node.y + node.dy + nodePadding - y0;
							if (dy > 0) node.y -= dy;
							y0 = node.y;
						}}
					}}
				}});
			}}
			function ascendingDepth(a, b) {{ return a.y - b.y; }}
		}}

		function computeLinkDepths() {{

			nodes.forEach(function(node) {{
				node.sourceLinks.sort(ascendingTargetDepth);
				node.targetLinks.sort(ascendingSourceDepth);
			}});
	
			nodes.forEach(function(node) {{
				var sy = 0, ty = 0;
				node.sourceLinks.forEach(function(link) {{
					link.sy = sy;
					sy += link.dy;
				}});
				node.targetLinks.forEach(function(link) {{
					link.ty = ty;
					ty += link.dy;
				}});
			}});
	
			function ascendingSourceDepth(a, b) {{
				return a.source.y - b.source.y;
			}}
			function ascendingTargetDepth(a, b) {{
				return a.target.y - b.target.y;
			}}
		}}
		
		function center(node){{
			return node.y + node.dy / 2;
		}}

		function value(link) {{
			return link.value;
		}}

		return sankey;
	}};
</script>

<script>
	var margin = {{top: 1, right: 1, bottom: 6, left: 1}},
			width = {width} - margin.left - margin.right,
			height = {height} - margin.top - margin.bottom;

	var formatNumber = d3.format(",.0f"),
			format = function(d) {{ return formatNumber(d) + " TWh"; }},
			color = d3.scale.category20();

	var svg = d3.select("#chart").append("svg")
  	  .attr("width", width + margin.left + margin.right)
    	.attr("height", height + margin.top + margin.bottom)
    	.append("g")
    	.attr("transform", "translate(" + margin.left + "," + margin.top + ")");

  var sankey = d3.sankey()
    .nodeWidth(15)
    .nodePadding(10)
    .size([width, height]);

	var path = sankey.link();

	var nodes={nodes}
	var links={links}

	sankey
		.nodes(nodes)
		.links(links)
		.layout(32);
        
	var link = svg.append("g").selectAll(".link")
					.data(links)
					.enter().append("path")
					.attr("class", "link")
					.attr("d", path)
					.style("stroke-width", function(d) {{ return Math.max(1, d.dy); }})
					.sort(function(a, b) {{ return b.dy - a.dy; }});
        
	link.append("title")
			.text(function(d) {{ return d.source.name + " → " + d.target.name + "" + format(d.value); }});
        
	var node = svg.append("g").selectAll(".node")
					.data(nodes)
					.enter().append("g")
					.attr("class", "node")
					.attr("transform", function(d) {{ return "translate(" + d.x + "," + d.y + ")"; }})
					.call(
						d3.behavior.drag()
							.origin(function(d) {{ return d; }})
							.on("dragstart", function() {{ this.parentNode.appendChild(this); }})
							.on("drag", dragmove)
					);

	node.append("rect")
			.attr("height", function(d) {{ return d.dy; }})
			.attr("width", sankey.nodeWidth() )
			.style("fill", function(d) {{ return d.color = color(d.name.replace(/ .*/, "")); }})
			.style("stroke", function(d) {{ return d3.rgb(d.color).darker(2); }})
			.append("title")
			.text(function(d) {{ return d.name + "" + format(d.value); }});
        
	node.append("text")
			.attr("x", -6)
			.attr("y", function(d) {{ return d.dy / 2; }})
			.attr("dy", ".35em")
			.attr("text-anchor", "end")
			.attr("transform", null)
			.text(function(d) {{ return d.name; }})
			.filter(function(d) {{ return d.x < width / 2; }})
			.attr("x", 6 + sankey.nodeWidth())
			.attr("text-anchor", "start");
        
	function dragmove(d){{
		d3.select(this)
			.attr("transform", "translate(" + d.x + "," + (d.y = Math.max(0, Math.min(height - d.dy, d3.event.y))) + ")");
		sankey.relayout();
		link.attr("d", path);
	}}
</script>
'''.format(title=title,
           nolabel=nolabel,
           d3js_str=vjs.ViewJs.d3jsMin(),
           nodes=nodes,
           links=links,
           width=w,
           height=h)

    html.write(outTemplate)

    if not oFile == None:
        html.close()
Ejemplo n.º 11
0
T3,F,8
T4,B,7
T4,D,1
T4,F,2
T5,A,44
T5,B,5
T5,D,7
T5,E,8
T6,A,3
T6,B,2
T6,D,1
T6,E,9
T6,F,1\
"""

temo = mtemp.Mtemp()
inpname = temo.file()

scp = """\
readcsv {inpdata}
head -n 10
mcut f=tid,item,val
msum k=tid f=val
writecsv runcheck.csv\
""".format(inpdata=inpname)

if len(sys.argv) != 2:
    print("run sample")
    scpname = temo.file()

    with open(scpname, "w") as wfp:
Ejemplo n.º 12
0
    def run(self):

        tempW = mtemp.Mtemp()

        xxtra = tempW.file()
        xxmap1 = tempW.file()
        xxmap2 = tempW.file()
        lcmout = tempW.file()

        xxt0 = tempW.file()
        xxp0 = tempW.file()
        xx3t = tempW.file()
        xx4t = tempW.file()

        self.pair2tra(self.ei, self.ef1, self.ef2, xxtra, xxmap1, xxmap2)

        runPara = {}
        runPara["type"] = "CIf"
        runPara["sup"] = 1
        runPara["o"] = lcmout
        runPara["i"] = xxtra

        if self.minSize2:
            runPara["l"] = self.minSize2
        if self.maxSize2:
            runPara["u"] = self.maxSize2

        extTake.lcm(runPara)
        extTake.lcmtrans(lcmout, "p", xxt0)

        f = None
        f <<= nm.mdelnull(f="pattern", i=xxt0)
        f <<= nm.mvreplace(vf="pattern", m=xxmap2, K="num2", f="node2")
        f <<= nm.mcut(f="pid,pattern,size:size2")
        f <<= nm.mvsort(vf="pattern")
        f <<= nm.msortf(f="pid")

        if self.byedge:
            f_e0 = nm.mtra(f="pattern", i=f, r=True)
            extTake.lcmtrans(lcmout, "t", xx3t)

            f_e1 = None
            f_e1 <<= nm.mjoin(k="__tid", m=xxmap1, f="node1", K="num1", i=xx3t)
            f_e1 <<= nm.msortf(f="pid")
            ## xx2
            f_e2 = None
            f_e2 <<= nm.mcount(k="pid", a="size1", i=f_e1)
            f_e2 <<= nm.mselnum(f="size1",
                                c="[{},{}]".format(self.minSize1,
                                                   self.maxSize1))

            f_e3 = None
            f_e3 <<= nm.mjoin(k="pid", m=f_e2, f="size1", i=f_e1)
            f_e3 <<= nm.mnjoin(k="pid", m=f_e0, f="pattern,size2")
            f_e3 <<= nm.mcut(f="pid:id,node1:{},pattern:{},size1,size2".format(
                self.ef1, self.ef2),
                             o=self.oFile)
            f_e3.run()

        else:

            extTake.lcmtrans(lcmout, "t", xx4t)
            f_e4 = None
            f_e4 <<= nm.mjoin(k="__tid", m=xxmap1, i=xx4t, f="node1", K="num1")
            f_e4 <<= nm.mtra(k="pid", f="node1")
            f_e4 <<= nm.mvcount(vf="node1:size1")
            f_e4 <<= nm.mjoin(k="pid", m=f, f="pattern,size2")
            f_e4 <<= nm.mselnum(f="size1",
                                c="[{},{}]".format(self.minSize1,
                                                   self.maxSize1))
            f_e4 <<= nm.mvsort(vf="node1,pattern")
            f_e4 <<= nm.msortf(f="node1,pattern")
            f_e4 <<= nm.mcut(f="node1:{},pattern:{},size1,size2".format(
                self.ef1, self.ef2),
                             o=self.oFile)
            f_e4.run()
Ejemplo n.º 13
0
    def run(self):
        wf = mtemp.Mtemp()
        xxpal = wf.file()
        xxa = wf.file()
        xxb = wf.file()
        xxc = wf.file()
        xxd = wf.file()
        xxout = wf.file()

        # ============
        # n1,n2,sim
        # a,b,0.40
        # a,c,0.31
        # a,d,0.22
        # b,c,0.20
        # b,d,0.24
        # b,e,0.14
        # c,d,0.30
        # d,e,0.09
        xpal = None
        if self.directed:
            # 任意の枝a->bのaについて上位rankを選択
            xpal <<= nm.mnumber(k=self.ef1,
                                s=self.sim + "%nr",
                                e="skip",
                                S=1,
                                a="##rank",
                                i=self.ei)
            xpal <<= nm.mselnum(f="##rank", c="[," + str(self.rank) + "]")
        else:
            xxa = nm.mfsort(f=[self.ef1, self.ef2], i=self.ei)
            xxb = nm.mfsort(f=[self.ef2, self.ef1], i=self.ei)
            xpal <<= nm.muniq(k=[self.ef1, self.ef2], i=[xxa, xxb])
            xpal <<= nm.mnumber(k=self.ef1,
                                s=self.sim + "%nr",
                                e="skip",
                                S=1,
                                a="##rank")
            xpal <<= nm.mselnum(f="##rank", c="[," + str(self.rank) + "]")

        # 両方向+片方向
        xpal1 = None
        if self.dir == "x":
            xpal1 <<= nm.mcut(f=[self.ef1, self.ef2, self.sim], i=xpal)
        # 両方向
        elif self.dir == "b":
            selpara = "$s{%s}==$s{##ef2}" % (self.ef1)
            # 得られた上位rankグラフからa->b->cを作成し、a==cであれば相思相愛ということ
            xpal1 <<= nm.mnjoin(k=self.ef2,
                                K=self.ef1,
                                m=xpal,
                                f=self.ef2 + ":##ef2," + self.sim + ":sim2",
                                i=xpal)
            xpal1 <<= nm.msel(c=selpara)
            xpal1 <<= nm.mcut(f=[self.ef1, self.ef2, self.sim])
        else:
            selpara = "$s{%s}==$s{##ef2}" % (self.ef1)
            xxc = None
            xxc <<= nm.mnjoin(k=self.ef2,
                              K=self.ef1,
                              m=xpal,
                              f=self.ef2 + ":##ef2," + self.sim + ":sim2",
                              i=xpal)
            xxc <<= nm.msel(c=selpara)
            xxc <<= nm.mcut(f=[self.ef1, self.ef2])
            xpal1 <<= nm.mcut(f=[self.ef1, self.ef2, self.sim], i=xpal)
            xpal1 <<= nm.mcommon(k=self.ef1 + "," + self.ef2, m=xxc, r=True)

        runpal = None
        kpara = "%s,%s" % (self.ef1, self.ef2)
        if self.udout:
            runpal <<= nm.mfsort(f=kpara, i=xpal1)
            runpal <<= nm.mavg(k=kpara, f=self.sim)
            runpal <<= nm.msortf(f=kpara, o=self.eo)
        else:
            runpal <<= nm.msortf(f=kpara, i=xpal1, o=self.eo)

        runpal.run()

        if self.ni and self.no:
            shutil.copyfile(self.ni, self.no)
Ejemplo n.º 14
0
    def enumerate(self, eArgs):
        tf = mtemp.Mtemp()

        # 最小サポートと最小サポート件数
        if "minCnt" in eArgs and eArgs["minCnt"] != None:
            self.minCnt = int(eArgs["minCnt"])
            self.minSup = float(self.minCnt) / float(self.db.size)
        else:
            self.minSup = float(eArgs["minSup"])
            self.minCnt = int(self.minSup * float(self.db.size) + 0.99)

        # 最大サポートと最大サポート件数
        self.maxCnt = None
        if ("maxCnt" in eArgs
                and eArgs["maxCnt"] != None) or ("maxSup" in eArgs
                                                 and eArgs["maxSup"] != None):
            if "maxCnt" in eArgs and eArgs["maxCnt"] != None:
                self.maxCnt = int(eArgs["maxCnt"])
                self.maxSup = float(self.maxCnt) / float(self.db.size)
            else:
                self.maxSup = float(eArgs["maxSup"])
                self.maxCnt = int(self.maxSup * float(self.db.size) + 0.99)

        # 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る
        if "top" in eArgs and eArgs["top"] != None:
            self.top = eArgs["top"]

        # 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る
        if self.top and self.top > 0:

            xxtop = tf.file()

            extTake.lcmseq(type="Cf",
                           K=str(self.top),
                           i=self.file,
                           sup="1",
                           so=xxtop)

            with open(xxtop, "r") as rfile:
                self.minCnt = int(rfile.read().strip())

        # lcm_seq出力ファイル
        lcmout = tf.file()
        # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので
        # そのときのために空ファイルを生成しておいく。
        with open(lcmout, "w") as efile:
            pass

        # lcm_seqのパラメータ設定と実行
        params = {}
        if self.msgoff:
            params["type"] = "CIf_"
        else:
            params["type"] = "CIf"

        if self.maxCnt:
            params["U"] = str(self.maxCnt)
        if "minLen" in eArgs:
            params["l"] = str(eArgs["minLen"])
        if 'maxLen' in eArgs:
            params["u"] = str(eArgs["maxLen"])
        if 'gap' in eArgs:
            params["g"] = str(eArgs["gap"])
        if 'win' in eArgs:
            params["G"] = str(eArgs["win"])

        params["i"] = self.file
        params["sup"] = str(self.minCnt)
        params["o"] = lcmout

        # lcm_seq実行
        #MCMD::msgLog("#{run}")
        if 'padding' in eArgs and eArgs[
                "padding"]:  # padding指定時は、0アイテムを出力しないlcm_seqを実行
            extTake.lcmseq_zero(params)
        else:
            extTake.lcmseq(params)

        # パターンのサポートを計算しCSV出力する
        self.pFile = self.temp.file()
        items = self.db.items

        transl = self.temp.file()
        extTake.lcmtrans(lcmout, "p", transl)

        f = nm.mdelnull(f="pattern", i=transl)
        f <<= nm.mvreplace(vf="pattern",
                           m=items.file,
                           K=items.idFN,
                           f=items.itemFN)
        f <<= nm.msetstr(v=self.db.size, a="total")
        f <<= nm.mcal(c='${count}/${total}', a="support")  # サポートの計算
        f <<= nm.mcut(f="pid,pattern,size,count,total,support")
        f <<= nm.msortf(f="support%nr", o=self.pFile)
        f.run()

        if self.outtf:
            # トランザクション毎に出現するシーケンスを書き出す
            #MCMD::msgLog("output tid-patterns ...")
            self.tFile = self.temp.file()

            xxw = tf.file()  #Mtemp.new.name
            f = None
            f <<= nm.mcut(f=self.db.idFN, i=self.db.file)
            f <<= nm.muniq(k=self.db.idFN)
            f <<= nm.mnumber(S=0, a="__tid", q=True)
            f <<= nm.msortf(f="__tid", o=xxw)
            f.run()

            translt = self.temp.file()
            extTake.lcmtrans(lcmout, "t", translt)

            f = None
            f <<= nm.msortf(f="__tid", i=translt)
            f <<= nm.mjoin(k="__tid", m=xxw, f=self.db.idFN)
            f <<= nm.mcut(f=self.db.idFN + ",pid")
            f <<= nm.msortf(f=self.db.idFN + ",pid", o=self.tFile)
            f.run()
Ejemplo n.º 15
0
    def enumerate(self, eArgs):

        pFiles = []
        tFiles = []
        tf = mtemp.Mtemp()
        for cName, posSize in self.db.clsNameRecSize.items():
            negSize = self.db.traSize - posSize
            if "minGR" in eArgs:
                self.minGR = eArgs["minGR"]
            else:
                minProb = eArgs["minProb"] if ("minProb" in eArgs) else 0.5
                if "uniform" in eArgs and eArgs["uniform"] == True:
                    self.minGR = (minProb / (1 - minProb)) * (
                        self.db.clsSize - 1)  # マニュアルの式(4)
                else:
                    self.minGR = (minProb / (1 - minProb)) * (
                        float(negSize) / float(posSize))  # マニュアルの式(4)

            # 最小サポートと最小サポート件数
            # s=0.05
            # s=c1:0.05,c2:0.06
            # S=10
            # S=c1:10,c2:15
            if "minCnt" in eArgs:
                if isinstance(eArgs["minCnt"], dict):
                    self.minPos = eArgs["minCnt"][cName]
                else:
                    self.minPos = eArgs["minCnt"]
            else:
                if isinstance(eArgs["minSup"], dict):
                    self.minPos = int(eArgs["minSup"][cName] * float(posSize) +
                                      0.99)
                else:
                    self.minPos = int(eArgs["minSup"] * flost(posSize) + 0.99)

            # 最大サポートと最大サポート件数
            if "maxCnt" in eArgs:
                if isinstance(eArgs["maxCnt"], dict):
                    self.maxPos = eArgs["maxCnt"][cName]
                else:
                    self.maxPos = eArgs["maxCnt"]

            elif "maxSup" in eArgs:
                if isinstance(eArgs["maxSup"], dict):
                    self.maxPos = int(eArgs["maxSup"][cName] * float(posSize) +
                                      0.99)
                else:
                    self.maxPos = int(eArgs["maxSup"] * float(posSize) + 0.99)
            else:
                self.maxPos = None

            self.sigma[cName] = self.calSigma(self.minPos, self.minGR, posSize,
                                              negSize)

            # lcmのパラメータ設定と実行
            # 頻出パターンがなかった場合、lcm出力ファイルが生成されないので
            # そのときのために空ファイルを生成しておいく。
            lcmout = tf.file()  # lcm出力ファイル
            with open(lcmout, "w") as efile:
                pass

            runPara = {}

            if self.msgoff:
                runPara["type"] = eArgs["type"] + "IA_"
            else:
                runPara["type"] = eArgs["type"] + "IA"

            #if self.maxPos: #rubyだとif @maxCntなってる(どこにも設定されてないので)動いてないはず
            if self.maxPos:
                runPara["U"] = self.maxPos

            if "minLen" in eArgs:
                runPara["l"] = str(eArgs["minLen"])

            if "maxLen" in eArgs:
                runPara["u"] = str(eArgs["maxLen"])

            runPara["w"] = self.weightFile[cName]

            runPara["i"] = self.file

            runPara["sup"] = str(self.sigma[cName])

            runPara["o"] = lcmout

            # lcm実行
            #MCMD::msgLog("#{run}")
            #TAKE::run_lcm(run)
            #print(self.sigma)
            #print(runPara)
            #MCMD::msgLog("output patterns to CSV file ...")

            extTake.lcm(runPara)

            pFiles.append(self.temp.file())

            transle = tf.file()
            extTake.lcmtrans(lcmout, "e", transle)

            f = nm.mdelnull(f="pattern", i=transle)
            f <<= nm.mcal(c='round(${countN},1)', a="neg")
            f <<= nm.mcal(c='round(${countP}/%s,1)' % (self.posWeight[cName]),
                          a="pos")
            f <<= nm.mdelnull(f="pattern")  #いる?
            f <<= nm.msetstr(v=cName, a="class")
            f <<= nm.msetstr(v=posSize, a="posTotal")
            f <<= nm.msetstr(v=self.minGR, a="minGR")
            f <<= nm.mcut(f="class,pid,pattern,size,pos,neg,posTotal,minGR",
                          o=pFiles[-1])
            f.run()

            #s = nutil.mrecount(i=self.file)
            #MCMD::msgLog("the number of contrast patterns on class `#{cName}' enumerated is #{s}")

            if self.outtf:
                # トランザクション毎に出現するパターンを書き出す
                #MCMD::msgLog("output tid-patterns ...")
                tFiles.append(self.temp.file())
                xxw = tf.file()

                xxw = nm.mcut(f=self.db.idFN, i=self.db.file)
                xxw <<= nm.muniq(k=self.db.idFN)
                xxw <<= nm.mnumber(S=0, a="__tid", q=True)

                translt = self.temp.file()
                extTake.lcmtrans(lcmout, "t", translt)

                f = nm.mjoin(k="__tid", m=xxw, f=self.db.idFN, i=translt)
                f <<= nm.msetstr(v=cName, a="class")
                f <<= nm.mcut(f=self.db.idFN + ",class,pid", o=tFiles[-1])
                f.run()

        # クラス別のパターンとtid-pidファイルを統合して最終出力
        self.pFile = self.temp.file()
        self.tFile = self.temp.file()

        # パターンファイル併合
        xxpCat = tf.file()
        f = nm.mcat(i=",".join(pFiles))
        f <<= nm.msortf(f="class,pid")
        f <<= nm.mnumber(s="class,pid", S=0, a="ppid", o=xxpCat)
        f.run()

        # パターンファイル計算
        items = self.db.items
        f = nm.mcut(f="class,ppid:pid,pattern,size,pos,neg,posTotal,minGR",
                    i=xxpCat)
        f <<= nm.msetstr(v=self.db.traSize, a="total")
        f <<= nm.mcal(c='${total}-${posTotal}', a="negTotal")  # negのトータル件数
        f <<= nm.mcal(c='${pos}/${posTotal}', a="support")  # サポートの計算
        f <<= nm.mcal(
            c=
            'if(${neg}==0,1.797693135e+308,(${pos}/${posTotal})/(${neg}/${negTotal}))',
            a="growthRate")

        if "uniform" in eArgs and eArgs["uniform"] == True:
            f <<= nm.mcal(
                c='(${pos}/${posTotal})/(${pos}/${posTotal}+(%s-1)*${neg}/${negTotal})'
                % (self.db.clsSize),
                a="postProb")
        else:
            f <<= nm.mcal(c='${pos}/(${pos}+${neg})', a="postProb")

        f <<= nm.msel(c='${pos}>=%s&&${growthRate}>=${minGR}' %
                      (self.minPos))  # minSupとminGRによる選択
        f <<= nm.mvreplace(vf="pattern",
                           m=items.file,
                           K=items.idFN,
                           f=items.itemFN)
        f <<= nm.mcut(
            f="class,pid,pattern,size,pos,neg,posTotal,negTotal,total,support,growthRate,postProb"
        )
        f <<= nm.mvsort(vf="pattern")
        f <<= nm.msortf(f="class%nr,postProb%nr,pos%nr", o=self.pFile)
        f.run()

        # アイテムを包含している冗長なタクソノミを削除
        if items.taxonomy:
            #MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")
            ##ここは後で
            zdd = VSOP.constant(0)
            dt = nm.mcut(i=self.pFile, f="pattern")

            for fldVal in dt:
                zdd = zdd + VSOP.itemset(fldVal[0])

            zdd = self.reduceTaxo(zdd, self.db.items)

            xxp1 = tf.file()
            xxp2 = tf.file()
            xxp3 = tf.file()
            zdd.csvout(xxp1)

            nm.mcut(nfni=True, f="1:pattern",
                    i=xxp1).mvsort(vf="pattern").msortf(f="pattern",
                                                        o=xxp2).run()
            nm.msortf(f="pattern", i=self.pFile).mcommon(
                k="pattern", m=xxp2).msortf(f="class%nr,postProb%nr,pos%nr",
                                            o=xxp3).run()
            shutil.move(xxp3, self.pFile)

        if self.outtf:
            # 列挙されたパターンを含むtraのみ選択するためのマスタ
            xxp4 = nm.mcut(f="class,pid", i=self.pFile)
            f = nm.mcat(i=",".join(tFiles))
            f <<= nm.mjoin(k="class,pid", m=xxpCat,
                           f="ppid")  # 全クラス統一pid(ppid)結合
            f <<= nm.mcommon(k="class,ppid", K="class,pid",
                             m=xxp4)  # 列挙されたパターンの選択
            f <<= nm.mcut(f=self.db.idFN + ",class,ppid:pid")
            f <<= nm.msortf(f=self.db.idFN + ",class,pid", o=self.tFile)
            f.run()
Ejemplo n.º 16
0
	def enumerate(self,eArgs):
		"""
		eArgsで与えられた条件で、頻出アイテム集合の列挙を実行する。

		:type eArgs: dict
		:type eArgs['type']: str
		:type eArgs['minCnt']: int
		:type eArgs['minSup']: float
		:type eArgs['maxCnt']: int
		:type eArgs['maxSup']: float
		:type eArgs['minLen']: int
		:type eArgs['maxLen']: int
		:type eArgs['top']: int
		:type eArgs['skipTP']: bool【default:False】
		:param eArgs: 各種列挙パラメータ
		:param eArgs['type']: 抽出するアイテム集合の型【'F':頻出集合, 'C':飽和集合, 'M':極大集合】
		:param eArgs['minCnt']: 最小サポート(件数)
		:param eArgs['minSup']: 最小サポート(確率)
		:param eArgs['maxCnt']: 最大サポート(件数)
		:param eArgs['maxSup']: 最大サポート(確率)
		:param eArgs['minLen']: アイテム集合の最小アイテム数(件数)
		:param eArgs['maxLen']: アイテム集合の最大アイテム数(件数)
		:param eArgs['top']: 列挙するサポート上位件数(件数)
		:param eArgs['skipTP']: トランザクションにマッチするパターン(アイテム集合)の出力を行わない。
		"""

		tf=mtemp.Mtemp()
		self.eArgs=eArgs
		self.type = eArgs["type"]

		if "minCnt" in eArgs and eArgs["minCnt"] != None:
			self.minCnt = int(eArgs["minCnt"])
			self.minSup = float(self.minCnt) / float(self.db.traSize)
		else:
			self.minSup = float(eArgs["minSup"])
			self.minCnt = int(self.minSup * float(self.db.traSize) + 0.99)

		# 最大サポートと最大サポート件数
		self.maxCnt=None
		if ("maxCnt" in eArgs and  eArgs["maxCnt"]!= None) or ( "maxSup" in eArgs and eArgs["maxSup"]!= None):
			if "maxCnt" in eArgs and eArgs["maxCnt"]!= None:
				self.maxCnt = int(eArgs["maxCnt"])
				self.maxSup = float(self.maxCnt) / float(self.db.traSize)
			else:
				self.maxSup    = float(eArgs["maxSup"])
				self.maxCnt = int(self.maxSup * float(self.db.traSize) + 0.99)


		params = {}
		if self.msgoff:
			params["type"] ="%sIf_"%(self.type)
		else:
			params["type"] ="%sIf"%(self.type)


		if self.maxCnt :
			params["U"] = str(self.maxCnt)

		if "minLen" in eArgs and eArgs["minLen"] != None :
			params["l"] = str(eArgs['minLen'])
		
		if "maxLen" in eArgs and eArgs["maxLen"] != None :
			params["u"] = str(eArgs['maxLen'])

		# 列挙パターン数上限が指定されれば、一度lcmを実行して最小サポートを得る
		if "top" in eArgs and eArgs["top"] != None :
			self.top = eArgs["top"]

		if self.top and self.top>0 :

			xxtop = tf.file()
			import copy
			top_params = copy.deepcopy(params)
			top_params["i"] = self.file
			top_params["sup"] = "1"
			top_params["K"] = str(self.top)
			top_params["so"] = xxtop
			import re
			top_params["type"] = re.sub('_$', '', top_params["type"] )

			extTake.lcm(top_params)

			with open(xxtop, "r") as rfile:
				self.minCnt = int(rfile.read().strip())

			if self.minCnt<0 :
				self.minCnt=1 


		self.skipTP=False
		if "skipTP" in eArgs:
			self.skipTP=eArgs["skipTP"]

		# lcm_seq出力ファイル
		lcmout = tf.file()

		# 頻出パターンがなかった場合、lcm出力ファイルが生成されないので
		# そのときのために空ファイルを生成しておいく。
		with open(lcmout, "w") as efile:
			pass

		# lcm実行
		params["i"] = self.file
		params["sup"] = str(self.minCnt)
		params["o"] = lcmout
		extTake.lcm(params)

		# caliculate one itemset for lift value
		xxone= tf.file()
		tpstr = "FIf_" if self.msgoff else "FIf"

		extTake.lcm(type=tpstr,i=self.file,sup=1,o=xxone,l=1,u=1)


		# パターンのサポートを計算しCSV出力する
		#MCMD::msgLog("output patterns to CSV file ...")

		xxp0 = tf.file()
		self.pFile = self.temp.file()
		items=self.db.items
		trans0 = self.temp.file()

		extTake.lcmtrans(lcmout,"p",trans0)

		f =   nm.mdelnull(i=trans0,f="pattern")
		f <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN)
		f <<= nm.msetstr(v=self.db.traSize,a="total")
		f <<= nm.mcal(c='${count}/${total}',a="support")
		f <<= nm.mcut(f="pid,pattern,size,count,total,support")
		f <<= nm.mvsort(vf="pattern")
		f <<= nm.msortf(f="pid",o=xxp0)
		f.run()


		# xxp0
		# pid,count,total,support,pattern
		# 0,13,13,1,A
		# 4,6,13,0.4615384615,A B
		xxp1=tf.file()

		# taxonomy指定がない場合(2010/11/20追加)
		if items.taxonomy==None:
			shutil.move(xxp0,xxp1)
		# taxonomy指定がある場合
		else:
			#MCMD::msgLog("reducing redundant rules in terms of taxonomy ...")

			zdd=VSOP.constant(0)
			fobj = nm.mcut(i=xxp0,f='pattern')
			for fldVal in fobj:
				zdd=zdd+VSOP.itemset(fldVal[0])

			
			zdd=self.reduceTaxo(zdd,self.db.items)
			xxz1=tf.file()
			xxz2=tf.file()
			zdd.csvout(xxz1)

			f0=None
			f0 <<= nm.mcut(nfni=True,f="1:pattern",i=xxz1)
			f0 <<= nm.mvsort(vf="pattern")
			f0 <<= nm.msortf(f="pattern")

			f=None
			f <<= nm.msortf(f="pattern",i=xxp0)
			f <<= nm.mcommon(k="pattern",m=f0)
			f <<= nm.msortf(f="pid",o=xxp1)
			f.run()


		# lift値の計算		
		transl = tf.file()
		extTake.lcmtrans(xxone,"p",transl)

		xxp2 =   nm.mdelnull(i=transl,f="pattern")
		xxp2 <<= nm.mvreplace(vf="pattern",m=items.file,K=items.idFN,f=items.itemFN)
		xxp2 <<= nm.msortf(f="pattern")

		xxp3 =   nm.mcut(f="pid,pattern",i=xxp1)
		xxp3 <<= nm.mtra(f="pattern",r=True)
		xxp3 <<= nm.mjoin(k="pattern",m=xxp2,f="count:c1")
		xxp3 <<= nm.mcal(c='ln(${c1})',a="c1ln")
		xxp3 <<= nm.msum(k="pid",f="c1ln")

		# p3
		# pid,pattern,c1,c1ln
		# 0,A,13,2.564949357
		# 1,E,7,1.945910149
		
		#おかしくなる?=>OK
		f3 =   nm.mjoin(k="pid",f="c1ln",i=xxp1,m=xxp3)
		f3 <<= nm.mcal(c='round(exp(ln(${count})-${c1ln}+(${size}-1)*ln(${total})),0.0001)',a="lift")
		f3 <<= nm.mcut(f="pid,size,count,total,support,lift,pattern")
		f3 <<= nm.msortf(f="support%nr",o=self.pFile)
		f3.run()

		#self.size = mrecount.mrecount(i=self.file)

		#MCMD::msgLog("the number of patterns enumerated is #{@size}")

		if not self.skipTP:
			# トランザクション毎に出現するシーケンスを書き出す
			#MCMD::msgLog("output tid-patterns ...")

			self.tFile = self.temp.file()
			xxw3i = tf.file()
			extTake.lcmtrans(lcmout,"t",xxw3i)

			xxw1 = nm.mcut(f=self.db.idFN,i=self.db.file).muniq(k=self.db.idFN).mnumber(S=0,a="__tid",q=True).msortf(f="__tid")
			xxw2 = nm.mcut(f="pid",i=self.pFile)
			xxw3 = nm.mcommon(k="pid",i=xxw3i,m=xxw2).mjoin(k="__tid",m=xxw1,f=self.db.idFN).mcut(f=self.db.idFN+",pid",o=self.tFile)
			xxw3.run()
Ejemplo n.º 17
0
def __mkTree(iFile, oFile):

    temp = mtemp.Mtemp()
    xxbase0 = temp.file()
    xxbase1 = temp.file()
    xxiFile2 = temp.file()
    xxcheck = temp.file()
    """
	# #{iFile}
	# key,nam%0,keyNum,num,nv,nc
	# #2_1,#1_1,4,1,6,1
	# #2_1,#1_2,4,2,0.9999999996,1
	"""

    # keyNumとnum項目のuniqリストを作り、お互いの包含関係でrootノードとleafノードを識別する。
    f0 = nm.mcut(f="keyNum,num", i=iFile)  #{xxiFile1}
    fk = f0.mcut(f="keyNum").muniq(k="keyNum")  #{xxkey}
    fn = f0.mcut(f="num").muniq(k="num")  #{xxnum}

    # root nodesの選択
    fr = nm.mcommon(k="keyNum", K="num", m=fn, i=fk,
                    r=True).mcut(f="keyNum:node0", o=xxbase0)  #{xxbase[0]}

    # leaf nodesの選択
    fl = nm.mcommon(k="num", K="keyNum", m=fk, i=fn,
                    r=True).mcut(f="num")  #{xxleaf}

    # leaf nodeの構造を知る必要はないので入力ファイルのnodeからleafを除外
    f = nm.mcommon(k="num", m=fl, r=True, i=f0, o=xxiFile2)

    nm.runs([f, fr])

    def _xnjoin(inf, outf, mfile, check, no):
        f = nm.mnjoin(k="node%d" % (no),
                      K="keyNum",
                      m=mfile,
                      n=True,
                      f="num:node%d" % (no + 1),
                      i=inf,
                      o=outf)
        fc = nm.mdelnull(i=f, f="node%d" % (no + 1), o=check)
        return fc

    i = 0
    depth = None
    inf = xxbase0
    outf = xxbase1
    '''
	# root nodesファイルから親子関係noodeを次々にjoinしていく
	# xxbase0 : root nodes
	# node0%0
	# 3
	# 4
	# xxbase1
	# node0%0,node1
	# 3,
	# 4,1
	# 4,2
	# xxbase2
	# node0,node1%0,node2
	# 3,,
	# 4,1,
	# 4,2,
	# join項目(node2)の非null項目が0件で終了
	'''

    while True:

        _xnjoin(inf, outf, xxiFile2, xxcheck, i).run()
        size = mrecount(i=xxcheck)

        if size == 0:
            nm.msortf(f="*", i=outf, o=oFile).run()
            depth = i + 1
            break

        # swap f_name
        xxtmp = outf
        outf = inf
        inf = xxtmp
        i += 1

    return depth
Ejemplo n.º 18
0
def mgv(ei,
        ef,
        ev=None,
        ec=None,
        el=None,
        ed=None,
        ni=None,
        nf=None,
        nv=None,
        nc=None,
        nl=None,
        nw=1,
        tp="flat",
        k=None,
        o=None,
        d=False,
        clusterLabel=False,
        noiso=False,
        normalize=False,
        normalizeEdge=False,
        normalizeNode=False):

    # arg check
    # ei : str (filename)
    # ef : str | list (fldname size=2)
    # ev : str | None (fldname)
    # ec : str | None (fldname)
    # el : str | list | None (fldname no limit )
    # ed : str | None (fldname)
    # ni : str | None (filename)
    # nf : str | None (fldname)
    # nv : str | None (fldname)
    # nc : str | None (fldname)
    # nl : str | list | None  (fldname no limit )
    # tp : str (flat|nest default:flat )
    # k  : str | None (fldname)
    # o  : str (filename)
    # d : bool | None
    # clusterLabel : bool | None
    # noiso : bool | None

    # ei
    if not (isinstance(ei, str)):
        raise TypeError("ei= unsupport " + str(type(ei)))

    # ef
    if isinstance(ef, str):
        ef = ef.split(',')
    elif not isinstance(ef, list):
        raise TypeError("ef= unsupport " + str(type(ef)))

    if len(ef) < 2:
        raise TypeError("ef size == 2 ")
    elif len(ef) > 2:
        sys.stderr.write('warning : ef size == 2 ')

    # k
    if not (isinstance(k, str) or k == None):
        raise TypeError("k= unsupport " + str(type(k)))

    # ev
    if not (isinstance(ev, str) or ev == None):
        raise TypeError("ev= unsupport " + str(type(ev)))

    # ec
    if not (isinstance(ec, str) or ec == None):
        raise TypeError("ec= unsupport " + str(type(ec)))

    # el
    if isinstance(el, str):
        el = el.split(',')
        if len(el) == 1 and el[0] == '':
            el = None
    elif not (isinstance(el, list) or el == None):
        raise TypeError("el= unsupport " + str(type(el)))

    # ed
    if not (isinstance(ed, str) or ed == None):
        raise TypeError("ed= unsupport " + str(type(ed)))

    # ni
    if not (isinstance(ni, str) or ni == None):
        raise TypeError("ni= unsupport " + str(type(ni)))

    # nf
    if not (isinstance(nf, str) or nf == None):
        raise TypeError("nf= unsupport " + str(type(nf)))

    # nv
    if not (isinstance(nv, str) or nv == None):
        raise TypeError("nv= unsupport " + str(type(nv)))

    # nc
    if not (isinstance(nc, str) or nc == None):
        raise TypeError("nc= unsupport " + str(type(nc)))

    # nl
    if isinstance(nl, str):
        nl = nl.split(',')
        if len(nl) == 1 and nl[0] == '':
            nl = None
    elif not (isinstance(nl, list) or nl == None):
        raise TypeError("nl= unsupport " + str(type(nl)))

    # tp
    if tp == None:
        tp = "flat"
    elif not isinstance(tp, str):
        raise TypeError("tp= unsupport " + str(type(tp)))

    # o
    if isinstance(o, str):
        oFile = o
    else:
        raise TypeError("o= unsupport " + str(type(o)))

    # d
    if d == None:
        d = False
    if not isinstance(d, bool):
        raise TypeError("d= unsupport " + str(type(d)))

    # clusterLabel
    if clusterLabel == None:
        clusterLabel = False
    if not isinstance(clusterLabel, bool):
        raise TypeError("clusterLabel= unsupport " + str(type(clusterLabel)))

    # noiso
    if noiso == None:
        noiso = False
    if not isinstance(noiso, bool):
        raise TypeError("noiso= unsupport " + str(type(noiso)))

    # noiso
    if normalize == None:
        normalize = False
    if not isinstance(normalize, bool):
        raise TypeError("noiso= unsupport " + str(type(normalize)))

    if normalizeEdge == None:
        normalizeEdge = False
    if not isinstance(normalizeEdge, bool):
        raise TypeError("noiso= unsupport " + str(type(normalizeEdge)))

    if normalizeNode == None:
        normalizeNode = False
    if not isinstance(normalizeNode, bool):
        raise TypeError("noiso= unsupport " + str(type(normalizeNode)))

    if normalize:
        normalizeEdge = True
        normalizeNode = True

    temp = mtemp.Mtemp()
    xxni = temp.file()
    xxei = temp.file()
    xxmap = temp.file()
    xxnode = temp.file()
    xxedge = temp.file()
    xxdotNode = temp.file()
    xxdotEdge = temp.file()
    xxtree = temp.file()

    mkDir(xxdotNode)
    mkDir(xxdotEdge)

    if d:
        directedStr = "edge []"
    else:
        directedStr = "edge [dir=none]"

    # key追加 (cluster用)
    if not k:
        if ni:
            nm.msetstr(v="", a="#key", i=ni, o=xxni).run()
            ni = xxni

        nm.msetstr(v="", a="#key", i=ei, o=xxei).run()
        ei = xxei
        k = "#key"

    #efs = ef.split(",")
    ef1 = ef[0]
    ef2 = ef[1]

    __mkMap(k, nf, ni, ef1, ef2, ei, xxmap)
    __mkNode(k, nf, nl, nv, nc, ni, ef1, ef2, ei, noiso, normalizeNode, xxmap,
             xxnode)

    __mkEdge(k, ef1, ef2, el, ec, ed, ev, ei, normalizeEdge, xxmap, xxedge)
    # dot用のnodeとedgeデータをcluster別ファイルとして生成
    __dotNode(xxnode, nw, tp, clusterLabel, xxdotNode)
    __dotEdge(xxedge, xxdotEdge)

    depth = None
    if tp == "flat":
        depth = __mkFlat(xxnode, xxtree)  # mgvとおなじ
    elif tp == "nest":
        # tree構造の処理
        # クラスタのみtree構造に格納する
        depth = __mkTree(xxnode, xxtree)  # mgvとおなじ
    else:
        raise TypeError("unsupport type " + tp)

    xxdotTree = temp.file()
    header = '''digraph G {{
  {directedStr}
'''.format(directedStr=directedStr)

    footer = "}\n"

    __dotTree(xxtree, depth, header, footer, xxdotTree)  # mgvとおなじ
    __replace(xxdotTree, xxdotNode, xxdotEdge, clusterLabel, o)
Ejemplo n.º 19
0
    def __init__(self, iFile, idFN, timeFN, itemFN, padding, clsFN=None):
        self.file = None  # トランザクションファイル名
        self.idFN = None  # トランザクションID項目名(String)
        self.timeFN = None  # 時間項目名(String)
        self.itemFN = None  # アイテム集合項目名(String)
        self.clsFN = None  # クラス項目名(String)
        self.size = None  # トランザクションサイズ(Num)
        self.items = None  # Itemsクラス
        self.taxonomy = None  # 階層分類クラス
        self.clsNameRecSize = None  # クラス別件数
        self.clsSize = None  # クラス数
        self.cFile = None  # クラスファイル

        self.temp = mtemp.Mtemp()
        self.iFile = iFile  # 入力ファイル
        self.iPath = os.path.abspath(self.iFile)  # フルパス
        self.idFN = idFN  # トランザクションID項目名
        self.timeFN = timeFN  # 時間項目名
        self.itemFN = itemFN  # アイテム項目名
        self.file = self.temp.file()  # 出力ファイル名
        self.padding = padding  # 時系列パディング

        # padding指定の場合は、asciiコードの先頭文字列を挿入
        f = None
        if self.padding:
            f <<= nm.mcut(f=self.idFN + "," + self.timeFN + "," + self.itemFN,
                          i=self.iFile)
            f <<= nm.msortf(f=self.idFN + "," + self.timeFN + "%n," +
                            self.itemFN)
            f <<= nm.muniq(k=self.idFN + "," + self.timeFN + "," + self.itemFN)
            f <<= nm.mpadding(k=self.idFN,
                              f=self.timeFN + "%n",
                              v="!",
                              o=self.file)
        else:
            f <<= nm.mcut(f=self.idFN + "," + self.timeFN + "," + self.itemFN,
                          i=self.iFile)
            f <<= nm.msortf(f=self.idFN + "," + self.timeFN + "%n," +
                            self.itemFN)
            f <<= nm.muniq(k=self.idFN + "," + self.timeFN + "," + self.itemFN,
                           o=self.file)

        f.run()

        # レコード数の計算
        #@recCnt = MCMD::mrecount("i=#{@file}")

        # トランザクション数の計算
        xx1 = nm.mcut(f=self.idFN, i=self.file).muniq(k=self.idFN).mcount(
            a="__cnt").mcut(f='__cnt').run()
        self.size = int(xx1[0][0])

        # トランザクションデータからアイテムオブジェクトを生成
        if self.padding:  #同じ?
            # paddingの場合は、特殊アイテム(!)を0番とし、出力を制御する。
            self.items = items.Items(self.file, self.itemFN)
        else:
            self.items = items.Items(self.file, self.itemFN)

        if clsFN:
            self.clsFN = clsFN
            self.cFile = self.temp.file()
            fpara_c = "%s,%s" % (self.idFN, self.clsFN)
            nm.mcut(f=fpara_c, i=self.iFile).muniq(k=fpara_c,
                                                   o=self.cFile).run()

            # 文字列としてのクラス別件数配列を数値配列に変換する
            self.clsSize = 0
            self.clsNames = []
            self.clsNameRecSize = {}
            for vv in nm.mcut(f=self.clsFN, i=self.cFile).mcount(k=self.clsFN,
                                                                 a='count'):
                self.clsNames.append(vv[0])
                self.clsNameRecSize[vv[0]] = int(vv[1])
                self.clsSize += 1
Ejemplo n.º 20
0
def mnetpie(ei,
            ni,
            ef,
            nf,
            o,
            nodeSizeFld=None,
            nodeTipsFld=None,
            nodeColorFld=None,
            edgeWidthFld=None,
            edgeColorFld=None,
            pieDataFld=None,
            pieTipsFld=None,
            picFld=None,
            undirect=False,
            offline=False):

    #ei:edge file
    #ef:egfile
    if type(ef) is str:
        ef = ef.split(',')
    if len(ef) != 2:
        raise Exception("ef= takes just two field names")

    if not ((pieDataFld == None and pieTipsFld == None) or
            (pieDataFld != None and pieTipsFld != None)):
        raise Exception(
            "pieDataFld= pieTipsFld= are necessary at the same time")

    if picFld != None and pieDataFld != None:
        raise Exception(
            "picFld= cannot be specified with pieDataFld= pieTipsFld=")

    if nodeColorFld != None:
        if picFld != None or pieDataFld != None or pieTipsFld != None:
            raise Exception(
                "nodeColorFld= cannot be specified with pieDataFld= pieTipsFld= picFld="
            )

    if pieDataFld != None and pieTipsFld != None:
        caseNo = 1
    elif picFld != None:
        caseNo = 2
    else:
        caseNo = 0

    tempW = mtemp.Mtemp()

    xxnode = tempW.file()

    nodefld = []
    nodedmy1 = []
    nodedmy2 = []

    nodefld.append("%s:node" % (nf))
    if nodeSizeFld != None:
        nodefld.append("%s:nodesize" % (nodeSizeFld))
    else:
        nodedmy1.append("nodesize")
        nodedmy2.append("50")

    if nodeTipsFld != None:
        nodefld.append("%s:nodeT" % (nodeTipFld))
    else:
        nodedmy1.append("nodeT")
        nodedmy2.append("")

    if nodeColorFld != None:
        nodefld.append("%s:nodeClr" % (nodeColorFld))
    else:
        nodedmy1.append("nodeClr")
        nodedmy2.append("skyblue")

    if caseNo == 1:
        nodefld.append("%s:pieD" % (pieDataFld))
        nodefld.append("%s:pieT" % (pieTipsFld))
    elif caseNo == 2:
        nodefld.append("%s:pic" % (picFld))
    else:
        nodedmy1.append("pic")
        nodedmy2.append("")

    f1 = None
    f1 <<= nm.mcut(i=ni, f=nodefld)
    if len(nodedmy1) != 0:
        f1 <<= nm.msetstr(a=nodedmy1, v=nodedmy2)

    if caseNo == 1:
        f1 <<= nm.mshare(k="node", f="pieD:pieDS")
        f1 <<= nm.mnumber(k="node", a="nodeid", B=True)

        f2 = nm.muniq(k="pieT", i=f1)
        f2 <<= nm.mnumber(q=True, a="pieTno")
        f2 <<= nm.mjoin(k="pieT", f="pieTno", i=f1).iredirect("m")
        f2 <<= nm.msortf(f="nodeid%n,pieTno%n", o=xxnode)
    else:
        f2 = nm.mnumber(a="nodeid%n", q=True, i=f1, o=xxnode)

    f2.run()

    xxedge = tempW.file()
    # MAKE EDGE DATA
    edgefld = []
    edgedmy1 = []
    edgedmy2 = []
    edgefld.append("%s:edgeS" % (ef[0]))
    edgefld.append("%s:edgeE" % (ef[1]))

    if edgeWidthFld != None:
        edgefld.append("%s:edgesize" % (edgeWidthFld))
    else:
        edgedmy1.append("edgesize")
        edgedmy2.append("1")

    if edgeColorFld != None:
        edgefld.append("%s:edgecolor" % (edgeColorFld))
    else:
        edgedmy1.append("edgecolor")
        edgedmy2.append("black")

    f3 = None
    f3 <<= nm.mcut(i=ei, f=edgefld)
    if len(edgedmy1) != 0:
        f3 <<= nm.msetstr(a=edgedmy1, v=edgedmy2)

    f3 <<= nm.mnumber(a="preNo", q=True)
    f3 <<= nm.mbest(k="edgeS,edgeE", s="preNo%nr")
    f3 <<= nm.mnumber(s="preNo%n", a="edgeID")
    f3 <<= nm.mjoin(k="edgeS", K="node", f="nodeid:edgeSid", m=xxnode)
    f3 <<= nm.mjoin(k="edgeE", K="node", f="nodeid:edgeEid", m=xxnode)

    #双方向チェック一応
    f4 = None
    f4 <<= nm.mfsort(i=f3, f="edgeS,edgeE")
    f4 <<= nm.mcount(k="edgeS,edgeE", a="edgecnt")
    f4 <<= nm.mselnum(c="[2,]", f="edgecnt")
    f4 <<= nm.msetstr(a="biflg", v=1)
    f4 <<= nm.mjoin(k="edgeID", f="biflg", n=True, i=f3).iredirect("m")
    f4 <<= nm.msortf(f="edgeID%n", o=xxedge)
    f4.run()

    gdata = "{\"nodes\":["
    if caseNo == 1:
        nodedatastk = []
        nodedatas = ""
        for val, top, bot in nm.readcsv(xxnode).getline(k="nodeid",
                                                        otype='dict',
                                                        q=True):
            name = val["node"]
            r = val["nodesize"]
            title = val["nodeT"]
            if top:
                nodedatas = "{\"name\":\"%s\",\"title\":\"%s\",\"r\":%s,\"node\":[" % (
                    name, title, r)

            pieTno = val["pieTno"]
            pieT = val["pieT"]
            pieDS = val["pieDS"]
            nodedatas += "{\"group\":%s,\"color\":%s,\"value\":%s,\"title\":\"%s\"}" % (
                pieTno, pieDS, pieDS, pieT)

            if bot:
                nodedatas += "]}"
                nodedatastk.append(nodedatas)
                nodedatas = ""
            else:
                nodedatas += ","

        gdata += ",".join(nodedatastk)

    else:
        nodedatastk = []
        for val in nm.readcsv(xxnode).getline(otype='dict'):
            name = val["node"]
            r = val["nodesize"]
            title = val["nodeT"]
            pic = val["pic"]
            nclr = val["nodeClr"]
            nodedatas = "{\"name\":\"%s\",\"title\":\"%s\",\"pic\":\"%s\",\"color\":\"%s\",\"r\":%s}" % (
                name, title, pic, nclr, r)
            nodedatastk.append(nodedatas)

        gdata += ",".join(nodedatastk)

    gdata += "],\"links\": ["

    edgedatastk = []
    for val in nm.readcsv(xxedge).getline(otype='dict'):
        es = val["edgeSid"]
        et = val["edgeEid"]
        esize = val["edgesize"]
        ecolor = val["edgecolor"]
        edgedatas = "{\"source\":%s,\"target\":%s,\"length\":500,\"ewidth\":%s,\"color\":\"%s\"}" % (
            es, et, esize, ecolor)
        edgedatastk.append(edgedatas)

    gdata += ','.join(edgedatastk)

    gdata += "]}"

    direct = ".attr('marker-end','url(#arrowhead)')"
    if undirect:
        direct = ""

    nodeTemplate = '''
    node
			.append("circle")
			.attr("r",function(d){return d.r/4;})
			.attr("fill", function(d){return d.color;})
			.append("title")
			.text(function(d){return d.title;})
	'''
    nodemakeTemplate = '''
	for(var i=0 ; i< graph.nodes.length;i++){
		graph.nodes[i].id = i
	}
	'''

    if pieDataFld != None:
        nodeTemplate = ''' 
    node.selectAll("path")
        .data( function(d, i){
          return pie(d.node);
				})
        .enter()
        .append("svg:path")
        .attr("d", arc)
        .attr("fill", function(d, i) {
					return color(d.data.group);
				})
				.append("title")
				.text(function(d){{return d.data.title;}})

        node.append("circle")
				.attr("r",function(d){{return d.r/4;}})
				.attr({
					'fill': 'white'
				})
				.append("title")
				.text(function(d){{return d.title;}});
		'''
        nodemakeTemplate = '''
			for(var i=0 ; i< graph.nodes.length;i++){
			var r = graph.nodes[i].r
			for(var j=0 ; j< graph.nodes[i].node.length;j++){
				graph.nodes[i].node[j]['r'] = r
			}
			graph.nodes[i].id = i
		}
		'''
    elif picFld != None:
        nodeTemplate = '''
    node
			.append("image")
			.attr("height",function(d){return d.r;})
			.attr("width",function(d){return d.r;})
			.attr("x",function(d){return -1 * d.r/2; })
			.attr("y",function(d){return -1 * d.r/2; })
			.attr("xlink:href",function(d){return d.pic; })
			.append("title")
			.text(function(d){return d.title;})
		'''

    d3js_str = "<script type='text/javascript' src='http://d3js.org/d3.v3.min.js'></script>"

    if offline:
        d3js_str = "<script>%s<script>" % (vjs.ViewJs.d3jsMin())

    outTemplate = '''
<!DOCTYPE html>
<html lang="en">
<head>
	<meta charset="utf-8">
	{d3js_str}
  <style></style>
</head>
<body>
<script type="text/javascript">
	var graph = {gdata} ;

  var width = 4000,
      height = 3000;

	var color = d3.scale.category10();
    
	{nodemakeTemplate};

	for(var i=0 ; i< graph.links.length;i++){{
		graph.links[i].id = i
	}}

	var pie = d3.layout.pie()
        .sort(null)
        .value(function(d) {{ return d.value; }});

	var arc = d3.svg.arc()
       	.outerRadius( function(d){{ return d.data.r ; }})
        .innerRadius( function(d){{ return d.data.r/2 ; }} );
		
	var svg = d3.select("body").append("svg")
		.attr("width", width)
		.attr("height", height);

	d3.select("svg").append('defs').append('marker')
		.attr({{'id':'arrowhead',
						'viewBox':'-0 -5 10 10',
						'refX':30,
						'refY':0,
						'orient':'auto-start-reverse',
						'markerWidth':5,
						'markerHeight':5,
						'xoverflow':'visible'}})
		.append('path')
		.attr('d', 'M 0,-5 L 10 ,0 L 0,5')
		.attr('fill', '#999')
		.style('stroke','none');
            
	var g = svg.append("g");
	var node = g.selectAll(".node");
	var link = g.selectAll(".link");
	nodes = graph.nodes
  links = graph.links

	var force = 
		d3.layout.force()
			.linkDistance(200)
			.linkStrength(3.5)
      .charge(-3500)
			.gravity(0.1)
			.friction(0.95)
      .size([width, height])
			.on("tick", function() {{
				link
					.attr("x1", function(d) {{ return d.source.x; }})
					.attr("y1", function(d) {{ return d.source.y; }})
					.attr("x2", function(d) {{ return d.target.x; }})
					.attr("y2", function(d) {{ return d.target.y; }});

				node
					.attr("x", function(d) {{ return d.x; }})
					.attr("y", function(d) {{ return d.y; }})
					.attr("transform", function(d) {{ return "translate(" + d.x + "," + d.y + ")"}});	
	    }});


		node = node.data(nodes, function( d ) {{ return d.id; }} );
		link = link.data(links, function( d ) {{ return d.id; }} );


    link
      .enter()
      .append("line")
      .attr("class", "link")
			.style("stroke", function( d ) {{ return d.color; }} )
			.style("stroke-width", function( d ) {{ return d.ewidth; }})
			{direct}


    node
    	.enter()
			.append("g")
      .attr("class", "node")
			.style({{}})
			.call(force.drag)
			.on("contextmenu", function(nd) {{
					d3.event.preventDefault();
					force.stop()
				 	nodes.splice( nd.index, 1 );
					links = links.filter(function(nl) {{
						return nl.source.index != nd.index && nl.target.index != nd.index;					
					}});
					node = node.data(nodes, function( d ) {{ return d.id; }} );
					node.exit().remove();
					link = link.data( links, function( d ) {{ return d.id; }} );
					link.exit().remove();
			    force.nodes(nodes)
      	   .links(links)
        	 .start();
				}});  
	
		{nodeTemplate}


    node
      .append("text")
      .attr("text-anchor", "middle")
			.style("stroke", "black")
      .text(function(d) {{ return d.name; }});

    force.nodes(nodes)
         .links(links)
         .start();


</script>
</body>
</html>
	'''.format(d3js_str=d3js_str,
            gdata=gdata,
            nodemakeTemplate=nodemakeTemplate,
            direct=direct,
            nodeTemplate=nodeTemplate)

    html = sys.stdout
    if not o == None:
        html = open(o, "w")

    html.write(outTemplate)

    if not o == None:
        html.close()
Ejemplo n.º 21
0
    def run(self):
        temp = mtemp.Mtemp()

        ### mtra2gc
        xxsimgN = temp.file()
        xxsimgE = temp.file()
        xxsimgE0 = temp.file()

        param = {}
        param["i"] = self.iFile
        if self.idFN:
            param["tid"] = self.idFN
        if self.itemFN:
            param["item"] = self.itemFN
        if self.sp1:
            param["s"] = self.sp1
        if self.sp2:
            param["S"] = self.sp2

        #####################
        # 異なる向きのconfidenceを列挙するためにsim=C th=0として双方向列挙しておく
        # 出力データは倍になるが、mfriendsで-directedとすることで元が取れている
        param["sim"] = "C"
        param["th"] = "0"

        param["node_support"] = True
        if self.numtp:
            param["num"] = True
        param["no"] = xxsimgN
        param["eo"] = xxsimgE0

        nt.mtra2gc(**param).run()

        f = nm.readcsv(xxsimgE0)
        for i in range(self.filterSize):
            f <<= nm.mselnum(f=self.filter[i],
                             c="[%s,%s]" % (self.lb[i], self.ub[i]))
        f <<= nm.writecsv(xxsimgE)
        f.run()

        ### mfrirends
        xxfriends = temp.file()
        xxfriendE = temp.file()
        xxw = temp.file()
        xxf = temp.file()
        xxff = temp.file()
        xxor = temp.file()

        if not os.path.isdir(xxfriends):
            os.makedirs(xxfriends)
        col = [["FF000080", "FF888880"], ["0000FF80", "8888FF80"],
               ["00FF0080", "88FF8880"]]

        for i in range(len(self.sim)):
            paramf = {}
            paramf["ei"] = xxsimgE
            paramf["ni"] = xxsimgN
            paramf["ef"] = "node1,node2"
            paramf["nf"] = "node"
            paramf["eo"] = xxfriendE
            paramf["no"] = xxfriends + "/n_" + str(i)
            paramf["sim"] = self.sim[i]
            paramf["dir"] = self.dir[i]
            paramf["rank"] = self.rank[i]
            paramf["directed"] = True

            nt.mfriends(**paramf).run()

            frec2 = nm.mfsort(f="node1,node2", i=xxfriendE)
            frec2 <<= nm.msummary(k="node1,node2",
                                  f=self.sim[i],
                                  c="count,mean")
            frec2 <<= nm.mselstr(f="count", v=2)
            # node1%0,node2%1,fld,count,mean
            # a,b,support,2,0.1818181818
            # a,d,support,2,0.1818181818

            f = nm.mjoin(k="node1,node2",
                         K="node1,node2",
                         m=frec2,
                         f="mean:s1",
                         n=True,
                         i=xxfriendE)
            f <<= nm.mjoin(k="node2,node1",
                           K="node1,node2",
                           m=frec2,
                           f="mean:s2",
                           n=True)
            # 1) xxrecs2でsimをjoinできない(s1,s2共にnull)ということは、それは片方向枝なので"F"をつける
            # 2) 双方向枝a->b,b->aのうちa->bのみ(s1がnullでない)に"W"の印をつける。
            # 3) それ以外の枝は"D"として削除
            f <<= nm.mcal(
                c='if(isnull($s{s1}),if(isnull($s{s2}),\"F\",\"D\"),\"W\")',
                a="dir")
            f <<= nm.mselstr(f="dir", v="D", r=True)
            f <<= nm.mcal(c='if($s{dir}==\"W\",$s{s1},$s{%s})' % (self.sim[i]),
                          a="sim")
            f <<= nm.mchgstr(f="dir:color",
                             c='W:%s,F:%s' % (col[i][0], col[i][1]),
                             A=True)
            f <<= nm.msetstr(v=[self.sim[i], str(i)], a="simType,simPriority")
            f <<= nm.mcut(f="simType,simPriority,node1,node2,sim,dir,color",
                          o=xxfriends + "/e_" + str(i))
            f.run()
            # node1%1,node2%0,simType,sim,dir,color
            # b,a,jaccard,0.3333333333,F,8888FF
            # j,c,jaccard,0.3333333333,F,8888FF
            # b,d,jaccard,0.3333333333,F,8888FF
            # a,e,jaccard,0.5,W,0000FF
            # d,e,jaccard,0.5,W,0000FF

        # rule fileの出力
        if self.orFile:
            mmm = nm.mcat(i=xxfriends + "/e_*").muniq(k="node1,node2")
            nm.mcommon(k="node1,node2", i=xxsimgE, m=mmm, o=self.orFile).run()

        # マルチ枝の単一化(W優先,パラメータ位置優先)
        if self.prune:
            """
			# 双方向と片方向に分割
			nm.mcat(i=xxfriends+"/e_*").mselstr(f="dir",v="W",o=xxw,u=xxf).run()
			# 片方向のみの枝を選択
			f =   nm.mcommon(k="node1,node2",K="node1,node2",r=True,m=xxw,i=xxf)
			f <<= nm.mcommon(k="node1,node2",K="node2,node1",r=True,m=xxw,o=xxff)
			f.run()
			f = nm.mcat(i=xxw+","+xxff).mbest(k="node1,node2",s="dir%r,simPriority%n",o=self.oeFile).run()
			"""
            #これだめ
            fo = nm.mcat(i=xxfriends + "/e_*").mselstr(f="dir", v="W")
            fu = fo.direction("u")  # これは再考
            fu <<= nm.mcommon(k="node1,node2", K="node1,node2", r=True, m=fo)
            fu <<= nm.mcommon(k="node1,node2", K="node2,node1", r=True, m=fo)
            #f  =   nm.m2cat()
            f = nm.mbest(i=[fo, fu],
                         k="node1,node2",
                         s="dir%r,simPriority%n",
                         o=self.oeFile)

            f.run()

        else:
            nm.mcat(i=xxfriends + "/e_*", o=self.oeFile).run()

        nm.mcat(i=xxfriends + "/n_0", o=self.onFile).run()