def test_add_svg_as_subelement(self): svg = SVG(id='svg') subsvg = SVG(id='subsvg') svg.add(subsvg) self.assertEqual( svg.tostring(), '<svg id="svg"><defs /><svg id="subsvg"><defs /></svg></svg>')
def get_4cat_canvas(path, width, height, header=None, footer="made with 4cat - 4cat.oilab.nl", fontsize_normal=None, fontsize_small=None, fontsize_large=None): """ Get a standard SVG canvas to draw 4CAT graphs to Adds a border, footer, header, and some basic text styling :param path: The path where the SVG graph will be saved :param width: Width of the canvas :param height: Height of the canvas :param header: Header, if necessary to draw :param footer: Footer text, if necessary to draw. Defaults to shameless 4CAT advertisement. :param fontsize_normal: Font size of normal text :param fontsize_small: Font size of small text (e.g. footer) :param fontsize_large: Font size of large text (e.g. header) :return SVG: SVG canvas (via svgwrite) that can be drawn to """ from svgwrite.container import SVG from svgwrite.drawing import Drawing from svgwrite.shapes import Rect from svgwrite.text import Text if fontsize_normal is None: fontsize_normal = width / 75 if fontsize_small is None: fontsize_small = width / 100 if fontsize_large is None: fontsize_large = width / 50 # instantiate with border and white background canvas = Drawing(str(path), size=(width, height), style="font-family:monospace;font-size:%ipx" % fontsize_normal) canvas.add(Rect(insert=(0, 0), size=(width, height), stroke="#000", stroke_width=2, fill="#FFF")) # header if header: header_shape = SVG(insert=(0, 0), size=("100%", fontsize_large * 2)) header_shape.add(Rect(insert=(0, 0), size=("100%", "100%"), fill="#000")) header_shape.add( Text(insert=("50%", "50%"), text=header, dominant_baseline="middle", text_anchor="middle", fill="#FFF", style="font-size:%ipx" % fontsize_large)) canvas.add(header_shape) # footer (i.e. 4cat banner) if footer: footersize = (fontsize_small * len(footer) * 0.7, fontsize_small * 2) footer_shape = SVG(insert=(width - footersize[0], height - footersize[1]), size=footersize) footer_shape.add(Rect(insert=(0, 0), size=("100%", "100%"), fill="#000")) footer_shape.add( Text(insert=("50%", "50%"), text=footer, dominant_baseline="middle", text_anchor="middle", fill="#FFF", style="font-size:%ipx" % fontsize_small)) canvas.add(footer_shape) return canvas
def compare(self, text_block1, text_block2): self.svg = SVG(insert=(self.pos_x, self.pos_y), font_family=self.font_family, font_size=self.font_size) _len_textblock1 = len(text_block1) _len_textblock2 = len(text_block2) if text_block1 == text_block2: _fill = rgb(0x00, 0x0, 0x0) self.build_svg_line(text_block2, _fill) elif _len_textblock1 > 0 and _len_textblock2 > 0: _matcher = SequenceMatcher(None, text_block1, text_block2) for tag, _s1, _e1, _s2, _e2 in _matcher.get_opcodes(): if tag == "replace": _text = text_block2[_s2:_e2] _fill = rgb(0x00, 0x80, 0xff) self.build_svg_line(_text, _fill) elif tag == "delete": pass elif tag == "insert": _text = text_block2[_s2:_e2] _fill = rgb(0x00, 0x80, 0xff) self.build_svg_line(_text, _fill) elif tag == "equal": _text = text_block1[_s1:_e1] _fill = rgb(0x0, 0x0, 0x0) self.build_svg_line(_text, _fill) elif _len_textblock1 == 0 and _len_textblock2 > 0: _fill = rgb(0x00, 0x0, 0x0) self.build_svg_line(text_block2, _fill) elif _len_textblock1 > 0 and _len_textblock2 == 0: _fill = rgb(0x00, 0x0, 0x0) self.build_svg_line(text_block1, _fill) else: _fill = rgb(0x00, 0x0, 0x0) self.build_svg_line("", _fill) if self.svg_text is not None: self.svg_text["x"] = 0 self.svg_text["y"] = self.height self.svg.add(self.svg_text) self.height += self.height_line * 0.3 self.svg_text = None self.svg['height'] = self.height self.svg['width'] = self.width_max return self.svg, self.width_max, self.height
def __init__(self): self.dwg = None self.pos_x = 0 self.pos_y = 0 self.pos_y_max = 0 self.pos_x_max = 0 self.unit = 10 self.font_size = 10 self.font_family = "Lucida Console" self.filepath = None render_text.Render.set_font_family(self.font_family) render_text.Render.set_font_size(self.font_size) self.dwg = svgwrite.Drawing() self._move_right() _svg = SVG(insert=(self.pos_x, self.pos_y)) for _class in base.gen_available_dx_node_types(): _svg.add(self.add_line(_class)) _svg["width"] = self.pos_x_max _svg["height"] = self.pos_y_max self.dwg["width"] = self.pos_x_max self.dwg["height"] = self.pos_y_max self.dwg.add(_svg)
def add_line(self, instance_xtype): ''' Draw svg line representing DiffxElement. :param instance_xtype: XTypes.DiffxElement ''' _text = instance_xtype.name() _w, _h = render_text.Render.get_text_size(_text) _h += _h * 0.25 _svg = SVG(insert=(self.pos_x, self.pos_y), width=_w, height=_h) _text_svg = Text(_text) _text_svg['x'] = 0 _text_svg['y'] = _h - _h * 0.25 _text_svg['font-size'] = self.font_size _text_svg['font-family'] = self.font_family _text_svg['opacity'] = 1.0 _text_svg['fill'] = rgb(0, 0, 0) _rect_svg = Rect() _rect_svg['x'] = 0 _rect_svg['y'] = 0 _rect_svg['fill'] = instance_xtype.fill _rect_svg['opacity'] = instance_xtype.opacity _rect_svg['height'] = _h _rect_svg['width'] = _w _svg.add(_text_svg) _svg.add(_rect_svg) _svg.viewbox(0, 0, _w, _h) self.pos_y = self.pos_y + _h self.pos_x_max = max(self.pos_x_max, _w + self.pos_x) self.pos_y_max = max(self.pos_y_max, self.pos_y) return _svg
def add_text_box(self, dx_nodes): ''' Simple text box with fixed width. :param dx_nodes: XTypes.DiffxElement ''' _text = self.get_element_text(dx_nodes.node) _lines = self._lines_callback(_text) _y = copy.deepcopy(self.pos_y) _svg = SVG(insert=(self.pos_x, self.pos_y)) _t = Text('', insert=(0, 0), font_size=self.font_size, font_family=self.font_family) _h = 0 _w = 0 for _line, _width, _height in _lines: _h = _h + float(_height) _w = max(_w, float(_width)) _text = TSpan(_line, fill="black", insert=(0, _h)) _t.add(_text) self.pos_y = self.pos_y + _h self.pos_y_max = max(self.pos_y_max, self.pos_y) self.pos_x_max = max(self.pos_x_max, _w + self.pos_x) _svg['height'] = _h _svg['width'] = _w _svg.viewbox(0, 0, _w, _h) _svg.add(_t) return _svg
def process(self): # parse parameters input_words = self.parameters.get("words", "") if not input_words or not input_words.split(","): self.dataset.update_status( "No input words provided, cannot look for similar words.", is_final=True) self.dataset.finish(0) return input_words = input_words.split(",") try: threshold = float( self.parameters.get("threshold", self.options["threshold"]["default"])) except ValueError: threshold = float(self.options["threshold"]["default"]) threshold = max(-1.0, min(1.0, threshold)) num_words = convert_to_int(self.parameters.get("num-words"), self.options["num-words"]["default"]) overlay = self.parameters.get("overlay") reduction_method = self.parameters.get("method") all_words = self.parameters.get("all-words") # load model files and initialise self.dataset.update_status("Unpacking word embedding models") staging_area = self.unpack_archive_contents(self.source_file) common_vocab = None vector_size = None models = {} # find words that are common to all models self.dataset.update_status("Determining cross-model common vocabulary") for model_file in staging_area.glob("*.model"): if self.interrupted: shutil.rmtree(staging_area) raise ProcessorInterruptedException( "Interrupted while processing word embedding models") model = KeyedVectors.load(str(model_file)).wv models[model_file.stem] = model if vector_size is None: vector_size = model.vector_size # needed later for dimensionality reduction if common_vocab is None: common_vocab = set(model.vocab.keys()) else: common_vocab &= set(model.vocab.keys()) # intersect # sort common vocabulary by combined frequency across all models # this should make filtering for common words a bit faster further down self.dataset.update_status("Sorting vocabulary") common_vocab = list(common_vocab) common_vocab.sort(key=lambda w: sum( [model.vocab[w].count for model in models.values()]), reverse=True) # initial boundaries of 2D space (to be adjusted later based on t-sne # outcome) max_x = 0.0 - sys.float_info.max max_y = 0.0 - sys.float_info.max min_x = sys.float_info.max min_y = sys.float_info.max # for each model, find the words that we may want to plot - these are # the nearest neighbours for the given query words relevant_words = {} # the vectors need to be reduced all at once - but the vectors are # grouped by model. To solve this, keep one numpy array of vectors, # but also keep track of which indexes of this array belong to which # model, by storing the index of the first vector for a model vectors = numpy.empty((0, vector_size)) vector_offsets = {} # now process each model for model_name, model in models.items(): relevant_words[model_name] = set( ) # not a set, since order needs to be preserved self.dataset.update_status("Finding similar words in model '%s'" % model_name) for query in input_words: if query not in model.vocab: self.dataset.update_status( "Query '%s' was not found in model %s; cannot find nearest neighbours." % (query, model_name), is_final=True) self.dataset.finish(0) return if self.interrupted: shutil.rmtree(staging_area) raise ProcessorInterruptedException( "Interrupted while finding similar words") # use a larger sample (topn) than required since some of the # nearest neighbours may not be in the common vocabulary and # will therefore need to be ignored context = set([ word[0] for word in model.most_similar(query, topn=1000) if word[0] in common_vocab and word[1] >= threshold ][:num_words]) relevant_words[model_name] |= { query } | context # always include query word # now do another loop to determine which words to plot for each model # this is either the same as relevant_words, or a superset which # combines all relevant words for all models plottable_words = {} last_model = max(relevant_words.keys()) all_relevant_words = set().union(*relevant_words.values()) for model_name, words in relevant_words.items(): plottable_words[model_name] = [] vector_offsets[model_name] = len(vectors) # determine which words to plot for this model. either the nearest # neighbours for this model, or all nearest neighbours found across # all models words_to_include = all_relevant_words if all_words else relevant_words[ model_name] for word in words_to_include: if word in plottable_words[model_name] or ( not overlay and model_name != last_model and word not in input_words): # only plot each word once per model, or if 'overlay' # is not set, only once overall (for the most recent # model) continue vector = models[model_name][word] plottable_words[model_name].append(word) vectors = numpy.append(vectors, [vector], axis=0) del models # no longer needed # reduce the vectors of all words to be plotted for this model to # a two-dimensional coordinate with the previously initialised tsne # transformer. here the two-dimensional vectors are interpreted as # cartesian coordinates if reduction_method == "PCA": pca = PCA(n_components=2, random_state=0) vectors = pca.fit_transform(vectors) elif reduction_method == "t-SNE": # initialise t-sne transformer # parameters taken from Hamilton et al. # https://github.com/williamleif/histwords/blob/master/viz/common.py tsne = TSNE(n_components=2, random_state=0, learning_rate=150, init="pca") vectors = tsne.fit_transform(vectors) elif reduction_method == "TruncatedSVD": # standard sklearn parameters made explicit svd = TruncatedSVD(n_components=2, algorithm="randomized", n_iter=5, random_state=0) vectors = svd.fit_transform(vectors) else: shutil.rmtree(staging_area) self.dataset.update_status( "Invalid dimensionality reduction technique selected", is_final=True) self.dataset.finish(0) return # also keep track of the boundaries of our 2D space, so we can plot # them properly later for position in vectors: max_x = max(max_x, position[0]) max_y = max(max_y, position[1]) min_x = min(min_x, position[0]) min_y = min(min_y, position[1]) # now we know for each model which words should be plotted and at what # position # with this knowledge, we can normalize the positions, and start # plotting them in a graph # a palette generated with https://medialab.github.io/iwanthue/ colours = [ "#d58eff", "#cf9000", "#3391ff", "#a15700", "#911ca7", "#00ddcb", "#cc25a9", "#d5c776", "#6738a8", "#ff9470", "#47c2ff", "#a4122c", "#00b0ca", "#9a0f76", "#ff70c8", "#713c88" ] colour_index = 0 # make sure all coordinates are positive max_x -= min_x max_y -= min_y # determine graph dimensions and proportions width = 1000 # arbitrary height = width * (max_y / max_x) # retain proportions scale = width / max_x # margin around the plot to give room for labels and to look better margin = width * 0.1 width += 2 * margin height += 2 * margin # normalize all known positions to fit within the graph vectors = [(margin + ((position[0] - min_x) * scale), margin + ((position[1] - min_y) * scale)) for position in vectors] # now all positions are finalised, we can determine the "journey" of # each query - the sequence of positions in the graph it takes, so we # can draw lines from position to position later journeys = {} for query in input_words: journeys[query] = [] for model_name, words in plottable_words.items(): index = words.index(query) journeys[query].append(vectors[vector_offsets[model_name] + index]) # font sizes proportional to width (which is static and thus predictable) fontsize_large = width / 50 fontsize_normal = width / 75 fontsize_small = width / 100 # now we have the dimensions, the canvas can be instantiated model_type = self.source_dataset.parameters.get( "model-type", "word2vec") canvas = get_4cat_canvas( self.dataset.get_results_path(), width, height, header="%s nearest neighbours (fitting: %s) - '%s'" % (model_type, reduction_method, ",".join(input_words)), fontsize_normal=fontsize_normal, fontsize_large=fontsize_large, fontsize_small=fontsize_small) # use colour-coded backgrounds to distinguish the query words in the # graph, each model (= interval) with a separate colour for model_name in plottable_words: solid = Filter(id="solid-%s" % model_name) solid.feFlood(flood_color=colours[colour_index]) solid.feComposite(in_="SourceGraphic") canvas.defs.add(solid) colour_index += 1 # now plot each word for each model self.dataset.update_status("Plotting graph") words = SVG(insert=(0, 0), size=(width, height)) queries = SVG(insert=(0, 0), size=(width, height)) colour_index = 0 for model_name, labels in plottable_words.items(): positions = vectors[ vector_offsets[model_name]:vector_offsets[model_name] + len(labels)] label_index = 0 for position in positions: word = labels[label_index] is_query = word in input_words label_index += 1 filter = ("url(#solid-%s)" % model_name) if is_query else "none" colour = "#FFF" if is_query else colours[colour_index] fontsize = fontsize_normal if is_query else fontsize_small if word in input_words: word += " (" + model_name + ")" label_container = SVG(insert=position, size=(1, 1), overflow="visible") label_container.add( Text(insert=("50%", "50%"), text=word, dominant_baseline="middle", text_anchor="middle", style="fill:%s;font-size:%ipx" % (colour, fontsize), filter=filter)) # we make sure the queries are always rendered on top by # putting them in a separate SVG container if is_query: queries.add(label_container) else: words.add(label_container) colour_index = 0 if colour_index >= len( colours) else colour_index + 1 # plot a line between positions for query words lines = SVG(insert=(0, 0), size=(width, height)) for query, journey in journeys.items(): previous_position = None for position in journey: if previous_position is None: previous_position = position continue lines.add( Line(start=previous_position, end=position, stroke="#CE1B28", stroke_width=2)) previous_position = position canvas.add(lines) canvas.add(words) canvas.add(queries) canvas.save(pretty=True) shutil.rmtree(staging_area) self.dataset.finish(len(journeys))
def render(self, canvas, level, x=0, y=0, origin=None, height=None, side=1, init=True, level_index=0): """ Render node set to canvas :param canvas: SVG object :param list level: List of nodes to render :param int x: X coordinate of top left of level block :param int y: Y coordinate of top left of level block :param tuple origin: Coordinates to draw 'connecting' line to :param float height: Block height budget :param int side: What direction to move into: 1 for rightwards, -1 for leftwards :param bool init: Whether the draw the top level of nodes. Only has an effect if side == self.SIDE_LEFT :return: Updated canvas """ if not level: return canvas # this eliminates a small misalignment where the left side of the # graph starts slightly too far to the left if init and side == self.SIDE_LEFT: x += self.step # determine how many nodes we'll need to fit on top of each other # within this block required_space_level = sum([self.max_breadth(node) for node in level]) # draw each node and the tree below it for node in level: # determine how high this block will be based on the available # height and the nodes we'll need to fit in it required_space_node = self.max_breadth(node) block_height = (required_space_node / required_space_level) * height # determine how much we want to enlarge the text occurrence_ratio = node.occurrences / self.max_occurrences[ level_index] if occurrence_ratio >= 0.75: embiggen = 3 elif occurrence_ratio > 0.5: embiggen = 2 elif occurrence_ratio > 0.25: embiggen = 1.75 elif occurrence_ratio > 0.15: embiggen = 1.5 else: embiggen = 1 # determine how large the text block will be (this is why we use a # monospace font) characters = len(node.name) text_width = characters * self.step text_width *= (embiggen * 1) text_offset_y = self.fontsize if self.align == "top" else ( (block_height) / 2) # determine where in the block to draw the text and where on the # canvas the block appears block_position = (x, y) block_offset_x = -(text_width + self.step) if side == self.SIDE_LEFT else 0 self.x_min = min(self.x_min, block_position[0] + block_offset_x) self.x_max = max(self.x_max, block_position[0] + block_offset_x + text_width) # the first node on the left side of the graph does not need to be # drawn if the right side is also being drawn because in that case # it's already going to be included through that part of the graph if not (init and side == self.SIDE_LEFT): container = SVG(x=block_position[0] + block_offset_x, y=block_position[1], width=text_width, height=block_height, overflow="visible") container.add( Text(text=node.name, insert=(0, text_offset_y), alignment_baseline="middle", style="font-size:" + str(embiggen) + "em")) canvas.add(container) else: # adjust position to make left side connect to right side x += text_width block_position = (block_position[0] + text_width, block_position[1]) # draw the line connecting this node to the parent node if origin: destination = (x - self.step, y + text_offset_y) # for the left side of the graph, draw a curve leftwards # instead of rightwards if side == self.SIDE_RIGHT: bezier_origin = origin bezier_destination = destination else: bezier_origin = (destination[0] + self.step, destination[1]) bezier_destination = (origin[0] - self.step, origin[1]) # bezier curve control points control_x = bezier_destination[0] - ( (bezier_destination[0] - bezier_origin[0]) / 2) control_left = (control_x, bezier_origin[1]) control_right = (control_x, bezier_destination[1]) # draw curve flow = Path(stroke="#000", fill_opacity=0, stroke_width=1.5) flow.push("M %f %f" % bezier_origin) flow.push("C %f %f %f %f %f %f" % tuple( [*control_left, *control_right, *bezier_destination])) canvas.add(flow) # bezier curves for the next set of nodes will start at these # coordinates new_origin = (block_position[0] + ((text_width + self.step) * side), block_position[1] + text_offset_y) # draw this node's children canvas = self.render(canvas, node.children, x=x + ((text_width + self.gap) * side), y=y, origin=new_origin, height=int(block_height), side=side, init=False, level_index=level_index + 1) y += block_height return canvas
def process(self): """ This takes a 4CAT results file as input, and outputs a plain text file containing all post bodies as one continuous string, sanitized. """ link_regex = re.compile(r"https?://[^\s]+") delete_regex = re.compile(r"[^a-zA-Z)(.,\n -]") # settings strip_urls = self.parameters.get("strip-urls", self.options["strip-urls"]["default"]) strip_symbols = self.parameters.get( "strip-symbols", self.options["strip-symbols"]["default"]) sides = self.parameters.get("sides", self.options["sides"]["default"]) self.align = self.parameters.get("align", self.options["align"]["default"]) window = convert_to_int( self.parameters.get("window", self.options["window"]["default"]), 5) + 1 query = self.parameters.get("query", self.options["query"]["default"]) self.limit = convert_to_int( self.parameters.get("limit", self.options["limit"]["default"]), 100) left_branches = [] right_branches = [] # do some validation if not query.strip() or re.sub(r"\s", "", query) != query: self.dataset.update_status( "Invalid query for word tree generation. Query cannot be empty or contain whitespace." ) self.dataset.finish(0) return window = min(window, self.options["window"]["max"] + 1) window = max(1, window) # find matching posts processed = 0 for post in self.iterate_csv_items(self.source_file): processed += 1 if processed % 500 == 0: self.dataset.update_status( "Processing and tokenising post %i" % processed) body = post["body"] if strip_urls: body = link_regex.sub("", body) if strip_symbols: body = delete_regex.sub("", body) body = word_tokenize(body) positions = [ i for i, x in enumerate(body) if x.lower() == query.lower() ] # get lists of tokens for both the left and right side of the tree # on the left side, all lists end with the query, on the right side, # they start with the query for position in positions: right_branches.append(body[position:position + window]) left_branches.append(body[max(0, position - window):position + 1]) # Some settings for rendering the tree later self.step = self.fontsize * 0.6 # approximately the width of a monospace char self.gap = (7 * self.step) # space for lines between nodes width = 1 # will be updated later # invert the left side of the tree (because that's the way we want the # branching to work for that side) # we'll visually invert the nodes in the tree again later left_branches = [list(reversed(branch)) for branch in left_branches] # first create vertical slices of tokens per level self.dataset.update_status("Generating token tree from posts") levels_right = [{} for i in range(0, window)] levels_left = [{} for i in range(0, window)] tokens_left = [] tokens_right = [] # for each "level" (each branching point representing a level), turn # tokens into nodes, record the max amount of occurences for any # token in that level, and keep track of what nodes are in which level. # The latter is needed because a token may occur multiple times, at # different points in the graph. Do this for both the left and right # side of the tree. for i in range(0, window): for branch in right_branches: if i >= len(branch): continue token = branch[i].lower() if token not in levels_right[i]: parent = levels_right[i - 1][branch[ i - 1].lower()] if i > 0 else None levels_right[i][token] = Node(token, parent=parent, occurrences=1, is_top_root=(parent is None)) tokens_right.append(levels_right[i][token]) else: levels_right[i][token].occurrences += 1 occurrences = levels_right[i][token].occurrences self.max_occurrences[i] = max( occurrences, self.max_occurrences[i] ) if i in self.max_occurrences else occurrences for branch in left_branches: if i >= len(branch): continue token = branch[i].lower() if token not in levels_left[i]: parent = levels_left[i - 1][branch[ i - 1].lower()] if i > 0 else None levels_left[i][token] = Node(token, parent=parent, occurrences=1, is_top_root=(parent is None)) tokens_left.append(levels_left[i][token]) else: levels_left[i][token].occurrences += 1 occurrences = levels_left[i][token].occurrences self.max_occurrences[i] = max( occurrences, self.max_occurrences[i] ) if i in self.max_occurrences else occurrences # nodes that have no siblings can be merged with their parents, else # the graph becomes unnecessarily large with lots of single-word nodes # connected to single-word nodes. additionally, we want the nodes with # the most branches to be sorted to the top, and then only retain the # most interesting (i.e. most-occurring) branches self.dataset.update_status("Merging and sorting tree nodes") for token in tokens_left: self.merge_upwards(token) self.sort_node(token) self.limit_subtree(token) for token in tokens_right: self.merge_upwards(token) self.sort_node(token) self.limit_subtree(token) # somewhat annoyingly, anytree does not simply delete nodes detached # from the tree in the previous steps, but makes them root nodes. We # don't need these root nodes (we only need the original root), so the # next step is to remove all root nodes that are not the main root. # We cannot modify a list in-place, so make a new list with the # relevant nodes level_sizes = {} filtered_tokens_right = [] for token in tokens_right: if token.is_root and not token.is_top_root: continue filtered_tokens_right.append(token) filtered_tokens_left = [] for token in tokens_left: if token.is_root and not token.is_top_root: continue filtered_tokens_left.append(token) # now we know which nodes are left, and can therefore determine how # large the canvas needs to be - this is based on the max number of # branches found on any level of the tree, in other words, the number # of "terminal nodes" height_left = self.whitespace * self.fontsize * max([ self.max_breadth(node) for node in filtered_tokens_left if node.is_top_root ]) height_right = self.whitespace * self.fontsize * max([ self.max_breadth(node) for node in filtered_tokens_right if node.is_top_root ]) height = max(height_left, height_right) canvas = Drawing(str(self.dataset.get_results_path()), size=(width, height), style="font-family:monospace;font-size:%ipx" % self.fontsize) # the nodes on the left side of the graph now have the wrong word order, # because we reversed them earlier to generate the correct tree # hierarchy - now reverse the node labels so they are proper language # again for token in tokens_left: self.invert_node_labels(token) wrapper = SVG(overflow="visible") self.dataset.update_status("Rendering tree to SVG file") if sides != "right": wrapper = self.render(wrapper, [ token for token in filtered_tokens_left if token.is_root and token.children ], height=height, side=self.SIDE_LEFT) if sides != "left": wrapper = self.render(wrapper, [ token for token in filtered_tokens_right if token.is_root and token.children ], height=height, side=self.SIDE_RIGHT) # things may have been rendered outside the canvas, in which case we # need to readjust the SVG properties wrapper.update({"x": 0 if self.x_min >= 0 else self.x_min * -1}) canvas.update({"width": (self.x_max - self.x_min)}) canvas.add(wrapper) canvas.save(pretty=True) self.dataset.update_status("Finished") self.dataset.finish(len(tokens_left) + len(tokens_right))
def process(self): graphs = {} intervals = [] smooth = self.parameters.get("smooth") normalise_values = self.parameters.get("normalise") completeness = convert_to_int(self.parameters.get("complete"), 0) graph_label = self.parameters.get("label") top = convert_to_int(self.parameters.get("top"), 10) # first gather graph data: each distinct item gets its own graph and # for each graph we have a sequence of intervals, each interval with # its own value first_date = "9999-99-99" last_date = "0000-00-00" for row in self.iterate_items(self.source_file): if row["item"] not in graphs: graphs[row["item"]] = {} # make sure the months and days are zero-padded interval = row.get("date", "") interval = "-".join([ str(bit).zfill(2 if len(bit) != 4 else 4) for bit in interval.split("-") ]) first_date = min(first_date, interval) last_date = max(last_date, interval) if interval not in intervals: intervals.append(interval) if interval not in graphs[row["item"]]: graphs[row["item"]][interval] = 0 graphs[row["item"]][interval] += float(row.get("value", 0)) # first make sure we actually have something to render intervals = sorted(intervals) if len(intervals) <= 1: self.dataset.update_status( "Not enough data for a side-by-side over-time visualisation.") self.dataset.finish(0) return # only retain most-occurring series - sort by sum of all frequencies if len(graphs) > top: selected_graphs = { graph: graphs[graph] for graph in sorted( graphs, key=lambda x: sum( [graphs[x][interval] for interval in graphs[x]]), reverse=True)[0:top] } graphs = selected_graphs # there may be items that do not have values for all intervals # this will distort the graph, so the next step is to make sure all # graphs consist of the same continuous interval list missing = {graph: 0 for graph in graphs} for graph in graphs: missing[graph], graphs[graph] = pad_interval( graphs[graph], first_interval=first_date, last_interval=last_date) # now that's done, make sure the graph datapoints are in order intervals = sorted(list(graphs[list(graphs)[0]].keys())) # delete graphs that do not have the required amount of intervals # this is useful to get rid of outliers and items that only occur # very few times over the full interval if completeness > 0: intervals_required = len(intervals) * (completeness / 100) disqualified = [] for graph in graphs: if len(intervals) - missing[graph] < intervals_required: disqualified.append(graph) graphs = { graph: graphs[graph] for graph in graphs if graph not in disqualified } # determine max value per item, so we can normalize them later limits = {} max_limit = 0 for graph in graphs: for interval in graphs[graph]: limits[graph] = max(limits.get(graph, 0), abs(graphs[graph][interval])) max_limit = max(max_limit, abs(graphs[graph][interval])) # order graphs by highest (or lowest) value) limits = { limit: limits[limit] for limit in sorted(limits, key=lambda l: limits[l]) } graphs = {graph: graphs[graph] for graph in limits} if not graphs: # maybe nothing is actually there to be graphed self.dataset.update_status( "No items match the selection criteria - nothing to visualise." ) self.dataset.finish(0) return None # how many vertical grid lines (and labels) are to be included at most # 12 is a sensible default because it allows one label per month for a full # year's data max_gridlines = 12 # If True, label is put at the lower left bottom of the graph rather than # outside it. Automatically set to True if one of the labels is long, as # else the label would fall off the screen label_in_graph = max([len(item) for item in graphs]) > 30 # determine how wide each interval should be # the graph has a minimum width - but the graph's width will be # extended if at this minimum width each item does not have the # minimum per-item width min_full_width = 600 min_item_width = 50 item_width = max(min_item_width, min_full_width / len(intervals)) # determine how much space each graph should get # same trade-off as for the interval width min_full_height = 300 min_item_height = 100 item_height = max(min_item_height, min_full_height / len(graphs)) # margin - this should be enough for the text labels to fit in margin_base = 50 margin_right = margin_base * 4 margin_top = margin_base * 3 # this determines the "flatness" of the isometric projection and an be # tweaked for different looks - basically corresponds to how far the # camera is above the horizon plane_angle = 120 # don't change these plane_obverse = radians((180 - plane_angle) / 2) plane_angle = radians(plane_angle) # okay, now determine the full graphic size with these dimensions projected # semi-isometrically. We can also use these values later for drawing for # drawing grid lines, et cetera. The axis widths and heights here are the # dimensions of the bounding box wrapping the isometrically projected axes. x_axis_length = (item_width * (len(intervals) - 1)) y_axis_length = (item_height * len(graphs)) x_axis_width = (sin(plane_angle / 2) * x_axis_length) y_axis_width = (sin(plane_angle / 2) * y_axis_length) canvas_width = x_axis_width + y_axis_width # leave room for graph header if graph_label: margin_top += (2 * (canvas_width / 50)) x_axis_height = (cos(plane_angle / 2) * x_axis_length) y_axis_height = (cos(plane_angle / 2) * y_axis_length) canvas_height = x_axis_height + y_axis_height # now we have the dimensions, the canvas can be instantiated canvas = get_4cat_canvas( self.dataset.get_results_path(), width=(canvas_width + margin_base + margin_right), height=(canvas_height + margin_base + margin_top), header=graph_label) # draw gridlines - vertical gridline_x = y_axis_width + margin_base gridline_y = margin_top + canvas_height step_x_horizontal = sin(plane_angle / 2) * item_width step_y_horizontal = cos(plane_angle / 2) * item_width step_x_vertical = sin(plane_angle / 2) * item_height step_y_vertical = cos(plane_angle / 2) * item_height # labels for x axis # month and week both follow the same pattern # it's not always possible to distinguish between them but we will try # by looking for months greater than 12 in which case we are dealing # with weeks # we need to know this because for months there is an extra row in the # label with the full month is_week = False for i in range(0, len(intervals)): if re.match(r"^[0-9]{4}-[0-9]{2}", intervals[i]) and int(intervals[i].split("-")[1]) > 12: is_week = True break skip = max(1, int(len(intervals) / max_gridlines)) for i in range(0, len(intervals)): if i % skip == 0: canvas.add( Line(start=(gridline_x, gridline_y), end=(gridline_x - y_axis_width, gridline_y - y_axis_height), stroke="grey", stroke_width=0.25)) # to properly position the rotated and skewed text a container # element is needed label1 = str(intervals[i])[0:4] center = (gridline_x, gridline_y) container = SVG(x=center[0] - 25, y=center[1], width="50", height="1.5em", overflow="visible", style="font-size:0.8em;") container.add( Text(insert=("25%", "100%"), text=label1, transform="rotate(%f) skewX(%f)" % (-degrees(plane_obverse), degrees(plane_obverse)), text_anchor="middle", baseline_shift="-0.5em", style="font-weight:bold;")) if re.match(r"^[0-9]{4}-[0-9]{2}", intervals[i]) and not is_week: label2 = month_abbr[int(str(intervals[i])[5:7])] if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}", intervals[i]): label2 += " %i" % int(intervals[i][8:10]) container.add( Text(insert=("25%", "150%"), text=label2, transform="rotate(%f) skewX(%f)" % (-degrees(plane_obverse), degrees(plane_obverse)), text_anchor="middle", baseline_shift="-0.5em")) canvas.add(container) gridline_x += step_x_horizontal gridline_y -= step_y_horizontal # draw graphs as filled beziers top = step_y_vertical * 1.5 graph_start_x = y_axis_width + margin_base graph_start_y = margin_top + canvas_height # draw graphs in reverse order, so the bottom one is most in the # foreground (in case of overlap) for graph in reversed(list(graphs)): self.dataset.update_status("Rendering graph for '%s'" % graph) # path starting at lower left corner of graph area_graph = Path(fill=self.colours[self.colour_index]) area_graph.push("M %f %f" % (graph_start_x, graph_start_y)) previous_value = None graph_x = graph_start_x graph_y = graph_start_y for interval in graphs[graph]: # normalise value value = graphs[graph][interval] try: limit = limits[graph] if normalise_values else max_limit value = top * copysign(abs(value) / limit, value) except ZeroDivisionError: value = 0 if previous_value is None: # vertical line upwards to starting value of graph area_graph.push("L %f %f" % (graph_start_x, graph_start_y - value)) elif not smooth: area_graph.push("L %f %f" % (graph_x, graph_y - value)) else: # quadratic bezier from previous value to current value control_left = (graph_x - (step_x_horizontal / 2), graph_y + step_y_horizontal - previous_value - (step_y_horizontal / 2)) control_right = (graph_x - (step_x_horizontal / 2), graph_y - value + (step_y_horizontal / 2)) area_graph.push("C %f %f %f %f %f %f" % (*control_left, *control_right, graph_x, graph_y - value)) previous_value = value graph_x += step_x_horizontal graph_y -= step_y_horizontal # line to the bottom of the graph at the current Y position area_graph.push( "L %f %f" % (graph_x - step_x_horizontal, graph_y + step_y_horizontal)) area_graph.push("Z") # then close the Path canvas.add(area_graph) # add text labels - skewing is a bit complicated and we need a # "center" to translate the origins properly. if label_in_graph: insert = (graph_start_x + 5, graph_start_y - 10) else: insert = (graph_x - (step_x_horizontal) + 5, graph_y + step_y_horizontal - 10) # we need to take the skewing into account for the translation offset_y = tan(plane_obverse) * insert[0] canvas.add( Text(insert=(0, 0), text=graph, transform="skewY(%f) translate(%f %f)" % (-degrees(plane_obverse), insert[0], insert[1] + offset_y))) # cycle colours, back to the beginning if all have been used self.colour_index += 1 if self.colour_index >= len(self.colours): self.colour_index = 0 graph_start_x -= step_x_vertical graph_start_y -= step_y_vertical # draw gridlines - horizontal gridline_x = margin_base gridline_y = margin_top + canvas_height - y_axis_height for graph in graphs: gridline_x += step_x_vertical gridline_y += step_y_vertical canvas.add( Line(start=(gridline_x, gridline_y), end=(gridline_x + x_axis_width, gridline_y - x_axis_height), stroke="black", stroke_width=1)) # x axis canvas.add( Line(start=(margin_base + y_axis_width, margin_top + canvas_height), end=(margin_base + canvas_width, margin_top + canvas_height - x_axis_height), stroke="black", stroke_width=2)) # and finally save the SVG canvas.save(pretty=True) self.dataset.finish(len(graphs))
def process(self): """ Render an SVG histogram/bar chart using a previous frequency analysis as input. """ self.dataset.update_status("Reading source file") header = self.parameters.get("header", self.options["header"]["default"]) max_posts = 0 # collect post numbers per month intervals = {} for post in self.iterate_csv_items(self.source_file): intervals[post["date"]] = int(post["frequency"]) max_posts = max(max_posts, int(post["frequency"])) if len(intervals) <= 1: self.dataset.update_status("Not enough data available for a histogram; need more than one time series.") self.dataset.finish(0) return self.dataset.update_status("Cleaning up data") (missing, intervals) = pad_interval(intervals) # create histogram self.dataset.update_status("Drawing histogram") # you may change the following four variables to adjust the graph dimensions width = 1024 height = 786 y_margin = 75 x_margin = 50 x_margin_left = x_margin * 2 tick_width = 5 fontsize_normal = int(height / 40) fontsize_small = int(height / 75) # better don't touch the following line_width = round(width / 512) y_margin_top = 150 if header else 50 y_height = height - (y_margin + y_margin_top) x_width = width - (x_margin + x_margin_left) canvas = Drawing(filename=str(self.dataset.get_results_path()), size=(width, height), style="font-family:monospace;font-size:%ipx" % fontsize_normal) # normalize the Y axis to a multiple of a power of 10 magnitude = pow(10, len(str(max_posts)) - 1) # ew max_neat = math.ceil(max_posts / magnitude) * magnitude self.dataset.update_status("Max (normalized): %i (%i) (magnitude: %i)" % (max_posts, max_neat, magnitude)) # draw border canvas.add(Rect( insert=(0, 0), size=(width, height), stroke="#000", stroke_width=line_width, fill="#FFF" )) # draw header on a black background if needed if header: if len(header) > 40: header = header[:37] + "..." header_rect_height = (y_margin_top / 1.5) header_fontsize = (width / len(header)) header_container = SVG(insert=(0, 0), size=(width, header_rect_height)) header_container.add(Rect( insert=(0, 0), size=(width, header_rect_height), fill="#000" )) header_container.add(Text( insert=("50%", "50%"), text=header, dominant_baseline="middle", text_anchor="middle", fill="#FFF", style="font-size:%i" % header_fontsize )) canvas.add(header_container) # horizontal grid lines for i in range(0, 10): offset = (y_height / 10) * i canvas.add(Line( start=(x_margin_left, y_margin_top + offset), end=(width - x_margin, y_margin_top + offset), stroke="#EEE", stroke_width=line_width )) # draw bars item_width = (width - (x_margin + x_margin_left)) / len(intervals) item_height = (height - y_margin - y_margin_top) bar_width = item_width * 0.9 x = x_margin_left + (item_width / 2) - (bar_width / 2) if bar_width >= 8: arc_adjust = max(8, int(item_width / 5)) / 2 else: arc_adjust = 0 for interval in intervals: posts = int(intervals[interval]) bar_height = ((posts / max_neat) * item_height) self.dataset.update_status("%s: %i posts" % (interval, posts)) bar_top = height - y_margin - bar_height bar_bottom = height - y_margin if bar_height == 0: x += item_width continue bar = Path(fill="#000") bar.push("M %f %f" % (x, bar_bottom)) bar.push("L %f %f" % (x, bar_top + (arc_adjust if bar_height > arc_adjust else 0))) if bar_height > arc_adjust > 0: control = (x, bar_top) bar.push("C %f %f %f %f %f %f" % (*control, *control, x + arc_adjust, bar_top)) bar.push("L %f %f" % (x + bar_width - arc_adjust, height - y_margin - bar_height)) if bar_height > arc_adjust > 0: control = (x + bar_width, bar_top) bar.push("C %f %f %f %f %f %f" % (*control, *control, x + bar_width, bar_top + arc_adjust)) bar.push("L %f %f" % (x + bar_width, height - y_margin)) bar.push("Z") canvas.add(bar) x += item_width # draw X and Y axis canvas.add(Line( start=(x_margin_left, height - y_margin), end=(width - x_margin, height - y_margin), stroke="#000", stroke_width=2 )) canvas.add(Line( start=(x_margin_left, y_margin_top), end=(x_margin_left, height - y_margin), stroke="#000", stroke_width=2 )) # draw ticks on Y axis for i in range(0, 10): offset = (y_height / 10) * i canvas.add(Line( start=(x_margin_left - tick_width, y_margin_top + offset), end=(x_margin_left, y_margin_top + offset), stroke="#000", stroke_width=line_width )) # draw ticks on X axis for i in range(0, len(intervals)): offset = (x_width / len(intervals)) * (i + 0.5) canvas.add(Line( start=(x_margin_left + offset, height - y_margin), end=(x_margin_left + offset, height - y_margin + tick_width), stroke="#000", stroke_width=line_width )) # prettify # y labels origin = (x_margin_left / 2) step = y_height / 10 for i in range(0, 11): label = str(int((max_neat / 10) * i)) labelsize = (len(label) * fontsize_normal * 1.25, fontsize_normal) label_x = origin - (tick_width * 2) label_y = height - y_margin - (i * step) - (labelsize[1] / 2) label_container = SVG( insert=(label_x, label_y), size=(x_margin_left / 2, x_margin_left / 5) ) label_container.add(Text( insert=("100%", "50%"), text=label, dominant_baseline="middle", text_anchor="end" )) canvas.add(label_container) # x labels label_width = max(fontsize_small * 6, item_width) label_x = x_margin_left label_y = height - y_margin + (tick_width * 2) next = 0 for interval in intervals: if len(interval) == 7: label = month_abbr[int(interval[5:7])] + "\n" + interval[0:4] elif len(interval) == 10: label = str(int(interval[8:10])) + month_abbr[int(interval[5:7])] + "\n" + interval[0:4] else: label = interval.replace("-", "\n") if label_x > next: shift = 0 for line in label.split("\n"): label_container = SVG( insert=(label_x + (item_width / 2) - (label_width / 2), label_y + (tick_width * 2)), size=(label_width, y_margin), overflow="visible") label_container.add(Text( insert=("50%", "0%"), text=line, dominant_baseline="middle", text_anchor="middle", baseline_shift=-shift )) shift += fontsize_small * 2 canvas.add(label_container) next = label_x + (label_width * 0.9) label_x += item_width # 4cat logo label = "made with 4cat - 4cat.oilab.nl" footersize = (fontsize_small * len(label) * 0.7, fontsize_small * 2) footer = SVG(insert=(width - footersize[0], height - footersize[1]), size=footersize) footer.add(Rect(insert=(0, 0), size=("100%", "100%"), fill="#000")) footer.add(Text( insert=("50%", "50%"), text=label, dominant_baseline="middle", text_anchor="middle", fill="#FFF", style="font-size:%i" % fontsize_small )) canvas.add(footer) canvas.save(pretty=True) self.dataset.update_status("Finished") self.dataset.finish(len(intervals))
def test_constructor(self): svg = SVG(insert=(10, 20), size=(100, 200)) self.assertTrue(isinstance(svg, Symbol)) self.assertEqual( svg.tostring(), '<svg height="200" width="100" x="10" y="20"><defs /></svg>')
def main(): print("PY_PLUGIN - PLOT_TX_TRACE") #---------------------------- # INPUT args_map = parse_args() l = sys.stdin.readline() print(">>>>>>>>>>>>>>>>>>>>>>>>>>") # print(l.strip()) tx_id_str = args_map["tx_id_str"] tx_trace_map = json.loads(l) assert isinstance(tx_trace_map, dict) #---------------------------- plot_y = 50 dwg = svgwrite.Drawing(filename=f"{modd_str}/test.svg", debug=True) plot_svg = SVG((50, plot_y)) dwg.add(plot_svg) plot_width_mm_int = 500 x_ops_base = 10 x_stack_base = 30 x_memory_base = x_stack_base + 18 x_gas_cost_base = x_memory_base + 10 #---------------------------- # LEGEND legend_svg = SVG((50, 10)) dwg.add(legend_svg) legend_svg.add( dwg.text(f"tx ID - {tx_id_str}", (0 * mm, 4 * mm), font_size=8)) legend_svg.add( dwg.text(f"stack", (x_stack_base * mm, plot_y - 12), font_size=8)) legend_svg.add( dwg.text(f"memory", ((x_memory_base - 3) * mm, plot_y - 12), font_size=8)) legend_svg.add( dwg.text(f"gas cost", (x_gas_cost_base * mm, plot_y - 12), font_size=8)) #---------------------------- i = 0 memory_ops_lst = [] call_ops_lst = [] logs_ops_lst = [] for op_map in tx_trace_map["opcodes_lst"]: # print("--------------------") # print(op_map) op_str = op_map["op_str"].strip() gas_cost_int = int(op_map["gas_cost_uint"]) # print(f"{op_str}-{gas_cost_int}") stack_lst = op_map["stack_lst"] memory_lst = op_map["memory_lst"] x1 = x_gas_cost_base x2 = x1 + gas_cost_int y = i * 8 # 2.4 op_svg = SVG((0, y)) plot_svg.add(op_svg) #---------------------------- # OP_GAS_COST__LINE op_svg.add( dwg.line(start=(x1 * mm, 1.25 * mm), end=(x2 * mm, 1.25 * mm), stroke="green", stroke_width=3)) #---------------------------- # DEBUGGING - alignment line. used to align other elements in line. op_svg.add( dwg.line(start=((x_stack_base - 1) * mm, 1.25 * mm), end=(x1 * mm, 1.25 * mm), stroke="black", stroke_width=0.5)) #---------------------------- # OP__TEXT - text local coordinate system is at lower left corner, # not upper-left like everything else. # so positioning it a bit lower from 0,0 (in this case 0,2) op_txt = dwg.text(op_str, (x_ops_base * mm, 2 * mm), font_size=8) op_svg.add(op_txt) # MSTORE if op_str == "MSTORE": y__local = 0.8 op_svg.add( dwg.rect(insert=((x_ops_base - 1.5) * mm, y__local * mm), size=(1 * mm, 1 * mm), fill='blue', stroke='black', stroke_width=0.5)) y__global = y + y__local memory_ops_lst.append(y__global) # CALLDATASIZE/CALLVALUE/CALLER ops_call_lst = ["CALLDATASIZE", "CALLVALUE", "CALLER"] if op_str in ops_call_lst: x_call_rect__global = x_ops_base - 2 y__local = 0.6 op_svg.add( dwg.rect(insert=((x_call_rect__global) * mm, y__local * mm), size=(1.2 * mm, 1.2 * mm), fill='red', stroke='black', stroke_width=0.5)) y__global = y + y__local call_ops_lst.append(y__global) # LOG if op_str.startswith("LOG"): x_log_rect__global = x_ops_base - 2 y__local = 0.6 op_svg.add( dwg.rect(insert=(x_log_rect__global * mm, y__local * mm), size=(1 * mm, 1 * mm), fill='cyan', stroke='black', stroke_width=0.5)) y__global = y + y__local logs_ops_lst.append(y__global) #---------------------------- # STACK op_stack_g = op_svg.add(dwg.g(id='op_stack', stroke='blue')) j = 0 for s in stack_lst: x = x_stack_base + j * 2 op_stack_g.add( dwg.rect(insert=(x * mm, 0.5 * mm), size=(1.5 * mm, 1.5 * mm), fill='yellow', stroke='black', stroke_width=0.5)) j += 1 #---------------------------- # MEMORY op_memory_g = op_svg.add(dwg.g(id='op_memory', stroke='blue')) j = 0 for s in memory_lst: x = x_memory_base + j * 2 op_memory_g.add( dwg.rect(insert=(x * mm, 0.5 * mm), size=(1.5 * mm, 1.5 * mm), fill='orange', stroke='black', stroke_width=0.5)) j += 1 #---------------------------- i += 1 #-------------------------------------------------- # ARCHS def draw_archs(): x_call_rect__global_px = 30 i = 0 for y__global in call_ops_lst[:-1]: y__global__next = call_ops_lst[i + 1] y__global__start_str = f"{x_call_rect__global_px} {int(y__global+4.4)}" y__global__end_str = f"{x_call_rect__global_px} {int(y__global__next+4.4)}" y__global__control_point_str = f"0 {int(y__global+(y__global__next-y__global)/2)}" path_str = f"M {y__global__start_str} Q {y__global__control_point_str} {y__global__end_str}" plot_svg.add( dwg.path(d=path_str, fill="none", stroke="red", stroke_width=0.5)) i += 1 #-------------------------------------------------- draw_archs() print("done drawing...") # FILE_SAVE if args_map["stdout_bool"]: svg_str = dwg.tostring() out_map = {"svg_str": svg_str} print(f"GF_OUT:{json.dumps(out_map)}") else: dwg.save()