Exemple #1
0
 def draw(self, svg_parent, tree_layout, parent, child):
     from svgwrite.shapes import Line
     if child.depth > parent.depth + 1:
         line_start = parent.y + parent.height
         if parent.height > 0:
             line_start += 0.2  # extra space for descenders
         box_y = tree_layout.y_distance(parent.depth, child.depth)
         y_target = em(box_y + child.y)
         x_target = perc(child.x + child.width / 2)
         # we are skipping level(s). Find the y position that an empty
         # node on the next level would have.
         intermediate_y = em(
             tree_layout.label_y_dodge(level=parent.depth +
                                       1, height=0)[0] +
             tree_layout.y_distance(parent.depth, parent.depth + 1))
         # TODO: do as Path?
         svg_parent.add(
             Line(start=(u"50%", em(line_start)),
                  end=(x_target, intermediate_y),
                  **self.svg_opts()))
         svg_parent.add(
             Line(start=(x_target, intermediate_y),
                  end=(x_target, y_target),
                  **self.svg_opts()))
     else:
         EdgeStyle.draw(self, svg_parent, tree_layout, parent, child)
    def _create_rect_arrow(scene: Drawing, start: tuple, point1: tuple,
                           point2: tuple, end: tuple, color: tuple):
        """Create an rectangular path through the given points.

        The path starts at p1 the goes to point1, p2 and finally to end.

        Args:
            scene (Scene): The scene where the path should be created.
            start: The first point.
            point1: The second point.
            point2: The third point.
            end: The last point.
            color: The arrow's color.

        Returns:
            The modified scene
        """
        scene.add(
            Line(start,
                 point1,
                 shape_rendering='inherit',
                 stroke=rgb(*color),
                 stroke_width=1))
        scene.add(
            Line(point1,
                 point2,
                 shape_rendering='inherit',
                 stroke=rgb(*color),
                 stroke_width=1))
        scene.add(
            Line(point2,
                 end,
                 shape_rendering='inherit',
                 stroke=rgb(*color),
                 stroke_width=1))
Exemple #3
0
 def get_runner_end(self, line, is_out):
     x, y = self.get_line_end(line)
     if is_out:
         g = Group()
         g['class'] = 'out'
         g.add(Line((x - X_SIZE, y - X_SIZE), (x + X_SIZE, y + X_SIZE)))
         g.add(Line((x - X_SIZE, y + X_SIZE), (x + X_SIZE, y - X_SIZE)))
         return g
     else:
         return Circle((x, y), CIRCLE_R)
Exemple #4
0
 def get_team_box(self, id, ht):
     box = Group()
     box['id'] = id
     box['class'] = 'team-box'
     box.add(Rect((ORIGIN_X, ORIGIN_Y), (ATBAT_W, ht)))
     box.add(
         Line((ORIGIN_X + NAME_W, ORIGIN_Y),
              (ORIGIN_X + NAME_W, ORIGIN_Y + ht)))
     box.add(
         Line((ORIGIN_X + NAME_W + SCORE_W, ORIGIN_Y),
              (ORIGIN_X + NAME_W + SCORE_W, ORIGIN_Y + ht)))
     return box
Exemple #5
0
 def _put_cross(self, cell, x, y, fill, stone_size):
     offset = stone_size / 4
     cell.add(
         Line(start=((x - offset) * self.unit, (y - offset) * self.unit),
              end=((x + offset) * self.unit, (y + offset) * self.unit),
              fill=fill,
              stroke=fill,
              stroke_width=self._line_width * 3))
     cell.add(
         Line(start=((x - offset) * self.unit, (y + offset) * self.unit),
              end=((x + offset) * self.unit, (y - offset) * self.unit),
              fill=fill,
              stroke=fill,
              stroke_width=self._line_width * 3))
Exemple #6
0
    def draw_inning_separators(self):
        y = ORIGIN_Y
        for i, inning in enumerate(self.game.innings[:-1]):
            inning_ht = self.get_inning_height(inning)
            y += inning_ht
            self.dwg.add(
                Line((ORIGIN_X, y), (ORIGIN_X + ATBAT_W, y),
                     class_='team-box'))

            if (not (i == len(self.game.innings) - 2
                     and self.is_no_final_bottom())):
                self.dwg.add(
                    Line((ORIGIN_X + ATBAT_W + SEPARATION, y),
                         (ORIGIN_X + 2 * ATBAT_W + SEPARATION, y),
                         class_='team-box'))
    def draw_arrow_w_text_middle(self, scene: Drawing, start: tuple,
                                 point1: tuple, point2: tuple, end: tuple,
                                 height: int, arrowsize: int, is_curved: bool,
                                 text: str, font_size: int, font_family: str,
                                 over: bool, color: tuple):
        # Store the appropriate function ouside of the loop
        if is_curved:
            self._create_curve_arrow(scene, start, point1, point2, end, color)
        else:
            self._create_rect_arrow(scene, start, point1, point2, end, color)

        # Draw arrow
        x_coord = (end[0] - arrowsize, end[1] - arrowsize)
        z_coord = (end[0] + arrowsize, end[1] - arrowsize)
        y_coord = (end[0], end[1])

        # Draw the arrow head
        scene.add(
            Line(x_coord,
                 y_coord,
                 shape_rendering='inherit',
                 stroke=rgb(*color),
                 stroke_width=1))
        scene.add(
            Line(z_coord,
                 y_coord,
                 shape_rendering='inherit',
                 stroke=rgb(*color),
                 stroke_width=1))

        direction = 1
        if over:
            direction = -1

        # Write label in the middle under
        labelx = min(start[0], point2[0]) + abs(start[0] - point2[0]) // 2
        labely = height + direction * font_size  # TODO: Should be font height!

        scene.add(
            Text(text,
                 insert=(labelx, labely),
                 fill=rgb(*color),
                 font_family=font_family,
                 font_size=font_size,
                 text_rendering='inherit',
                 alignment_baseline='central',
                 text_anchor='middle')
        )  # TODO: alignment_baseline should be hanging or baseline!
Exemple #8
0
def create_doors(doors, walls):
    door_ids = set()
    door_coords = defaultdict(list)
    for door_point_id in doors:
        door_id, index = door_point_id[0].split(':')
        door_ids.add(door_id)

    for point in walls:
        if point.name is None or point.name.find(':') == -1:
            continue

        name_id, index = point.name.split(':')
        if name_id in door_ids:
            door_coords[name_id].append(point)

    lines = []

    extra = {
        'stroke': 'red',
        'stroke-width': 3
    }

    for door_id, door_pair in door_coords.items():
        assert len(door_pair) == 2
        print('Create door {}'.format(door_id))
        start = door_pair[0].x, door_pair[0].y
        end = door_pair[1].x, door_pair[1].y
        lines.append(Line(start, end, **extra))

    return lines
Exemple #9
0
def get_parallax_lines():
    groups = [Group() for i in range(3)]
    duplicated_groups = [Group() for i in range(3)]
    longest = [0 for i in range(3)]

    current_y = anim['verticalInterval']
    while current_y < anim['height']:
        line_quantity = randint(1, 9)
        lengths = [randint(5, 30) for l in range(line_quantity)]
        position = sample(range(anim['width'] * 2), line_quantity)
        for i, (length, pos) in enumerate(zip(lengths, position)):
            if i % 3 is 0 or i is 0: n = 2
            elif i % 2 is 0: n = 1
            else: n = 0
            if pos + length > longest[n]:
                longest[n] = pos + length
            line = Line((pos, current_y), (pos + length, current_y))
            if pos < anim['height']:
                duplicated_groups[n].add(line)
            groups[n].add(line)
        current_y += randint(1,5) * anim['verticalInterval']

    dur = 2
    containers = [Group(**anim['style']) for i in range(3)]
    for c, g, dg, l in zip(containers, groups, duplicated_groups, longest):
        c.add(g)
        dg['transform'] = 'translate({}, 0)'.format(l)
        c.add(dg)
        c.add(animate(l, dur))
        dur += 0.5

    return containers
Exemple #10
0
    def _draw_wirelesslink(self, url, labels):
        """
        Draw a line with labels representing a WirelessLink.

        :param url: Hyperlink URL
        :param labels: Iterable of text labels
        """
        group = Group(class_='connector')

        # Draw the wireless link
        start = (OFFSET + self.center, self.cursor)
        height = PADDING * 2 + LINE_HEIGHT * len(labels) + PADDING * 2
        end = (start[0], start[1] + height)
        line = Line(start=start, end=end, class_='wireless-link')
        group.add(line)

        self.cursor += PADDING * 2

        # Add link
        link = Hyperlink(href=f'{self.base_url}{url}', target='_blank')

        # Add text label(s)
        for i, label in enumerate(labels):
            self.cursor += LINE_HEIGHT
            text_coords = (self.center + PADDING * 2,
                           self.cursor - LINE_HEIGHT / 2)
            text = Text(label,
                        insert=text_coords,
                        class_='bold' if not i else [])
            link.add(text)

        group.add(link)
        self.cursor += PADDING * 2

        return group
Exemple #11
0
 def get_mid_pa_runner_line(self, runner, i, num_events):
     x = ORIGIN_X + NAME_W + SCORE_W
     x_start = x + BASE_L * runner.start
     x_end = x + BASE_L * runner.end
     y_step = ATBAT_HT / 2 / num_events
     y_start = self.y - ATBAT_HT + i * y_step
     y_end = y_start + y_step
     return Line((x_start, y_start), (x_end, y_end))
Exemple #12
0
 def get_baserunner_line(self, runner):
     x = ORIGIN_X + NAME_W + SCORE_W
     x_start = x + BASE_L * runner.start
     x_end = x + BASE_L * runner.end
     y_start = self.y - ATBAT_HT
     mid_pa_runner = self.get_mid_pa_runner_to_use(runner.start)
     if mid_pa_runner:
         x_start = x + BASE_L * mid_pa_runner.end
         y_start = self.y - ATBAT_HT / 2
     return Line((x_start, y_start), (x_end, self.y))
Exemple #13
0
 def draw_hash(self, inning):
     line = Line((ORIGIN_X + ATBAT_W + HASH_SEP, self.y),
                 (ORIGIN_X + ATBAT_W + HASH_SEP + HASH_LEN, self.y))
     if self.is_home_team_batting(inning):
         flip(line)
         self.home_hash_ys.append(self.y)
         line['class'] = 'away-pitcher-hash'
     else:
         self.away_hash_ys.append(self.y)
         line['class'] = 'home-pitcher-hash'
     self.dwg.add(line)
    def background_lines(self, max_data, log_max_value, log_min_value):
        line = 1
        while line < max_data:
            plot_value = self.margin_top + self.plottable_y - self.scale_y(
                log10(line), log_max_value, log_min_value)

            self.plot.add(
                Line(start=(self.margin_left, plot_value),
                     end=(self.margin_left + self.plottable_x, plot_value),
                     stroke_width=1,
                     stroke="lightgrey"))
            line *= 10
Exemple #15
0
    def _draw_cable(self, color, url, labels):
        """
        Return an SVG group containing a line element and text labels representing a Cable.

        :param color: Cable (line) color
        :param url: Hyperlink URL
        :param labels: Iterable of text labels
        """
        group = Group(class_='connector')

        # Draw a "shadow" line to give the cable a border
        start = (OFFSET + self.center, self.cursor)
        height = PADDING * 2 + LINE_HEIGHT * len(labels) + PADDING * 2
        end = (start[0], start[1] + height)
        cable_shadow = Line(start=start, end=end, class_='cable-shadow')
        group.add(cable_shadow)

        # Draw the cable
        cable = Line(start=start, end=end, style=f'stroke: #{color}')
        group.add(cable)

        self.cursor += PADDING * 2

        # Add link
        link = Hyperlink(href=f'{self.base_url}{url}', target='_blank')

        # Add text label(s)
        for i, label in enumerate(labels):
            self.cursor += LINE_HEIGHT
            text_coords = (self.center + PADDING * 2,
                           self.cursor - LINE_HEIGHT / 2)
            text = Text(label,
                        insert=text_coords,
                        class_='bold' if not i else [])
            link.add(text)

        group.add(link)
        self.cursor += PADDING * 2

        return group
Exemple #16
0
def corner_ruler(scale, length, major_interval, minor_per_major):
    g = Group(stroke="black", stroke_width=0.4)

    ruler_length_mm = (length / scale) * 1000

    # Horizontal Ruler
    g.add(Line((-ruler_length_mm * mm, 0 * mm), ((3) * mm, 0 * mm)))
    r = ruler(scale=scale,
              length=length,
              major_interval=major_interval,
              minor_per_major=minor_per_major,
              reverse=True,
              vertical=False)
    g.add(r)

    # Vertical Ruler
    g.add(Line((0 * mm, -3 * mm), (0 * mm, (ruler_length_mm) * mm)))
    r = ruler(scale=scale,
              length=length,
              major_interval=major_interval,
              minor_per_major=minor_per_major,
              reverse=False,
              vertical=True)
    g.add(r)

    g.add(Circle((0, 0), r=0.5 * mm, fill="white"))

    # Scale label
    g.add(
        Text(
            f"1:{scale}",
            x=[(-ruler_length_mm / 2) * mm],
            y=[-1 * mm],
            text_anchor="middle",
            font_size=2 * mm,
            font_weight="bold",
        ))

    return g
 def draw_line(scene: Drawing, start: tuple, ctrl1: tuple, ctrl2: tuple,
               end: tuple, is_curved: bool, edge_color: tuple):
     if is_curved:  # cubic Bezier curve
         scene.add(
             Path(d=['M', start, 'C', ctrl1, ctrl2, end],
                  stroke=rgb(*edge_color),
                  stroke_width=1,
                  fill='none'))
     else:
         scene.add(
             Line(start,
                  end,
                  shape_rendering='inherit',
                  stroke=rgb(*edge_color),
                  stroke_width=1))
Exemple #18
0
    def _draw_attachment(self):
        """
        Return an SVG group containing a line element and "Attachment" label.
        """
        group = Group(class_='connector')

        # Draw attachment (line)
        start = (OFFSET + self.center, OFFSET + self.cursor)
        height = PADDING * 2 + LINE_HEIGHT + PADDING * 2
        end = (start[0], start[1] + height)
        line = Line(start=start, end=end, class_='attachment')
        group.add(line)
        self.cursor += PADDING * 4

        return group
Exemple #19
0
def get_lines():
    group = Group(**img['style'])

    current_y = 6
    current_x = randint(-10 , 150)

    for y in range(0, img['verticalLines']):
        while current_x < img['width']:
            rand_x = randint(2, 6) + current_x
            line = Line((current_x, current_y), (rand_x, current_y))
            group.add(line)
            current_x = rand_x + randint(50, 150)
        current_x = randint(-10, 150)
        current_y = round(current_y + img['verticalInterval'], 1)

    return group
    def add_zero_based_regression(self, slope):
        """place a regression line on the plot."""
        self.max_min()
        x_value = self.x_max
        y_value = slope * x_value

        if y_value > self.y_max:
            y_value = self.y_max
            x_value = y_value / slope

        self.plot.add(Line(start=(self.margin_left, self.margin_top + self.plottable_y),
                           end=(self.x_to_printx(x_value), self.y_to_printy(y_value)),
                           stroke_width=1, stroke=self.graph_colour))

        self.plot.add(
            Text(f"Slope = {round(slope, 4)}",
                 insert=(self.plottable_x + self.margin_left - 200, self.margin_top + 15),
                 fill=self.graph_colour, font_size="15"))
Exemple #21
0
 def get_batter_runner_line(self, runner):
     x_start = ORIGIN_X + NAME_W
     x_end = x_start + SCORE_W + BASE_L * runner.end
     return Line((x_start, self.y), (x_end, self.y))
Exemple #22
0
 def test_numbers(self):
     line = Line(start=(0,0), end=(10,20))
     self.assertEqual(line.tostring(), '<line x1="0" x2="10" y1="0" y2="20" />')
Exemple #23
0
def line(start: tuple, end: tuple) -> Line:
    return Line(start,
                end,
                stroke='black',
                style='stroke-linecap:round;stroke-width:2px;')
Exemple #24
0
    def process(self):
        graphs = {}
        intervals = []

        smooth = self.parameters.get("smooth")
        normalise_values = self.parameters.get("normalise")
        completeness = convert_to_int(self.parameters.get("complete"), 0)
        graph_label = self.parameters.get("label")
        top = convert_to_int(self.parameters.get("top"), 10)

        # first gather graph data: each distinct item gets its own graph and
        # for each graph we have a sequence of intervals, each interval with
        # its own value
        first_date = "9999-99-99"
        last_date = "0000-00-00"

        for row in self.iterate_items(self.source_file):
            if row["item"] not in graphs:
                graphs[row["item"]] = {}

            # make sure the months and days are zero-padded
            interval = row.get("date", "")
            interval = "-".join([
                str(bit).zfill(2 if len(bit) != 4 else 4)
                for bit in interval.split("-")
            ])
            first_date = min(first_date, interval)
            last_date = max(last_date, interval)

            if interval not in intervals:
                intervals.append(interval)

            if interval not in graphs[row["item"]]:
                graphs[row["item"]][interval] = 0

            graphs[row["item"]][interval] += float(row.get("value", 0))

        # first make sure we actually have something to render
        intervals = sorted(intervals)
        if len(intervals) <= 1:
            self.dataset.update_status(
                "Not enough data for a side-by-side over-time visualisation.")
            self.dataset.finish(0)
            return

        # only retain most-occurring series - sort by sum of all frequencies
        if len(graphs) > top:
            selected_graphs = {
                graph: graphs[graph]
                for graph in sorted(
                    graphs,
                    key=lambda x: sum(
                        [graphs[x][interval] for interval in graphs[x]]),
                    reverse=True)[0:top]
            }
            graphs = selected_graphs

        # there may be items that do not have values for all intervals
        # this will distort the graph, so the next step is to make sure all
        # graphs consist of the same continuous interval list
        missing = {graph: 0 for graph in graphs}
        for graph in graphs:
            missing[graph], graphs[graph] = pad_interval(
                graphs[graph],
                first_interval=first_date,
                last_interval=last_date)

        # now that's done, make sure the graph datapoints are in order
        intervals = sorted(list(graphs[list(graphs)[0]].keys()))

        # delete graphs that do not have the required amount of intervals
        # this is useful to get rid of outliers and items that only occur
        # very few times over the full interval
        if completeness > 0:
            intervals_required = len(intervals) * (completeness / 100)
            disqualified = []
            for graph in graphs:
                if len(intervals) - missing[graph] < intervals_required:
                    disqualified.append(graph)

            graphs = {
                graph: graphs[graph]
                for graph in graphs if graph not in disqualified
            }

        # determine max value per item, so we can normalize them later
        limits = {}
        max_limit = 0
        for graph in graphs:
            for interval in graphs[graph]:
                limits[graph] = max(limits.get(graph, 0),
                                    abs(graphs[graph][interval]))
                max_limit = max(max_limit, abs(graphs[graph][interval]))

        # order graphs by highest (or lowest) value)
        limits = {
            limit: limits[limit]
            for limit in sorted(limits, key=lambda l: limits[l])
        }
        graphs = {graph: graphs[graph] for graph in limits}

        if not graphs:
            # maybe nothing is actually there to be graphed
            self.dataset.update_status(
                "No items match the selection criteria - nothing to visualise."
            )
            self.dataset.finish(0)
            return None

        # how many vertical grid lines (and labels) are to be included at most
        # 12 is a sensible default because it allows one label per month for a full
        # year's data
        max_gridlines = 12

        # If True, label is put at the lower left bottom of the graph rather than
        # outside it. Automatically set to True if one of the labels is long, as
        # else the label would fall off the screen
        label_in_graph = max([len(item) for item in graphs]) > 30

        # determine how wide each interval should be
        # the graph has a minimum width - but the graph's width will be
        # extended if at this minimum width each item does not have the
        # minimum per-item width
        min_full_width = 600
        min_item_width = 50
        item_width = max(min_item_width, min_full_width / len(intervals))

        # determine how much space each graph should get
        # same trade-off as for the interval width
        min_full_height = 300
        min_item_height = 100
        item_height = max(min_item_height, min_full_height / len(graphs))

        # margin - this should be enough for the text labels to fit in
        margin_base = 50
        margin_right = margin_base * 4
        margin_top = margin_base * 3

        # this determines the "flatness" of the isometric projection and an be
        # tweaked for different looks - basically corresponds to how far the
        # camera is above the horizon
        plane_angle = 120

        # don't change these
        plane_obverse = radians((180 - plane_angle) / 2)
        plane_angle = radians(plane_angle)

        # okay, now determine the full graphic size with these dimensions projected
        # semi-isometrically. We can also use these values later for drawing for
        # drawing grid lines, et cetera. The axis widths and heights here are the
        # dimensions of the bounding box wrapping the isometrically projected axes.
        x_axis_length = (item_width * (len(intervals) - 1))
        y_axis_length = (item_height * len(graphs))

        x_axis_width = (sin(plane_angle / 2) * x_axis_length)
        y_axis_width = (sin(plane_angle / 2) * y_axis_length)
        canvas_width = x_axis_width + y_axis_width

        # leave room for graph header
        if graph_label:
            margin_top += (2 * (canvas_width / 50))

        x_axis_height = (cos(plane_angle / 2) * x_axis_length)
        y_axis_height = (cos(plane_angle / 2) * y_axis_length)
        canvas_height = x_axis_height + y_axis_height

        # now we have the dimensions, the canvas can be instantiated
        canvas = get_4cat_canvas(
            self.dataset.get_results_path(),
            width=(canvas_width + margin_base + margin_right),
            height=(canvas_height + margin_base + margin_top),
            header=graph_label)

        # draw gridlines - vertical
        gridline_x = y_axis_width + margin_base
        gridline_y = margin_top + canvas_height

        step_x_horizontal = sin(plane_angle / 2) * item_width
        step_y_horizontal = cos(plane_angle / 2) * item_width
        step_x_vertical = sin(plane_angle / 2) * item_height
        step_y_vertical = cos(plane_angle / 2) * item_height

        # labels for x axis
        # month and week both follow the same pattern
        # it's not always possible to distinguish between them but we will try
        # by looking for months greater than 12 in which case we are dealing
        # with weeks
        # we need to know this because for months there is an extra row in the
        # label with the full month
        is_week = False
        for i in range(0, len(intervals)):
            if re.match(r"^[0-9]{4}-[0-9]{2}",
                        intervals[i]) and int(intervals[i].split("-")[1]) > 12:
                is_week = True
                break

        skip = max(1, int(len(intervals) / max_gridlines))
        for i in range(0, len(intervals)):
            if i % skip == 0:
                canvas.add(
                    Line(start=(gridline_x, gridline_y),
                         end=(gridline_x - y_axis_width,
                              gridline_y - y_axis_height),
                         stroke="grey",
                         stroke_width=0.25))

                # to properly position the rotated and skewed text a container
                # element is needed
                label1 = str(intervals[i])[0:4]
                center = (gridline_x, gridline_y)
                container = SVG(x=center[0] - 25,
                                y=center[1],
                                width="50",
                                height="1.5em",
                                overflow="visible",
                                style="font-size:0.8em;")
                container.add(
                    Text(insert=("25%", "100%"),
                         text=label1,
                         transform="rotate(%f) skewX(%f)" %
                         (-degrees(plane_obverse), degrees(plane_obverse)),
                         text_anchor="middle",
                         baseline_shift="-0.5em",
                         style="font-weight:bold;"))

                if re.match(r"^[0-9]{4}-[0-9]{2}",
                            intervals[i]) and not is_week:
                    label2 = month_abbr[int(str(intervals[i])[5:7])]
                    if re.match(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}", intervals[i]):
                        label2 += " %i" % int(intervals[i][8:10])

                    container.add(
                        Text(insert=("25%", "150%"),
                             text=label2,
                             transform="rotate(%f) skewX(%f)" %
                             (-degrees(plane_obverse), degrees(plane_obverse)),
                             text_anchor="middle",
                             baseline_shift="-0.5em"))

                canvas.add(container)

            gridline_x += step_x_horizontal
            gridline_y -= step_y_horizontal

        # draw graphs as filled beziers
        top = step_y_vertical * 1.5
        graph_start_x = y_axis_width + margin_base
        graph_start_y = margin_top + canvas_height

        # draw graphs in reverse order, so the bottom one is most in the
        # foreground (in case of overlap)
        for graph in reversed(list(graphs)):
            self.dataset.update_status("Rendering graph for '%s'" % graph)

            # path starting at lower left corner of graph
            area_graph = Path(fill=self.colours[self.colour_index])
            area_graph.push("M %f %f" % (graph_start_x, graph_start_y))
            previous_value = None

            graph_x = graph_start_x
            graph_y = graph_start_y
            for interval in graphs[graph]:
                # normalise value
                value = graphs[graph][interval]
                try:
                    limit = limits[graph] if normalise_values else max_limit
                    value = top * copysign(abs(value) / limit, value)
                except ZeroDivisionError:
                    value = 0

                if previous_value is None:
                    # vertical line upwards to starting value of graph
                    area_graph.push("L %f %f" %
                                    (graph_start_x, graph_start_y - value))
                elif not smooth:
                    area_graph.push("L %f %f" % (graph_x, graph_y - value))
                else:
                    # quadratic bezier from previous value to current value
                    control_left = (graph_x - (step_x_horizontal / 2),
                                    graph_y + step_y_horizontal -
                                    previous_value - (step_y_horizontal / 2))
                    control_right = (graph_x - (step_x_horizontal / 2),
                                     graph_y - value + (step_y_horizontal / 2))
                    area_graph.push("C %f %f %f %f %f %f" %
                                    (*control_left, *control_right, graph_x,
                                     graph_y - value))

                previous_value = value
                graph_x += step_x_horizontal
                graph_y -= step_y_horizontal

            # line to the bottom of the graph at the current Y position
            area_graph.push(
                "L %f %f" %
                (graph_x - step_x_horizontal, graph_y + step_y_horizontal))
            area_graph.push("Z")  # then close the Path
            canvas.add(area_graph)

            # add text labels - skewing is a bit complicated and we need a
            # "center" to translate the origins properly.
            if label_in_graph:
                insert = (graph_start_x + 5, graph_start_y - 10)
            else:
                insert = (graph_x - (step_x_horizontal) + 5,
                          graph_y + step_y_horizontal - 10)

            # we need to take the skewing into account for the translation
            offset_y = tan(plane_obverse) * insert[0]
            canvas.add(
                Text(insert=(0, 0),
                     text=graph,
                     transform="skewY(%f) translate(%f %f)" %
                     (-degrees(plane_obverse), insert[0],
                      insert[1] + offset_y)))

            # cycle colours, back to the beginning if all have been used
            self.colour_index += 1
            if self.colour_index >= len(self.colours):
                self.colour_index = 0

            graph_start_x -= step_x_vertical
            graph_start_y -= step_y_vertical

        # draw gridlines - horizontal
        gridline_x = margin_base
        gridline_y = margin_top + canvas_height - y_axis_height
        for graph in graphs:
            gridline_x += step_x_vertical
            gridline_y += step_y_vertical
            canvas.add(
                Line(start=(gridline_x, gridline_y),
                     end=(gridline_x + x_axis_width,
                          gridline_y - x_axis_height),
                     stroke="black",
                     stroke_width=1))

        # x axis
        canvas.add(
            Line(start=(margin_base + y_axis_width,
                        margin_top + canvas_height),
                 end=(margin_base + canvas_width,
                      margin_top + canvas_height - x_axis_height),
                 stroke="black",
                 stroke_width=2))

        # and finally save the SVG
        canvas.save(pretty=True)
        self.dataset.finish(len(graphs))
Exemple #25
0
	def process(self):
		"""
		Render an SVG histogram/bar chart using a previous frequency analysis
		as input.
		"""
		self.dataset.update_status("Reading source file")
		header = self.parameters.get("header", self.options["header"]["default"])
		max_posts = 0

		# collect post numbers per month
		intervals = {}
		for post in self.iterate_csv_items(self.source_file):
			intervals[post["date"]] = int(post["frequency"])
			max_posts = max(max_posts, int(post["frequency"]))

		if len(intervals) <= 1:
			self.dataset.update_status("Not enough data available for a histogram; need more than one time series.")
			self.dataset.finish(0)
			return

		self.dataset.update_status("Cleaning up data")
		(missing, intervals) = pad_interval(intervals)

		# create histogram
		self.dataset.update_status("Drawing histogram")

		# you may change the following four variables to adjust the graph dimensions
		width = 1024
		height = 786
		y_margin = 75
		x_margin = 50
		x_margin_left = x_margin * 2
		tick_width = 5

		fontsize_normal = int(height / 40)
		fontsize_small = int(height / 75)

		# better don't touch the following
		line_width = round(width / 512)
		y_margin_top = 150 if header else 50
		y_height = height - (y_margin + y_margin_top)
		x_width = width - (x_margin + x_margin_left)
		canvas = Drawing(filename=str(self.dataset.get_results_path()), size=(width, height),
						 style="font-family:monospace;font-size:%ipx" % fontsize_normal)

		# normalize the Y axis to a multiple of a power of 10
		magnitude = pow(10, len(str(max_posts)) - 1)  # ew
		max_neat = math.ceil(max_posts / magnitude) * magnitude
		self.dataset.update_status("Max (normalized): %i (%i) (magnitude: %i)" % (max_posts, max_neat, magnitude))

		# draw border
		canvas.add(Rect(
			insert=(0, 0),
			size=(width, height),
			stroke="#000",
			stroke_width=line_width,
			fill="#FFF"
		))

		# draw header on a black background if needed
		if header:
			if len(header) > 40:
				header = header[:37] + "..."

			header_rect_height = (y_margin_top / 1.5)
			header_fontsize = (width / len(header))

			header_container = SVG(insert=(0, 0), size=(width, header_rect_height))
			header_container.add(Rect(
				insert=(0, 0),
				size=(width, header_rect_height),
				fill="#000"
			))
			header_container.add(Text(
				insert=("50%", "50%"),
				text=header,
				dominant_baseline="middle",
				text_anchor="middle",
				fill="#FFF",
				style="font-size:%i" % header_fontsize
			))
			canvas.add(header_container)

		# horizontal grid lines
		for i in range(0, 10):
			offset = (y_height / 10) * i
			canvas.add(Line(
				start=(x_margin_left, y_margin_top + offset),
				end=(width - x_margin, y_margin_top + offset),
				stroke="#EEE",
				stroke_width=line_width
			))

		# draw bars
		item_width = (width - (x_margin + x_margin_left)) / len(intervals)
		item_height = (height - y_margin - y_margin_top)
		bar_width = item_width * 0.9
		x = x_margin_left + (item_width / 2) - (bar_width / 2)

		if bar_width >= 8:
			arc_adjust = max(8, int(item_width / 5)) / 2
		else:
			arc_adjust = 0

		for interval in intervals:
			posts = int(intervals[interval])
			bar_height = ((posts / max_neat) * item_height)
			self.dataset.update_status("%s: %i posts" % (interval, posts))
			bar_top = height - y_margin - bar_height
			bar_bottom = height - y_margin

			if bar_height == 0:
				x += item_width
				continue

			bar = Path(fill="#000")
			bar.push("M %f %f" % (x, bar_bottom))
			bar.push("L %f %f" % (x, bar_top + (arc_adjust if bar_height > arc_adjust else 0)))
			if bar_height > arc_adjust > 0:
				control = (x, bar_top)
				bar.push("C %f %f %f %f %f %f" % (*control, *control, x + arc_adjust, bar_top))
			bar.push("L %f %f" % (x + bar_width - arc_adjust, height - y_margin - bar_height))
			if bar_height > arc_adjust > 0:
				control = (x + bar_width, bar_top)
				bar.push("C %f %f %f %f %f %f" % (*control, *control, x + bar_width, bar_top + arc_adjust))
			bar.push("L %f %f" % (x + bar_width, height - y_margin))
			bar.push("Z")
			canvas.add(bar)

			x += item_width

		# draw X and Y axis
		canvas.add(Line(
			start=(x_margin_left, height - y_margin),
			end=(width - x_margin, height - y_margin),
			stroke="#000",
			stroke_width=2
		))
		canvas.add(Line(
			start=(x_margin_left, y_margin_top),
			end=(x_margin_left, height - y_margin),
			stroke="#000",
			stroke_width=2
		))

		# draw ticks on Y axis
		for i in range(0, 10):
			offset = (y_height / 10) * i
			canvas.add(Line(
				start=(x_margin_left - tick_width, y_margin_top + offset),
				end=(x_margin_left, y_margin_top + offset),
				stroke="#000",
				stroke_width=line_width
			))

		# draw ticks on X axis
		for i in range(0, len(intervals)):
			offset = (x_width / len(intervals)) * (i + 0.5)
			canvas.add(Line(
				start=(x_margin_left + offset, height - y_margin),
				end=(x_margin_left + offset, height - y_margin + tick_width),
				stroke="#000",
				stroke_width=line_width
			))

		# prettify

		# y labels
		origin = (x_margin_left / 2)
		step = y_height / 10
		for i in range(0, 11):
			label = str(int((max_neat / 10) * i))
			labelsize = (len(label) * fontsize_normal * 1.25, fontsize_normal)
			label_x = origin - (tick_width * 2)
			label_y = height - y_margin - (i * step) - (labelsize[1] / 2)
			label_container = SVG(
				insert=(label_x, label_y),
				size=(x_margin_left / 2, x_margin_left / 5)
			)
			label_container.add(Text(
				insert=("100%", "50%"),
				text=label,
				dominant_baseline="middle",
				text_anchor="end"
			))
			canvas.add(label_container)

		# x labels
		label_width = max(fontsize_small * 6, item_width)
		label_x = x_margin_left
		label_y = height - y_margin + (tick_width * 2)
		next = 0
		for interval in intervals:
			if len(interval) == 7:
				label = month_abbr[int(interval[5:7])] + "\n" + interval[0:4]
			elif len(interval) == 10:
				label = str(int(interval[8:10])) + month_abbr[int(interval[5:7])] + "\n" + interval[0:4]
			else:
				label = interval.replace("-", "\n")

			if label_x > next:
				shift = 0
				for line in label.split("\n"):
					label_container = SVG(
						insert=(label_x + (item_width / 2) - (label_width / 2), label_y + (tick_width * 2)),
						size=(label_width, y_margin), overflow="visible")
					label_container.add(Text(
						insert=("50%", "0%"),
						text=line,
						dominant_baseline="middle",
						text_anchor="middle",
						baseline_shift=-shift
					))
					shift += fontsize_small * 2
					canvas.add(label_container)
					next = label_x + (label_width * 0.9)
			label_x += item_width

		# 4cat logo
		label = "made with 4cat - 4cat.oilab.nl"
		footersize = (fontsize_small * len(label) * 0.7, fontsize_small * 2)
		footer = SVG(insert=(width - footersize[0], height - footersize[1]), size=footersize)
		footer.add(Rect(insert=(0, 0), size=("100%", "100%"), fill="#000"))
		footer.add(Text(
			insert=("50%", "50%"),
			text=label,
			dominant_baseline="middle",
			text_anchor="middle",
			fill="#FFF",
			style="font-size:%i" % fontsize_small
		))
		canvas.add(footer)

		canvas.save(pretty=True)

		self.dataset.update_status("Finished")
		self.dataset.finish(len(intervals))
    def make_svg(self, data):
        layers = data["layers"]
        _PROPORTION = 10
        _SHIFT_PROP = _PROPORTION / 1.5
        for f in self.forms:
            structures = [None] * len(f.id.split("_"))
            linkers    = []
            centers    = {}
            order      = {}
            maxW, maxH = 0, 0
            for cx, x in enumerate(f.id.split("_")):
                order[x] = cx
            for cl, l in enumerate(layers):
                for cs, s in enumerate(l):
                    name = f.id + "__" + s["id"]
                    if s["type"] == "H":
                        color  = "cornflowerblue" if "ref" not in s else "gainsboro"
                        shape = Circle( center = (s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP),
                                        r = 2.3 * _PROPORTION, id = name,
                                        fill = color, stroke="black",
                                        stroke_width = "2")
                    elif s["type"] == "E":
                        color  = "indianred" if "ref" not in s else "salmon"
                        rotate = f.get_ss_by_id(s["id"]).struc.goes_down()
                        shape = Triangle(center = (s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP),
                                         rc = 2.3 * _PROPORTION, id = name, fill=color,
                                         stroke = "black", stroke_width = "2",
                                         rotate = rotate)
                    else:
                        color  = "darkgreen" if "ref" not in s else "lightgreen"
                        shape = Cross(center = (s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP), r = 2.3 * _PROPORTION,
                                      fill=color, stroke="black", stroke_width = "2",
                                      id = name)
                    structures[order[s["id"]]] = shape
                    centers[s["id"]] = [s["shift_x"] * _SHIFT_PROP, s["shift_z"] * _SHIFT_PROP]
                    if s["shift_x"] * _SHIFT_PROP > maxW: maxW = s["shift_x"] * _SHIFT_PROP
                    if s["shift_z"] * _SHIFT_PROP > maxH: maxH = s["shift_z"] * _SHIFT_PROP

            for cx, x in enumerate(f.id.split("_")):
                if cx == 0 or cx == len(f.id.split("_")) - 1:
                    init = [0, 0]
                    if (ord(x[0]) - 64) <= len(layers) / 2:
                        init[1] = centers[x][1] - (2.3 * _PROPORTION * 2)
                    else:
                        init[1] = centers[x][1] + (2.3 * _PROPORTION * 2)
                    if int(re.search("(\d+)", x).group(1)) <= len(layers[ord(x[0]) - 65]) / 2:
                        init[0] = centers[x][0] - (2.3 * _PROPORTION * 2)
                    else:
                        init[0] = centers[x][0] + (2.3 * _PROPORTION * 2)
                    if cx == 0:
                        shape = Line(init, centers[x], stroke="darkblue", stroke_width = "4")
                    elif cx == len(f.id.split("_")) - 1:
                        shape = Line(centers[x], init, stroke="darkred", stroke_width = "4")
                    linkers.append(shape)
                for cy, y in enumerate(f.id.split("_")):
                    if cy == cx + 1:
                        shape = Line(centers[x], centers[y], stroke="black", stroke_width = "4")
                        linkers.append(shape)

            # Intercalate
            toplink = []
            dowlink = []
            if f.sslist[0].struc.goes_up():
                dowlink = linkers[0:][::2]
                toplink = linkers[1:][::2]
            else:
                dowlink = linkers[1:][::2]
                toplink = linkers[0:][::2]

            g = Group()
            for x in dowlink:
                g.add(x)
            for x in structures:
                g.add(x)
            for x in toplink:
                g.add(x)
            g.translate(2.3 * _PROPORTION * 3, 2.3 * _PROPORTION * 3)
            d = Drawing(size = (maxW + ((2.3 * _PROPORTION * 3) * 2),
                                maxH + ((2.3 * _PROPORTION * 3) * 2)),
                        id = f.id + "__image")
            d.add(g)
            for x in data["forms"]:
                if x["id"] == f.id:
                    x["svg"] = d.tostring()
    def plot_data(self):
        if not self.show_legend:
            self.plot.add(
                Line(start=(self.margin_left, self.margin_top),
                     end=(self.width - self.margin_right, self.margin_top),
                     stroke_width=1,
                     stroke="black"))

        self.max_value = max(
            [x['score'] for x in self.annotated_scores.values()])
        if not self.min_value:
            self.min_value = min(
                [x['score'] for x in self.annotated_scores.values()])

        if not self.show_legend:
            self.plot.add(
                Text("Fibroblasts",
                     insert=(self.margin_left, self.margin_top - 5),
                     fill="black",
                     font_size="15"))
            self.plot.add(
                Text("Cardiomyocytes",
                     insert=(self.width - self.margin_right - 100,
                             self.margin_top - 5),
                     fill="black",
                     font_size="15"))

        delta = self.max_value - self.min_value
        plottable = self.width - (self.margin_left + self.margin_right)

        spacing = {
            "Fibroblasts": INIT_GAP,
            "Cardiomyocytes": INIT_GAP,
            "Experimental": INIT_GAP,
            "other": INIT_GAP
        }

        for sample, sample_details in self.annotated_scores.items():

            position = self.margin_left + (
                (sample_details['score'] - self.min_value) / delta * plottable)
            # colour = "grey"
            if "category" in sample_details:
                sample_type = sample_details["category"]
                if sample_type == "Fibroblasts":
                    colour = "green"
                elif sample_type == "Cardiomyocytes":
                    colour = "blue"
                elif sample_type == "Experimental":
                    colour = "red"
                else:
                    continue
                    # sample_type = "other"
            else:
                continue
                # sample_type = "other"

            circle_obj = Circle(
                center=(position, self.margin_top + spacing[sample_type] + 5),
                r=3,
                stroke_width=0.1,
                stroke_linecap='round',
                stroke_opacity=1,
                fill=colour,
                fill_opacity=0.6)  # set to 0.2 if you want to show clear.
            circle_obj.set_desc('{} - {} - {}'.format(
                sample, sample_type, sample_details["score"] if "description"
                not in sample_details else sample_details["description"]))
            self.plot.add(circle_obj)
            if sample_type == "other":
                spacing[sample_type] += 3
            else:
                spacing[sample_type] += 6
Exemple #28
0
 def test_coordinates(self):
     line = Line(start=('10cm','11cm'), end=('20cm', '30cm'))
     self.assertEqual(line.tostring(), '<line x1="10cm" x2="20cm" y1="11cm" y2="30cm" />')
Exemple #29
0
def ruler(scale=1000,
          length=100,
          major_interval=10,
          minor_per_major=10,
          reverse=False,
          vertical=False):
    major_len_mm = 3
    mid_len_mm = 2
    minor_len_mm = 1
    font_size_mm = 2

    if reverse:
        sign = -1
    else:
        sign = 1

    g = Group()

    # Draw major lines
    mj = 0
    while mj < length:
        # Minor tick marks
        for i in range(1, minor_per_major):
            offset_mm = sign * (
                (mj + i * major_interval / minor_per_major) / scale) * 1000
            if i == minor_per_major / 2:
                # Half mark
                if vertical:
                    g.add(
                        Line(((0) * mm, (offset_mm) * mm),
                             ((-mid_len_mm) * mm, (offset_mm) * mm)))
                else:
                    g.add(
                        Line(((offset_mm) * mm, (0) * mm),
                             ((offset_mm) * mm, (mid_len_mm) * mm)))
            else:
                if vertical:
                    g.add(
                        Line(((0) * mm, (offset_mm) * mm),
                             ((-minor_len_mm) * mm, (offset_mm) * mm)))
                else:
                    g.add(
                        Line(((offset_mm) * mm, (0) * mm),
                             ((offset_mm) * mm, (minor_len_mm) * mm)))
        mj += major_interval

        if mj >= length:
            label = f"{mj}m"
        else:
            label = f"{mj}"

        # Major line
        offset_mm = sign * (mj / scale) * 1000
        if vertical:
            g.add(
                Line(((0) * mm, (offset_mm) * mm),
                     ((-major_len_mm) * mm, (offset_mm) * mm)))
            if offset_mm > (major_len_mm + font_size_mm * 1.5):
                # only draw label if it wont collide w horizontal ruler
                g.add(
                    Text(
                        label,
                        x=[(-major_len_mm - 1) * mm],
                        y=[(offset_mm + font_size_mm / 2.5) * mm],
                        text_anchor="end",
                        font_size=font_size_mm * mm,
                    ))
        else:
            g.add(
                Line(((offset_mm) * mm, (0) * mm),
                     ((offset_mm) * mm, (major_len_mm) * mm)))
            g.add(
                Text(
                    label,
                    x=[(offset_mm) * mm],
                    y=[(major_len_mm + font_size_mm) * mm],
                    text_anchor="middle",
                    font_size=font_size_mm * mm,
                ))

    return g
Exemple #30
0
 def get_line(self, value, color, **options):
     end = self.calc_slice_coord(value, self.r)
     start = (self.x0, self.y0)
     line = Line(start, end, stroke=color, stroke_dasharray="4 4")
     return line.tostring()
Exemple #31
0
    def process(self):
        # parse parameters
        input_words = self.parameters.get("words", "")
        if not input_words or not input_words.split(","):
            self.dataset.update_status(
                "No input words provided, cannot look for similar words.",
                is_final=True)
            self.dataset.finish(0)
            return

        input_words = input_words.split(",")

        try:
            threshold = float(
                self.parameters.get("threshold",
                                    self.options["threshold"]["default"]))
        except ValueError:
            threshold = float(self.options["threshold"]["default"])

        threshold = max(-1.0, min(1.0, threshold))
        num_words = convert_to_int(self.parameters.get("num-words"),
                                   self.options["num-words"]["default"])
        overlay = self.parameters.get("overlay")
        reduction_method = self.parameters.get("method")
        all_words = self.parameters.get("all-words")

        # load model files and initialise
        self.dataset.update_status("Unpacking word embedding models")
        staging_area = self.unpack_archive_contents(self.source_file)
        common_vocab = None
        vector_size = None
        models = {}

        # find words that are common to all models
        self.dataset.update_status("Determining cross-model common vocabulary")
        for model_file in staging_area.glob("*.model"):
            if self.interrupted:
                shutil.rmtree(staging_area)
                raise ProcessorInterruptedException(
                    "Interrupted while processing word embedding models")

            model = KeyedVectors.load(str(model_file)).wv
            models[model_file.stem] = model
            if vector_size is None:
                vector_size = model.vector_size  # needed later for dimensionality reduction

            if common_vocab is None:
                common_vocab = set(model.vocab.keys())
            else:
                common_vocab &= set(model.vocab.keys())  # intersect

        # sort common vocabulary by combined frequency across all models
        # this should make filtering for common words a bit faster further down
        self.dataset.update_status("Sorting vocabulary")
        common_vocab = list(common_vocab)
        common_vocab.sort(key=lambda w: sum(
            [model.vocab[w].count for model in models.values()]),
                          reverse=True)

        # initial boundaries of 2D space (to be adjusted later based on t-sne
        # outcome)
        max_x = 0.0 - sys.float_info.max
        max_y = 0.0 - sys.float_info.max
        min_x = sys.float_info.max
        min_y = sys.float_info.max

        # for each model, find the words that we may want to plot - these are
        # the nearest neighbours for the given query words
        relevant_words = {}

        # the vectors need to be reduced all at once - but the vectors are
        # grouped by model. To solve this, keep one numpy array of vectors,
        # but also keep track of which indexes of this array belong to which
        # model, by storing the index of the first vector for a model
        vectors = numpy.empty((0, vector_size))
        vector_offsets = {}

        # now process each model
        for model_name, model in models.items():
            relevant_words[model_name] = set(
            )  # not a set, since order needs to be preserved
            self.dataset.update_status("Finding similar words in model '%s'" %
                                       model_name)

            for query in input_words:
                if query not in model.vocab:
                    self.dataset.update_status(
                        "Query '%s' was not found in model %s; cannot find nearest neighbours."
                        % (query, model_name),
                        is_final=True)
                    self.dataset.finish(0)
                    return

                if self.interrupted:
                    shutil.rmtree(staging_area)
                    raise ProcessorInterruptedException(
                        "Interrupted while finding similar words")

                # use a larger sample (topn) than required since some of the
                # nearest neighbours may not be in the common vocabulary and
                # will therefore need to be ignored
                context = set([
                    word[0] for word in model.most_similar(query, topn=1000)
                    if word[0] in common_vocab and word[1] >= threshold
                ][:num_words])

                relevant_words[model_name] |= {
                    query
                } | context  # always include query word

        # now do another loop to determine which words to plot for each model
        # this is either the same as relevant_words, or a superset which
        # combines all relevant words for all models
        plottable_words = {}
        last_model = max(relevant_words.keys())
        all_relevant_words = set().union(*relevant_words.values())

        for model_name, words in relevant_words.items():
            plottable_words[model_name] = []
            vector_offsets[model_name] = len(vectors)

            # determine which words to plot for this model. either the nearest
            # neighbours for this model, or all nearest neighbours found across
            # all models
            words_to_include = all_relevant_words if all_words else relevant_words[
                model_name]

            for word in words_to_include:
                if word in plottable_words[model_name] or (
                        not overlay and model_name != last_model
                        and word not in input_words):
                    # only plot each word once per model, or if 'overlay'
                    # is not set, only once overall (for the most recent
                    # model)
                    continue

                vector = models[model_name][word]
                plottable_words[model_name].append(word)
                vectors = numpy.append(vectors, [vector], axis=0)

        del models  # no longer needed

        # reduce the vectors of all words to be plotted for this model to
        # a two-dimensional coordinate with the previously initialised tsne
        # transformer. here the two-dimensional vectors are interpreted as
        # cartesian coordinates
        if reduction_method == "PCA":
            pca = PCA(n_components=2, random_state=0)
            vectors = pca.fit_transform(vectors)
        elif reduction_method == "t-SNE":
            # initialise t-sne transformer
            # parameters taken from Hamilton et al.
            # https://github.com/williamleif/histwords/blob/master/viz/common.py
            tsne = TSNE(n_components=2,
                        random_state=0,
                        learning_rate=150,
                        init="pca")
            vectors = tsne.fit_transform(vectors)
        elif reduction_method == "TruncatedSVD":
            # standard sklearn parameters made explicit
            svd = TruncatedSVD(n_components=2,
                               algorithm="randomized",
                               n_iter=5,
                               random_state=0)
            vectors = svd.fit_transform(vectors)
        else:
            shutil.rmtree(staging_area)
            self.dataset.update_status(
                "Invalid dimensionality reduction technique selected",
                is_final=True)
            self.dataset.finish(0)
            return

        # also keep track of the boundaries of our 2D space, so we can plot
        # them properly later
        for position in vectors:
            max_x = max(max_x, position[0])
            max_y = max(max_y, position[1])
            min_x = min(min_x, position[0])
            min_y = min(min_y, position[1])

        # now we know for each model which words should be plotted and at what
        # position
        # with this knowledge, we can normalize the positions, and start
        # plotting them in a graph

        # a palette generated with https://medialab.github.io/iwanthue/
        colours = [
            "#d58eff", "#cf9000", "#3391ff", "#a15700", "#911ca7", "#00ddcb",
            "#cc25a9", "#d5c776", "#6738a8", "#ff9470", "#47c2ff", "#a4122c",
            "#00b0ca", "#9a0f76", "#ff70c8", "#713c88"
        ]
        colour_index = 0

        # make sure all coordinates are positive
        max_x -= min_x
        max_y -= min_y

        # determine graph dimensions and proportions
        width = 1000  # arbitrary
        height = width * (max_y / max_x)  # retain proportions
        scale = width / max_x

        # margin around the plot to give room for labels and to look better
        margin = width * 0.1
        width += 2 * margin
        height += 2 * margin

        # normalize all known positions to fit within the graph
        vectors = [(margin + ((position[0] - min_x) * scale),
                    margin + ((position[1] - min_y) * scale))
                   for position in vectors]

        # now all positions are finalised, we can determine the "journey" of
        # each query - the sequence of positions in the graph it takes, so we
        # can draw lines from position to position later
        journeys = {}
        for query in input_words:
            journeys[query] = []
            for model_name, words in plottable_words.items():
                index = words.index(query)
                journeys[query].append(vectors[vector_offsets[model_name] +
                                               index])

        # font sizes proportional to width (which is static and thus predictable)
        fontsize_large = width / 50
        fontsize_normal = width / 75
        fontsize_small = width / 100

        # now we have the dimensions, the canvas can be instantiated
        model_type = self.source_dataset.parameters.get(
            "model-type", "word2vec")
        canvas = get_4cat_canvas(
            self.dataset.get_results_path(),
            width,
            height,
            header="%s nearest neighbours (fitting: %s) - '%s'" %
            (model_type, reduction_method, ",".join(input_words)),
            fontsize_normal=fontsize_normal,
            fontsize_large=fontsize_large,
            fontsize_small=fontsize_small)

        # use colour-coded backgrounds to distinguish the query words in the
        # graph, each model (= interval) with a separate colour
        for model_name in plottable_words:
            solid = Filter(id="solid-%s" % model_name)
            solid.feFlood(flood_color=colours[colour_index])
            solid.feComposite(in_="SourceGraphic")
            canvas.defs.add(solid)
            colour_index += 1

        # now plot each word for each model
        self.dataset.update_status("Plotting graph")
        words = SVG(insert=(0, 0), size=(width, height))
        queries = SVG(insert=(0, 0), size=(width, height))
        colour_index = 0

        for model_name, labels in plottable_words.items():
            positions = vectors[
                vector_offsets[model_name]:vector_offsets[model_name] +
                len(labels)]

            label_index = 0
            for position in positions:
                word = labels[label_index]
                is_query = word in input_words
                label_index += 1

                filter = ("url(#solid-%s)" %
                          model_name) if is_query else "none"
                colour = "#FFF" if is_query else colours[colour_index]
                fontsize = fontsize_normal if is_query else fontsize_small

                if word in input_words:
                    word += " (" + model_name + ")"

                label_container = SVG(insert=position,
                                      size=(1, 1),
                                      overflow="visible")
                label_container.add(
                    Text(insert=("50%", "50%"),
                         text=word,
                         dominant_baseline="middle",
                         text_anchor="middle",
                         style="fill:%s;font-size:%ipx" % (colour, fontsize),
                         filter=filter))

                # we make sure the queries are always rendered on top by
                # putting them in a separate SVG container
                if is_query:
                    queries.add(label_container)
                else:
                    words.add(label_container)

            colour_index = 0 if colour_index >= len(
                colours) else colour_index + 1

        # plot a line between positions for query words
        lines = SVG(insert=(0, 0), size=(width, height))
        for query, journey in journeys.items():
            previous_position = None
            for position in journey:
                if previous_position is None:
                    previous_position = position
                    continue

                lines.add(
                    Line(start=previous_position,
                         end=position,
                         stroke="#CE1B28",
                         stroke_width=2))
                previous_position = position

        canvas.add(lines)
        canvas.add(words)
        canvas.add(queries)

        canvas.save(pretty=True)
        shutil.rmtree(staging_area)
        self.dataset.finish(len(journeys))
Exemple #32
0
 def test_coordinates(self):
     line = Line(start=('10cm', '11cm'), end=('20cm', '30cm'))
     self.assertEqual(line.tostring(),
                      '<line x1="10cm" x2="20cm" y1="11cm" y2="30cm" />')