Esempio n. 1
0
    def inv_eval(vtrace):
        use_color = all([vt.color != None for vt in vtrace])
        if not use_color:
            # simple viusal trace
            all_start_from_zero = all(
                [vt.yb1 == 0 and vt.yb2 == 0 for vt in vtrace])
            table_content = []
            for vt in vtrace:
                if all_start_from_zero:
                    table_content.append({"c_x": vt.x1, "c_top": vt.yt1})
                    table_content.append({"c_x": vt.x2, "c_top": vt.yt2})
                else:
                    table_content.append({
                        "c_x": vt.x1,
                        "c_top": vt.yt1,
                        "c_bot": vt.yb1
                    })
                    table_content.append({
                        "c_x": vt.x2,
                        "c_top": vt.yt2,
                        "c_bot": vt.yb2
                    })
            chart = MpAreaChart(
                c_x="c_x",
                c_tops=["c_top"],
                c_bots=None if all_start_from_zero else ["c_bot"])
            return [(SymTable(values=table_content), chart)]
        else:
            # map x to multiple y
            color_names = list(set([vt.color for vt in vtrace]))
            table_dict = {}
            for vt in vtrace:
                if vt.x1 not in table_dict:
                    table_dict[vt.x1] = {"c_x": vt.x1}
                if vt.x2 not in table_dict:
                    table_dict[vt.x2] = {"c_x": vt.x2}
                table_dict[vt.x1]["{}".format(str(
                    vt.color))] = (vt.yt1 -
                                   vt.yb1) if vt.yb1 is not None else vt.yt1
                table_dict[vt.x2]["{}".format(str(
                    vt.color))] = (vt.yt2 -
                                   vt.yb2) if vt.yb2 is not None else vt.yt2

            table_content = []
            for x in table_dict:
                table_content.append(table_dict[x])
                if len(table_dict[x]) != len(color_names) + 1:
                    # we require table to contain NA values
                    return []
            chart = MpScatterPlot("c_x", ["{}".format(c) for c in color_names])
            return [(SymTable(values=table_content), chart)]
Esempio n. 2
0
    def inv_eval(vtrace):

        constraints = []

        # frozen data used for removing duplicate points
        frozen_data = []
        for vt in vtrace:

            # add fault tolerency: if the field is null, ignore it
            if vt.x1 != None and vt.y1 != None:
                # each end of an point will only be added once
                p1 = json.dumps(
                    {
                        "c_x": vt.x1,
                        "c_y": vt.y1,
                        "c_size": vt.size,
                        "c_color": vt.color,
                        "c_column": vt.column
                    },
                    sort_keys=True)
                if p1 not in frozen_data: frozen_data.append(p1)

            if vt.x2 != None and vt.y2 != None:
                p2 = json.dumps(
                    {
                        "c_x": vt.x2,
                        "c_y": vt.y2,
                        "c_size": vt.size,
                        "c_color": vt.color,
                        "c_column": vt.column
                    },
                    sort_keys=True)
                if p2 not in frozen_data: frozen_data.append(p2)

            # there should not be any points between these two
            constraints.append("""
                (not (exists (r Tuple) (and (> r.c_x vt.x1) 
                                            (< r.c_x vt.x2) 
                                            (= r.c_color vt.color) 
                                            (= r.c_column vt.column))))""")

        data_values = [json.loads(r) for r in frozen_data]
        unused_fields = remove_unused_fields(data_values)

        encodings = []
        for channel, enc_ty in [("x", "_"), ("y", "_"), ("size", "nominal"),
                                ("color", "nominal"), ("column", "nominal")]:
            field_name = "c_{}".format(channel)
            if field_name in unused_fields:
                continue
            if channel in ["x", "y"]:
                dtype = table_utils.infer_dtype(
                    [r[field_name] for r in data_values])
                enc_ty = "nominal" if dtype == "string" else "quantitative"

            encodings.append(Encoding(channel, field_name, enc_ty))

        bar_chart = LineChart(encodings=encodings)
        return [(SymTable(values=data_values,
                          constraints=constraints), bar_chart)]
Esempio n. 3
0
def sample_symbolic_table(symtable, size, strategy="diversity"):
    """given a symbolic table, sample a smaller symbolic table that is contained by it
    Args:
        symtable: the input symbolic table
        size: the number of rows we want for the output table.
    Returns:
        the output table sample
    """

    if size > len(symtable.values):
        size = len(symtable.values)

    if strategy == "uniform":
        chosen_indices = np.random.choice(list(range(len(symtable.values))),
                                          size,
                                          replace=False)
    elif strategy == "diversity":
        indices = set(range(len(symtable.values)))
        chosen_indices = set()
        for i in range(size):
            pool = indices - chosen_indices
            candidate_size = min([20, len(pool)])
            candidates = np.random.choice(list(pool),
                                          size=candidate_size,
                                          replace=False)
            index = pick_diverse_candidate_index(candidates, chosen_indices,
                                                 symtable.values)
            chosen_indices.add(index)

    sample_values = [symtable.values[i] for i in chosen_indices]
    symtable_sample = SymTable(sample_values)
    return symtable_sample
Esempio n. 4
0
    def inv_eval(vtrace, orientation):
        # map x to multiple y
        print(vtrace)
        table_dict = {}
        y_cols = list(set([vt.color for vt in vtrace]))
        for vt in vtrace:
            if orientation == "vertical":
                if vt.x not in table_dict:
                    table_dict[vt.x] = {"c_x": vt.x}
                if vt.y2 is None or vt.color is None:
                    return []
                table_dict[vt.x][vt.color] = vt.y2 - vt.y1
            else:
                if vt.y not in table_dict:
                    table_dict[vt.y] = {"c_x": vt.y}
                if vt.x2 is None or vt.color is None:
                    return []
                table_dict[vt.y][vt.color] = vt.x2 - vt.x1
        table_content = []
        for x in table_dict:
            table_content.append(table_dict[x])
            if len(table_dict[x]) != len(y_cols) + 1:
                # cannot  represented in mp format
                return []

        return [(SymTable(values=table_content),
                 MpGroupBarChart("c_x", y_cols, orient=orientation))]
Esempio n. 5
0
    def inv_eval(vtrace, orientation):
        data_values = []
        if orientation == "vertical":
            for vt in vtrace:
                bot = None if vt.y2 is None else vt.y1
                height = vt.y1 if vt.y2 is None else vt.y2 - vt.y1
                data_values.append({
                    "c_x": vt.x,
                    "c_bot": bot,
                    "c_height": height,
                    "c_color": vt.color
                })
        if orientation == "horizontal":
            for vt in vtrace:
                bot = None if vt.x2 is None else vt.x1
                height = vt.x1 if vt.x2 is None else vt.x2 - vt.x1
                data_values.append({
                    "c_x": vt.y,
                    "c_bot": bot,
                    "c_height": height,
                    "c_color": vt.color
                })

        # remove fields that contain none values
        unused_fields = remove_unused_fields(data_values)

        bar_chart = MpBarChart(
            c_x="c_x",
            c_bot="c_bot" if "c_bot" not in unused_fields else None,
            c_height="c_height",
            c_color="c_color" if "c_color" not in unused_fields else None,
            orient=orientation)
        return [(SymTable(values=data_values), bar_chart)]
Esempio n. 6
0
    def inv_eval(vtrace):
        # frozen data used for removing duplicate points
        frozen_data = []
        for vt in vtrace:
            # each end of an point will only be added once
            p1 = json.dumps(
                {
                    "c_x": vt.x1,
                    "c_y": vt.y1,
                    "c_size": vt.size,
                    "c_color": vt.color,
                    "c_column": vt.column
                },
                sort_keys=True)
            p2 = json.dumps(
                {
                    "c_x": vt.x2,
                    "c_y": vt.y2,
                    "c_size": vt.size,
                    "c_color": vt.color,
                    "c_column": vt.column
                },
                sort_keys=True)
            if p1 not in frozen_data: frozen_data.append(p1)
            if p2 not in frozen_data: frozen_data.append(p2)

        data_values = [json.loads(r) for r in frozen_data]

        unused_fields = remove_unused_fields(data_values)

        if "c_color" not in unused_fields and "c_size" not in unused_fields:
            assert False

        col_num = 2
        y_cols = None
        if "c_color" not in unused_fields:
            y_cols = list(set([r["c_color"] for r in data_values]))
            col_num = 1 + len(y_cols)
            # map x to multiple y
            table_dict = {}
            for r in data_values:
                if r["c_x"] not in table_dict:
                    table_dict[r["c_x"]] = {"c_x": r["c_x"]}
                table_dict[r["c_x"]][r["c_color"]] = r["c_y"]

            table_content = []
            for x in table_dict:
                table_content.append(table_dict[x])
                if len(table_dict[x]) != col_num:
                    # we require table to contain NA values
                    return []
        else:
            y_cols = ["c_y"]
            if "c_size" not in unused_fields:
                y_cols.append(["c_size"])
            table_content = data_values

        return [(SymTable(values=table_content,
                          constraints=[]), MpLineChart("c_x", y_cols))]
Esempio n. 7
0
    def inv_eval(vtrace):
        data_values = []
        constraints = []

        for vt in vtrace:
            # min max will appear in the table
            data_values.append({
                "c_x": vt.x,
                "c_y": vt.min,
                "c_color": vt.color,
                "c_column": vt.column
            })
            data_values.append({
                "c_x": vt.x,
                "c_y": vt.max,
                "c_color": vt.color,
                "c_column": vt.column
            })

            # the output table should satisfy these constraints
            constraints.append(
                "min([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}"
                .format(vt.color, vt.column, vt.min))
            constraints.append(
                "max([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}"
                .format(vt.color, vt.column, vt.max))
            constraints.append(
                "Q1([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}"
                .format(vt.color, vt.column, vt.Q1))
            constraints.append(
                "Q3([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}"
                .format(vt.color, vt.column, vt.Q3))
            constraints.append(
                "median([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}"
                .format(vt.color, vt.column, vt.median))

        # remove fields that contain none values
        unused_fields = remove_unused_fields(data_values)

        encodings = []
        for channel, enc_ty in [("x", "_"), ("y", "_"), ("color", "nominal"),
                                ("column", "nominal")]:
            field_name = "c_{}".format(channel)
            if field_name in unused_fields:
                continue

            if channel in ["x", "y"]:
                # the type needs to be determined by datatype
                dtype = table_utils.infer_dtype(
                    [r[field_name] for r in data_values])
                enc_ty = "nominal" if dtype == "string" else "quantitative"

            encodings.append(Encoding(channel, field_name, enc_ty))

        chart = BoxPlot(encodings=encodings)
        return [(SymTable(data_values, constraints), chart)]
Esempio n. 8
0
    def inv_eval(vtrace):
        table_dict = {}
        y_cols = list(set([vt.color for vt in vtrace]))

        size_used = any([vt.size != None for vt in vtrace])

        if any([vt.shape != None
                for vt in vtrace]) or (len(y_cols) > 1 and size_used):
            # does not support shape or size + color
            return []

        if len(y_cols) > 1:
            # map x to multiple y
            table_dict = {}
            for vt in vtrace:
                if vt.x not in table_dict:
                    table_dict[vt.x] = {"c_x": vt.x}
                table_dict[vt.x][str(vt.color)] = vt.y

            table_content = []
            for x in table_dict:
                table_content.append(table_dict[x])
                if len(table_dict[x]) != len(y_cols) + 1:
                    # we require table to contain NA values
                    return []
            chart = MpScatterPlot("c_x", [str(y) for y in y_cols])
            return [(SymTable(values=table_content), chart)]
        else:
            table_content = []
            for vt in vtrace:
                r = {"c_x": vt.x, "c_y": vt.y}
                if size_used:
                    r["c_size"] = vt.size
                    c_size = "c_size"
                else:
                    c_size = None

                table_content.append(r)
                chart = MpScatterPlot("c_x", ["c_y"], c_size)
            return [(SymTable(values=table_content), chart)]
Esempio n. 9
0
    def inv_eval(vtrace):

        frozen_data = []
        for vt in vtrace:
            # each end of an point will only be added once
            p1 = json.dumps(
                {
                    "c_x": vt.x1,
                    "c_y": vt.yt1,
                    "c_y2": vt.yb1,
                    "c_color": vt.color,
                    "c_column": vt.column
                },
                sort_keys=True)
            p2 = json.dumps(
                {
                    "c_x": vt.x2,
                    "c_y": vt.yt2,
                    "c_y2": vt.yb2,
                    "c_color": vt.color,
                    "c_column": vt.column
                },
                sort_keys=True)
            if p1 not in frozen_data: frozen_data.append(p1)
            if p2 not in frozen_data: frozen_data.append(p2)

        data_values = [json.loads(r) for r in frozen_data]
        channel_types = [("x", "_"), ("y", "quantitative"),
                         ("y2", "quantitative"), ("color", "nominal"),
                         ("column", "nominal")]

        # remove fields that contain none values
        unused_fields = remove_unused_fields(data_values)

        encodings = []
        for channel, enc_ty in channel_types:
            field_name = "c_{}".format(channel)
            if field_name in unused_fields:
                continue
            if channel == "x":
                dtype = table_utils.infer_dtype(
                    [r[field_name] for r in data_values])
                enc_ty = "nominal" if dtype == "string" else "quantitative"
            encodings.append(Encoding(channel, field_name, enc_ty))

        chart = AreaChart(encodings=encodings)

        return [(SymTable(values=data_values), chart)]
Esempio n. 10
0
    def inv_eval(vtrace, vty):
        def synth_per_case(_vtrace, _vty):
            if _vty == "BarV":
                l2 = MpGroupBarChart.inv_eval(_vtrace, orientation="vertical")
                l1 = MpBarChart.inv_eval(_vtrace, orientation="vertical")
                return l1 + l2
            elif _vty == "BarH":
                l2 = MpGroupBarChart.inv_eval(_vtrace,
                                              orientation="horizontal")
                l1 = MpBarChart.inv_eval(_vtrace, orientation="horizontal")
                return l1 + l2
            elif _vty == "Point":
                return MpScatterPlot.inv_eval(_vtrace)
            elif _vty == "Line":
                return MpLineChart.inv_eval(_vtrace)
            elif _vty == "Area":
                return MpAreaChart.inv_eval(_vtrace)

        use_column = any([vt.column is not None for vt in vtrace])
        if not use_column:
            return synth_per_case(vtrace, vty)

        partition = {}
        for vt in vtrace:
            if vt.column not in partition:
                partition[vt.column] = []
            partition[vt.column].append(vt)

        res = []
        chart_by_type = {}
        for col in partition:
            layer_cand = synth_per_case(partition[col], vty)
            if layer_cand == []:
                return []
            for l in layer_cand:
                table, chart = l
                if type(chart) not in chart_by_type:
                    chart_by_type[type(chart)] = {"table": [], "chart": chart}
                col_table = table.values
                for r in col_table:
                    r["c_column"] = col
                chart_by_type[type(chart)]["table"] += col_table

        return [(SymTable(values=chart_by_type[chart_ty]["table"]),
                 MpSubplot(chart_by_type[chart_ty]["chart"], "c_column"))
                for chart_ty in chart_by_type]
Esempio n. 11
0
    def inv_eval(vtrace, orientation):

        assert (orientation in ["horizontal", "vertical"])

        data_values = []

        if orientation == "vertical":
            for vt in vtrace:
                data_values.append({
                    "c_x": vt.x,
                    "c_y": vt.y1,
                    "c_y2": vt.y2,
                    "c_column": vt.column,
                    "c_color": vt.color
                })
            channel_types = [("x", "nominal"), ("y", "quantitative"),
                             ("y2", "quantitative"), ("color", "nominal"),
                             ("column", "nominal")]

        if orientation == "horizontal":
            for vt in vtrace:
                data_values.append({
                    "c_x": vt.x1,
                    "c_x2": vt.x2,
                    "c_y": vt.y,
                    "c_column": vt.column,
                    "c_color": vt.color
                })
            channel_types = [("x", "quantitative"), ("x2", "quantitative"),
                             ("y", "nominal"), ("color", "nominal"),
                             ("column", "nominal")]

        # remove fields that contain none values
        unused_fields = remove_unused_fields(data_values)

        encodings = []
        for channel, enc_ty in channel_types:
            field_name = "c_{}".format(channel)
            if field_name in unused_fields:
                continue
            encodings.append(Encoding(channel, field_name, enc_ty))

        bar_chart = BarChart(encodings=encodings, orientation=orientation)

        return [(SymTable(values=data_values), bar_chart)]
Esempio n. 12
0
    def inv_eval(vtrace):

        mark_ty = "rect" if vtrace[0].point_shape == "rect" else "point"

        data_values = []
        for vt in vtrace:
            data_values.append({
                "c_x": vt.x,
                "c_y": vt.y,
                "c_size": vt.size,
                "c_color": vt.color,
                "c_shape": vt.shape,
                "c_column": vt.column
            })

        # remove fields that contain none values
        unused_fields = remove_unused_fields(data_values)

        encodings = []
        for channel, enc_ty in [("x", "_"), ("y", "_"), ("size", "_"),
                                ("color", "nominal"), ("shape", "nominal"),
                                ("column", "nominal")]:
            field_name = "c_{}".format(channel)
            if field_name in unused_fields:
                continue

            if channel in ["x", "y", "size"] or (channel == "color"
                                                 and mark_ty == "rect"):
                # the type needs to be determined by datatype
                dtype = table_utils.infer_dtype(
                    [r[field_name] for r in data_values])
                enc_ty = "nominal" if dtype == "string" else "quantitative"

            encodings.append(Encoding(channel, field_name, enc_ty))

        chart = ScatterPlot(mark_ty=mark_ty, encodings=encodings)
        return [(SymTable(values=data_values), chart)]