Exemple #1
0
    def create_datatable_vis(self, sorted_combo):

        # Create a new base Vega-Lite Spec
        vl_genie_instance = VLGenie()

        # Set the explicit_vis_type to a datatable and then make relevant transforms there.
        vl_genie_instance.set_vis_type("datatable")

        for attr in sorted_combo:
            # Create a column with mark type = text
            vl_genie_instance.create_and_add_column_to_datatable(attr)

            # Append the scores
            vl_genie_instance.score_obj[
                "by_attributes"] += self.nl4dv_instance.extracted_attributes[
                    attr]["matchScore"]

            # Try and apply a FILTER task even to the DataTable fallback visualization.
            for task in self.nl4dv_instance.extracted_tasks:
                for task_instance in self.nl4dv_instance.extracted_tasks[task]:
                    if task == "filter":
                        # If there is NO Datatype Ambiguity, then apply the Filter Task. Else let it be the way it is.
                        # Datatype ambiguity example: "Content Rating > 5" is NOT possible because Content Rating is a Nominal attribute.
                        if not (task_instance["isValueAmbiguous"] and
                                task_instance["meta"]["value_ambiguity_type"]
                                == "datatype"):
                            vl_genie_instance.set_tasks_to_datatable(
                                None, task_instance)
                            vl_genie_instance.score_obj[
                                "by_task"] += task_instance["matchScore"]

        # Since we are counting the `by_task` score multiple times (equal to the number of columns), we need to normalize it to a VIS level.
        vl_genie_instance.score_obj["by_task"] /= len(sorted_combo)

        #  Set the data
        vl_genie_instance.set_data(self.nl4dv_instance.data_url,
                                   self.nl4dv_instance.data_url_type)

        # Create the Visualization object to return
        vis_object = {
            "score":
            sum(vl_genie_instance.score_obj.values()),
            "scoreObj":
            vl_genie_instance.score_obj,
            "attributes":
            sorted_combo,
            "visType":
            "datatable",
            "queryPhrase":
            None,
            "tasks":
            list(self.nl4dv_instance.extracted_tasks.keys()),
            "inferenceType":
            'implicit'
            if self.nl4dv_instance.extracted_vis_type is None else 'explicit',
            "vlSpec":
            vl_genie_instance.vl_spec
        }

        return vis_object
Exemple #2
0
    def create_datatable_vis(self, sorted_combo):

        # Create a new base Vega-Lite Spec
        vl_genie_instance = VLGenie()

        # Set the explicit_vis_type to a datatable and then make relevant transforms there.
        vl_genie_instance.set_vis_type("datatable")

        for attr in sorted_combo:
            # Create a column with mark type = text
            vl_genie_instance.create_and_add_column_to_datatable(attr)

            # Append the scores
            vl_genie_instance.score_obj[
                "by_attributes"] += self.nl4dv_instance.extracted_attributes[
                    attr]["matchScore"]

        #  Set the data
        vl_genie_instance.set_data(self.nl4dv_instance.data_url)

        # Create the Visualization object to return
        vis_object = {
            "score":
            sum(vl_genie_instance.score_obj.values()),
            "scoreObj":
            vl_genie_instance.score_obj,
            "attributes":
            sorted_combo,
            "visType":
            "datatable",
            "queryPhrase":
            None,
            "tasks":
            list(self.nl4dv_instance.extracted_tasks.keys()),
            "inferenceType":
            'implicit'
            if self.nl4dv_instance.extracted_vis_type is None else 'explicit',
            "vlSpec":
            vl_genie_instance.vl_spec
        }

        return vis_object
Exemple #3
0
    def get_vis(self, design, attr_type_combo, attr_list):

        # CREATE a new Vega-Lite Spec
        vl_genie_instance = VLGenie()

        # MAP the attributes to the DESIGN spec.
        for index, attr in enumerate(attr_list):
            dim = design["priority"][
                index]  # Dimension: x, y, color, size, tooltip, ...
            agg = design[dim]["agg"]  # Aggregate: sum, mean, ...
            datatype = self.nl4dv_instance.data_genie_instance.data_attribute_map[
                attr]["dataType"]

            # Update the design with the attribute. It could be referenced later.
            design[dim]["attr"] = attr
            design[dim]["is_defined"] = True

            # Set the default VIS mark type. Note: Can be overridden later.
            vl_genie_instance.set_vis_type(design["vis_type"])

            # Set the encoding Note: Can be overridden later.
            vl_genie_instance.set_encoding(dim, attr, datatype, agg)

            # Set Score
            vl_genie_instance.score_obj[
                "by_attributes"] += self.nl4dv_instance.extracted_attributes[
                    attr]["matchScore"]

        # If an attribute is dual-encoded e.g. x axis as well as count of y axis, the attribute is supposed to be encoded to both channels.
        for encoding in design["mandatory"]:
            if not design[encoding]["is_defined"]:
                attr_reference = design[encoding]["attr_ref"]
                attr = design[attr_reference]["attr"]
                datatype = self.nl4dv_instance.data_genie_instance.data_attribute_map[
                    attr]["dataType"]
                agg = design[encoding]["agg"]
                vl_genie_instance.set_encoding(encoding, attr, datatype, agg)

        # ENSURE if COMBOS has the attributes to which the TASK is applied. If NOT, don"t do anything.
        for task in self.nl4dv_instance.extracted_tasks:
            for task_instance in self.nl4dv_instance.extracted_tasks[task]:

                if task == "filter":
                    # If there is NO Datatype Ambiguity, then apply the Filter Task. Else let it be the way it is.
                    # Datatype ambiguity example: "Content Rating > 5" is NOT possible because Content Rating is a Nominal attribute.
                    if not (task_instance["isValueAmbiguous"]
                            and task_instance["meta"]["value_ambiguity_type"]
                            == "datatype"):
                        vl_genie_instance.set_task(None, task_instance)
                        vl_genie_instance.score_obj[
                            "by_task"] += task_instance["matchScore"]

                else:
                    # If a NON-FILTER task has an attribute that is NOT in the combos (means it was ambiguous), then No Need to Apply this FILTER.
                    # E.g. We don't want IMDB Rating > 5 to be applied to a VIS design with Rotten Tomatoes Rating
                    if any([
                            attr not in attr_list
                            for attr in task_instance["attributes"]
                    ]):
                        continue

                    if task == "derived_value":
                        # If there is NO Datatype Ambiguity, then apply the Derived Value Task. Else let it be the way it is.
                        # Datatype ambiguity example: "SUM(Genre)" is NOT possible because Genre is a Nominal attribute.
                        if not (task_instance["isValueAmbiguous"] and
                                task_instance["meta"]["value_ambiguity_type"]
                                == "datatype"):
                            if design["vis_type"] in ["histogram", "boxplot"]:
                                return None

                            # Iterate over all encodings and if the corresponding attribute matches that in the task, then UPDATE the "aggregate".
                            for dimension in design["mandatory"]:
                                attr = design[dimension]["attr"]
                                if attr in task_instance["attributes"]:
                                    vl_genie_instance.score_obj[
                                        "by_task"] += task_instance[
                                            "matchScore"]

                                    datatype = self.nl4dv_instance.data_genie_instance.data_attribute_map[
                                        attr]["dataType"]
                                    new_agg = constants.operator_symbol_mapping[
                                        task_instance["operator"]]
                                    vl_genie_instance.set_encoding(
                                        dimension, attr, datatype, new_agg)

                    elif task == "distribution":
                        # Increment score by_task
                        vl_genie_instance.score_obj[
                            "by_task"] += task_instance["matchScore"]

                    elif task == "correlation":
                        # For correlations, there should be NO aggregation between the attributes
                        for dimension in design['mandatory']:
                            if design[dimension]["attr"] in task_instance[
                                    "attributes"]:
                                # If there exists some aggregate already, then this is a CONFLICT and we should DEDUCT points
                                if design[dimension]['agg'] is not None:
                                    vl_genie_instance.score_obj["by_task"] -= 1

                                design[dimension]['agg'] = None
                                vl_genie_instance.set_encoding_aggregate(
                                    dimension, None)

                        # Correlation < scatterplot (mark type = point)
                        vl_genie_instance.set_vis_type("scatterplot")

                        # Increment score by_task
                        vl_genie_instance.score_obj[
                            "by_task"] += task_instance["matchScore"]

                    elif task == "find_extremum":
                        # If there is NO Datatype Ambiguity, then apply the Derived Value Task. Else let it be the way it is.
                        # Datatype ambiguity example: "SUM(Genre)" is NOT possible because Genre is a Nominal attribute.
                        if not (task_instance["isValueAmbiguous"] and
                                task_instance["meta"]["value_ambiguity_type"]
                                == "datatype"):

                            # Iterate over all encodings and if the corresponding attribute matches that in the task, then UPDATE the "aggregate".
                            for dimension in design["mandatory"]:
                                attr = design[dimension]["attr"]
                                if attr in task_instance["attributes"]:
                                    vl_genie_instance.score_obj[
                                        "by_task"] += task_instance[
                                            "matchScore"]
                                    vl_genie_instance.set_task(
                                        dimension, task_instance)

                    elif task == "trend":
                        pass

        # If explicit VIS is specified, then override it
        # TODO:- There a few vis (mark) types that are NOT sensible, e.g. asking a scatterplot for a piechart design or a linechart for a boxplot base design. Filter these designs out!
        if self.nl4dv_instance.extracted_vis_type:

            # A design with PIECHART / DONUTCHART as a base should NOT be attempted to be transformed for a different mark type. Note: It has thetas, colors as opposed to x, y.
            if self.nl4dv_instance.extracted_vis_type not in [
                    "piechart", "donutchart"
            ] and design["vis_type"] in ["piechart", "donutchart"]:
                return None

            # PIE CHART + DONUT CHART
            # Can happen between 2 attributes {QN, QO} combinations
            if self.nl4dv_instance.extracted_vis_type in [
                    "piechart", "donutchart"
            ]:
                if attr_type_combo not in ["QN", "QO"]:
                    print(
                        "Pie Chart not compatible / not supported for your query."
                    )
                    return None

            # HISTOGRAM
            elif self.nl4dv_instance.extracted_vis_type == "histogram":
                if attr_type_combo not in ["Q", "N", "O", "T"]:
                    print(
                        "Histogram not compatible / not supported for your query."
                    )
                    return None

            # STRIP PLOT
            elif self.nl4dv_instance.extracted_vis_type == "stripplot":
                # Stripplot is indicative of a DISTRIBUTION Task. All aggregations should be removed.
                for dimension in design['mandatory']:
                    # If there exists some aggregate already, then this is a CONFLICT and we should DEDUCT points
                    if design[dimension]['agg'] is not None:
                        vl_genie_instance.score_obj["by_vis"] -= 1

                    design[dimension]['agg'] = None
                    vl_genie_instance.set_encoding_aggregate(dimension, None)

            # BAR CHART
            elif self.nl4dv_instance.extracted_vis_type == "barchart":
                pass

            # LINE CHART
            elif self.nl4dv_instance.extracted_vis_type == "linechart":
                pass

            # AREA CHART
            elif self.nl4dv_instance.extracted_vis_type == "areachart":
                if design["vis_type"] == "barchart":
                    return None

            # SCATTERPLOT
            elif self.nl4dv_instance.extracted_vis_type == "scatterplot":
                # For scatterplots, treat it as a Correlation task. There should be NO aggregation between the attributes,
                # and mark type should be "point"
                for dimension in design['mandatory']:
                    # If there exists some aggregate already, then this is a CONFLICT and we should DEDUCT points
                    if design[dimension]['agg'] is not None:
                        vl_genie_instance.score_obj["by_task"] -= 1

                    design[dimension]['agg'] = None
                    vl_genie_instance.set_encoding_aggregate(dimension, None)

                    # Correlation < scatterplot (mark type = point)
                    vl_genie_instance.set_vis_type("scatterplot")

            # BOX PLOT
            elif self.nl4dv_instance.extracted_vis_type == "boxplot":
                if "Q" not in attr_type_combo:
                    print(
                        "Box Plot requires at least one continuous axis. Not compatible / supported for your query."
                    )
                    return None

            # Set the VIS mark type in the vl_genie_instance
            vl_genie_instance.set_vis_type(
                self.nl4dv_instance.extracted_vis_type)

            # just here because the user/developer explicitly requested this
            vl_genie_instance.score_obj[
                "by_vis"] += self.nl4dv_instance.match_scores['vis'][
                    'explicit']

        else:
            # There are a few designs tagged as "not_suggested_by_default",
            # e.g., in absence of a task, there's no need to show both DERIVED_VALUE (barchart + mean) and DISTRIBUTION (stripplot) implicit tasked visualizations
            if design["not_suggested_by_default"]:
                return None

        # Encode the label attribute as a TOOLTIP to show the dataset label on hover.
        # Note: This will ONLY be added when there is NO aggregation, i.e., all data points are visible.
        if self.nl4dv_instance.label_attribute is not None:
            vl_genie_instance.add_label_attribute_as_tooltip(
                self.nl4dv_instance.label_attribute)

        # AESTHETICS
        # ------------------
        # Format ticks (e.g. 10M, 1k, ... ) for Quantitative axes
        vl_genie_instance.add_tick_format()
        # ------------------

        # Enable Tooltips
        # ------------------
        vl_genie_instance.add_tooltip()
        # ------------------

        #  Finally, let"s set the data and Rock"n Roll!
        # ------------------
        vl_genie_instance.set_data(self.nl4dv_instance.data_url)
        # ------------------

        return vl_genie_instance