def create_datatable_vis(self, sorted_combo): # Create a new base Vega-Lite Spec vl_genie_instance = VLGenie() # Set the explicit_vis_type to a datatable and then make relevant transforms there. vl_genie_instance.set_vis_type("datatable") for attr in sorted_combo: # Create a column with mark type = text vl_genie_instance.create_and_add_column_to_datatable(attr) # Append the scores vl_genie_instance.score_obj[ "by_attributes"] += self.nl4dv_instance.extracted_attributes[ attr]["matchScore"] # Try and apply a FILTER task even to the DataTable fallback visualization. for task in self.nl4dv_instance.extracted_tasks: for task_instance in self.nl4dv_instance.extracted_tasks[task]: if task == "filter": # If there is NO Datatype Ambiguity, then apply the Filter Task. Else let it be the way it is. # Datatype ambiguity example: "Content Rating > 5" is NOT possible because Content Rating is a Nominal attribute. if not (task_instance["isValueAmbiguous"] and task_instance["meta"]["value_ambiguity_type"] == "datatype"): vl_genie_instance.set_tasks_to_datatable( None, task_instance) vl_genie_instance.score_obj[ "by_task"] += task_instance["matchScore"] # Since we are counting the `by_task` score multiple times (equal to the number of columns), we need to normalize it to a VIS level. vl_genie_instance.score_obj["by_task"] /= len(sorted_combo) # Set the data vl_genie_instance.set_data(self.nl4dv_instance.data_url, self.nl4dv_instance.data_url_type) # Create the Visualization object to return vis_object = { "score": sum(vl_genie_instance.score_obj.values()), "scoreObj": vl_genie_instance.score_obj, "attributes": sorted_combo, "visType": "datatable", "queryPhrase": None, "tasks": list(self.nl4dv_instance.extracted_tasks.keys()), "inferenceType": 'implicit' if self.nl4dv_instance.extracted_vis_type is None else 'explicit', "vlSpec": vl_genie_instance.vl_spec } return vis_object
def create_datatable_vis(self, sorted_combo): # Create a new base Vega-Lite Spec vl_genie_instance = VLGenie() # Set the explicit_vis_type to a datatable and then make relevant transforms there. vl_genie_instance.set_vis_type("datatable") for attr in sorted_combo: # Create a column with mark type = text vl_genie_instance.create_and_add_column_to_datatable(attr) # Append the scores vl_genie_instance.score_obj[ "by_attributes"] += self.nl4dv_instance.extracted_attributes[ attr]["matchScore"] # Set the data vl_genie_instance.set_data(self.nl4dv_instance.data_url) # Create the Visualization object to return vis_object = { "score": sum(vl_genie_instance.score_obj.values()), "scoreObj": vl_genie_instance.score_obj, "attributes": sorted_combo, "visType": "datatable", "queryPhrase": None, "tasks": list(self.nl4dv_instance.extracted_tasks.keys()), "inferenceType": 'implicit' if self.nl4dv_instance.extracted_vis_type is None else 'explicit', "vlSpec": vl_genie_instance.vl_spec } return vis_object
def get_vis(self, design, attr_type_combo, attr_list): # CREATE a new Vega-Lite Spec vl_genie_instance = VLGenie() # MAP the attributes to the DESIGN spec. for index, attr in enumerate(attr_list): dim = design["priority"][ index] # Dimension: x, y, color, size, tooltip, ... agg = design[dim]["agg"] # Aggregate: sum, mean, ... datatype = self.nl4dv_instance.data_genie_instance.data_attribute_map[ attr]["dataType"] # Update the design with the attribute. It could be referenced later. design[dim]["attr"] = attr design[dim]["is_defined"] = True # Set the default VIS mark type. Note: Can be overridden later. vl_genie_instance.set_vis_type(design["vis_type"]) # Set the encoding Note: Can be overridden later. vl_genie_instance.set_encoding(dim, attr, datatype, agg) # Set Score vl_genie_instance.score_obj[ "by_attributes"] += self.nl4dv_instance.extracted_attributes[ attr]["matchScore"] # If an attribute is dual-encoded e.g. x axis as well as count of y axis, the attribute is supposed to be encoded to both channels. for encoding in design["mandatory"]: if not design[encoding]["is_defined"]: attr_reference = design[encoding]["attr_ref"] attr = design[attr_reference]["attr"] datatype = self.nl4dv_instance.data_genie_instance.data_attribute_map[ attr]["dataType"] agg = design[encoding]["agg"] vl_genie_instance.set_encoding(encoding, attr, datatype, agg) # ENSURE if COMBOS has the attributes to which the TASK is applied. If NOT, don"t do anything. for task in self.nl4dv_instance.extracted_tasks: for task_instance in self.nl4dv_instance.extracted_tasks[task]: if task == "filter": # If there is NO Datatype Ambiguity, then apply the Filter Task. Else let it be the way it is. # Datatype ambiguity example: "Content Rating > 5" is NOT possible because Content Rating is a Nominal attribute. if not (task_instance["isValueAmbiguous"] and task_instance["meta"]["value_ambiguity_type"] == "datatype"): vl_genie_instance.set_task(None, task_instance) vl_genie_instance.score_obj[ "by_task"] += task_instance["matchScore"] else: # If a NON-FILTER task has an attribute that is NOT in the combos (means it was ambiguous), then No Need to Apply this FILTER. # E.g. We don't want IMDB Rating > 5 to be applied to a VIS design with Rotten Tomatoes Rating if any([ attr not in attr_list for attr in task_instance["attributes"] ]): continue if task == "derived_value": # If there is NO Datatype Ambiguity, then apply the Derived Value Task. Else let it be the way it is. # Datatype ambiguity example: "SUM(Genre)" is NOT possible because Genre is a Nominal attribute. if not (task_instance["isValueAmbiguous"] and task_instance["meta"]["value_ambiguity_type"] == "datatype"): if design["vis_type"] in ["histogram", "boxplot"]: return None # Iterate over all encodings and if the corresponding attribute matches that in the task, then UPDATE the "aggregate". for dimension in design["mandatory"]: attr = design[dimension]["attr"] if attr in task_instance["attributes"]: vl_genie_instance.score_obj[ "by_task"] += task_instance[ "matchScore"] datatype = self.nl4dv_instance.data_genie_instance.data_attribute_map[ attr]["dataType"] new_agg = constants.operator_symbol_mapping[ task_instance["operator"]] vl_genie_instance.set_encoding( dimension, attr, datatype, new_agg) elif task == "distribution": # Increment score by_task vl_genie_instance.score_obj[ "by_task"] += task_instance["matchScore"] elif task == "correlation": # For correlations, there should be NO aggregation between the attributes for dimension in design['mandatory']: if design[dimension]["attr"] in task_instance[ "attributes"]: # If there exists some aggregate already, then this is a CONFLICT and we should DEDUCT points if design[dimension]['agg'] is not None: vl_genie_instance.score_obj["by_task"] -= 1 design[dimension]['agg'] = None vl_genie_instance.set_encoding_aggregate( dimension, None) # Correlation < scatterplot (mark type = point) vl_genie_instance.set_vis_type("scatterplot") # Increment score by_task vl_genie_instance.score_obj[ "by_task"] += task_instance["matchScore"] elif task == "find_extremum": # If there is NO Datatype Ambiguity, then apply the Derived Value Task. Else let it be the way it is. # Datatype ambiguity example: "SUM(Genre)" is NOT possible because Genre is a Nominal attribute. if not (task_instance["isValueAmbiguous"] and task_instance["meta"]["value_ambiguity_type"] == "datatype"): # Iterate over all encodings and if the corresponding attribute matches that in the task, then UPDATE the "aggregate". for dimension in design["mandatory"]: attr = design[dimension]["attr"] if attr in task_instance["attributes"]: vl_genie_instance.score_obj[ "by_task"] += task_instance[ "matchScore"] vl_genie_instance.set_task( dimension, task_instance) elif task == "trend": pass # If explicit VIS is specified, then override it # TODO:- There a few vis (mark) types that are NOT sensible, e.g. asking a scatterplot for a piechart design or a linechart for a boxplot base design. Filter these designs out! if self.nl4dv_instance.extracted_vis_type: # A design with PIECHART / DONUTCHART as a base should NOT be attempted to be transformed for a different mark type. Note: It has thetas, colors as opposed to x, y. if self.nl4dv_instance.extracted_vis_type not in [ "piechart", "donutchart" ] and design["vis_type"] in ["piechart", "donutchart"]: return None # PIE CHART + DONUT CHART # Can happen between 2 attributes {QN, QO} combinations if self.nl4dv_instance.extracted_vis_type in [ "piechart", "donutchart" ]: if attr_type_combo not in ["QN", "QO"]: print( "Pie Chart not compatible / not supported for your query." ) return None # HISTOGRAM elif self.nl4dv_instance.extracted_vis_type == "histogram": if attr_type_combo not in ["Q", "N", "O", "T"]: print( "Histogram not compatible / not supported for your query." ) return None # STRIP PLOT elif self.nl4dv_instance.extracted_vis_type == "stripplot": # Stripplot is indicative of a DISTRIBUTION Task. All aggregations should be removed. for dimension in design['mandatory']: # If there exists some aggregate already, then this is a CONFLICT and we should DEDUCT points if design[dimension]['agg'] is not None: vl_genie_instance.score_obj["by_vis"] -= 1 design[dimension]['agg'] = None vl_genie_instance.set_encoding_aggregate(dimension, None) # BAR CHART elif self.nl4dv_instance.extracted_vis_type == "barchart": pass # LINE CHART elif self.nl4dv_instance.extracted_vis_type == "linechart": pass # AREA CHART elif self.nl4dv_instance.extracted_vis_type == "areachart": if design["vis_type"] == "barchart": return None # SCATTERPLOT elif self.nl4dv_instance.extracted_vis_type == "scatterplot": # For scatterplots, treat it as a Correlation task. There should be NO aggregation between the attributes, # and mark type should be "point" for dimension in design['mandatory']: # If there exists some aggregate already, then this is a CONFLICT and we should DEDUCT points if design[dimension]['agg'] is not None: vl_genie_instance.score_obj["by_task"] -= 1 design[dimension]['agg'] = None vl_genie_instance.set_encoding_aggregate(dimension, None) # Correlation < scatterplot (mark type = point) vl_genie_instance.set_vis_type("scatterplot") # BOX PLOT elif self.nl4dv_instance.extracted_vis_type == "boxplot": if "Q" not in attr_type_combo: print( "Box Plot requires at least one continuous axis. Not compatible / supported for your query." ) return None # Set the VIS mark type in the vl_genie_instance vl_genie_instance.set_vis_type( self.nl4dv_instance.extracted_vis_type) # just here because the user/developer explicitly requested this vl_genie_instance.score_obj[ "by_vis"] += self.nl4dv_instance.match_scores['vis'][ 'explicit'] else: # There are a few designs tagged as "not_suggested_by_default", # e.g., in absence of a task, there's no need to show both DERIVED_VALUE (barchart + mean) and DISTRIBUTION (stripplot) implicit tasked visualizations if design["not_suggested_by_default"]: return None # Encode the label attribute as a TOOLTIP to show the dataset label on hover. # Note: This will ONLY be added when there is NO aggregation, i.e., all data points are visible. if self.nl4dv_instance.label_attribute is not None: vl_genie_instance.add_label_attribute_as_tooltip( self.nl4dv_instance.label_attribute) # AESTHETICS # ------------------ # Format ticks (e.g. 10M, 1k, ... ) for Quantitative axes vl_genie_instance.add_tick_format() # ------------------ # Enable Tooltips # ------------------ vl_genie_instance.add_tooltip() # ------------------ # Finally, let"s set the data and Rock"n Roll! # ------------------ vl_genie_instance.set_data(self.nl4dv_instance.data_url) # ------------------ return vl_genie_instance