Example #1
0
def generalize(ldf):
	#for benchmarking
	if ldf.toggle_benchmarking == True:
		tic = time.perf_counter()
	'''
	Generates all possible visualizations when one attribute or filter from the current vis is removed.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified intent.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Generalize action.
	'''
	# takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed
	# -->  return list of dataObjects with corresponding interestingness scores

	output = []
	excluded_columns = []
	attributes = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.intent))
	filters = utils.get_filter_specs(ldf.intent)

	fltr_str = [fltr.attribute+fltr.filter_op+str(fltr.value) for fltr in filters]
	attr_str = [clause.attribute for clause in attributes]
	intended_attrs = '<p class="highlight-intent">'+', '.join(attr_str+fltr_str)+'</p>'

	recommendation = {"action":"Generalize",
						   "description":f"Remove an attribute or filter from {intended_attrs}."}
						    # to observe a more general trend
	# if we do no have enough column attributes or too many, return no views.
	if(len(attributes)<1 or len(attributes)>4):
		recommendation["collection"] = []
		return recommendation
	#for each column specification, create a copy of the ldf's view and remove the column specification
	#then append the view to the output
	if (len(attributes)>1):
		for clause in attributes:
			columns = clause.attribute
			if type(columns) == list:
				for column in columns:
					if column not in excluded_columns:
						temp_view = Vis(ldf.copy_intent(),score=1)
						temp_view.remove_column_from_spec(column, remove_first = True)
						excluded_columns.append(column)
						output.append(temp_view)
			elif type(columns) == str:
				if columns not in excluded_columns:
					temp_view = Vis(ldf.copy_intent(),score=1)
					temp_view.remove_column_from_spec(columns, remove_first = True)
					excluded_columns.append(columns)
			output.append(temp_view)
	#for each filter specification, create a copy of the ldf's current vis and remove the filter specification,
	#then append the view to the output
	for clause in filters:
		#new_spec = ldf.intent.copy()
		#new_spec.remove_column_from_spec(new_spec.attribute)
		temp_view = Vis(ldf.current_vis[0]._inferred_intent.copy(),source = ldf,title="Overall",score=0)
		temp_view.remove_filter_from_spec(clause.value)
		output.append(temp_view)
	
	vc = lux.vis.VisList.VisList(output,source=ldf)
	# Ignore interestingness sorting since Generalize yields very few vis (preserve order of remove attribute, then remove filters)
	# for view in vc:
	# 	view.score = interestingness(view,ldf)

	vc.remove_duplicates()
	vc.sort(remove_invalid=True)
	recommendation["collection"] = vc
	#for benchmarking
	if ldf.toggle_benchmarking == True:
		toc = time.perf_counter()
		print(f"Performed generalize action in {toc - tic:0.4f} seconds")
	return recommendation
Example #2
0
    def determine_encoding(ldf: LuxDataFrame, vis: Vis):
        """
        Populates Vis with the appropriate mark type and channel information based on ShowMe logic
        Currently support up to 3 dimensions or measures

        Parameters
        ----------
        ldf : lux.core.frame
                LuxDataFrame with underspecified intent
        vis : lux.vis.Vis

        Returns
        -------
        None

        Notes
        -----
        Implementing automatic encoding from Tableau's VizQL
        Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007).
        Show Me: Automatic presentation for visual analysis.
        IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144.
        https://doi.org/10.1109/TVCG.2007.70594
        """
        # Count number of measures and dimensions
        ndim = vis._ndim
        nmsr = vis._nmsr
        # preserve to add back to _inferred_intent later
        filters = utils.get_filter_specs(vis._inferred_intent)

        # Helper function (TODO: Move this into utils)
        def line_or_bar_or_geo(ldf, dimension: Clause, measure: Clause):
            dim_type = dimension.data_type
            # If no aggregation function is specified, then default as average
            if measure.aggregation == "":
                measure.set_aggregation("mean")
            if dim_type == "temporal" or dim_type == "oridinal":
                return "line", {"x": dimension, "y": measure}
            else:  # unordered categorical
                # if cardinality large than 5 then sort bars
                if ldf.cardinality[dimension.attribute] > 5:
                    dimension.sort = "ascending"
                if utils.like_geo(dimension.get_attr()):
                    return "geographical", {"x": dimension, "y": measure}
                return "bar", {"x": measure, "y": dimension}

        # ShowMe logic + additional heuristics
        # count_col = Clause( attribute="count()", data_model="measure")
        count_col = Clause(
            attribute="Record",
            aggregation="count",
            data_model="measure",
            data_type="quantitative",
        )
        auto_channel = {}
        if ndim == 0 and nmsr == 1:
            # Histogram with Count
            measure = vis.get_attr_by_data_model("measure",
                                                 exclude_record=True)[0]
            if len(vis.get_attr_by_attr_name("Record")) < 0:
                vis._inferred_intent.append(count_col)
            # If no bin specified, then default as 10
            if measure.bin_size == 0:
                measure.bin_size = 10
            auto_channel = {"x": measure, "y": count_col}
            vis._mark = "histogram"
        elif ndim == 1 and (nmsr == 0 or nmsr == 1):
            # Line or Bar Chart
            if nmsr == 0:
                vis._inferred_intent.append(count_col)
            dimension = vis.get_attr_by_data_model("dimension")[0]
            measure = vis.get_attr_by_data_model("measure")[0]
            vis._mark, auto_channel = line_or_bar_or_geo(
                ldf, dimension, measure)
        elif ndim == 2 and (nmsr == 0 or nmsr == 1):
            # Line or Bar chart broken down by the dimension
            dimensions = vis.get_attr_by_data_model("dimension")
            d1 = dimensions[0]
            d2 = dimensions[1]
            if ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]:
                # d1.channel = "color"
                vis.remove_column_from_spec(d1.attribute)
                dimension = d2
                color_attr = d1
            else:
                # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
                if d1.attribute == d2.attribute:
                    vis._inferred_intent.pop(0)
                else:
                    vis.remove_column_from_spec(d2.attribute)
                dimension = d1
                color_attr = d2
            # Colored Bar/Line chart with Count as default measure
            if not ldf.pre_aggregated:
                if nmsr == 0 and not ldf.pre_aggregated:
                    vis._inferred_intent.append(count_col)
                measure = vis.get_attr_by_data_model("measure")[0]
                vis._mark, auto_channel = line_or_bar_or_geo(
                    ldf, dimension, measure)
                auto_channel["color"] = color_attr
        elif ndim == 0 and nmsr == 2:
            # Scatterplot
            vis._mark = "scatter"
            vis._inferred_intent[0].set_aggregation(None)
            vis._inferred_intent[1].set_aggregation(None)
            auto_channel = {
                "x": vis._inferred_intent[0],
                "y": vis._inferred_intent[1]
            }
        elif ndim == 1 and nmsr == 2:
            # Scatterplot broken down by the dimension
            measure = vis.get_attr_by_data_model("measure")
            m1 = measure[0]
            m2 = measure[1]

            vis._inferred_intent[0].set_aggregation(None)
            vis._inferred_intent[1].set_aggregation(None)

            color_attr = vis.get_attr_by_data_model("dimension")[0]
            vis.remove_column_from_spec(color_attr)
            vis._mark = "scatter"
            auto_channel = {"x": m1, "y": m2, "color": color_attr}
        elif ndim == 0 and nmsr == 3:
            # Scatterplot with color
            vis._mark = "scatter"
            auto_channel = {
                "x": vis._inferred_intent[0],
                "y": vis._inferred_intent[1],
                "color": vis._inferred_intent[2],
            }
        relevant_attributes = [
            auto_channel[channel].attribute for channel in auto_channel
        ]
        relevant_min_max = dict((attr, ldf._min_max[attr])
                                for attr in relevant_attributes
                                if attr != "Record" and attr in ldf._min_max)
        vis._min_max = relevant_min_max
        if auto_channel != {}:
            vis = Compiler.enforce_specified_channel(vis, auto_channel)
            vis._inferred_intent.extend(
                filters)  # add back the preserved filters
Example #3
0
def test_remove(global_var):
    df = pytest.car_df
    vis = Vis([lux.Clause("Horsepower"), lux.Clause("Acceleration")], df)
    vis.remove_column_from_spec("Horsepower", remove_first=False)
    assert vis._inferred_intent[0].attribute == "Acceleration"
Example #4
0
def test_remove():
    from lux.vis.Vis import Vis
    df = pd.read_csv("lux/data/car.csv")
    vis = Vis([lux.Clause("Horsepower"), lux.Clause("Acceleration")], df)
    vis.remove_column_from_spec("Horsepower", remove_first=False)
    assert vis._inferred_intent[0].attribute == "Acceleration"
Example #5
0
def generalize(ldf):
    """
    Generates all possible visualizations when one attribute or filter from the current vis is removed.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Generalize action.
    """
    # takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed
    # -->  return list of dataObjects with corresponding interestingness scores

    output = []
    excluded_columns = []
    attributes = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent))
    filters = utils.get_filter_specs(ldf._intent)

    fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters]
    attr_str = [str(clause.attribute) for clause in attributes]
    intended_attrs = f'<p class="highlight-intent">{", ".join(attr_str + fltr_str)}</p>'

    recommendation = {
        "action": "Generalize",
        "description": f"Remove an attribute or filter from {intended_attrs}.",
        "long_description": f"Remove one aspect of the Current Vis. We can either remove an attribute or filter from {intended_attrs}.",
    }
    # to observe a more general trend
    # if we do no have enough column attributes or too many, return no vis.
    if len(attributes) < 1 or len(attributes) > 4:
        recommendation["collection"] = []
        return recommendation
    # for each column specification, create a copy of the ldf's vis and remove the column specification
    # then append the vis to the output
    if len(attributes) > 1:
        for clause in attributes:
            columns = clause.attribute
            if type(columns) == list:
                for column in columns:
                    if column not in excluded_columns:
                        temp_vis = Vis(ldf.copy_intent(), score=1)
                        temp_vis.remove_column_from_spec(column, remove_first=True)
                        excluded_columns.append(column)
                        output.append(temp_vis)
            else:
                if columns not in excluded_columns:
                    temp_vis = Vis(ldf.copy_intent(), score=1)
                    temp_vis.remove_column_from_spec(columns, remove_first=True)
                    excluded_columns.append(columns)
            output.append(temp_vis)
    # for each filter specification, create a copy of the ldf's current vis and remove the filter specification,
    # then append the vis to the output
    for clause in filters:
        # new_spec = ldf._intent.copy()
        # new_spec.remove_column_from_spec(new_spec.attribute)
        temp_vis = Vis(
            ldf.current_vis[0]._inferred_intent.copy(),
            source=ldf,
            title="Overall",
            score=0,
        )
        temp_vis.remove_filter_from_spec(clause.value)
        output.append(temp_vis)

    vlist = lux.vis.VisList.VisList(output, source=ldf)
    # Ignore interestingness sorting since Generalize yields very few vis (preserve order of remove attribute, then remove filters)
    # for vis in vlist:
    # 	vis.score = interestingness(vis,ldf)

    vlist.remove_duplicates()
    vlist.sort(remove_invalid=True)
    vlist._collection = list(filter(lambda x: x.score != -1, vlist._collection))
    recommendation["collection"] = vlist
    return recommendation