Example #1
0
def test_vis_custom_aggregation_as_str():
    df = pd.read_csv("lux/data/college.csv")
    import numpy as np

    vis = Vis(["HighestDegree", lux.Clause("AverageCost", aggregation="max")], df)
    assert vis.get_attr_by_data_model("measure")[0].aggregation == "max"
    assert vis.get_attr_by_data_model("measure")[0]._aggregation_name == "max"
Example #2
0
def test_vis_custom_aggregation_as_numpy_func():
    df = pd.read_csv("lux/data/college.csv")
    from lux.vis.Vis import Vis
    import numpy as np
    vis = Vis(["HighestDegree",lux.Clause("AverageCost",aggregation=np.ptp)],df)
    assert vis.get_attr_by_data_model("measure")[0].aggregation == np.ptp
    assert vis.get_attr_by_data_model("measure")[0]._aggregation_name =='ptp'
Example #3
0
def test_vis_custom_aggregation_as_str(global_var):
    df = pytest.college_df
    import numpy as np

    vis = Vis(["HighestDegree", lux.Clause("AverageCost", aggregation="max")], df)
    assert vis.get_attr_by_data_model("measure")[0].aggregation == "max"
    assert vis.get_attr_by_data_model("measure")[0]._aggregation_name == "max"
Example #4
0
def deviation_from_overall(vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_attribute: str) -> int:
    """
    Difference in bar chart/histogram shape from overall chart
    Note: this function assumes that the filtered vis.data is operating on the same range as the unfiltered vis.data.

    Parameters
    ----------
    vis : Vis
    ldf : LuxDataFrame
    filter_specs : list
            List of filters from the Vis
    msr_attribute : str
            The attribute name of the measure value of the chart

    Returns
    -------
    int
            Score describing how different the vis is from the overall vis
    """
    v_filter_size = get_filtered_size(filter_specs, ldf)
    v_size = len(vis.data)
    v_filter = vis.data[msr_attribute]
    total = v_filter.sum()
    v_filter = v_filter / total  # normalize by total to get ratio
    if total == 0:
        return 0
    # Generate an "Overall" Vis (TODO: This is computed multiple times for every vis, alternative is to directly access df.current_vis but we do not have guaruntee that will always be unfiltered vis (in the non-Filter action scenario))
    import copy

    unfiltered_vis = copy.copy(vis)
    # Remove filters, keep only attribute intent
    unfiltered_vis._inferred_intent = utils.get_attrs_specs(vis._inferred_intent)
    ldf.executor.execute([unfiltered_vis], ldf)

    v = unfiltered_vis.data[msr_attribute]
    v = v / v.sum()
    assert len(v) == len(v_filter), "Data for filtered and unfiltered vis have unequal length."
    sig = v_filter_size / v_size  # significance factor
    # Euclidean distance as L2 function

    rankSig = 1  # category measure value ranking significance factor
    # if the vis is a barchart, count how many categories' rank, based on measure value, changes after the filter is applied
    if vis.mark == "bar":
        dimList = vis.get_attr_by_data_model("dimension")

        # use Pandas rank function to calculate rank positions for each category
        v_rank = unfiltered_vis.data.rank()
        v_filter_rank = vis.data.rank()
        # go through and count the number of ranking changes between the filtered and unfiltered data
        numCategories = ldf.cardinality[dimList[0].attribute]
        for r in range(0, numCategories - 1):
            if v_rank[msr_attribute][r] != v_filter_rank[msr_attribute][r]:
                rankSig += 1
        # normalize ranking significance factor
        rankSig = rankSig / numCategories

    from scipy.spatial.distance import euclidean

    return sig * rankSig * euclidean(v, v_filter)
Example #5
0
def test_vis_custom_aggregation_as_numpy_func(global_var):
    df = pytest.college_df
    from lux.vis.Vis import Vis
    import numpy as np

    vis = Vis(["HighestDegree", lux.Clause("AverageCost", aggregation=np.ptp)], df)
    assert vis.get_attr_by_data_model("measure")[0].aggregation == np.ptp
    assert vis.get_attr_by_data_model("measure")[0]._aggregation_name == "ptp"
Example #6
0
    def determine_encoding(ldf: LuxDataFrame, vis: Vis):
        '''
		Populates Vis with the appropriate mark type and channel information based on ShowMe logic
		Currently support up to 3 dimensions or measures
		
		Parameters
		----------
		ldf : lux.luxDataFrame.LuxDataFrame
			LuxDataFrame with underspecified intent
		vis : lux.vis.Vis

		Returns
		-------
		None

		Notes
		-----
		Implementing automatic encoding from Tableau's VizQL
		Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007).
		Show Me: Automatic presentation for visual analysis.
		IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144.
		https://doi.org/10.1109/TVCG.2007.70594
		'''
        # Count number of measures and dimensions
        ndim = 0
        nmsr = 0
        filters = []
        for clause in vis._inferred_intent:
            if (clause.value == ""):
                if (clause.data_model == "dimension"):
                    ndim += 1
                elif (clause.data_model == "measure"
                      and clause.attribute != "Record"):
                    nmsr += 1
            else:  # preserve to add back to _inferred_intent later
                filters.append(clause)
        # Helper function (TODO: Move this into utils)
        def line_or_bar(ldf, dimension: Clause, measure: Clause):
            dim_type = dimension.data_type
            # If no aggregation function is specified, then default as average
            if (measure.aggregation == ""):
                measure.set_aggregation("mean")
            if (dim_type == "temporal" or dim_type == "oridinal"):
                return "line", {"x": dimension, "y": measure}
            else:  # unordered categorical
                # if cardinality large than 5 then sort bars
                if ldf.cardinality[dimension.attribute] > 5:
                    dimension.sort = "ascending"
                return "bar", {"x": measure, "y": dimension}

        # ShowMe logic + additional heuristics
        #count_col = Clause( attribute="count()", data_model="measure")
        count_col = Clause(attribute="Record",
                           aggregation="count",
                           data_model="measure",
                           data_type="quantitative")
        auto_channel = {}
        if (ndim == 0 and nmsr == 1):
            # Histogram with Count
            measure = vis.get_attr_by_data_model("measure",
                                                 exclude_record=True)[0]
            if (len(vis.get_attr_by_attr_name("Record")) < 0):
                vis._inferred_intent.append(count_col)
            # If no bin specified, then default as 10
            if (measure.bin_size == 0):
                measure.bin_size = 10
            auto_channel = {"x": measure, "y": count_col}
            vis.mark = "histogram"
        elif (ndim == 1 and (nmsr == 0 or nmsr == 1)):
            # Line or Bar Chart
            if (nmsr == 0):
                vis._inferred_intent.append(count_col)
            dimension = vis.get_attr_by_data_model("dimension")[0]
            measure = vis.get_attr_by_data_model("measure")[0]
            vis.mark, auto_channel = line_or_bar(ldf, dimension, measure)
        elif (ndim == 2 and (nmsr == 0 or nmsr == 1)):
            # Line or Bar chart broken down by the dimension
            dimensions = vis.get_attr_by_data_model("dimension")
            d1 = dimensions[0]
            d2 = dimensions[1]
            if (ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]):
                # d1.channel = "color"
                vis.remove_column_from_spec(d1.attribute)
                dimension = d2
                color_attr = d1
            else:
                if (d1.attribute == d2.attribute):
                    vis._inferred_intent.pop(
                        0
                    )  # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
                else:
                    vis.remove_column_from_spec(d2.attribute)
                dimension = d1
                color_attr = d2
            # Colored Bar/Line chart with Count as default measure
            if (nmsr == 0):
                vis._inferred_intent.append(count_col)
            measure = vis.get_attr_by_data_model("measure")[0]
            vis.mark, auto_channel = line_or_bar(ldf, dimension, measure)
            auto_channel["color"] = color_attr
        elif (ndim == 0 and nmsr == 2):
            # Scatterplot
            vis.mark = "scatter"
            vis._inferred_intent[0].set_aggregation(None)
            vis._inferred_intent[1].set_aggregation(None)
            auto_channel = {
                "x": vis._inferred_intent[0],
                "y": vis._inferred_intent[1]
            }
        elif (ndim == 1 and nmsr == 2):
            # Scatterplot broken down by the dimension
            measure = vis.get_attr_by_data_model("measure")
            m1 = measure[0]
            m2 = measure[1]

            vis._inferred_intent[0].set_aggregation(None)
            vis._inferred_intent[1].set_aggregation(None)

            color_attr = vis.get_attr_by_data_model("dimension")[0]
            vis.remove_column_from_spec(color_attr)
            vis.mark = "scatter"
            auto_channel = {"x": m1, "y": m2, "color": color_attr}
        elif (ndim == 0 and nmsr == 3):
            # Scatterplot with color
            vis.mark = "scatter"
            auto_channel = {
                "x": vis._inferred_intent[0],
                "y": vis._inferred_intent[1],
                "color": vis._inferred_intent[2]
            }
        relevant_attributes = [
            auto_channel[channel].attribute for channel in auto_channel
        ]
        relevant_min_max = dict((attr, ldf.min_max[attr])
                                for attr in relevant_attributes
                                if attr != "Record" and attr in ldf.min_max)
        vis.min_max = relevant_min_max
        if (auto_channel != {}):
            vis = Compiler.enforce_specified_channel(vis, auto_channel)
            vis._inferred_intent.extend(
                filters)  # add back the preserved filters
Example #7
0
def interestingness(vis:Vis ,ldf:LuxDataFrame) -> int:
	"""
	Compute the interestingness score of the vis.
	The interestingness metric is dependent on the vis type.

	Parameters
	----------
	vis : Vis
	ldf : LuxDataFrame

	Returns
	-------
	int
		Interestingness Score
	"""	
	

	if vis.data is None or len(vis.data)==0:
		raise Exception("Vis.data needs to be populated before interestingness can be computed. Run Executor.execute(vis,ldf).")

	n_dim = 0
	n_msr = 0
	
	filter_specs = utils.get_filter_specs(vis._inferred_intent)
	vis_attrs_specs = utils.get_attrs_specs(vis._inferred_intent)

	record_attrs = list(filter(lambda x: x.attribute=="Record" and x.data_model=="measure", vis_attrs_specs))
	n_record = len(record_attrs)
	for clause in vis_attrs_specs:
		if (clause.attribute!="Record"):
			if (clause.data_model == 'dimension'):
				n_dim += 1
			if (clause.data_model == 'measure'):
				n_msr += 1
	n_filter = len(filter_specs)
	attr_specs = [clause for clause in vis_attrs_specs if clause.attribute != "Record"]
	dimension_lst = vis.get_attr_by_data_model("dimension")
	measure_lst = vis.get_attr_by_data_model("measure")
	v_size = len(vis.data)
	# Line/Bar Chart
	#print("r:", n_record, "m:", n_msr, "d:",n_dim)
	if (n_dim == 1 and (n_msr==0 or n_msr==1)):
		if (v_size<2): return -1 
		if (n_filter == 0):
			return unevenness(vis, ldf, measure_lst, dimension_lst)
		elif(n_filter==1):
			return deviation_from_overall(vis, ldf, filter_specs, measure_lst[0].attribute)
	# Histogram
	elif (n_dim == 0 and n_msr == 1):
		if (v_size<2): return -1 
		if (n_filter == 0):
			v = vis.data["Number of Records"]
			return skewness(v)
		elif (n_filter == 1):
			return deviation_from_overall(vis, ldf, filter_specs, "Number of Records")
	# Scatter Plot
	elif (n_dim == 0 and n_msr == 2):
		if (v_size<2): return -1 
		if (n_filter==1):
			v_filter_size = get_filtered_size(filter_specs, vis.data)
			sig = v_filter_size/v_size
		else:
			sig = 1
		return sig * monotonicity(vis,attr_specs)
	# Scatterplot colored by Dimension
	elif (n_dim == 1 and n_msr == 2):
		if (v_size<5): return -1 
		color_attr = vis.get_attr_by_channel("color")[0].attribute
		
		C = ldf.cardinality[color_attr]
		if (C<40):
			return 1/C
		else:
			return -1
	# Scatterplot colored by dimension
	elif (n_dim== 1 and n_msr == 2):
		return 0.2
	# Scatterplot colored by measure
	elif (n_msr == 3):
		return 0.1	
	# colored line and barchart cases
	elif ((vis.mark == "line" or vis.mark == "bar") and n_dim == 2):
		return 0.2
	# Default
	else:
		return -1
Example #8
0
def interestingness(vis: Vis, ldf: LuxDataFrame) -> int:
    """
    Compute the interestingness score of the vis.
    The interestingness metric is dependent on the vis type.

    Parameters
    ----------
    vis : Vis
    ldf : LuxDataFrame

    Returns
    -------
    int
            Interestingness Score
    """

    if vis.data is None or len(vis.data) == 0:
        return -1
        # raise Exception("Vis.data needs to be populated before interestingness can be computed. Run Executor.execute(vis,ldf).")

    n_dim = 0
    n_msr = 0

    filter_specs = utils.get_filter_specs(vis._inferred_intent)
    vis_attrs_specs = utils.get_attrs_specs(vis._inferred_intent)

    record_attrs = list(
        filter(
            lambda x: x.attribute == "Record" and x.data_model == "measure",
            vis_attrs_specs,
        ))
    n_record = len(record_attrs)
    for clause in vis_attrs_specs:
        if clause.attribute != "Record":
            if clause.data_model == "dimension":
                n_dim += 1
            if clause.data_model == "measure":
                n_msr += 1
    n_filter = len(filter_specs)
    attr_specs = [
        clause for clause in vis_attrs_specs if clause.attribute != "Record"
    ]
    dimension_lst = vis.get_attr_by_data_model("dimension")
    measure_lst = vis.get_attr_by_data_model("measure")
    v_size = len(vis.data)
    # Line/Bar Chart
    # print("r:", n_record, "m:", n_msr, "d:",n_dim)
    if n_dim == 1 and (n_msr == 0 or n_msr == 1):
        if v_size < 2:
            return -1
        if n_filter == 0:
            return unevenness(vis, ldf, measure_lst, dimension_lst)
        elif n_filter == 1:
            return deviation_from_overall(vis, ldf, filter_specs,
                                          measure_lst[0].attribute)
    # Histogram
    elif n_dim == 0 and n_msr == 1:
        if v_size < 2:
            return -1
        if n_filter == 0 and "Number of Records" in vis.data:
            if "Number of Records" in vis.data:
                v = vis.data["Number of Records"]
                return skewness(v)
        elif n_filter == 1 and "Number of Records" in vis.data:
            return deviation_from_overall(vis, ldf, filter_specs,
                                          "Number of Records")
        return -1
    # Scatter Plot
    elif n_dim == 0 and n_msr == 2:
        if v_size < 10:
            return -1
        if vis.mark == "heatmap":
            return weighted_correlation(vis.data["xBinStart"],
                                        vis.data["yBinStart"],
                                        vis.data["count"])
        if n_filter == 1:
            v_filter_size = get_filtered_size(filter_specs, vis.data)
            sig = v_filter_size / v_size
        else:
            sig = 1
        return sig * monotonicity(vis, attr_specs)
    # Scatterplot colored by Dimension
    elif n_dim == 1 and n_msr == 2:
        if v_size < 10:
            return -1
        color_attr = vis.get_attr_by_channel("color")[0].attribute

        C = ldf.cardinality[color_attr]
        if C < 40:
            return 1 / C
        else:
            return -1
    # Scatterplot colored by dimension
    elif n_dim == 1 and n_msr == 2:
        return 0.2
    # Scatterplot colored by measure
    elif n_msr == 3:
        return 0.1
    # colored line and barchart cases
    elif vis.mark == "line" and n_dim == 2:
        return 0.15
    # for colored bar chart, scoring based on Chi-square test for independence score.
    # gives higher scores to colored bar charts with fewer total categories as these charts are easier to read and thus more useful for users
    elif vis.mark == "bar" and n_dim == 2:
        from scipy.stats import chi2_contingency

        measure_column = vis.get_attr_by_data_model("measure")[0].attribute
        dimension_columns = vis.get_attr_by_data_model("dimension")

        groupby_column = dimension_columns[0].attribute
        color_column = dimension_columns[1].attribute

        contingency_table = []
        groupby_cardinality = ldf.cardinality[groupby_column]
        groupby_unique_vals = ldf.unique_values[groupby_column]
        for c in range(0, groupby_cardinality):
            contingency_table.append(
                vis.data[vis.data[groupby_column] ==
                         groupby_unique_vals[c]][measure_column])
        score = 0.12
        # ValueError results if an entire column of the contingency table is 0, can happen if an applied filter results in
        # a category having no counts

        try:
            color_cardinality = ldf.cardinality[color_column]
            # scale down score based on number of categories
            chi2_score = chi2_contingency(contingency_table)[0] * 0.9**(
                color_cardinality + groupby_cardinality)
            score = min(0.10, chi2_score)
        except ValueError:
            pass
        return score
    # Default
    else:
        return -1
Example #9
0
def interestingness(vis: Vis, ldf: LuxDataFrame) -> int:
    """
    Compute the interestingness score of the vis.
    The interestingness metric is dependent on the vis type.

    Parameters
    ----------
    vis : Vis
    ldf : LuxDataFrame

    Returns
    -------
    int
            Interestingness Score
    """

    if vis.data is None or len(vis.data) == 0:
        return -1
        # raise Exception("Vis.data needs to be populated before interestingness can be computed. Run Executor.execute(vis,ldf).")
    try:
        filter_specs = utils.get_filter_specs(vis._inferred_intent)
        vis_attrs_specs = utils.get_attrs_specs(vis._inferred_intent)
        n_dim = vis._ndim
        n_msr = vis._nmsr
        n_filter = len(filter_specs)
        attr_specs = [clause for clause in vis_attrs_specs if clause.attribute != "Record"]
        dimension_lst = vis.get_attr_by_data_model("dimension")
        measure_lst = vis.get_attr_by_data_model("measure")
        v_size = len(vis.data)

        if (
            n_dim == 1
            and (n_msr == 0 or n_msr == 1)
            and ldf.current_vis is not None
            and vis.get_attr_by_channel("y")[0].data_type == "quantitative"
            and len(ldf.current_vis) == 1
            and ldf.current_vis[0].mark == "line"
            and len(get_filter_specs(ldf.intent)) > 0
        ):
            query_vc = VisList(ldf.current_vis, ldf)
            query_vis = query_vc[0]
            preprocess(query_vis)
            preprocess(vis)
            return 1 - euclidean_dist(query_vis, vis)

        # Line/Bar Chart
        # print("r:", n_record, "m:", n_msr, "d:",n_dim)
        if n_dim == 1 and (n_msr == 0 or n_msr == 1):
            if v_size < 2:
                return -1

            if n_filter == 0:
                return unevenness(vis, ldf, measure_lst, dimension_lst)
            elif n_filter == 1:
                return deviation_from_overall(vis, ldf, filter_specs, measure_lst[0].attribute)
        # Histogram
        elif n_dim == 0 and n_msr == 1:
            if v_size < 2:
                return -1
            if n_filter == 0 and "Number of Records" in vis.data:
                if "Number of Records" in vis.data:
                    v = vis.data["Number of Records"]
                    return skewness(v)
            elif n_filter == 1 and "Number of Records" in vis.data:
                return deviation_from_overall(vis, ldf, filter_specs, "Number of Records")
            return -1
        # Scatter Plot
        elif n_dim == 0 and n_msr == 2:
            if v_size < 10:
                return -1
            if vis.mark == "heatmap":
                return weighted_correlation(
                    vis.data["xBinStart"], vis.data["yBinStart"], vis.data["count"]
                )
            if n_filter == 1:
                v_filter_size = get_filtered_size(filter_specs, vis.data)
                sig = v_filter_size / v_size
            else:
                sig = 1
            return sig * monotonicity(vis, attr_specs)
        # Scatterplot colored by Dimension
        elif n_dim == 1 and n_msr == 2:
            if v_size < 10:
                return -1
            color_attr = vis.get_attr_by_channel("color")[0].attribute

            C = ldf.cardinality[color_attr]
            if C < 40:
                return 1 / C
            else:
                return -1
        # Scatterplot colored by dimension
        elif n_dim == 1 and n_msr == 2:
            return 0.2
        # Scatterplot colored by measure
        elif n_msr == 3:
            return 0.1
        # colored line and barchart cases
        elif vis.mark == "line" and n_dim == 2:
            return 0.15
        # for colored bar chart, scoring based on Chi-square test for independence score.
        # gives higher scores to colored bar charts with fewer total categories as these charts are easier to read and thus more useful for users
        elif vis.mark == "bar" and n_dim == 2:
            from scipy.stats import chi2_contingency

            measure_column = vis.get_attr_by_data_model("measure")[0].attribute
            dimension_columns = vis.get_attr_by_data_model("dimension")

            groupby_column = dimension_columns[0].attribute
            color_column = dimension_columns[1].attribute

            contingency_tbl = pd.crosstab(
                vis.data[groupby_column],
                vis.data[color_column],
                values=vis.data[measure_column],
                aggfunc=sum,
            )

            try:
                color_cardinality = ldf.cardinality[color_column]
                groupby_cardinality = ldf.cardinality[groupby_column]
                # scale down score based on number of categories
                chi2_score = chi2_contingency(contingency_tbl)[0] * 0.9 ** (
                    color_cardinality + groupby_cardinality
                )
                score = min(0.10, chi2_score)
            except (ValueError, KeyError):
                # ValueError results if an entire column of the contingency table is 0, can happen if an applied filter results in a category having no counts
                score = -1
            return score
        # Default
        else:
            return -1
    except:
        if lux.config.interestingness_fallback:
            # Supress interestingness related issues
            warnings.warn(f"An error occurred when computing interestingness for: {vis}")
            return -1
        else:
            raise
Example #10
0
    def determine_encoding(ldf: LuxDataFrame, vis: Vis):
        """
        Populates Vis with the appropriate mark type and channel information based on ShowMe logic
        Currently support up to 3 dimensions or measures

        Parameters
        ----------
        ldf : lux.core.frame
                LuxDataFrame with underspecified intent
        vis : lux.vis.Vis

        Returns
        -------
        None

        Notes
        -----
        Implementing automatic encoding from Tableau's VizQL
        Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007).
        Show Me: Automatic presentation for visual analysis.
        IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144.
        https://doi.org/10.1109/TVCG.2007.70594
        """
        # Count number of measures and dimensions
        ndim = vis._ndim
        nmsr = vis._nmsr
        # preserve to add back to _inferred_intent later
        filters = utils.get_filter_specs(vis._inferred_intent)

        # Helper function (TODO: Move this into utils)
        def line_or_bar_or_geo(ldf, dimension: Clause, measure: Clause):
            dim_type = dimension.data_type
            # If no aggregation function is specified, then default as average
            if measure.aggregation == "":
                measure.set_aggregation("mean")
            if dim_type == "temporal" or dim_type == "oridinal":
                if isinstance(dimension.attribute, pd.Timestamp):
                    # If timestamp, use the _repr_ (e.g., TimeStamp('2020-04-05 00.000')--> '2020-04-05')
                    attr = str(dimension.attribute._date_repr)
                else:
                    attr = dimension.attribute
                if ldf.cardinality[attr] == 1:
                    return "bar", {"x": measure, "y": dimension}
                else:
                    return "line", {"x": dimension, "y": measure}
            else:  # unordered categorical
                # if cardinality large than 5 then sort bars
                if ldf.cardinality[dimension.attribute] > 5:
                    dimension.sort = "ascending"
                if utils.like_geo(dimension.get_attr()):
                    return "geographical", {"x": dimension, "y": measure}
                return "bar", {"x": measure, "y": dimension}

        # ShowMe logic + additional heuristics
        # count_col = Clause( attribute="count()", data_model="measure")
        count_col = Clause(
            attribute="Record",
            aggregation="count",
            data_model="measure",
            data_type="quantitative",
        )
        auto_channel = {}
        if ndim == 0 and nmsr == 1:
            # Histogram with Count
            measure = vis.get_attr_by_data_model("measure",
                                                 exclude_record=True)[0]
            if len(vis.get_attr_by_attr_name("Record")) < 0:
                vis._inferred_intent.append(count_col)
            # If no bin specified, then default as 10
            if measure.bin_size == 0:
                measure.bin_size = 10
            auto_channel = {"x": measure, "y": count_col}
            vis._mark = "histogram"
        elif ndim == 1 and (nmsr == 0 or nmsr == 1):
            # Line or Bar Chart
            if nmsr == 0:
                vis._inferred_intent.append(count_col)
            dimension = vis.get_attr_by_data_model("dimension")[0]
            measure = vis.get_attr_by_data_model("measure")[0]
            vis._mark, auto_channel = line_or_bar_or_geo(
                ldf, dimension, measure)
        elif ndim == 2 and (nmsr == 0 or nmsr == 1):
            # Line or Bar chart broken down by the dimension
            dimensions = vis.get_attr_by_data_model("dimension")
            d1 = dimensions[0]
            d2 = dimensions[1]
            if ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]:
                # d1.channel = "color"
                vis.remove_column_from_spec(d1.attribute)
                dimension = d2
                color_attr = d1
            else:
                # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
                if d1.attribute == d2.attribute:
                    vis._inferred_intent.pop(0)
                else:
                    vis.remove_column_from_spec(d2.attribute)
                dimension = d1
                color_attr = d2
            # Colored Bar/Line chart with Count as default measure
            if not ldf.pre_aggregated:
                if nmsr == 0 and not ldf.pre_aggregated:
                    vis._inferred_intent.append(count_col)
                measure = vis.get_attr_by_data_model("measure")[0]
                vis._mark, auto_channel = line_or_bar_or_geo(
                    ldf, dimension, measure)
                auto_channel["color"] = color_attr
        elif ndim == 0 and nmsr == 2:
            # Scatterplot
            vis._mark = "scatter"
            vis._inferred_intent[0].set_aggregation(None)
            vis._inferred_intent[1].set_aggregation(None)
            auto_channel = {
                "x": vis._inferred_intent[0],
                "y": vis._inferred_intent[1]
            }
        elif ndim == 1 and nmsr == 2:
            # Scatterplot broken down by the dimension
            measure = vis.get_attr_by_data_model("measure")
            m1 = measure[0]
            m2 = measure[1]

            vis._inferred_intent[0].set_aggregation(None)
            vis._inferred_intent[1].set_aggregation(None)

            color_attr = vis.get_attr_by_data_model("dimension")[0]
            vis.remove_column_from_spec(color_attr)
            vis._mark = "scatter"
            auto_channel = {"x": m1, "y": m2, "color": color_attr}
        elif ndim == 0 and nmsr == 3:
            # Scatterplot with color
            vis._mark = "scatter"
            auto_channel = {
                "x": vis._inferred_intent[0],
                "y": vis._inferred_intent[1],
                "color": vis._inferred_intent[2],
            }
        relevant_attributes = [
            auto_channel[channel].attribute for channel in auto_channel
        ]
        relevant_min_max = dict((attr, ldf._min_max[attr])
                                for attr in relevant_attributes
                                if attr != "Record" and attr in ldf._min_max)
        # Replace scatterplot with heatmap
        HBIN_START = 5000
        if vis.mark == "scatter" and lux.config.heatmap and len(
                ldf) > HBIN_START:
            vis._postbin = True
            ldf._message.add_unique(
                f"Large scatterplots detected: Lux is automatically binning scatterplots to heatmaps.",
                priority=98,
            )
            vis._mark = "heatmap"
        vis._min_max = relevant_min_max
        if auto_channel != {}:
            vis = Compiler.enforce_specified_channel(vis, auto_channel)
            vis._inferred_intent.extend(
                filters)  # add back the preserved filters