Beispiel #1
0
 def random_categorical(ldf):
     intent = [lux.Clause("?", data_type="nominal")]
     vlist = VisList(intent, ldf)
     for vis in vlist:
         vis.score = 10
     vlist.sort()
     vlist = vlist.showK()
     return {
         "action": "bars",
         "description": "Random list of Bar charts",
         "collection": vlist,
     }
Beispiel #2
0
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
    """
    Generates bivariate visualizations that represent all pairwise relationships in the data.

    Parameters
    ----------
    ldf : LuxDataFrame
            LuxDataFrame with underspecified intent.

    ignore_transpose: bool
            Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed)

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Correlation action.
    """

    import numpy as np

    filter_specs = utils.get_filter_specs(ldf._intent)
    intent = [
        lux.Clause("?", data_model="measure"),
        lux.Clause("?", data_model="measure"),
    ]
    intent.extend(filter_specs)
    vlist = VisList(intent, ldf)
    recommendation = {
        "action":
        "Correlation",
        "description":
        "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.",
    }
    ignore_rec_flag = False
    # Doesn't make sense to compute correlation if less than 4 data values
    if len(ldf) < 5:
        ignore_rec_flag = True
    # Then use the data populated in the vis list to compute score
    for vis in vlist:
        measures = vis.get_attr_by_data_model("measure")
        if len(measures) < 2:
            raise ValueError(
                f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present."
            )
        msr1 = measures[0].attribute
        msr2 = measures[1].attribute

        if ignore_transpose:
            check_transpose = check_transpose_not_computed(vlist, msr1, msr2)
        else:
            check_transpose = True
        if check_transpose:
            vis.score = interestingness(vis, ldf)
        else:
            vis.score = -1
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist.sort()
    vlist = vlist.showK()
    recommendation["collection"] = vlist
    return recommendation
Beispiel #3
0
def univariate(ldf, data_type_constraint="quantitative"):
    '''
	Generates bar chart distributions of different attributes in the dataframe.

	Parameters
	----------
	ldf : lux.core.frame
		LuxDataFrame with underspecified intent.

	data_type_constraint: str
		Controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
    import numpy as np

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if (data_type_constraint == "quantitative"):
        intent = [
            lux.Clause("?",
                       data_type="quantitative",
                       exclude="Number of Records")
        ]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes."
        }
        if (
                len(ldf) < 5
        ):  # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
            ignore_rec_flag = True
    elif (data_type_constraint == "nominal"):
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes."
        }
    elif (data_type_constraint == "temporal"):
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes."
        }
        if (
                len(ldf) < 3
        ):  # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
            ignore_rec_flag = True
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    # vlist = vlist.topK(15) # Basic visualizations should not be capped
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation
Beispiel #4
0
def univariate(ldf, *args):
    """
    Generates bar chart distributions of different attributes in the dataframe.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    data_type_constraint: str
            Controls the type of distribution chart that will be rendered.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Distribution action.
    """
    import numpy as np

    if len(args) == 0:
        data_type_constraint = "quantitative"
    else:
        data_type_constraint = args[0][0]

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if data_type_constraint == "quantitative":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "quantitative"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        intent = [lux.Clause(possible_attributes)]
        intent.extend(filter_specs)
        examples = ""
        if len(possible_attributes) >= 1:
            examples = f" (e.g., {possible_attributes[0]})"
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes.",
            "long_description":
            f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.",
        }
        # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
        if ldf.length < 5:
            ignore_rec_flag = True
    elif data_type_constraint == "nominal":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "nominal"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        examples = ""
        if len(possible_attributes) >= 1:
            examples = f" (e.g., {possible_attributes[0]})"
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.",
            "long_description":
            f"Occurence displays bar charts of counts for all categorical attributes{examples}. Visualizations are ranked from most to least uneven across the bars. ",
        }
    elif data_type_constraint == "geographical":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "geographical"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        examples = ""
        if len(possible_attributes) >= 1:
            examples = f" (e.g., {possible_attributes[0]})"
        intent = [
            lux.Clause("?", data_type="geographical"),
            lux.Clause("?", data_model="measure")
        ]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Geographical",
            "description":
            "Show choropleth maps of <p class='highlight-descriptor'>geographic</p> attributes",
            "long_description":
            f"Occurence displays choropleths of averages for some geographic attribute{examples}. Visualizations are ranked by diversity of the geographic attribute.",
        }
    elif data_type_constraint == "temporal":
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
            "long_description":
            "Temporal displays line charts for all attributes related to datetimes in the dataframe.",
        }
        # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
        if ldf.length < 3:
            ignore_rec_flag = True
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation
Beispiel #5
0
def univariate(ldf, *args):
    """
    Generates bar chart distributions of different attributes in the dataframe.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    data_type_constraint: str
            Controls the type of distribution chart that will be rendered.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Distribution action.
    """
    import numpy as np

    if len(args) == 0:
        data_type_constraint = "quantitative"
    else:
        data_type_constraint = args[0][0]

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if data_type_constraint == "quantitative":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "quantitative"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        intent = [lux.Clause(possible_attributes)]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes.",
        }
        # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
        if len(ldf) < 5:
            ignore_rec_flag = True
    elif data_type_constraint == "nominal":
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.",
        }
    elif data_type_constraint == "temporal":
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
        }
        # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
        if len(ldf) < 3:
            ignore_rec_flag = True
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation