Ejemplo n.º 1
0
    def enforceSpecifiedChannel(view: View, autoChannel: Dict[str, str]):
        """
		Enforces that the channels specified in the View by users overrides the showMe autoChannels.
		
		Parameters
		----------
		view : lux.view.View
			Input View without channel specification.
		autoChannel : Dict[str,str]
			Key-value pair in the form [channel: attributeName] specifying the showMe recommended channel location.
		
		Returns
		-------
		view : lux.view.View
			View with channel specification combining both original and autoChannel specification.
		
		Raises
		------
		ValueError
			Ensures no more than one attribute is placed in the same channel.
		"""
        resultDict = {
        }  # result of enforcing specified channel will be stored in resultDict
        specifiedDict = {
        }  # specifiedDict={"x":[],"y":[list of Dobj with y specified as channel]}
        # create a dictionary of specified channels in the given dobj
        for val in autoChannel.keys():
            specifiedDict[val] = view.getAttrByChannel(val)
            resultDict[val] = ""
        # for every element, replace with what's in specifiedDict if specified
        for sVal, sAttr in specifiedDict.items():
            if (len(sAttr) == 1):  # if specified in dobj
                # remove the specified channel from autoChannel (matching by value, since channel key may not be same)
                for i in list(autoChannel.keys()):
                    if (
                        (autoChannel[i].attribute == sAttr[0].attribute)
                            and (autoChannel[i].channel == sVal)
                    ):  # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name)
                        autoChannel.pop(i)
                        break
                sAttr[0].channel = sVal
                resultDict[sVal] = sAttr[0]
            elif (len(sAttr) > 1):
                raise ValueError(
                    "There should not be more than one attribute specified in the same channel."
                )
        # For the leftover channels that are still unspecified in resultDict,
        # and the leftovers in the autoChannel specification,
        # step through them together and fill it automatically.
        leftover_channels = list(
            filter(lambda x: resultDict[x] == '', resultDict))
        for leftover_channel, leftover_encoding in zip(leftover_channels,
                                                       autoChannel.values()):
            leftover_encoding.channel = leftover_channel
            resultDict[leftover_channel] = leftover_encoding
        view.specLst = list(resultDict.values())
        return view
Ejemplo n.º 2
0
    def executeAggregate(view: View):
        '''
        Aggregate data points on an axis for bar or line charts

        Parameters
        ----------
        view: lux.View
            lux.View object that represents a visualization
        ldf : lux.luxDataFrame.LuxDataFrame
            LuxDataFrame with specified context.

        Returns
        -------
        None
        '''
        import numpy as np
        xAttr = view.getAttrByChannel("x")[0]
        yAttr = view.getAttrByChannel("y")[0]
        groupbyAttr = ""
        measureAttr = ""
        if (yAttr.aggregation != ""):
            groupbyAttr = xAttr
            measureAttr = yAttr
            aggFunc = yAttr.aggregation
        if (xAttr.aggregation != ""):
            groupbyAttr = yAttr
            measureAttr = xAttr
            aggFunc = xAttr.aggregation
        allAttrVals = view.data.uniqueValues[groupbyAttr.attribute]
        if (measureAttr != ""):
            if (measureAttr.attribute == "Record"):
                view.data = view.data.reset_index()
                view.data = view.data.groupby(
                    groupbyAttr.attribute).count().reset_index()
                view.data = view.data.rename(columns={"index": "Record"})
                view.data = view.data[[groupbyAttr.attribute, "Record"]]
            else:
                groupbyResult = view.data.groupby(groupbyAttr.attribute)
                view.data = groupbyResult.agg(aggFunc).reset_index()
            resultVals = list(view.data[groupbyAttr.attribute])
            if (len(resultVals) != len(allAttrVals)):
                # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints
                for vals in allAttrVals:
                    if (vals not in resultVals):
                        view.data.loc[len(view.data)] = [vals, 0]
            assert len(list(view.data[groupbyAttr.attribute])) == len(
                allAttrVals
            ), f"Aggregated data missing values compared to original range of values of `{groupbyAttr.attribute}`."
            view.data = view.data.sort_values(by=groupbyAttr.attribute,
                                              ascending=True)
            view.data = view.data.reset_index()
            view.data = view.data.drop(columns="index")
Ejemplo n.º 3
0
    def executeAggregate(view:View, ldf:LuxDataFrame):
        import pandas as pd
        xAttr = view.getAttrByChannel("x")[0]
        yAttr = view.getAttrByChannel("y")[0]
        groupbyAttr =""
        measureAttr =""
        if (yAttr.aggregation!=""):
            groupbyAttr = xAttr
            measureAttr = yAttr
            aggFunc = yAttr.aggregation
        if (xAttr.aggregation!=""):
            groupbyAttr = yAttr
            measureAttr = xAttr
            aggFunc = xAttr.aggregation
        
        if (measureAttr!=""):
            #barchart case, need count data for each group
            if (measureAttr.attribute=="Record"):
                whereClause, filterVars = SQLExecutor.executeFilter(view)
                countQuery = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, groupbyAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                view.data = pd.read_sql(countQuery, ldf.SQLconnection)
                view.data = view.data.rename(columns={"count":"Record"})
                view.data = utils.pandasToLux(view.data)

            else:
                whereClause, filterVars = SQLExecutor.executeFilter(view)
                if aggFunc == "mean":
                    meanQuery = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                    view.data = pd.read_sql(meanQuery, ldf.SQLconnection)
                    view.data = utils.pandasToLux(view.data)
                if aggFunc == "sum":
                    meanQuery = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                    view.data = pd.read_sql(meanQuery, ldf.SQLconnection)
                    view.data = utils.pandasToLux(view.data)
                if aggFunc == "max":
                    meanQuery = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                    view.data = pd.read_sql(meanQuery, ldf.SQLconnection)
                    view.data = utils.pandasToLux(view.data)
Ejemplo n.º 4
0
def interestingness(view:View ,ldf:LuxDataFrame) -> int:
	"""
	Compute the interestingness score of the view.
	The interestingness metric is dependent on the view type.

	Parameters
	----------
	view : View
	ldf : LuxDataFrame

	Returns
	-------
	int
		Interestingness Score
	"""	
	

	if view.data is None:
		raise Exception("View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf).")

	n_dim = 0
	n_msr = 0
	
	filterSpecs = utils.getFilterSpecs(view.specLst)
	viewAttrsSpecs = utils.getAttrsSpecs(view.specLst)

	for spec in viewAttrsSpecs:
		if (spec.attribute!="Record"):
			if (spec.dataModel == 'dimension'):
				n_dim += 1
			if (spec.dataModel == 'measure'):
				n_msr += 1
	n_filter = len(filterSpecs)
	attr_specs = [spec for spec in viewAttrsSpecs if spec.attribute != "Record"]
	dimensionLst = view.getAttrByDataModel("dimension")
	measureLst = view.getAttrByDataModel("measure")

	# Bar Chart
	if (n_dim == 1 and (n_msr == 0 or n_msr==1)):
		if (n_filter == 0):
			return unevenness(view, ldf, measureLst, dimensionLst)
		elif(n_filter==1):
			return deviationFromOverall(view,ldf,filterSpecs,measureLst[0].attribute)
	# Histogram
	elif (n_dim == 0 and n_msr == 1):
		if (n_filter == 0):
			v = view.data["Count of Records"]
			return skewness(v)
		elif (n_filter == 1):
			return deviationFromOverall(view,ldf,filterSpecs,"Count of Records")
	# Scatter Plot
	elif (n_dim == 0 and n_msr == 2):
		if (n_filter==1):
			v_filter_size = getFilteredSize(filterSpecs,view.data)
			v_size = len(view.data)
			sig = v_filter_size/v_size
		else:
			sig = 1
		return sig * monotonicity(view,attr_specs)
	# Scatterplot colored by Dimension
	elif (n_dim == 1 and n_msr == 2):
		colorAttr = view.getAttrByChannel("color")[0].attribute
		
		C = ldf.cardinality[colorAttr]
		if (C<40):
			return 1/C
		else:
			return -1
	# Scatterplot colored by dimension
	elif (n_dim== 1 and n_msr == 2):
		return 0.2
	# Scatterplot colored by measure
	elif (n_msr == 3):
		return 0.1
	# Default
	else:
		return -1