Beispiel #1
0
    def removeAllInvalid(viewCollection: ViewCollection) -> ViewCollection:
        """
		Given an expanded view collection, remove all views that are invalid.
		Currently, the invalid views are ones that contain two of the same attribute, no more than two temporal attributes, or overlapping attributes (same filter attribute and visualized attribute).
		Parameters
		----------
		viewCollection : list[lux.view.View]
			empty list that will be populated with specified lux.View objects.
		Returns
		-------
		views: list[lux.View]
			view collection with compiled lux.View objects.
		"""
        newVC = []
        for view in viewCollection:
            numTemporalSpecs = 0
            attributeSet = set()
            for spec in view.specLst:
                attributeSet.add(spec.attribute)
                if spec.dataType == "temporal":
                    numTemporalSpecs += 1
            allDistinctSpecs = 0 == len(view.specLst) - len(attributeSet)
            if numTemporalSpecs < 2 and allDistinctSpecs:
                newVC.append(view)

        return ViewCollection(newVC)
Beispiel #2
0
def distribution(ldf,dataTypeConstraint="quantitative"):
	'''
	Generates bar chart distributions of different attributes in the dataset.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	dataTypeConstraint: str
		The variable that controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
	import scipy.stats
	import numpy as np

	#for benchmarking
	if ldf.toggleBenchmarking == True:
		tic = time.perf_counter()

	if (dataTypeConstraint=="quantitative"):
		query = [lux.Spec("?",dataType="quantitative")]
		query.extend(ldf.filterSpecs)
		recommendation = {"action":"Distribution",
							"description":"Show univariate count distributions of different attributes in the dataset."}
	elif (dataTypeConstraint=="nominal"):
		query = [lux.Spec("?",dataType="nominal")]
		query.extend(ldf.filterSpecs)
		recommendation = {"action":"Category",
						   "description":"Show bar chart distributions of different attributes in the dataset."}
	vc = ViewCollection(query)
	vc = vc.load(ldf)	
	for view in vc:
		view.score = interestingness(view,ldf)
	vc = vc.topK(15)
	recommendation["collection"] = vc
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		toc = time.perf_counter()
		print(f"Performed distribution action in {toc - tic:0.4f} seconds")
	return recommendation
Beispiel #3
0
def similar_pattern(ldf, queryContext, topK=-1):
    '''
    Generates visualizations with similar patterns to a query visualization.

    Parameters
    ----------
    ldf : lux.luxDataFrame.LuxDataFrame
    	LuxDataFrame with underspecified context.

    queryContext: list[lux.Spec]
        context for specifying the visual query for the similarity search.

    topK: int
        number of visual recommendations to return.

    Returns
    -------
    recommendations : Dict[str,obj]
    	object with a collection of visualizations that result from the Similarity action
    '''
    row_specs = list(filter(lambda x: x.value != "", queryContext))
    if (len(row_specs) == 1):
        search_space_vc = ViewCollection(ldf.current_view.collection.copy())
        search_space_vc = search_space_vc.load(ldf)

        query_vc = ViewCollection(queryContext)
        query_vc = query_vc.load(ldf)
        query_view = query_vc[0]
        preprocess(queryView)
        #for loop to create assign euclidean distance
        recommendation = {
            "action":
            "Similarity",
            "description":
            "Show other charts that are visually similar to the Current View."
        }
        for view in search_space_vc:
            preprocess(view)
            view.score = euclidean_dist(query_view, view)
        search_space_vc.normalize_score(invert_order=True)
        if (topK != -1):
            search_space_vc = search_space_vc.topK(topK)
        recommendation["collection"] = search_space_vc
        return recommendation
    else:
        print("Query needs to have 1 row value")
Beispiel #4
0
    def getExported(
        self
    ) -> typing.Union[typing.Dict[str, ViewCollection], ViewCollection]:
        """
        Get selected views as exported View Collection

        Notes
        -----
        Convert the _exportedVisIdxs dictionary into a programmable ViewCollection
        Example _exportedVisIdxs : 
            {'Correlation': [0, 2], 'Category': [1]}
        indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Category` tab is selected.
        
        Returns
        -------
        typing.Union[typing.Dict[str,ViewCollection], ViewCollection]
            When there are no exported vis, return empty list -> []
            When all the exported vis is from the same tab, return a ViewCollection of selected views. -> ViewCollection(v1, v2...)
            When the exported vis is from the different tabs, return a dictionary with the action name as key and selected views in the ViewCollection. -> {"Enhance": ViewCollection(v1, v2...), "Filter": ViewCollection(v5, v7...), ..}
        """
        exportedVisLst = self.widget._exportedVisIdxs
        exportedViews = []
        if (exportedVisLst == {}):
            import warnings
            warnings.warn("No visualization selected to export")
            return []
        if len(exportedVisLst) == 1:
            exportAction = list(exportedVisLst.keys())[0]
            exportedViews = ViewCollection(
                list(
                    map(self.recommendation[exportAction].__getitem__,
                        exportedVisLst[exportAction])))
        elif len(exportedVisLst) > 1:
            exportedViews = {}
            for exportAction in exportedVisLst:
                exportedViews[exportAction] = ViewCollection(
                    list(
                        map(self.recommendation[exportAction].__getitem__,
                            exportedVisLst[exportAction])))
        return exportedViews
Beispiel #5
0
    def enumerateCollection(specLst: List[Spec],
                            ldf: LuxDataFrame) -> ViewCollection:
        """
		Given specifications that have been expanded thorught populateOptions,
		recursively iterate over the resulting list combinations to generate a View collection.

		Parameters
		----------
		ldf : lux.luxDataFrame.LuxDataFrame
			LuxDataFrame with underspecified context.

		Returns
		-------
		ViewCollection: list[lux.View]
			view collection with compiled lux.View objects.
		"""
        import copy
        specs = Compiler.populateWildcardOptions(specLst, ldf)
        attributes = specs['attributes']
        filters = specs['filters']
        if len(attributes) == 0 and len(filters) > 0:
            ldf.filterSpecs = filters
            return []

        collection = []

        # TODO: generate combinations of column attributes recursively by continuing to accumulate attributes for len(colAtrr) times
        def combine(colAttrs, accum):
            last = (len(colAttrs) == 1)
            n = len(colAttrs[0])
            for i in range(n):
                columnList = copy.deepcopy(accum + [colAttrs[0][i]])
                if last:
                    if len(
                            filters
                    ) > 0:  # if we have filters, generate combinations for each row.
                        for row in filters:
                            specLst = copy.deepcopy(columnList + [row])
                            view = View(
                                specLst,
                                title=
                                f"{row.attribute} {row.filterOp} {row.value}")
                            collection.append(view)
                    else:
                        view = View(columnList)
                        collection.append(view)
                else:
                    combine(colAttrs[1:], columnList)

        combine(attributes, [])
        return ViewCollection(collection)
Beispiel #6
0
def test_viewCollection():
    df = pd.read_csv("lux/data/olympic.csv")
    from lux.view.ViewCollection import ViewCollection
    vc = ViewCollection(["Height","SportType=Ball","?"])
    vc = vc.load(df)
    viewWithYear = list(filter(lambda x: x.getAttrByAttrName("Year")!=[],vc))[0]
    assert viewWithYear.getAttrByChannel("x")[0].attribute=="Year"
    assert len(vc) == len(df.columns) -1 -1 #remove 1 for view with same filter attribute and remove 1 view with for same attribute
    vc = ViewCollection(["Height","?"])
    vc = vc.load(df)
    assert len(vc) == len(df.columns) -1 #remove 1 for view with for same attribute
Beispiel #7
0
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
    '''
	Generates bivariate visualizations that represent all pairwise relationships in the data.

	Parameters
	----------
	ldf : LuxDataFrame
		LuxDataFrame with underspecified context.

	ignore_transpose: bool
		Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed)

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Correlation action.
	'''

    import numpy as np
    # for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()

    query = [
        lux.Spec("?", data_model="measure"),
        lux.Spec("?", data_model="measure")
    ]
    query.extend(ldf.filter_specs)
    vc = ViewCollection(query)
    recommendation = {
        "action": "Correlation",
        "description":
        "Show relationships between two quantitative attributes."
    }
    vc = vc.load(ldf)
    # Then use the data populated in the view collection to compute score
    for view in vc:
        measures = view.get_attr_by_data_model("measure")
        if len(measures) < 2:
            raise ValueError(
                f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present."
            )
        msr1 = measures[0].attribute
        msr2 = measures[1].attribute

        if (ignore_transpose):
            check_transpose = check_transpose_not_computed(vc, msr1, msr2)
        else:
            check_transpose = True
        if (check_transpose):
            view.score = interestingness(view, ldf)
        else:
            view.score = -1
    vc = vc.topK(15)
    recommendation["collection"] = vc

    # for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed correlation action in {toc - tic:0.4f} seconds")
    return recommendation