Example #1
0
	def onCalculateMinMax3Sigma(self):
		pairs = self.table1d.getSelected()

		expressions = [pair[0] for pair in pairs]
		if self.dataset.is_local():
			executor = vaex.execution.Executor(buffer_size=buffer_size)
		else:
			executor = vaex.remote.ServerExecutor()

		if self.dataset.is_local():
			executor = vaex.execution.Executor()
		else:
			executor = vaex.remote.ServerExecutor()
		subspace = self.dataset.subspace(*expressions, executor=executor, async=True)
		means = subspace.mean()
		with dialogs.ProgressExecution(self, "Calculating mean", executor=executor) as progress:
			progress.add_task(means).end()
			progress.execute()
		logger.debug("get means")
		means = means.get()
		logger.debug("got means")

		vars = subspace.var(means=means)
		with dialogs.ProgressExecution(self, "Calculating variance", executor=executor) as progress:
			progress.add_task(vars).end()
			progress.execute()
		#limits  = limits.get()
		vars = vars.get()
		stds = vars**0.5
		sigmas = 3
		ranges = list(zip(means-sigmas*stds, means+sigmas*stds))
		self.table1d.setRanges(pairs, ranges)
		self.fill_range_map()
Example #2
0
def subtract_mean(plot_window):
    if plot_window.layers:
        layer = plot_window.layers[0]

        executor = vaex.execution.Executor()
        subspace = layer.dataset.subspace(*layer.state.expressions,
                                          executor=executor,
                                          delay=True)
        means = subspace.mean()
        with dialogs.ProgressExecution(plot_window,
                                       "Calculating mean",
                                       executor=executor):
            executor.execute()
        means = means.get()
        new_expressions = [
            "(%s) - %s" % (expression, mean)
            for expression, mean in zip(layer.state.expressions, means)
        ]
        for i in range(len(new_expressions)):
            vmin, vmax = layer.plot_window.state.ranges_viewport[i]
            vmin -= means[i]
            vmax -= means[i]
            layer.plot_window.set_range(vmin, vmax, i)
        for i in range(len(new_expressions)):
            layer.set_expression(new_expressions[i], i)
        plot_window.update_all_layers()
        plot_window.queue_history_change("task: remove mean")
Example #3
0
def sigma3(plot_window):
    if plot_window.layers:
        layer = plot_window.layers[0]

        if layer.dataset.is_local():
            executor = vaex.execution.Executor()
        else:
            executor = vaex.remote.ServerExecutor()
        subspace = layer.dataset.subspace(*layer.state.expressions,
                                          executor=executor,
                                          delay=True)
        means = subspace.mean()
        with dialogs.ProgressExecution(plot_window,
                                       "Calculating mean",
                                       executor=executor) as progress:
            progress.add_task(means)
            progress.execute()
        logger.debug("get means")
        means = means.get()
        logger.debug("got means")

        vars = subspace.var(means=means)
        with dialogs.ProgressExecution(plot_window,
                                       "Calculating variance",
                                       executor=executor) as progress:
            progress.add_task(vars)
            progress.execute()
        #limits  = limits.get()
        vars = vars.get()
        stds = vars**0.5
        sigmas = 3
        limits = list(zip(means - sigmas * stds, means + sigmas * stds))
        #plot_window.ranges_show = limits
        plot_window.set_ranges(range(len(limits)),
                               limits,
                               add_to_history=True,
                               reason="3 sigma region")
        #plot_window.update_all_layers()
        #for layer in plot_window.layers:
        #	layer.flag_needs_update()
        logger.debug("means=%r", means)
        logger.debug("vars=%r", vars)
        logger.debug("limits=%r", limits)
        plot_window.queue_history_change("task: 3 sigma region")
Example #4
0
	def onCalculateMinMax(self):
		pairs = self.table1d.getSelected()
		logger.debug("estimate min/max for %r" % pairs)
		if self.dataset.is_local():
			executor = vaex.execution.Executor(buffer_size=buffer_size)
		else:
			executor = vaex.remote.ServerExecutor()

		expressions = [pair[0] for pair in pairs]
		assert len(pairs[0]) == 1
		self.range_map = {}
		try:
			with dialogs.ProgressExecution(self, "Calculating min/max", executor=executor) as progress:
				subspace = self.dataset.subspace(*expressions, executor=executor, async=True)
				minmax = subspace.minmax()
				progress.add_task(minmax).end()
				progress.execute()
			ranges = minmax.get()
			self.table1d.setRanges(pairs, ranges)
			self.fill_range_map()
		except:
			logger.exception("Error in min/max or cancelled")
Example #5
0
	def calculate_correlation(self, table):
		print(("calculate correlation for ", table))
		pairs = table.getSelected()

		expressions = set()
		for pair in pairs:
			for expression in pair:
				expressions.add(expression)
		expressions = list(expressions)
		if self.dataset.is_local():
			executor = vaex.execution.Executor(buffer_size=buffer_size)
		else:
			executor = vaex.remote.ServerExecutor()

		def on_error(exc):
			raise exc
		if 1:
			#subspace = self.dataset(*expressions, executor=executor, async=True)
			subspaces = self.dataset.subspaces(pairs, executor=executor, async=True)
			means_promise = subspaces.mean()
			#print means_promise, type(means_promise), subs
			with dialogs.ProgressExecution(self, "Calculating means", executor=executor) as progress:
				progress.add_task(means_promise)
				progress.execute()
			means  = means_promise.get()

			variances_promise = subspaces.var(means=means)
			with dialogs.ProgressExecution(self, "Calculating variances", executor=executor) as progress:
				progress.add_task(variances_promise)
				progress.execute()
			vars = variances_promise.get()

			#means = subspaces._unpack(means_packed)
			#vars = subspaces._unpack(vars_packed)
			tasks = []
			with dialogs.ProgressExecution(self, "Calculating correlation", executor=executor) as progress:
				for subspace, mean, var in zip(subspaces.subspaces, means, vars):
					task = subspace.correlation(means=mean, vars=var)
					progress.add_task(task).end()
					tasks.append(task)
				progress.execute()
			correlations = [task.get() for task in tasks]

			correlation_map = dict(zip(pairs, correlations))
			table.set_correlations(correlation_map)
			return
			#mean_map = dict(zip(expressions, means))
			#var_map = dict(zip(expressions, variances))
		else:
			mean_map = {}
			def on_error(exc):
				raise exc
			for expression in expressions:
				subspace = self.dataset(expression, executor=executor, async=True)
				def assign(mean_list, expression=expression):
					logger.debug("assigning %r to %s", mean_list, expression)
					mean_map[expression] = mean_list
				subspace.mean().then(assign, on_error).end()
			with dialogs.ProgressExecution(self, "Calculating means", executor=executor):
				executor.execute()

			var_map = {}
			for expression in expressions:
				subspace = self.dataset(expression, executor=executor, async=True)
				def assign(mean_list, expression=expression):
					logger.debug("assigning %r to %s", mean_list, expression)
					var_map[expression] = mean_list[0].tolist()
				subspace.var(means=mean_map[expression]).then(assign, on_error).end()
			with dialogs.ProgressExecution(self, "Calculating variances", executor=executor):
				executor.execute()

			means = [mean_map[expressions[0]] for expressions in pairs]
			variances = [var_map[expressions[0]] for expressions in pairs]

		correlation_map = {}
		for pair in pairs:
			means = [mean_map[expression] for expression in pair]
			vars = [var_map[expression] for expression in pair]
			subspace = self.dataset(*pair, executor=executor, async=True)
			def assign(correlation, pair=pair):
				logger.debug("assigning %r to %s", correlation, pair)
				correlation_map[pair] = correlation
			subspace.correlation(means, vars).then(assign, on_error).end()

		with dialogs.ProgressExecution(self, "Calculating correlation", executor=executor):
			executor.execute()

		table.set_correlations(correlation_map)
		return

		jobsManager = vaex.dataset.JobsManager()
		expressions = set()
		for pair in pairs:
			for expression in pair:
				expressions.add(expression)
		expressions = list(expressions)
		print("means")
		with ProgressExecution(self, "Calculating means") as progress:
			means = jobsManager.calculate_mean(self.dataset, use_mask=self.radio_button_selection.isChecked(), expressions=expressions, feedback=progress.progress)
		mean_map = dict(list(zip(expressions, means)))
		centered_expressions_map = {expression: "(%s - %.20e)" % (expression, mean) for (expression, mean) in list(mean_map.items())}
		variances_expressions_map = {expression: "%s**2" % centered_expressions for expression, centered_expressions in list(centered_expressions_map.items())}
		with ProgressExecution(self, "Calculating variances") as progress:
			variances = jobsManager.calculate_mean(self.dataset, use_mask=self.radio_button_selection.isChecked(), expressions=list(variances_expressions_map.values()), feedback=progress.progress)
		variances_map = dict(list(zip(list(variances_expressions_map.keys()), variances)))

		covariances_expressions = []
		for pair in pairs:
			centered_expressions = [centered_expressions_map[expression] for expression in pair]
			covariance_expression = "*".join(centered_expressions)
			covariances_expressions.append(covariance_expression)

		print(covariances_expressions)
		with ProgressExecution(self, "Calculating covariances") as progress:
			#progress.progress(20)
			covariances = jobsManager.calculate_mean(self.dataset, use_mask=self.radio_button_selection.isChecked(), expressions=covariances_expressions, feedback=progress.progress)
			#progress.progress(20)
		print(variances)
		print(covariances)

		correlation_map = {}
		for pair, covariance in zip(pairs, covariances):
			normalization = 1
			for expression in pair:
				normalization *= np.sqrt(variances_map[expression])
			correlation_map[pair] = covariance / normalization
		table.set_correlations(correlation_map)

		return
Example #6
0
	def rankSubspaces(self, table):
		self.fill_range_map()

		pairs = table.getSelected()
		error = False
		ranges = []

		for pair in pairs:
			for expression in pair:
				if expression not in self.range_map:
					error = True
					print(("missing", expression))
		if error:
			dialog_error(self, "Missing min/max", "Please calculate the minimum and maximum for the dimensions")
			return


		#expressions = [pair[0] for pair in pairs]
		#executor = vaex.execution.Executor(buffer_size=buffer_size)
		if self.dataset.is_local():
			executor = vaex.execution.Executor(buffer_size=buffer_size)
		else:
			executor = vaex.remote.ServerExecutor()


		tasks = []
		with dialogs.ProgressExecution(self, "Calculating mutual information", executor=executor) as progress:
			for pair in pairs:
				limits = [self.range_map[expr] for expr in pair]
				task = self.dataset(*pair, executor=executor, async=True).mutual_information(limits=limits, size=self.grid_size)
				progress.add_task(task).end()
				tasks.append(task)
			if not progress.execute():
				return
		logger.debug("get means")
		mutual_information = [task.get() for task in tasks]

		#mutual_information_list = [MI_map[pair] for pair in pairs]
		table.setQualities(pairs, mutual_information)
		return

		print(table)
		qualities = []
		pairs = table.getSelected()

		if 0:
			for pair in pairs:
				dim = len(pair)
				#if dim == 2:
				columns = [self.dataset.columns[name] for name in pair]
				print(pair)
				information = vaex.kld.kld_shuffled(columns, mask=mask)
				qualities.append(information)
				#print pair
		if 0:
			dialog = QtGui.QProgressDialog("Calculating Mutual information", "Abort", 0, 1000, self)
			dialog.show()
			def feedback(percentage):
				print(percentage)
				dialog.setValue(int(percentage*10))
				QtCore.QCoreApplication.instance().processEvents()
				if dialog.wasCanceled():
					return True
		with ProgressExecution(self, "Calculating Mutual information") as progress:
			qualities = vaex.kld.kld_shuffled_grouped(self.dataset, self.range_map, pairs, feedback=progress.progress, use_mask=self.radio_button_selection.isChecked())
			#dialog.hide()
		if qualities is not None:
			print(qualities)
			table.setQualities(pairs, qualities)