def onCalculateMinMax3Sigma(self): pairs = self.table1d.getSelected() expressions = [pair[0] for pair in pairs] if self.dataset.is_local(): executor = vaex.execution.Executor(buffer_size=buffer_size) else: executor = vaex.remote.ServerExecutor() if self.dataset.is_local(): executor = vaex.execution.Executor() else: executor = vaex.remote.ServerExecutor() subspace = self.dataset.subspace(*expressions, executor=executor, async=True) means = subspace.mean() with dialogs.ProgressExecution(self, "Calculating mean", executor=executor) as progress: progress.add_task(means).end() progress.execute() logger.debug("get means") means = means.get() logger.debug("got means") vars = subspace.var(means=means) with dialogs.ProgressExecution(self, "Calculating variance", executor=executor) as progress: progress.add_task(vars).end() progress.execute() #limits = limits.get() vars = vars.get() stds = vars**0.5 sigmas = 3 ranges = list(zip(means-sigmas*stds, means+sigmas*stds)) self.table1d.setRanges(pairs, ranges) self.fill_range_map()
def subtract_mean(plot_window): if plot_window.layers: layer = plot_window.layers[0] executor = vaex.execution.Executor() subspace = layer.dataset.subspace(*layer.state.expressions, executor=executor, delay=True) means = subspace.mean() with dialogs.ProgressExecution(plot_window, "Calculating mean", executor=executor): executor.execute() means = means.get() new_expressions = [ "(%s) - %s" % (expression, mean) for expression, mean in zip(layer.state.expressions, means) ] for i in range(len(new_expressions)): vmin, vmax = layer.plot_window.state.ranges_viewport[i] vmin -= means[i] vmax -= means[i] layer.plot_window.set_range(vmin, vmax, i) for i in range(len(new_expressions)): layer.set_expression(new_expressions[i], i) plot_window.update_all_layers() plot_window.queue_history_change("task: remove mean")
def sigma3(plot_window): if plot_window.layers: layer = plot_window.layers[0] if layer.dataset.is_local(): executor = vaex.execution.Executor() else: executor = vaex.remote.ServerExecutor() subspace = layer.dataset.subspace(*layer.state.expressions, executor=executor, delay=True) means = subspace.mean() with dialogs.ProgressExecution(plot_window, "Calculating mean", executor=executor) as progress: progress.add_task(means) progress.execute() logger.debug("get means") means = means.get() logger.debug("got means") vars = subspace.var(means=means) with dialogs.ProgressExecution(plot_window, "Calculating variance", executor=executor) as progress: progress.add_task(vars) progress.execute() #limits = limits.get() vars = vars.get() stds = vars**0.5 sigmas = 3 limits = list(zip(means - sigmas * stds, means + sigmas * stds)) #plot_window.ranges_show = limits plot_window.set_ranges(range(len(limits)), limits, add_to_history=True, reason="3 sigma region") #plot_window.update_all_layers() #for layer in plot_window.layers: # layer.flag_needs_update() logger.debug("means=%r", means) logger.debug("vars=%r", vars) logger.debug("limits=%r", limits) plot_window.queue_history_change("task: 3 sigma region")
def onCalculateMinMax(self): pairs = self.table1d.getSelected() logger.debug("estimate min/max for %r" % pairs) if self.dataset.is_local(): executor = vaex.execution.Executor(buffer_size=buffer_size) else: executor = vaex.remote.ServerExecutor() expressions = [pair[0] for pair in pairs] assert len(pairs[0]) == 1 self.range_map = {} try: with dialogs.ProgressExecution(self, "Calculating min/max", executor=executor) as progress: subspace = self.dataset.subspace(*expressions, executor=executor, async=True) minmax = subspace.minmax() progress.add_task(minmax).end() progress.execute() ranges = minmax.get() self.table1d.setRanges(pairs, ranges) self.fill_range_map() except: logger.exception("Error in min/max or cancelled")
def calculate_correlation(self, table): print(("calculate correlation for ", table)) pairs = table.getSelected() expressions = set() for pair in pairs: for expression in pair: expressions.add(expression) expressions = list(expressions) if self.dataset.is_local(): executor = vaex.execution.Executor(buffer_size=buffer_size) else: executor = vaex.remote.ServerExecutor() def on_error(exc): raise exc if 1: #subspace = self.dataset(*expressions, executor=executor, async=True) subspaces = self.dataset.subspaces(pairs, executor=executor, async=True) means_promise = subspaces.mean() #print means_promise, type(means_promise), subs with dialogs.ProgressExecution(self, "Calculating means", executor=executor) as progress: progress.add_task(means_promise) progress.execute() means = means_promise.get() variances_promise = subspaces.var(means=means) with dialogs.ProgressExecution(self, "Calculating variances", executor=executor) as progress: progress.add_task(variances_promise) progress.execute() vars = variances_promise.get() #means = subspaces._unpack(means_packed) #vars = subspaces._unpack(vars_packed) tasks = [] with dialogs.ProgressExecution(self, "Calculating correlation", executor=executor) as progress: for subspace, mean, var in zip(subspaces.subspaces, means, vars): task = subspace.correlation(means=mean, vars=var) progress.add_task(task).end() tasks.append(task) progress.execute() correlations = [task.get() for task in tasks] correlation_map = dict(zip(pairs, correlations)) table.set_correlations(correlation_map) return #mean_map = dict(zip(expressions, means)) #var_map = dict(zip(expressions, variances)) else: mean_map = {} def on_error(exc): raise exc for expression in expressions: subspace = self.dataset(expression, executor=executor, async=True) def assign(mean_list, expression=expression): logger.debug("assigning %r to %s", mean_list, expression) mean_map[expression] = mean_list subspace.mean().then(assign, on_error).end() with dialogs.ProgressExecution(self, "Calculating means", executor=executor): executor.execute() var_map = {} for expression in expressions: subspace = self.dataset(expression, executor=executor, async=True) def assign(mean_list, expression=expression): logger.debug("assigning %r to %s", mean_list, expression) var_map[expression] = mean_list[0].tolist() subspace.var(means=mean_map[expression]).then(assign, on_error).end() with dialogs.ProgressExecution(self, "Calculating variances", executor=executor): executor.execute() means = [mean_map[expressions[0]] for expressions in pairs] variances = [var_map[expressions[0]] for expressions in pairs] correlation_map = {} for pair in pairs: means = [mean_map[expression] for expression in pair] vars = [var_map[expression] for expression in pair] subspace = self.dataset(*pair, executor=executor, async=True) def assign(correlation, pair=pair): logger.debug("assigning %r to %s", correlation, pair) correlation_map[pair] = correlation subspace.correlation(means, vars).then(assign, on_error).end() with dialogs.ProgressExecution(self, "Calculating correlation", executor=executor): executor.execute() table.set_correlations(correlation_map) return jobsManager = vaex.dataset.JobsManager() expressions = set() for pair in pairs: for expression in pair: expressions.add(expression) expressions = list(expressions) print("means") with ProgressExecution(self, "Calculating means") as progress: means = jobsManager.calculate_mean(self.dataset, use_mask=self.radio_button_selection.isChecked(), expressions=expressions, feedback=progress.progress) mean_map = dict(list(zip(expressions, means))) centered_expressions_map = {expression: "(%s - %.20e)" % (expression, mean) for (expression, mean) in list(mean_map.items())} variances_expressions_map = {expression: "%s**2" % centered_expressions for expression, centered_expressions in list(centered_expressions_map.items())} with ProgressExecution(self, "Calculating variances") as progress: variances = jobsManager.calculate_mean(self.dataset, use_mask=self.radio_button_selection.isChecked(), expressions=list(variances_expressions_map.values()), feedback=progress.progress) variances_map = dict(list(zip(list(variances_expressions_map.keys()), variances))) covariances_expressions = [] for pair in pairs: centered_expressions = [centered_expressions_map[expression] for expression in pair] covariance_expression = "*".join(centered_expressions) covariances_expressions.append(covariance_expression) print(covariances_expressions) with ProgressExecution(self, "Calculating covariances") as progress: #progress.progress(20) covariances = jobsManager.calculate_mean(self.dataset, use_mask=self.radio_button_selection.isChecked(), expressions=covariances_expressions, feedback=progress.progress) #progress.progress(20) print(variances) print(covariances) correlation_map = {} for pair, covariance in zip(pairs, covariances): normalization = 1 for expression in pair: normalization *= np.sqrt(variances_map[expression]) correlation_map[pair] = covariance / normalization table.set_correlations(correlation_map) return
def rankSubspaces(self, table): self.fill_range_map() pairs = table.getSelected() error = False ranges = [] for pair in pairs: for expression in pair: if expression not in self.range_map: error = True print(("missing", expression)) if error: dialog_error(self, "Missing min/max", "Please calculate the minimum and maximum for the dimensions") return #expressions = [pair[0] for pair in pairs] #executor = vaex.execution.Executor(buffer_size=buffer_size) if self.dataset.is_local(): executor = vaex.execution.Executor(buffer_size=buffer_size) else: executor = vaex.remote.ServerExecutor() tasks = [] with dialogs.ProgressExecution(self, "Calculating mutual information", executor=executor) as progress: for pair in pairs: limits = [self.range_map[expr] for expr in pair] task = self.dataset(*pair, executor=executor, async=True).mutual_information(limits=limits, size=self.grid_size) progress.add_task(task).end() tasks.append(task) if not progress.execute(): return logger.debug("get means") mutual_information = [task.get() for task in tasks] #mutual_information_list = [MI_map[pair] for pair in pairs] table.setQualities(pairs, mutual_information) return print(table) qualities = [] pairs = table.getSelected() if 0: for pair in pairs: dim = len(pair) #if dim == 2: columns = [self.dataset.columns[name] for name in pair] print(pair) information = vaex.kld.kld_shuffled(columns, mask=mask) qualities.append(information) #print pair if 0: dialog = QtGui.QProgressDialog("Calculating Mutual information", "Abort", 0, 1000, self) dialog.show() def feedback(percentage): print(percentage) dialog.setValue(int(percentage*10)) QtCore.QCoreApplication.instance().processEvents() if dialog.wasCanceled(): return True with ProgressExecution(self, "Calculating Mutual information") as progress: qualities = vaex.kld.kld_shuffled_grouped(self.dataset, self.range_map, pairs, feedback=progress.progress, use_mask=self.radio_button_selection.isChecked()) #dialog.hide() if qualities is not None: print(qualities) table.setQualities(pairs, qualities)