def Commit(self): if not self.dbc: self.Connect() allTables = [] import time start = time.time() pb = OWGUI.ProgressBar(self, iterations=1000) table = None ids = [] for item in self.experimentsWidget.selectedItems(): ids += str(item.text(5)).split(",") table = self.dbc.get_single_data( ids=ids, callback=pb.advance, exclude_constant_labels=self.excludeconstant) end = int(time.time() - start) pb.finish() #self.send("Example table", None) # table.taxid = "352472" # table.genesinrows = False from Orange.orng.orngDataCaching import data_hints data_hints.set_hint(table, "taxid", "352472", 10.0) data_hints.set_hint(table, "genesinrows", False, 10.0) self.send("Example table", table)
def chipdata(self, data): self.data = [] if data: self.infob.setText("") numFiles = reduce(lambda a, b: a + len(b[1]), data, 0) lenSD = len(data) self.infoa.setText("%d set%s, total of %d data file%s." % (lenSD, ["", "s"][lenSD != 1], numFiles, ["", "s"][numFiles != 1])) numExamplesList = [] # construct a list of ExampleTable lengths and a list of attribute names for (name, etList) in data: for et in etList: setattr(et, "dirname", name) setattr(et, "strain", name) self.data.append(et) numExamplesList.append(len(et)) if len(self.data) > 1: # test that files contain the same attributes and equal number of examples attrSorted = self.data[0].domain.attributes attrSorted.sort() numEx = len(self.data[0]) for et in self.data[1:]: attrSorted2 = et.domain.attributes attrSorted2.sort() if map(lambda x: x.name, attrSorted) != map( lambda x: x.name, attrSorted2): self.data = [] self.infob.setText( "Error: data files contain different attributes, aborting distance computation." ) return if len(et) != numEx: self.data = [] self.infob.setText( "Error: data files contain unequal number of examples, aborting distance computation." ) return # compute distances pb = OWGUI.ProgressBar(self, iterations=len(self.data)) self.computeMatrix() pb.finish() else: self.data = [] self.infob.setText( 'Error: not enough data, aborting distance computation.') else: self.infoa.setText('No data on input.')
def update_distances(self, base_indices=()): """Recompute the experiment distances. """ distance = self.selected_distance() if base_indices == (): base_group_index = self.selected_base_group_index() base_indices = [ind[base_group_index] \ for _, ind in self.groups] assert (len(base_indices) == len(self.groups)) base_distances = [] attributes = self.data.domain.attributes pb = OWGUI.ProgressBar(self, len(self.groups) * \ len(attributes)) cached_distances, filled_set = self.get_cached_distances(distance) for (group, indices), base_index in zip(self.groups, base_indices): # Base column of the group if base_index is not None: base_vec = exp.linearize(self.data, [base_index]) distances = [] # Compute the distances between base column # and all the rest data columns. for i in range(len(attributes)): if i == base_index: distances.append(0.0) elif self.get_cached_distance(distance, i, base_index) is not None: distances.append( self.get_cached_distance(distance, i, base_index)) else: vec_i = exp.linearize(self.data, [i]) dist = distance(base_vec, vec_i) self.store_distance(distance, i, base_index, dist) distances.append(dist) pb.advance() base_distances.append(distances) else: base_distances.append(None) pb.finish() self.distances = base_distances
def compute(self, res=None, dm=None): collectionNames = [ self.geneSel[a] for a in self.gridSel ] organism = self.organismTaxids[self.organismIndex] if self.gsgo: collectionNames.append((("GO",),organism)) if self.gskegg: collectionNames.append((("KEGG",),organism)) self.geneSets = obiGeneSets.collections(*collectionNames) self.resultsOut(None) qApp.processEvents() self.res = res self.dm = dm clearListView(self.listView) self.addComment("Computing...") qApp.processEvents() self.phenVar = self.phenCands[self.selectedPhenVar][0] self.geneVar = self.geneCands[self.selectedGeneVar] if self.res == None and self.data: self.setSelMode(False) pb = OWGUI.ProgressBar(self, iterations=self.perms+2) if hasattr(self, "btnApply"): self.btnApply.setFocus() kwargs = {} dkwargs = {} dkwargs["phenVar"] = self.phenVar dkwargs["geneVar"] = self.geneVar if not obiGsea.already_have_correlations(self.data): selectedClasses = self.psel.getSelection() fc = "Phenotype group empty. Stopped." if len(selectedClasses[0]) == 0: self.addComment(fc) return elif len(selectedClasses[1]) == 0: self.addComment(fc) return dkwargs["classValues"] = selectedClasses dkwargs["atLeast"] = self.atLeast permtype = self.permutationTypes[self.ptype][1] kwargs["permutation"] = "class" if permtype == "p" else "genes" def ifr(case, t, f): if case: return t else: return f kwargs["minSize"] = \ ifr(self.minSubsetSizeC, self.minSubsetSize, 1) kwargs["maxSize"] = \ ifr(self.maxSubsetSizeC, self.maxSubsetSize, 1000000) kwargs["minPart"] = \ ifr(self.minSubsetPartC, self.minSubsetPart/100.0, 0.0) #create gene matcher genematcher = obiGene.matcher([[obiGene.GMKEGG(organism)] + ([obiGene.GMDicty()] if organism == "352472" else [])]) #dkwargs["caseSensitive"] = self.csgm gso = obiGsea.GSEA(self.data, matcher=genematcher, **dkwargs) for gs in self.geneSets: gso.addGenesets([gs]) qApp.processEvents() self.res = gso.compute(n=self.perms, callback=pb.advance, **kwargs) pb.finish()
def update_scores(self): """ Compute the scores and update the histogram. """ self.clear_plot() self.error(0) label, values = self.current_target_selection if not self.data or label is None: return _, score_func, _, two_sample_test = self.score_methods[ self.method_index] if two_sample_test: target = self.targets score_target = set(target) ind1, ind2 = score_func( self.data, self.genes_in_columns).test_indices(score_target) if not len(ind1) or not len(ind2): self.error( 0, "Target labels most exclude/include at least one value.") return else: # ANOVA should use all labels. target = dict(self.data_labels)[label] if self.genes_in_columns: target = [(label, t) for t in target] score_target = target # indices = score_func(self.data, self.genes_in_columns).test_indices(score_target) # TODO: Check that each label has more than one measurement, raise warning otherwise. pb = OWGUI.ProgressBar( self, 4 + self.permutations_count if self.compute_null else 3) self.scores = dict( self.compute_scores(self.data, score_func, self.genes_in_columns, score_target, advance=pb.advance)) pb.advance() if self.compute_null: self.null_dist = self.compute_null_distribution( self.data, score_func, self.genes_in_columns, score_target, self.permutations_count, advance=pb.advance) else: self.null_dist = [] pb.advance() htype = self.histType[self.score_methods[self.method_index][2]] score_type = self.score_methods[self.method_index][0] self.histogram.type = htype if self.scores: self.histogram.setValues(self.scores.values()) low, high = self.thresholds.get(score_type, (float("-inf"), float("inf"))) minx, maxx = self.histogram.minx, self.histogram.maxx low, high = max(low, minx), min(high, maxx) if htype == "hiTail": low = high if htype == "lowTail": high = low self.histogram.setBoundary(low, high) if self.compute_null and self.null_dist: nullY, nullX = numpy.histogram(self.null_dist, bins=self.histogram.xData) nullY = nullY / self.permutations_count self.histogram.nullCurve = self.histogram.addCurve( "nullCurve", Qt.black, Qt.black, 6, symbol=QwtSymbol.NoSymbol, style=QwtPlotCurve.Steps, xData=nullX, yData=nullY) minx = min(min(nullX), minx) maxx = max(max(nullX), maxx) miny = min(min(nullY), self.histogram.miny) maxy = max(max(nullY), self.histogram.maxy) spanx, spany = maxx - minx, maxy - miny self.histogram.setAxisScale(QwtPlot.xBottom, minx - 0.05 * spanx, maxx + 0.05 * spanx) self.histogram.setAxisScale(QwtPlot.yLeft, miny - 0.05 * spany, maxy + 0.05 * spany) state = dict(hiTail=(False, True), lowTail=(True, False), twoTail=(True, True)) for spin, visible in zip( (self.upperBoundarySpin, self.lowerBoundarySpin), state[self.histogram.type]): spin.setVisible(visible) # If this is a two sample test add markers to the left and right # plot indicating which target group is over-expressed in that # part if self.method_index in [0, 2, 6]: if self.method_index == 0: ## fold change is centered on 1.0 x1, y1 = (self.histogram.minx + 1) / 2, self.histogram.maxy x2, y2 = (self.histogram.maxx + 1) / 2, self.histogram.maxy else: x1, y1 = (self.histogram.minx) / 2, self.histogram.maxy x2, y2 = (self.histogram.maxx) / 2, self.histogram.maxy if self.genes_in_columns: label = target[0][0] target_values = [t[1] for t in target] values = dict(self.data_labels)[label] else: target_values = target values = self.data_labels[0][1] left = ", ".join(v for v in values if v not in target_values) right = ", ".join(v for v in values if v in target_values) self.histogram.addMarker(left, x1, y1) self.histogram.addMarker(right, x2, y2) self.warning(0) else: self.warning(0, "No scores obtained.") self.histogram.replot() pb.advance() pb.finish() self.update_data_info_label()