Exemple #1
0
    def Load(self, pb=None):

        if self.__state == OWGOEnrichmentAnalysis.Ready:
            go_files, tax_files = serverfiles.listfiles(
                "GO"), serverfiles.listfiles("Taxonomy")
            calls = []
            pb, finish = (gui.ProgressBar(self, 0),
                          True) if pb is None else (pb, False)
            count = 0
            if not tax_files:
                calls.append(("Taxonomy", "ncbi_taxnomy.tar.gz"))
                count += 1
            org = self.annotationCodes[min(self.annotationIndex,
                                           len(self.annotationCodes) - 1)]
            if org != self.loadedAnnotationCode:
                count += 1
                if self.annotationFiles[org] not in go_files:
                    calls.append(("GO", self.annotationFiles[org]))
                    count += 1

            if "gene_ontology_edit.obo.tar.gz" not in go_files:
                calls.append(("GO", "gene_ontology_edit.obo.tar.gz"))
                count += 1
            if not self.ontology:
                count += 1
            pb.iter += count * 100

            for args in calls:
                serverfiles.localpath_download(*args,
                                               **dict(callback=pb.advance))

            i = len(calls)
            if not self.ontology:
                self.ontology = go.Ontology(
                    progress_callback=lambda value: pb.advance())
                i += 1

            if org != self.loadedAnnotationCode:
                self.annotations = None
                gc.collect()  # Force run garbage collection
                code = self.annotationFiles[org].split(".")[-3]
                self.annotations = go.Annotations(
                    code,
                    genematcher=gene.GMDirect(),
                    progress_callback=lambda value: pb.advance())
                i += 1
                self.loadedAnnotationCode = org
                count = defaultdict(int)
                geneSets = defaultdict(set)

                for anno in self.annotations.annotations:
                    count[anno.evidence] += 1
                    geneSets[anno.evidence].add(anno.geneName)
                for etype in go.evidenceTypesOrdered:
                    ecb = self.evidenceCheckBoxDict[etype]
                    ecb.setEnabled(bool(count[etype]))
                    ecb.setText(etype + ": %i annots(%i genes)" %
                                (count[etype], len(geneSets[etype])))
            if finish:
                pb.finish()
    def __init__(self):
        super().__init__()
        self.progress = gui.ProgressBar(self, 100)
        form = QFormLayout()
        form.setFieldGrowthPolicy(form.AllNonFixedFieldsGrow)
        form.setVerticalSpacing(25)
        form.setLabelAlignment(Qt.AlignLeft)
        gui.widgetBox(self.controlArea, True, orientation=form)
        form.addRow(
            "ENRON mail directory:",
            gui.lineEdit(None,
                         self,
                         "directory",
                         controlWidth=200,
                         orientation=Qt.Horizontal,
                         tooltip="Tooltip",
                         placeholderText="Directory"))

        form.addRow(
            "glob pattern:",
            gui.lineEdit(None,
                         self,
                         "_glob",
                         controlWidth=100,
                         orientation=Qt.Horizontal,
                         tooltip="Tooltip",
                         placeholderText=""))

        form.addRow(gui.button(None, self, 'load', self.load))
Exemple #3
0
 def commit(self):
     self.progressbar = gui.ProgressBar(self, self.pref_n_iterations)
     Algo = DECOMPOSITION_ALGO[self.pref_algorithm][1]
     init_type = INITIALIZATION_ALGO[
         self.pref_initialization].lower().replace(' ', '_')
     # Update rank on object-types
     maxrank = defaultdict(int)
     for rel in self.graph.relations:
         rows, cols = rel.data.shape
         row_type, col_type = rel.row_type, rel.col_type
         if rows > maxrank[row_type]:
             maxrank[row_type] = row_type.rank = max(
                 5, int(rows * (self.pref_rank / 100)))
         if cols > maxrank[col_type]:
             maxrank[col_type] = col_type.rank = max(
                 5, int(cols * (self.pref_rank / 100)))
     # Run the algo ...
     try:
         self.fuser = Algo(
             init_type=init_type,
             max_iter=self.pref_n_iterations,
             random_state=0,
             callback=lambda *args: self.progressbar.advance()).fuse(
                 self.graph)
     finally:
         self.progressbar.finish()
     self.fuser.name = self.pref_algo_name
     self.send(Output.FUSER, FittedFusionGraph(self.fuser))
    def progressBar(self, iterations=0):
        """
        Context manager for progress bar.

        Using it ensures that the progress bar is removed at the end without
        needing the `finally` blocks.

        Usage:

            with self.progressBar(20) as progress:
                ...
                progress.advance()

        or

            with self.progressBar() as progress:
                ...
                progress.advance(0.15)

        or

            with self.progressBar():
                ...
                self.progressBarSet(50)

        :param iterations: the number of iterations (optional)
        :type iterations: int
        """
        progress_bar = gui.ProgressBar(self, iterations)
        try:
            yield progress_bar
        finally:
            progress_bar.finish()
    def commit(self):
        self.Error.clear()
        selected_item = self.experimentsWidget.currentItem(
        )  # get selected TreeItem
        self.selected_item = selected_item.gen_data_id

        if selected_item:
            # init progress bar
            self.progress_bar = gui.ProgressBar(self, iterations=1)
            # status message
            self.setStatusMessage('downloading experiment data')

            worker = Worker(
                self.res.download_etc_data,
                selected_item.gen_data_id,
                table_name=selected_item.data_name,
                progress_callback=True,
            )

            worker.signals.progress.connect(self.progress_advance)
            worker.signals.result.connect(self.send_to_output)
            worker.signals.error.connect(self.handle_error)

            # move download process to worker thread
            self.threadpool.start(worker)
Exemple #6
0
 def _updateEnrichment(self):
     if self.clusterDataset is not None and \
             self.__state == OWGOEnrichmentAnalysis.Ready:
         pb = gui.ProgressBar(self, 100)
         self.Load(pb=pb)
         graph = self.Enrichment(pb=pb)
         self.FilterUnknownGenes()
         self.SetGraph(graph)
Exemple #7
0
    def relayout(self):
        if self.graph is None or self.graph.number_of_nodes() <= 1:
            return
        self.progressbar = gui.ProgressBar(self, FR_ITERATIONS)

        distmatrix = self.items_matrix
        if distmatrix is not None and distmatrix.shape[
                0] != self.graph.number_of_nodes():
            self.Warning.distance_matrix_size()
            distmatrix = None
        self.Warning.distance_matrix_size.clear()

        self.relayout_button.setDisabled(True)
        self.view.relayout(randomize=False, weight=distmatrix)
Exemple #8
0
 def run(self):
     graph = self.parent_widget.graph
     y_full = self.parent_widget.data.Y
     if not self.attrs:
         self.attrs = self.score_heuristic()
     if not self.progress:
         self.progress = gui.ProgressBar(
             self,
             len(self.attrs) * (len(self.attrs) - 1) / 2)
     for i in range(self.i, len(self.attrs)):
         ind1 = graph.attribute_name_index[self.attrs[i]]
         for j in range(self.j, i):
             if not self.running:
                 self.i, self.j = i, j
                 if not self.projectionTable.selectedIndexes():
                     self.projectionTable.selectRow(0)
                 self.button.setEnabled(True)
                 return
             ind2 = graph.attribute_name_index[self.attrs[j]]
             X = graph.scaled_data[[ind1, ind2], :]
             valid = graph.get_valid_list([ind1, ind2])
             X = X[:, valid].T
             if X.shape[0] < self.k:
                 self.progress.advance()
                 continue
             y = y_full[valid]
             n_neighbors = min(self.k, len(X) - 1)
             knn = NearestNeighbors(n_neighbors=n_neighbors).fit(X)
             ind = knn.kneighbors(return_distance=False)
             if self.parent_widget.data.domain.has_discrete_class:
                 score = np.sum(y[ind] == y.reshape(-1, 1)) / (
                     len(y_full) * n_neighbors)
             else:
                 score = r2_score(y, np.mean(
                     y[ind], axis=1)) * (len(y) / len(y_full))
             pos = bisect_left(self.scores, score)
             self.projectionTableModel.insertRow(
                 len(self.scores) - pos, [
                     QStandardItem("{:.4f}".format(score)),
                     QStandardItem(self.attrs[j]),
                     QStandardItem(self.attrs[i])
                 ])
             self.scores.insert(pos, score)
             self.progress.advance()
         self.j = 0
     self.progress.finish()
     if not self.projectionTable.selectedIndexes():
         self.projectionTable.selectRow(0)
     self.button.setText("Finished")
     self.button.setEnabled(False)
    def load_experiments(self):
        if self.res:
            # init progress bar
            self.progress_bar = gui.ProgressBar(self, iterations=2)
            # status message
            self.setStatusMessage('downloading experiments')

            worker = Worker(self.res.fetch_etc_objects, progress_callback=True)
            worker.signals.progress.connect(self.progress_advance)
            worker.signals.result.connect(self.load_experiments_result)
            worker.signals.error.connect(self.handle_error)

            # move download process to worker thread
            self.threadpool.start(worker)
    def run_download_tasks(self):
        self.cancelButton.setEnabled(True)
        # init progress bar

        self.progress_bar = gui.ProgressBar(self,
                                            iterations=len(self.workers) * 100)

        # status message
        self.setStatusMessage('downloading')

        # move workers to threadpool
        [self.threadpool.start(worker) for worker in self.workers]
        self.filesView.setDisabled(True)
        # reset list of workers
        self.workers = list()
    def relayout(self):
        if self.graph is None or self.graph.number_of_nodes() <= 1:
            return
        self.progressbar = gui.ProgressBar(self, FR_ITERATIONS)

        distmatrix = self.items_matrix
        if distmatrix is not None and distmatrix.shape[
                0] != self.graph.number_of_nodes():
            self.warning(
                17,
                "Distance matrix size doesn't match the number of network nodes. Not using it."
            )
            distmatrix = None
        self.warning(17)

        self.relayout_button.setDisabled(True)
        self.view.relayout(randomize=False, weight=distmatrix)
    def initialize_files_view(self):
        # self.retryButton.hide()

        # clear view
        self.filesView.clear()
        # init progress bar
        self.progress_bar = gui.ProgressBar(self, iterations=3)
        # status message
        self.setStatusMessage('initializing')

        worker = Worker(evaluate_files_state, progress_callback=True)
        worker.signals.progress.connect(self.__progress_advance)
        worker.signals.result.connect(self.set_files_list)
        worker.signals.error.connect(self.handle_worker_exception)

        # move download process to worker thread
        self.threadpool.start(worker)
        self.setEnabled(False)
    def update_distances(self, base_indices=()):
        """Recompute the experiment distances.
        """
        distance = self.selected_distance()
        if base_indices == ():
            base_group_index = self.selected_base_group_index()
            base_indices = [ind[base_group_index] \
                            for _, ind in self.groups]

        assert (len(base_indices) == len(self.groups))

        base_distances = []
        attributes = self.data.domain.attributes
        pb = gui.ProgressBar(self, len(self.groups) * len(attributes))

        for (group, indices), base_index in zip(self.groups, base_indices):
            # Base column of the group
            if base_index is not None:
                base_vec = take_columns(self.data, [base_index])
                distances = []
                # Compute the distances between base column
                # and all the rest data columns.
                for i in range(len(attributes)):
                    if i == base_index:
                        distances.append(0.0)
                    elif self.get_cached_distance(distance, i,
                                                  base_index) is not None:
                        distances.append(
                            self.get_cached_distance(distance, i, base_index))
                    else:
                        vec_i = take_columns(self.data, [i])
                        dist = distance(base_vec, vec_i)
                        self.store_distance(distance, i, base_index, dist)
                        distances.append(dist)
                    pb.advance()

                base_distances.append(distances)
            else:
                base_distances.append(None)

        pb.finish()
        self.distances = base_distances
 def retrieve(self, url):
     if not url: return
     progress = gui.ProgressBar(self, 10)
     for i in range(3):
         progress.advance()
     try:
         table = Table.from_url(url)
     except Exception as e:
         import traceback
         log.error(traceback.format_exc())
         log.error("Couldn't load spreadsheet %s: %s", url, e)
         self.error(
             "Couldn't load spreadsheet. Ensure correct read permissions; rectangular, top-left aligned sheet data ..."
         )
         return
     else:
         for i in range(7):
             progress.advance()
     finally:
         progress.finish()
     return table
Exemple #15
0
    def relayout(self, restart):
        if self.edges is None:
            return
        if restart or self.positions is None:
            self.set_random_positions()
        self.progressbar = gui.ProgressBar(self, 100)
        self.set_buttons(running=True)
        self._stop_optimization = False

        Simplifications = self.graph.Simplifications
        self.graph.set_simplifications(
            Simplifications.NoDensity
            + Simplifications.NoLabels * (len(self.graph.labels) > 20)
            + Simplifications.NoEdgeLabels * (len(self.graph.edge_labels) > 20)
            + Simplifications.NoEdges * (self.number_of_edges > 30000))

        large_graph = self.number_of_nodes + self.number_of_edges > 30000

        class LayoutOptimizer(QObject):
            update = Signal(np.ndarray, float)
            done = Signal(np.ndarray)
            stopped = Signal()

            def __init__(self, widget):
                super().__init__()
                self.widget = widget

            def send_update(self, positions, progress):
                if not large_graph:
                    self.update.emit(np.array(positions), progress)
                return not self.widget._stop_optimization

            def run(self):
                widget = self.widget
                edges = widget.edges
                nnodes = widget.number_of_nodes
                init_temp = 0.05 if restart else 0.2
                k = widget.layout_density / 10 / np.sqrt(nnodes)
                sample_ratio =  None if nnodes < 1000 else 1000 / nnodes
                fruchterman_reingold(
                    widget.positions, edges, widget.observe_weights,
                    FR_ALLOWED_TIME, k, init_temp, sample_ratio,
                    callback_step=4, callback=self.send_update)
                self.done.emit(widget.positions)
                self.stopped.emit()

        def update(positions, progress):
            self.progressbar.advance(progress)
            self.positions = positions
            self.graph.update_coordinates()

        def done(positions):
            self.positions = positions
            self.set_buttons(running=False)
            self.graph.set_simplifications(
                self.graph.Simplifications.NoSimplifications)
            self.graph.update_coordinates()
            self.progressbar.finish()

        def thread_finished():
            self._optimizer = None
            self._animation_thread = None

        self._optimizer = LayoutOptimizer(self)
        self._animation_thread = QThread()
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._animation_thread.quit)
        self._optimizer.moveToThread(self._animation_thread)
        self._animation_thread.started.connect(self._optimizer.run)
        self._animation_thread.finished.connect(thread_finished)
        self._animation_thread.start()
Exemple #16
0
    def send_data(self):
        """Compute result of widget processing and send to output"""

        # Check that there's a table in input...
        if self.inputTable is None:
            self.infoBox.setText("Widget needs input.", "warning")
            self.send("Term-topic Textable table", None)
            self.send("Document-topic Textable table", None)
            self.send("Term-topic Orange table", None)
            self.send("Document-topic Orange table", None)
            self.listEntries = list()
            return

        # Initialize progress bar.
        progressBar = gui.ProgressBar(
            self,
            iterations=1  # TODO
        )

        # Convert input table to gensim dictionary.
        dictionary, corpus = pivot_crosstab_to_gensim(self.inputTable)

        # Apply topic modelling...

        # Case 1: LDA...
        if self.method == "Latent Dirichlet allocation":

            model = models.LdaModel(
                corpus,
                id2word=dictionary,
                num_topics=self.numTopics,
            )

            # Create segment-topic PivotCrosstab table.
            values = dict()
            terms = list()
            for topic in xrange(self.numTopics):
                topic_terms = model.get_topic_terms(
                    topic,
                    len(self.inputTable.col_ids),
                )
                for term, score in topic_terms:
                    values[(dictionary[term], topic)] = score
                terms.append(
                    list(dictionary[t]
                         for t, s in topic_terms[:MAX_NUM_DISPLAYED_TERMS]))
            segmentTopicTable = PivotCrosstab(
                row_ids=self.inputTable.col_ids[:],
                col_ids=list(range(self.numTopics)),
                values=values,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

            # Fill listbox...
            newListEntries = list()
            for topicNum in range(self.numTopics):
                displayedTerms = ", ".join(terms[topicNum])
                if len(self.inputTable.col_ids) > MAX_NUM_DISPLAYED_TERMS:
                    displayedTerms += ", ..."
                listEntry = "%i. %s" % (
                    topicNum + 1,
                    displayedTerms,
                )
                newListEntries.append(listEntry)
            self.listEntries = newListEntries

            # Create context-topic PivotCrosstab table...
            corpus_lda = model[corpus]
            values = dict()
            for row_idx, row in enumerate(self.inputTable.row_ids):
                lda_doc = corpus_lda[row_idx]
                for topic, score in lda_doc:
                    values[(row, topic)] = score
            contextTopicTable = PivotCrosstab(
                row_ids=self.inputTable.row_ids[:],
                col_ids=list(range(self.numTopics)),
                values=values,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__context__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
                missing=0,
            )

        # Case 2: LSI...
        if self.method == "Latent semantic indexing":

            model = models.LsiModel(
                corpus,
                id2word=dictionary,
                num_topics=self.numTopics,
            )

            # Create segment-topic PivotCrosstab table.
            segmentTopicTable = PivotCrosstab.from_numpy(
                row_ids=self.inputTable.col_ids[:],
                col_ids=list(range(self.numTopics)),
                np_array=model.projection.u,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

            # Fill listbox...
            colIds = np.array(self.inputTable.col_ids)
            newListEntries = list()
            # Subtask: compute total inertia, i.e. sum of eigenvalues of
            # doc-term matrix multiplied by its transposed...
            rect_matrix = self.inputTable.to_numpy()
            matrix_dims = self.inputTable.to_numpy().shape
            if matrix_dims[0] > matrix_dims[1]:
                square_matrix = np.dot(np.transpose(rect_matrix), rect_matrix)
            else:
                square_matrix = np.dot(rect_matrix, np.transpose(rect_matrix))
            total_inertia = sum(np.linalg.eigvals(square_matrix))
            for topicNum in range(self.numTopics):
                # Proportion of inertia is SQUARE of singular value divided by
                # total inertia, because n-th singular value = square root of
                # n-th eigenvalue (cf. compute total inertia above)...
                propInertia = model.projection.s[topicNum]**2 / total_inertia
                scores = model.projection.u[:, topicNum]
                sortedTerms = colIds[scores.argsort()[::-1]]
                if len(colIds) > MAX_NUM_DISPLAYED_TERMS:
                    displayedTerms = ", ".join(
                        sortedTerms[:MAX_NUM_DISPLAYED_TERMS // 2])
                    displayedTerms += ", ..., "
                    displayedTerms += ", ".join(
                        sortedTerms[-MAX_NUM_DISPLAYED_TERMS // 2:])
                else:
                    displayedTerms = ", ".join(sortedTerms)
                listEntry = "%i. (%.2f%%) %s" % (
                    topicNum + 1,
                    propInertia * 100,
                    displayedTerms,
                )
                newListEntries.append(listEntry)
            self.listEntries = newListEntries

            # Create context-topic PivotCrosstab table...
            contextTopicMatrix = corpus2dense(
                model[corpus], len(model.projection.s)).T / model.projection.s
            values = dict()
            for row_idx, row in enumerate(contextTopicMatrix):
                for topic, val in enumerate(row):
                    values[(self.inputTable.row_ids[row_idx], topic)] = val
            contextTopicTable = PivotCrosstab(
                row_ids=self.inputTable.row_ids[:],
                col_ids=list(range(self.numTopics)),
                values=values,
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__context__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
                missing=0,
            )

        # Case 2: Correspondence analysis...
        elif self.method == "Correspondence analysis":

            ca = correspondence(self.inputTable.to_numpy())

            # Create segment-topic PivotCrosstab table.
            segmentTopicTable = PivotCrosstab.from_numpy(
                row_ids=self.inputTable.col_ids[:],
                col_ids=list(range(self.numTopics)),
                np_array=ca.col_factors[:, range(self.numTopics)],
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

            # Fill listbox...
            colIds = np.array(self.inputTable.col_ids)
            newListEntries = list()
            total_inertia = sum(ca.inertia_of_axis())
            for topicNum in range(self.numTopics):
                propInertia = ca.inertia_of_axis()[topicNum] / total_inertia
                scores = np.array(ca.col_factors[:, topicNum])
                sortedTerms = colIds[scores.argsort()[::-1]]
                if len(colIds) > MAX_NUM_DISPLAYED_TERMS:
                    displayedTerms = ", ".join(
                        sortedTerms[:MAX_NUM_DISPLAYED_TERMS // 2])
                    displayedTerms += ", ..., "
                    displayedTerms += ", ".join(
                        sortedTerms[-MAX_NUM_DISPLAYED_TERMS // 2:])
                else:
                    displayedTerms = ", ".join(sortedTerms)
                listEntry = "%i. (%.2f%%) %s" % (
                    topicNum + 1,
                    propInertia * 100,
                    displayedTerms,
                )
                newListEntries.append(listEntry)
            self.listEntries = newListEntries

            # Create context-topic PivotCrosstab table.
            contextTopicTable = PivotCrosstab.from_numpy(
                row_ids=self.inputTable.row_ids[:],
                col_ids=list(range(self.numTopics)),
                np_array=ca.row_factors[:, range(self.numTopics)],
                header_row_id='__topic__',
                header_row_type='continuous',
                header_col_id='__unit__',
                header_col_type='string',
                col_type=dict((col_id, 'continuous')
                              for col_id in range(self.numTopics)),
            )

        # Set status to OK and report...
        self.infoBox.setText("Tables correctly sent to output.")
        progressBar.finish()

        # Clear progress bar.
        progressBar.finish()

        # Send tokens...
        self.send("Term-topic Textable table", segmentTopicTable)
        self.send("Document-topic Textable table", contextTopicTable)
        self.send(
            "Term-topic Orange table",
            segmentTopicTable.to_orange_table(),
        )
        self.send(
            "Document-topic Orange table",
            contextTopicTable.to_orange_table(),
        )

        self.sendButton.resetSettingsChangedFlag()
Exemple #17
0
    def getFileList(self):
        #print("getFileList")

        initialRootParentPath, _ = os.path.split(self.rootFolderPath) #initial parent path is selected's folder parent folder
        fileListExt = list() # list of files matching default extension
        depthList = list()

        progressBar = gui.ProgressBar(self, iterations=1)

        for curr_path, dirnames, filenames in os.walk(self.rootFolderPath):
    	#curr_path is a STRING, the path to the directory.
    	#dirnames is a LIST of the names of subdirectories.
    	#filenames is a LIST of the names of the files in curr_path
    	#symlink non traités

            curr_rel_path = curr_path[len(initialRootParentPath)+1:] #defines current relative path by similar initial parent path part
            curr_rel_path_list = os.path.normpath(curr_rel_path).split(os.sep) #splits current relative path by os separator

            for filename in filenames:
                file = dict()
                # file = {"absoluteFilePath","foldername","filename","depth1","depth2","depth3","depth4","depth5","depth lvl"}
                # prev_non_excl_check = False
                # curr_non_excl_check = prev_non_excl_check #importing previous state of the "non-exclusion check" (opposite of exclusion check)

                annotations = curr_rel_path_list[:] # annotations are different subfolders browsed
                # print(annotations)

                curr_depth = len(annotations)

                depthList.append(curr_depth)

                file['absoluteFilePath'] = os.path.join(curr_path,filename)
                file['fileName'] = filename
                file['depthLvl'] = curr_depth

                file['folderName'] = annotations[0]

                for i in range(1, curr_depth):
                    file['depth' + str(i)] = annotations[i]
                for i in range(curr_depth, 5):
                    file['depth' + str(i)] = "0"

                # apply default file extension filter
                for extension in self.inclusionList:
                    if filename.endswith(extension):
                        fileListExt.append(file)

        # apply inclusion filter
        if self.applyInclusion:
            fileListIncl = [file for file in fileListExt
                            # match in inclusion list
                            if self.match(file['fileName'], self.inclusionsUserAsList)]
        else:
            fileListIncl = fileListExt

        # apply exclusion filter
        if self.applyExclusion:
            fileListExcl = [file for file in fileListIncl
                            # no match in exclusion list
                            if not self.match(file['fileName'], self.exclusionsUserAsList)]
        else:
            fileListExcl = fileListIncl

        # output file list
        self.fileList = fileListExcl

        if self.fileList:
            self.maxDepth = max(depthList)
            self.openFileList()
        else:
            self.maxDepth = 0
        progressBar.advance()
        progressBar.finish()
Exemple #18
0
    def sendData(self):
        """Compute result of widget processing and send to output"""

        # Skip if title list is empty:
        if self.titleLabels == list():
            return

        # Check that something has been selected...
        if len(self.selectedTitles) == 0:
            self.infoBox.setText("Please select one or more titles.",
                                 "warning")
            self.send("XML-TEI data", None, self)
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        # Initialize progress bar.
        progressBar = gui.ProgressBar(self,
                                      iterations=len(self.selectedTitles))

        # Attempt to connect to ECP and retrieve plays...
        xml_contents = list()
        annotations = list()
        try:
            for title in self.selectedTitles:
                doc_url = self.document_base_url +  \
                    self.filteredTitleSeg[title].annotations["url"]
                print(doc_url)
                url = re.sub(r"/([^/]+)\.shtml", r"/\1/\1.xml", doc_url)
                print(url)
                response = urllib.request.urlopen(url)
                xml_contents.append(response.read().decode('utf-8'))
                source_annotations = \
                self.filteredTitleSeg[title].annotations.copy()
                #source_annotations["url"] = source_annotations["href"]
                #del source_annotations["href"]
                annotations.append(source_annotations)
                progressBar.advance()  # 1 tick on the progress bar...

        # If an error occurs (e.g. http error, or memory error)...
        except:
            #Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from ECP website.",
                                 "error")
            # Reset output channel.
            self.send("XML-TEI data", None, self)
            return

        # Store downloaded XML in input objects...
        for xml_content_idx in range(len(xml_contents)):
            newInput = Input(xml_contents[xml_content_idx], self.captionTitle)
            self.createdInputs.append(newInput)

        # If there"s only one play, the widget"s output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update(annotations[idx])
            self.segmentation[idx] = segment

        # Store imported URLs as setting.
        self.importedURLs = [
            self.filteredTitleSeg[self.selectedTitles[0]].annotations["url"]
        ]

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()

        # Clear progress bar.
        progressBar.finish()

        # Send token...
        self.send("XML-TEI data", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()
Exemple #19
0
    def getFileList(self):

        initialRootParentPath, _ = os.path.split(

            # self.rootFolderPath is the initially selected's folder parent
            self.rootFolderPath)
        fileList = list()

        # fileListExt is a list of files matching default extension
        fileListExt = list()
        depthList = list()

        progressBarZero = gui.ProgressBar(self, iterations=1)

        # Using os.walk to walk through directories :
        # Variables descriptions :
        # currPath is a STRING, the path to the directory.
        # dirNames is a LIST of the names of subdirectories.
        # fileNames is a LIST of the names of the files in currPath
        # symlink are not considered in this analysis

        for currPath, dirNames, fileNames in os.walk(self.rootFolderPath):

            currRelPath = currPath[
                len(initialRootParentPath) +
                1:]  # defines current relative path by similar initial parent path part
            currRelPathList = os.path.normpath(currRelPath).split(
                os.sep)  # splits current relative path by os separator

            for fileName in fileNames:

                # file dict is a dictionary of the file's informations will get following keys :
                # file = {
                # "absoluteFilePath",
                # "fileName",
                # "depth_0",
                # "depth_X"
                # depthLvl",
                # "fileContent"
                # }

                # 'fileContent','encoding' and 'encodingConfidence' keys are defined when function "openFileList" is called

                file = dict()

                # Initial annotations correspond different subfolders browsed by each depth level (used for depth_X annot.)
                annotations = currRelPathList[:]

                currDepth = len(annotations) - 1
                depthList.append(currDepth)

                file['absoluteFilePath'] = os.path.join(currPath, fileName)
                file['fileName'] = fileName
                file['depthLvl'] = currDepth

                file['depth_0'] = annotations[0]

                # Created an annotation by depth level, corresponding to folder names
                for i in range(1, currDepth + 1):
                    file['depth_' + str(i)] = annotations[i]

                # Apply default file extension filter
                for extension in self.inclusionList:
                    if fileName.endswith(extension):

                        # FileListExt = file list created with default inclusion criteria (text extensions from inclusionList)
                        fileListExt.append(file)

                fileList.append(file)

        # apply inclusion filter
        if self.applyInclusion:
            fileListIncl = [
                file for file in fileList
                # match in inclusion list
                if self.match(file['fileName'], self.inclusionsUserAsList)
            ]
        else:
            fileListIncl = fileListExt

        # apply exclusion filter
        if self.applyExclusion:
            fileListExcl = [
                file for file in fileListIncl
                # no match in exclusion list
                if not self.match(file['fileName'], self.exclusionsUserAsList)
            ]
        else:
            fileListExcl = fileListIncl

        # output file list
        self.fileList = fileListExcl

        if self.fileList:
            self.maxDepth = max(depthList)
            self.fileList = self.sampleFileList()
            self.openFileList()
        else:
            self.maxDepth = 0

        progressBarZero.finish()
Exemple #20
0
    def openFileList(self):

        tempFileList = list()

        progressBarOpen = gui.ProgressBar(self, iterations=len(self.fileList))

        for file in self.fileList:
            fileContent = ""
            try:
                filePath = file['absoluteFilePath']
            except TypeError:
                pass

            encodings = getPredefinedEncodings()
            try:
                with open(filePath, 'rb') as openedFile:
                    fileContent = openedFile.read()
                    charsetDict = chardet.detect(fileContent)
                    detectedEncoding = charsetDict['encoding']
                    detectedConfidence = charsetDict['confidence']

                    # Chunking functionnality should be added here

                    try:
                        encodings.remove(detectedEncoding)
                        encodings.insert(0, detectedEncoding)

                    except ValueError:
                        pass

                    for encoding in encodings:
                        try:
                            self.fileContent = fileContent.decode(encoding)
                        except:
                            pass

                file['encoding'] = detectedEncoding
                file['fileContent'] = self.fileContent
                file['encodingConfidence'] = detectedConfidence
                progressBarOpen.advance()
                tempFileList.append(file)

            except IOError:
                if len(myFiles) > 1:
                    message = u"Couldn't open file '%s'." % filePath
                else:
                    message = u"Couldn't open file."
                self.infoBox.setText(message, 'error')
                self.send('Text data', None, self)
                return

        self.fileList = tempFileList

        self.folder = {
            'rootPath': self.rootFolderPath,
            'maxDepth': self.maxDepth,
            'inclusionsUser': self.inclusionsUser,
            'exclusionsUser': self.exclusionsUser,
            'samplingRate': self.samplingRate,
            'fileList': self.fileList
        }
        progressBarOpen.finish()
 def __init__(self):
     super().__init__()
     self.label = gui.widgetLabel(self.controlArea)
     self.progress = gui.ProgressBar(self, 100)
     self.resetWidget()
Exemple #22
0
    def _recompute_som(self):
        if self.cont_x is None:
            return

        class Optimizer(QObject):
            update = Signal(float, np.ndarray, np.ndarray)
            done = Signal(SOM)
            stopped = Signal()

            def __init__(self, data, widget):
                super().__init__()
                self.som = SOM(
                    widget.size_x,
                    widget.size_y,
                    hexagonal=widget.hexagonal,
                    pca_init=widget.initialization == 0,
                    random_seed=0 if widget.initialization == 2 else None)
                self.data = data
                self.widget = widget

            def callback(self, progress):
                self.update.emit(progress, self.som.weights.copy(),
                                 self.som.ssum_weights.copy())
                return not self.widget.stop_optimization

            def run(self):
                try:
                    self.som.fit(self.data,
                                 N_ITERATIONS,
                                 callback=self.callback)
                    # Report an exception, but still remove the thread
                finally:
                    self.done.emit(self.som)
                    self.stopped.emit()

        def update(_progress, weights, ssum_weights):
            progressbar.advance()
            self._assign_instances(weights, ssum_weights)
            self._redraw()

        def done(som):
            self.enable_controls(True)
            progressbar.finish()
            self._assign_instances(som.weights, som.ssum_weights)
            self._redraw()
            # This is the first time we know what was selected (assuming that
            # initialization is not set to random)
            if self.__pending_selection is not None:
                self.on_selection_change(self.__pending_selection)
                self.__pending_selection = None
            self.update_output()

        def thread_finished():
            self._optimizer = None
            self._optimizer_thread = None

        progressbar = gui.ProgressBar(self, N_ITERATIONS)

        self._optimizer = Optimizer(self.cont_x, self)
        self._optimizer_thread = QThread()
        self._optimizer_thread.setStackSize(5 * 2**20)
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._optimizer_thread.quit)
        self._optimizer.moveToThread(self._optimizer_thread)
        self._optimizer_thread.started.connect(self._optimizer.run)
        self._optimizer_thread.finished.connect(thread_finished)
        self.stop_optimization = False
        self._optimizer_thread.start()
Exemple #23
0
    def Commit(self):

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(ID_INDEX))
            ids.append(unique_id)

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = {name: name for key, name in HEADER}
        reverse_header_dict["ID"] = "id"

        allowed_labels = None

        def namefn(a):
            name = SORTING_MODEL_LIST[self.exnamei]
            name = reverse_header_dict.get(name, "id")
            return dict(a)[name]

        if len(ids):
            table = self.dbc.get_data(
                ids=ids,
                result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels,
                namefn=namefn)

            if self.joinreplicates:
                table = dicty.join_replicates(
                    table,
                    ignorenames=self.dbc.IGNORE_REPLICATE,
                    namefn="name",
                    avg=dicty.median,
                    fnshow=lambda x: " | ".join(map(str, x)))

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(
                        atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except ValueError:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([
                    optfloat(atts.get(reverse_header_dict[name], ""), name)
                    for name in sortOrder
                ])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(attributes, table.domain.class_vars,
                                        table.domain.metas)

            table = Orange.data.Table.from_table(domain, table)
            table = Orange.data.Table(domain, table)

            if self.transpose:
                experiments = [at for at in table.domain.variables]
                attr = [
                    compat.ContinuousVariable.make(ex['DDB'].value)
                    for ex in table
                ]
                metavars = sorted(table.domain.variables[0].attributes.keys())
                metavars = [
                    compat.StringVariable.make(name) for name in metavars
                ]
                domain = compat.create_domain(attr, None, metavars)
                metavars = compat.get_metas(domain)
                metas = [[exp.attributes[var.name] for var in metavars]
                         for exp in experiments]
                table = compat.create_table(domain, table.X.transpose(), None,
                                            metas)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()
Exemple #24
0
    def Enrichment(self, pb=None):
        assert self.clusterDataset is not None

        pb = gui.ProgressBar(self, 100) if pb is None else pb
        if not self.annotations.ontology:
            self.annotations.ontology = self.ontology

        if isinstance(self.annotations.genematcher, gene.GMDirect):
            self.SetGeneMatcher()
        self.error(1)
        self.warning([0, 1])

        if self.useAttrNames:
            clusterGenes = [
                v.name for v in self.clusterDataset.domain.attributes
            ]
            self.information(0)
        elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs):
            geneAttr = self.candidateGeneAttrs[self.geneAttrIndex]
            clusterGenes = [
                str(ex[geneAttr]) for ex in self.clusterDataset
                if not numpy.isnan(ex[geneAttr])
            ]
            if any("," in gene for gene in clusterGenes):
                self.information(
                    0,
                    "Separators detected in cluster gene names. Assuming multiple genes per example."
                )
                clusterGenes = reduce(operator.iadd,
                                      (genes.split(",")
                                       for genes in clusterGenes), [])
            else:
                self.information(0)
        else:
            self.error(1, "Failed to extract gene names from input dataset!")
            return {}

        genesSetCount = len(set(clusterGenes))

        self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator(
            clusterGenes).values()

        self.infoLabel.setText(
            "%i unique genes on input\n%i (%.1f%%) genes with known annotations"
            % (genesSetCount, len(clusterGenes), 100.0 * len(clusterGenes) /
               genesSetCount if genesSetCount else 0.0))

        referenceGenes = None
        if not self.useReferenceDataset or self.referenceDataset is None:
            self.information(2)
            self.information(1)
            referenceGenes = self.annotations.gene_names

        elif self.referenceDataset is not None:
            if self.useAttrNames:
                referenceGenes = [
                    v.name for v in self.referenceDataset.domain.attributes
                ]
                self.information(1)
            elif geneAttr in (self.referenceDataset.domain.variables +
                              self.referenceDataset.domain.metas):
                referenceGenes = [
                    str(ex[geneAttr]) for ex in self.referenceDataset
                    if not numpy.isnan(ex[geneAttr])
                ]
                if any("," in gene for gene in clusterGenes):
                    self.information(
                        1,
                        "Separators detected in reference gene names. Assuming multiple genes per example."
                    )
                    referenceGenes = reduce(operator.iadd,
                                            (genes.split(",")
                                             for genes in referenceGenes), [])
                else:
                    self.information(1)
            else:
                self.information(1)
                referenceGenes = None

            if referenceGenes is None:
                referenceGenes = list(self.annotations.gene_names)
                self.referenceRadioBox.buttons[1].setText("Reference set")
                self.referenceRadioBox.buttons[1].setDisabled(True)
                self.information(
                    2,
                    "Unable to extract gene names from reference dataset. Using entire genome for reference"
                )
                self.useReferenceDataset = 0
            else:
                refc = len(referenceGenes)
                referenceGenes = self.annotations.get_gene_names_translator(
                    referenceGenes).values()
                self.referenceRadioBox.buttons[1].setText(
                    "Reference set (%i genes, %i matched)" %
                    (refc, len(referenceGenes)))
                self.referenceRadioBox.buttons[1].setDisabled(False)
                self.information(2)
        else:
            self.useReferenceDataset = 0

        if not referenceGenes:
            self.error(1, "No valid reference set")
            return {}

        self.referenceGenes = referenceGenes
        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                evidences.append(etype)
        aspect = ["P", "C", "F"][self.aspectIndex]

        if clusterGenes:
            self.terms = terms = self.annotations.get_enriched_terms(
                clusterGenes,
                referenceGenes,
                evidences,
                aspect=aspect,
                prob=self.probFunctions[self.probFunc],
                use_fdr=False,
                progress_callback=lambda value: pb.advance())
            ids = []
            pvals = []
            for i, d in self.terms.items():
                ids.append(i)
                pvals.append(d[1])
            for i, fdr in zip(ids, stats.FDR(
                    pvals)):  # save FDR as the last part of the tuple
                terms[i] = tuple(list(terms[i]) + [fdr])

        else:
            self.terms = terms = {}
        if not self.terms:
            self.warning(0, "No enriched terms found.")
        else:
            self.warning(0)

        pb.finish()
        self.treeStructDict = {}
        ids = self.terms.keys()

        self.treeStructRootKey = None

        parents = {}
        for id in ids:
            parents[id] = set([term for _, term in self.ontology[id].related])

        children = {}
        for term in self.terms:
            children[term] = set([id for id in ids if term in parents[id]])

        for term in self.terms:
            self.treeStructDict[term] = TreeNode(self.terms[term],
                                                 children[term])
            if not self.ontology[term].related and not getattr(
                    self.ontology[term], "is_obsolete", False):
                self.treeStructRootKey = term
        return terms
Exemple #25
0
    def Commit(self):
        if not self.dbc:
            self.Connect()

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(10))
            annots = self.mappings[unique_id]
            ids.append((annots["data_id"], annots["id"]))

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = dict((name, key) for key, name in HEADER)

        hview = self.experimentsWidget.header()
        shownHeaders = [label for i, label in \
                        list(enumerate(self.headerLabels))[1:] \
                        if not hview.isSectionHidden(i)
                        ]

        allowed_labels = [reverse_header_dict.get(label, label) \
                          for label in shownHeaders]

        if self.joinreplicates and "id" not in allowed_labels:
            # need 'id' labels in join_replicates for attribute names
            allowed_labels.append("id")

        if len(ids):
            table = self.dbc.get_data(
                ids=ids,
                result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                #                          bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                                              ignorenames=[
                                                  "replicate", "data_id",
                                                  "mappings_id", "data_name",
                                                  "id", "unique_id"
                                              ],
                                              namefn=None,
                                              avg=dicty.median)

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(
                        atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(attributes, table.domain.class_var,
                                        table.domain.metas)
            table = table.from_table(domain, table)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()
    def relayout(self):
        if self.edges is None:
            return
        if self.randomizePositions or self.positions is None:
            self.set_random_positions()
        self.progressbar = gui.ProgressBar(self, FR_ITERATIONS)
        self.set_buttons(running=True)
        self._stop_optimization = False

        Simplifications = self.graph.Simplifications
        self.graph.set_simplifications(
            Simplifications.NoDensity
            + Simplifications.NoLabels * (len(self.graph.labels) > 20)
            + Simplifications.NoEdgeLabels * (len(self.graph.edge_labels) > 20)
            + Simplifications.NoEdges * (self.number_of_edges > 1000))

        large_graph = self.number_of_nodes + self.number_of_edges > 20000
        iterations = 5 if large_graph else FR_ITERATIONS

        class LayoutOptimizer(QObject):
            update = Signal(np.ndarray, float)
            done = Signal(np.ndarray)
            stopped = Signal()

            def __init__(self, widget):
                super().__init__()
                self.widget = widget

            def send_update(self, positions, progress):
                if not large_graph:
                    self.update.emit(np.array(positions), progress)
                return not self.widget._stop_optimization

            def run(self):
                widget = self.widget
                edges = widget.edges
                positions = np.array(fruchterman_reingold(
                    edges.data, edges.row, edges.col,
                    1 / np.sqrt(widget.number_of_nodes),  # k
                    widget.positions,
                    np.array([], dtype=np.int32),  # fixed
                    iterations,
                    0.1,  # sample ratio
                    self.send_update, 0.25))
                self.done.emit(positions)
                self.stopped.emit()

        def update(positions, progress):
            self.progressbar.advance(progress)
            self.positions = positions
            self.graph.update_coordinates()

        def done(positions):
            self.positions = positions
            self.set_buttons(running=False)
            self.graph.set_simplifications(
                self.graph.Simplifications.NoSimplifications)
            self.graph.update_coordinates()
            self.progressbar.finish()

        def thread_finished():
            self._optimizer = None
            self._animation_thread = None

        self._optimizer = LayoutOptimizer(self)
        self._animation_thread = QThread()
        self._optimizer.update.connect(update)
        self._optimizer.done.connect(done)
        self._optimizer.stopped.connect(self._animation_thread.quit)
        self._optimizer.moveToThread(self._animation_thread)
        self._animation_thread.started.connect(self._optimizer.run)
        self._animation_thread.finished.connect(thread_finished)
        self._animation_thread.start()
    def sendData(self):

        # Si le lien vers treetagger n"est pas trouve
        if self.NoLink:
            self.infoBox.setText(u"Sorry, TreeTagger's link not found.",
                                 "error")
            self.send("Text data", None)
        # Important: if input data is None, propagate this value to output...
        elif not self.inputData:
            self.infoBox.setText(u"Widget needs input", "warning")
            self.send("Text data", None)
        # affiche que quelque chose se passe...
        else:
            self.infoBox.setText(u"TreeTagger is running...", "warning")

            # Initialisation de variables
            total_tagged_text = list()
            new_segmentations = list()
            i = 0

            # Initialize progress bar.
            self.progressBar = gui.ProgressBar(self, iterations=5)

            # Copie de la segmentation avec ajout d"une annotation...
            copy_of_input_seg = Segmentation()
            copy_of_input_seg.label = self.inputData.label
            for seg_idx, segment in enumerate(self.inputData):
                attr = " ".join(
                    ["%s='%s'" % item for item in segment.annotations.items()])
                segment.annotations["tt_xb"] = attr
                copy_of_input_seg.append(segment)

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            concatenated_text = copy_of_input_seg.to_string(
                formatting="<xb_tt %(tt_xb)s>%(__content__)s</xb_tt>",
                display_all=True,
            )

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            tagged_text = self.tag(concatenated_text)
            tagged_input = Input(tagged_text)
            tagged_segmentation = Segmenter.import_xml(tagged_input, "xb_tt")

            # avancer la progressBar d"un cran
            self.progressBar.advance()

            # Si checkBox xml active
            if self.activer_xml == True:
                xml_segmentation, _ = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), "[unknown]"),
                        (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                         "<w lemma='&3' type='&2'>&1</w>"),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = xml_segmentation
            # Si checkBox xml desactive
            else:
                xml_segmentation, _ = Segmenter.recode(
                    tagged_segmentation,
                    substitutions=[
                        (re.compile(r"<unknown>"), "[unknown]"),
                        (re.compile(r"(.+)\t(.+)\t(.+?)(?=[\r\n])"),
                         "<w lemma='&3' type='&2'>&1</w>"),
                        (re.compile(r'"""'), '"&quot;"'),
                    ],
                )
                final_segmentation = Segmenter.import_xml(
                    xml_segmentation, "w")

            self.infoBox.dataSent("")

            # Enregistrer le lien de treetagger...
            if self.system == "nt":
                file = open("treetagger_link.txt", "w")
            else:
                file = open(
                    os.path.normpath("/Users/" + self.user +
                                     "/treetagger_link.txt"), "w")

            file.write(self.treetagger_link)
            file.close()

            # Clear progress bar.
            self.progressBar.finish()

            # envoyer la seguementation
            self.send("Text data", final_segmentation, self)
            self.compteur += 1
            self.sendButton.resetSettingsChangedFlag()
Exemple #28
0
    def sendData(self):

        """Load folders, create and send segmentation"""

        # Check that there's something on input...
        if (
            (self.displayAdvancedSettings and not self.folders) or
            not (self.rootFolderPath or self.displayAdvancedSettings)
        ):
            self.infoBox.setText(u'Please select input folder.', 'warning')
            self.send('Text data', None, self)
            return

        # Check that autoNumberKey is not empty (if necessary)...
        if self.displayAdvancedSettings and self.autoNumber:
            if self.autoNumberKey:
                autoNumberKey = self.autoNumberKey
            else:
                self.infoBox.setText(
                    u'Please enter an annotation key for auto-numbering.',
                    'warning'
                )
                self.send('Text data', None, self)
                return
        else:
            autoNumberKey = None

        # Clear created Inputs...
        self.clearCreatedInputs()

        fileContents = list()
        annotations = list()
        counter = 1

        if self.displayAdvancedSettings:
            myFolders = self.folders
        else:
            myFolders = [[self.rootFolderPath]]

        progressBar = gui.ProgressBar(
            self,
            iterations=len(myFolders)
        )

        # Walk through each folder and open each files successively...

        fileContents = self.fileContents

        # Annotations...
        myFolders = self.folders
        for myFolder in myFolders:
            myFiles = myFolder['fileList']

            for myFile in myFiles:
                # print(myFile)
                annotation = dict()

                if self.importFileNameKey:
                    annotation[self.importFileNameKey] = myFile['fileName']

                if self.importFolderNameKey:
                    annotation[self.importFolderNameKey] = myFile['folderName']

                if self.FolderDepth1Key:
                    annotation[self.FolderDepth1Key] = myFile['depth1']

                if self.FolderDepth2Key:
                    annotation[self.FolderDepth2Key] = myFile['depth2']

                if self.FolderDepthLvl:
                    annotation[self.FolderDepthLvl] = myFile['depthLvl']

                annotations.append(annotation)
            # progressBar.advance()

        # Create an LTTL.Input for each files...

        if len(fileContents) == 1:
            label = self.captionTitle
        else:
            label = None
        for index in range(len(fileContents)):
            myInput = Input(fileContents[index], label)
            segment = myInput[0]
            segment.annotations.update(annotations[index])
            myInput[0] = segment
            self.createdInputs.append(myInput)

        # If there's only one file, the widget's output is the created Input.
        if len(fileContents) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget's output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                segmentations=self.createdInputs,
                label=self.captionTitle,
                copy_annotations=True,
                import_labels_as=None,
                sort=False,
                auto_number_as=None,
                merge_duplicates=False,
                progress_callback=None,
            )
        message = u'%i segment@p sent to output ' % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += u'(%i character@p).' % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)
        progressBar.finish()

        self.send('Text data', self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()
Exemple #29
0
    def changeDisplay(self, callSharpen=False):
        if self.check_use_cache:
            image = self.getCachedImage()
            if image is not None:
                self.hmi.setImage_(image)
                return

        self.progress = gui.ProgressBar(
            self, 100
        )  # iterations are set to arbitrary number, since it will soon get updated
        self.progress.advance(
            1
        )  # advance right away, so that user can see something is happening
        self.countHeappush = 0

        if not self.regionSharpened:
            d = Orange.statistics.distribution.get_distribution(
                self.dataset,
                self.dataset.domain.attributes[self.X_attributes_select])
            self.X_min = d[0, 0]
            self.X_max = d[0, -1]
            d = Orange.statistics.distribution.get_distribution(
                self.dataset,
                self.dataset.domain.attributes[self.Y_attributes_select])
            self.Y_min = d[0, 0]
            self.Y_max = d[0, -1]

            self.plot.clear()
            self.plot.getAxis('bottom').setLabel(
                self.dataset.domain.attributes[self.X_attributes_select].name)
            self.plot.getAxis('left').setLabel(
                self.dataset.domain.attributes[self.Y_attributes_select].name)

            disc = feature.discretization.EqualWidth(
                n=self.n_discretization_intervals)
            self.disc_dataset = discretization.DiscretizeTable(self.dataset,
                                                               method=disc)
            self.contingencies = self.computeContingencies(self.disc_dataset)

            # calculate new image width, with interval width as integer
            self.interval_width = int(self.image_width /
                                      self.contingencies.shape[2])
            self.interval_height = int(self.image_height /
                                       self.contingencies.shape[1])
            self.image_width = self.interval_width * self.contingencies.shape[2]
            self.image_height = self.interval_height * self.contingencies.shape[
                1]
        # compute main rect
        rect = np.empty((1, 1), dtype=QtCore.QRectF)
        rect[0, 0] = QtCore.QRectF(0, 0, self.image_width, self.image_height)

        if not self.regionSharpened:
            # if the image is not updated when region is sharpened, otherwise some error occurs
            self.contingencies_valmax = self.updateImage(
                self.contingencies, rect[0, 0], None)

        if callSharpen:
            vr = self.plot.viewRect()

            if vr.x() < self.X_min:
                vr.setLeft(self.X_min)
            if vr.x() + vr.width() > self.X_max:
                vr.setRight(self.X_max)
            if vr.y() < self.Y_min:
                vr.setTop(self.Y_min)
            if vr.y() + vr.height() > self.Y_max:
                vr.setBottom(self.Y_max)

            if self.hmi is not None:
                pr = self.hmi.mapRectFromView_()

            self.h = []
            self.updated_fields = []
            self.count = 0
            self.disc_dataset.domain.X_min = self.X_min
            self.disc_dataset.domain.X_max = self.X_max
            self.disc_dataset.domain.Y_min = self.Y_min
            self.disc_dataset.domain.Y_max = self.Y_max

            if rect[0,
                    0].width() > pr.width() and rect[0,
                                                     0].height() > pr.height():
                self.sharpeningRegion = True
                self.sharpenHeatMapRegion(
                    rect[0, 0],
                    pr,
                    self.contingencies,
                    self.dataset,
                    self.disc_dataset.domain,
                    int(self.n_discretization_intervals / 2),
                    valmax_array=self.contingencies_valmax)
                self.sharpeningRegion = False
                self.regionSharpened = True
            else:
                contingencies = self.contingencies
                self.sharpenHeatMap(rect[0, 0],
                                    contingencies,
                                    self.dataset,
                                    self.disc_dataset.domain,
                                    int(self.n_discretization_intervals / 2),
                                    valmax_array=self.contingencies_valmax)
                self.addToCache()
        self.progress.finish()