Ejemplo n.º 1
0
def run_mds(matrix: DistMatrix, max_iter: int, step_size: int, init_type: int,
            embedding: np.ndarray, state: TaskState):
    res = Result(embedding=embedding)

    iterations_done = 0
    init = embedding
    state.set_status("Running...")
    oldstress = np.finfo(np.float).max

    while True:
        step_iter = min(max_iter - iterations_done, step_size)
        mds = MDS(
            dissimilarity="precomputed", n_components=2,
            n_init=1, max_iter=step_iter,
            init_type=init_type, init_data=init
        )

        mdsfit = mds(matrix)
        iterations_done += step_iter

        embedding, stress = mdsfit.embedding_, mdsfit.stress_
        emb_norm = np.sqrt(np.sum(embedding ** 2, axis=1)).sum()
        if emb_norm > 0:
            stress /= emb_norm

        res.embedding = embedding
        state.set_partial_result(res)
        state.set_progress_value(100 * iterations_done / max_iter)
        if iterations_done >= max_iter or stress == 0 or \
                (oldstress - stress) < mds.params["eps"]:
            return res
        init = embedding
        oldstress = stress
        if state.is_interruption_requested():
            return res
Ejemplo n.º 2
0
def _prepare_dir_and_save_images(paths_queue, dir_name, target_size,
                                 previously_saved, state: TaskState):
    """
    This function prepares a directory structure and calls function
    that saves images.

    Parameters
    ----------
    previously_saved : int
        Number of saved images in the previous process. If the process is
        resumed it is non-zero.
    """
    res = Result(paths=paths_queue)
    if previously_saved == 0:
        _clean_dir(dir_name)

    steps = len(paths_queue) + previously_saved
    loader = ImageLoader()
    while res.paths:
        from_path, to_path = res.paths.popleft()
        _save_an_image(loader, from_path, to_path, target_size)

        state.set_progress_value((1 - len(res.paths) / steps) * 100)
        state.set_partial_result(res)
        if state.is_interruption_requested():
            return res

    return res
Ejemplo n.º 3
0
def run_mds(matrix: DistMatrix, max_iter: int, step_size: int, init_type: int,
            embedding: np.ndarray, state: TaskState):
    res = Result(embedding=embedding)

    iterations_done = 0
    init = embedding
    state.set_status("Running...")
    oldstress = np.finfo(np.float).max

    while True:
        step_iter = min(max_iter - iterations_done, step_size)
        mds = MDS(
            dissimilarity="precomputed", n_components=2,
            n_init=1, max_iter=step_iter,
            init_type=init_type, init_data=init
        )

        mdsfit = mds(matrix)
        iterations_done += step_iter

        embedding, stress = mdsfit.embedding_, mdsfit.stress_
        emb_norm = np.sqrt(np.sum(embedding ** 2, axis=1)).sum()
        if emb_norm > 0:
            stress /= emb_norm

        res.embedding = embedding
        state.set_partial_result(res)
        state.set_progress_value(100 * iterations_done / max_iter)
        if iterations_done >= max_iter or stress == 0 or \
                (oldstress - stress) < mds.params["eps"]:
            return res
        init = embedding
        oldstress = stress
        if state.is_interruption_requested():
            return res
Ejemplo n.º 4
0
def worker(data: Table, learner, state: TaskState):
    # No need to check for irregularities, this is done in widget
    time_var, event_var = get_survival_endpoints(data.domain)

    def fit_cox_models(attrs_combinations):
        results = []
        for attrs in attrs_combinations:
            columns = attrs + [time_var.name, event_var.name]
            cph_model = learner(data[:, columns])
            log2p = cph_model.ll_ratio_log2p()
            result = Result(log2p, cph_model)
            results.append(result)
        return results

    attributes = [attr for attr in data.domain.attributes]
    progress_steps = iter(np.linspace(0, 100, len(attributes)))
    _trace = fit_cox_models([attributes])
    while len(_trace) != len(data.domain.attributes):
        attributes = [attr for attr in _trace[-1].model.domain.attributes]

        if len(attributes) > 1:
            combinations = [
                list(comb)
                for comb in itertools.combinations(attributes,
                                                   len(attributes) - 1)
            ]
        else:
            combinations = [attributes]

        results = fit_cox_models(combinations)
        _trace.append(max(results, key=lambda result: result.log2p))
        state.set_progress_value(next(progress_steps))
    return _trace
Ejemplo n.º 5
0
def run_vizrank(compute_score: Callable, iterate_states: Callable,
                saved_state: Optional[Iterable], scores: List, progress: int,
                state_count: int, task: TaskState):
    task.set_status("Getting combinations...")
    task.set_progress_value(0.1)
    states = iterate_states(saved_state)

    task.set_status("Getting scores...")
    res = Result(queue=Queue(), scores=None)
    scores = scores.copy()
    can_set_partial_result = True

    def do_work(st, next_st):
        try:
            score = compute_score(st)
            if score is not None:
                pos = bisect_left(scores, score)
                res.queue.put_nowait(
                    QueuedScore(position=pos,
                                score=score,
                                state=st,
                                next_state=next_st))
                scores.insert(pos, score)
        except Exception:  # ignore current state in case of any problem
            pass
        res.scores = scores.copy()

    def reset_flag():
        nonlocal can_set_partial_result
        can_set_partial_result = True

    state = None
    next_state = next(states)
    try:
        while True:
            if task.is_interruption_requested():
                return res
            task.set_progress_value(int(progress * 100 / max(1, state_count)))
            progress += 1
            state = copy.copy(next_state)
            next_state = copy.copy(next(states))
            do_work(state, next_state)
            # for simple scores (e.g. correlations widget) and many feature
            # combinations, the 'partial_result_ready' signal (emitted by
            # invoking 'task.set_partial_result') was emitted too frequently
            # for a longer period of time and therefore causing the widget
            # being unresponsive
            if can_set_partial_result:
                task.set_partial_result(res)
                can_set_partial_result = False
                Timer(0.01, reset_flag).start()
    except StopIteration:
        do_work(state, None)
        task.set_partial_result(res)
    return res
Ejemplo n.º 6
0
def run(gene_sets: GeneSets, selected_gene_sets: List[Tuple[str, ...]], genes,
        state: TaskState) -> Results:
    results = Results()
    items = []
    step, steps = 0, len(gene_sets)

    if not genes:
        return results

    state.set_status('Calculating...')

    for gene_set in sorted(gene_sets):

        step += 1
        if step % (steps / 10) == 0:
            state.set_progress_value(100 * step / steps)

        if gene_set.hierarchy not in selected_gene_sets:
            continue

        if state.is_interruption_requested():
            return results

        matched_set = gene_set.genes & genes
        if len(matched_set) > 0:
            category_column = QStandardItem()
            term_column = QStandardItem()
            count_column = QStandardItem()
            genes_column = QStandardItem()

            category_column.setData(", ".join(gene_set.hierarchy),
                                    Qt.DisplayRole)
            term_column.setData(gene_set.name, Qt.DisplayRole)
            term_column.setData(gene_set.name, Qt.ToolTipRole)

            # there was some cases when link string was not empty string but not valid (e.g. "_")
            if gene_set.link and urlparse(gene_set.link).scheme:
                term_column.setData(gene_set.link, LinkRole)
                term_column.setForeground(QColor(Qt.blue))

            count_column.setData(matched_set, Qt.UserRole)
            count_column.setData(len(matched_set), Qt.DisplayRole)

            genes_column.setData(len(gene_set.genes), Qt.DisplayRole)
            genes_column.setData(
                set(gene_set.genes),
                Qt.UserRole)  # store genes to get then on output on selection

            items.append(
                [count_column, genes_column, category_column, term_column])

    results.items = items
    return results
    def compute_scores(
        data: Table,
        genes: Table,
        p_threshold: float,
        p_value_fun: str,
        scoring: str,
        start: float,
        end: float,
        result: Result,
        state: TaskState,
    ):
        if not data or not genes:
            result.scores.z_vals = None
            result.scores.annotations = None
            result.scores.p_vals = None
            result.scores.table = None
        else:
            state.set_status("Computing scores...")
            weights = np.array([15, 75, 10]) * (end - start) / 100

            if not result.scores.z_vals:
                result.scores.z_vals = AnnotateSamplesMeta.mann_whitney_test(
                    data)
                state.set_partial_result(("scores", result))
            state.set_progress_value(weights[0])
            if state.is_interruption_requested():
                return

            if not result.scores.annotations or not result.scores.p_vals:
                annot, p_vals = AnnotateSamplesMeta.assign_annotations(
                    result.scores.z_vals,
                    genes,
                    data,
                    p_value_fun=p_value_fun,
                    scoring=scoring)
                result.scores.annotations = annot
                result.scores.p_vals = p_vals
                state.set_partial_result(("scores", result))
            state.set_progress_value(weights[1])
            if state.is_interruption_requested():
                return

            result.scores.table = AnnotateSamplesMeta.filter_annotations(
                result.scores.annotations,
                result.scores.p_vals,
                p_threshold=p_threshold)

        state.set_partial_result(("scores", result))
def worker(table: Table, covariates: List, time_var: str, event_var: str,
           state: TaskState):
    with multiprocessing.Manager() as _manager:
        _queue = _manager.Queue()
        _cpu_count = cpu_count()

        df = table_to_frame(table, include_metas=False)
        df = df.astype({event_var: np.float64})
        if len(covariates) > 50:
            batches = (df[[time_var, event_var] + batch] for batch in
                       [covariates[i::_cpu_count] for i in range(_cpu_count)])
        else:
            batches = (df[[time_var, event_var] + [cov]] for cov in covariates)
        progress_steps = iter(np.linspace(0, 100, len(covariates)))

        with multiprocessing.Pool(processes=_cpu_count) as pool:
            results = pool.map_async(
                partial(
                    batch_to_process,
                    _queue,
                    time_var,
                    event_var,
                ),
                batches,
            )

            while True:
                try:
                    state.set_progress_value(next(progress_steps))
                    _queue.get(timeout=3)
                except (queue.Empty, StopIteration):
                    break

            stacked_result = np.vstack(results.get())
            covariate_names = stacked_result[:, 0]
            results = stacked_result[:, 1:].astype(float)
            _, pvals_corrected = fdrcorrection(results[:, -1], is_sorted=False)
            results = np.hstack(
                (results, pvals_corrected.reshape(pvals_corrected.shape[0],
                                                  -1)))
            return covariate_names, results
Ejemplo n.º 9
0
def count_words(data: Corpus, state: TaskState) -> Tuple[Counter, bool]:
    """
    This function implements counting process of the word cloud widget and
    is called in the separate thread by concurrent.

    Parameters
    ----------
    data
        Corpus with the data
    state
        State used to report status.

    Returns
    -------
    Reports counts as a counter and boolean that tell whether the data were
    retrieved on bag of words basis.
    """
    state.set_status("Calculating...")
    state.set_progress_value(0)
    bow_counts = _bow_words(data)
    state.set_progress_value(0.5)
    if bow_counts:
        corpus_counter = Counter(bow_counts)
    else:
        corpus_counter = Counter(w for doc in data.ngrams for w in doc)
    state.set_progress_value(1)
    return corpus_counter, bool(bow_counts)
Ejemplo n.º 10
0
def run(data: Table, embedding: Optional[np.ndarray], state: TaskState):
    res = Result(embedding=embedding)

    # simulate wasteful calculation (increase 'steps')
    step, steps = 0, 10
    state.set_status("Calculating...")
    while step < steps:
        for _ in range(steps):
            x_data = np.array(np.mean(data.X, axis=1))
            if x_data.ndim == 2:
                x_data = x_data.ravel()
            y_data = np.random.rand(len(x_data))
            embedding = np.vstack((x_data, y_data)).T
        step += 1
        if step % (steps / 10) == 0:
            state.set_progress_value(100 * step / steps)

        if state.is_interruption_requested():
            return res

        res.embedding = embedding
        state.set_partial_result(res)
    return res
Ejemplo n.º 11
0
def run(data: Table, embedding: Optional[np.ndarray], state: TaskState):
    res = Result(embedding=embedding)

    # simulate wasteful calculation (increase 'steps')
    step, steps = 0, 10
    state.set_status("Calculating...")
    while step < steps:
        for _ in range(steps):
            x_data = np.array(np.mean(data.X, axis=1))
            if x_data.ndim == 2:
                x_data = x_data.ravel()
            y_data = np.random.rand(len(x_data))
            embedding = np.vstack((x_data, y_data)).T
        step += 1
        if step % (steps / 10) == 0:
            state.set_progress_value(100 * step / steps)

        if state.is_interruption_requested():
            return res

        res.embedding = embedding
        state.set_partial_result(res)
    return res
Ejemplo n.º 12
0
def run_freeviz(data: Table, projector: FreeViz, state: TaskState):
    res = Result(projector=projector, projection=None)
    step, steps = 0, MAX_ITERATIONS
    initial = res.projector.components_.T
    state.set_status("Calculating...")
    while True:
        # Needs a copy because projection should not be modified inplace.
        # If it is modified inplace, the widget and the thread hold a
        # reference to the same object. When the thread is interrupted it
        # is still modifying the object, but the widget receives it
        # (the modified object) with a delay.
        res.projection = res.projector(data).copy()
        anchors = res.projector.components_.T
        res.projector.initial = anchors

        state.set_partial_result(res)
        if np.allclose(initial, anchors, rtol=1e-5, atol=1e-4):
            return res
        initial = anchors

        step += 1
        state.set_progress_value(100 * step / steps)
        if state.is_interruption_requested():
            return res
Ejemplo n.º 13
0
def run_freeviz(data: Table, projector: FreeViz, state: TaskState):
    res = Result(projector=projector, projection=None)
    step, steps = 0, MAX_ITERATIONS
    initial = res.projector.components_.T
    state.set_status("Calculating...")
    while True:
        # Needs a copy because projection should not be modified inplace.
        # If it is modified inplace, the widget and the thread hold a
        # reference to the same object. When the thread is interrupted it
        # is still modifying the object, but the widget receives it
        # (the modified object) with a delay.
        res.projection = res.projector(data).copy()
        anchors = res.projector.components_.T
        res.projector.initial = anchors

        state.set_partial_result(res)
        if np.allclose(initial, anchors, rtol=1e-5, atol=1e-4):
            return res
        initial = anchors

        step += 1
        state.set_progress_value(100 * step / steps)
        if state.is_interruption_requested():
            return res
    def runner(self, state: TaskState) -> Table:
        exp_type = self.data_output_options.expression_type[self.exp_type].type
        exp_source = self.data_output_options.expression_sources[
            self.exp_source]
        proc_slug = self.data_output_options.process[self.proc_slug].slug
        collection_id = self.selected_collection_id

        table = self.data_table
        progress_steps_download = iter(np.linspace(0, 50, 2))

        def callback(i: float, status=""):
            state.set_progress_value(i * 100)
            if status:
                state.set_status(status)
            if state.is_interruption_requested():
                raise Exception

        if not table:
            collection = self.res.get_collection_by_id(collection_id)
            coll_table = resdk.tables.RNATables(
                collection,
                expression_source=exp_source,
                expression_process_slug=proc_slug,
                progress_callable=wrap_callback(callback, end=0.5),
            )
            species = coll_table._data[0].output['species']
            sample = coll_table._samples[0]

            state.set_status('Downloading ...')
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            df_exp = coll_table.exp if exp_type != 'rc' else coll_table.rc
            df_exp = df_exp.rename(index=coll_table.readable_index)
            df_metas = coll_table.meta
            df_metas = df_metas.rename(index=coll_table.readable_index)
            df_qc = None
            if self.append_qc_data:
                # TODO: check if there is a way to detect if collection
                #       table contains QC data
                try:
                    df_qc = coll_table.qc
                    df_qc = df_qc.rename(index=coll_table.readable_index)
                except ValueError:
                    pass
            loop.close()

            state.set_status('To data table ...')

            duplicates = {
                item
                for item, count in Counter([
                    label.split('.')[1]
                    for label in df_metas.columns.to_list() if '.' in label
                ]).items() if count > 1
            }

            # what happens if there is more nested sections?
            section_name_to_label = {
                section['name']: section['label']
                for section in sample.descriptor_schema.schema
            }

            column_labels = {}
            for field_schema, fields, path in iterate_schema(
                    sample.descriptor, sample.descriptor_schema.schema,
                    path=''):
                path = path[1:]  # this is ugly, but cant go around it
                if path not in df_metas.columns:
                    continue
                label = field_schema['label']
                section_name, field_name = path.split('.')
                column_labels[path] = (
                    label if field_name not in duplicates else
                    f'{section_name_to_label[section_name]} - {label}')

            df_exp = df_exp.reset_index(drop=True)
            df_metas = df_metas.astype('object')
            df_metas = df_metas.fillna(np.nan)
            df_metas = df_metas.replace('nan', np.nan)
            df_metas = df_metas.rename(columns=column_labels)
            if df_qc is not None:
                df_metas = pd.merge(df_metas,
                                    df_qc,
                                    left_index=True,
                                    right_index=True)

            xym, domain_metas = vars_from_df(df_metas)
            x, _, m = xym
            x_metas = np.hstack((x, m))
            attrs = [ContinuousVariable(col) for col in df_exp.columns]
            metas = domain_metas.attributes + domain_metas.metas
            domain = Domain(attrs, metas=metas)
            table = Table(domain, df_exp.to_numpy(), metas=x_metas)
            state.set_progress_value(next(progress_steps_download))

            state.set_status('Matching genes ...')
            progress_steps_gm = iter(
                np.linspace(50, 99, len(coll_table.gene_ids)))

            def gm_callback():
                state.set_progress_value(next(progress_steps_gm))

            tax_id = species_name_to_taxid(species)
            gm = GeneMatcher(tax_id, progress_callback=gm_callback)
            table = gm.match_table_attributes(table, rename=True)
            table.attributes[TableAnnotation.tax_id] = tax_id
            table.attributes[TableAnnotation.gene_as_attr_name] = True
            table.attributes[TableAnnotation.gene_id_attribute] = 'Entrez ID'
            self.data_table = table

        state.set_status('Normalizing ...')
        table = self.normalize(table)
        state.set_progress_value(100)

        return table