def run_mds(matrix: DistMatrix, max_iter: int, step_size: int, init_type: int, embedding: np.ndarray, state: TaskState): res = Result(embedding=embedding) iterations_done = 0 init = embedding state.set_status("Running...") oldstress = np.finfo(np.float).max while True: step_iter = min(max_iter - iterations_done, step_size) mds = MDS( dissimilarity="precomputed", n_components=2, n_init=1, max_iter=step_iter, init_type=init_type, init_data=init ) mdsfit = mds(matrix) iterations_done += step_iter embedding, stress = mdsfit.embedding_, mdsfit.stress_ emb_norm = np.sqrt(np.sum(embedding ** 2, axis=1)).sum() if emb_norm > 0: stress /= emb_norm res.embedding = embedding state.set_partial_result(res) state.set_progress_value(100 * iterations_done / max_iter) if iterations_done >= max_iter or stress == 0 or \ (oldstress - stress) < mds.params["eps"]: return res init = embedding oldstress = stress if state.is_interruption_requested(): return res
def _run( data: Table, group_by_attrs: List[Variable], aggregations: Dict[Variable, Set[str]], result: Result, state: TaskState, ) -> Result: def progress(part): state.set_progress_value(part * 100) if state.is_interruption_requested(): raise Exception state.set_status("Aggregating") # group table rows if result.group_by is None: result.group_by = data.groupby(group_by_attrs) state.set_partial_result(result) aggregations = { var: [(agg, AGGREGATIONS[agg].function) for agg in sorted(aggs, key=AGGREGATIONS_ORD.index)] for var, aggs in aggregations.items() } result.result_table = result.group_by.aggregate( aggregations, wrap_callback(progress, 0.2, 1)) return result
def run(corpus: Corpus, statistics: Tuple[int, str], state: TaskState) -> None: """ This function runs the computation for new features. All results will be reported as a partial results. Parameters ---------- corpus The corpus on which the computation is held. statistics Tuple of statistic pairs to be computed: (statistics id, string pattern) state State used to report progress and partial results. """ # callback is called for each corpus element statistics time tick_values = iter(np.linspace(0, 100, len(corpus) * len(statistics))) def advance(): state.set_progress_value(next(tick_values)) for s, patern in statistics: fun = STATISTICS_FUNCTIONS[s] result = fun(corpus, patern, advance) if result is not None: result = result + (ComputeValue(fun, patern),) state.set_partial_result((s, patern, result))
def _prepare_dir_and_save_images(paths_queue, dir_name, target_size, previously_saved, state: TaskState): """ This function prepares a directory structure and calls function that saves images. Parameters ---------- previously_saved : int Number of saved images in the previous process. If the process is resumed it is non-zero. """ res = Result(paths=paths_queue) if previously_saved == 0: _clean_dir(dir_name) steps = len(paths_queue) + previously_saved loader = ImageLoader() while res.paths: from_path, to_path = res.paths.popleft() _save_an_image(loader, from_path, to_path, target_size) state.set_progress_value((1 - len(res.paths) / steps) * 100) state.set_partial_result(res) if state.is_interruption_requested(): return res return res
def run_embedding( images: Table, file_paths_attr: Variable, embedder_name: str, state: TaskState, ) -> Result: """ Run the embedding process Parameters ---------- images Data table with images to embed. file_paths_attr The column of the table with images. embedder_name The name of selected embedder. state State object used for controlling and progress. Returns ------- The object that holds embedded images, skipped images, and number of skipped images. """ embedder = ImageEmbedder(model=embedder_name) file_paths = images[:, file_paths_attr].metas.flatten() file_paths_mask = file_paths == file_paths_attr.Unknown file_paths_valid = file_paths[~file_paths_mask] # init progress bar and fuction ticks = iter(np.linspace(0.0, 100.0, file_paths_valid.size)) def advance(success=True): if state.is_interruption_requested(): embedder.set_canceled() if success: state.set_progress_value(next(ticks)) try: emb, skip, n_skip = embedder(images, col=file_paths_attr, callback=advance) except EmbeddingConnectionError: # recompute ticks to go from current state to 100 ticks = iter(np.linspace(next(ticks), 100.0, file_paths_valid.size)) state.set_partial_result("squeezenet") embedder = ImageEmbedder(model="squeezenet") emb, skip, n_skip = embedder(images, col=file_paths_attr, callback=advance) return Result(embedding=emb, skip_images=skip, num_skipped=n_skip)
def run_vizrank(compute_score: Callable, iterate_states: Callable, saved_state: Optional[Iterable], scores: List, progress: int, state_count: int, task: TaskState): task.set_status("Getting combinations...") task.set_progress_value(0.1) states = iterate_states(saved_state) task.set_status("Getting scores...") res = Result(queue=Queue(), scores=None) scores = scores.copy() can_set_partial_result = True def do_work(st, next_st): try: score = compute_score(st) if score is not None: pos = bisect_left(scores, score) res.queue.put_nowait( QueuedScore(position=pos, score=score, state=st, next_state=next_st)) scores.insert(pos, score) except Exception: # ignore current state in case of any problem pass res.scores = scores.copy() def reset_flag(): nonlocal can_set_partial_result can_set_partial_result = True state = None next_state = next(states) try: while True: if task.is_interruption_requested(): return res task.set_progress_value(int(progress * 100 / max(1, state_count))) progress += 1 state = copy.copy(next_state) next_state = copy.copy(next(states)) do_work(state, next_state) # for simple scores (e.g. correlations widget) and many feature # combinations, the 'partial_result_ready' signal (emitted by # invoking 'task.set_partial_result') was emitted too frequently # for a longer period of time and therefore causing the widget # being unresponsive if can_set_partial_result: task.set_partial_result(res) can_set_partial_result = False Timer(0.01, reset_flag).start() except StopIteration: do_work(state, None) task.set_partial_result(res) return res
def compute_secondary_clusters(embedding: Table, result: Result, state: TaskState): if not result.clusters.groups or not embedding: result.clusters.secondary_table = None else: state.set_status("Finding secondary clusters...") hulls = {k: v[2] for k, v in result.clusters.groups.items()} clusters = result.clusters.table domain = clusters and clusters.domain["Clusters"] table = cluster_additional_points(embedding, hulls, domain) result.clusters.secondary_table = table state.set_partial_result(("secondary_clusters", result))
def _run( corpus: Corpus, words: List[str], scoring_methods: List[str], aggregation: str, additional_params: dict, state: TaskState, ) -> None: """ Perform word scoring with selected scoring methods Parameters ---------- corpus Corpus of documents words List of words used for scoring scoring_methods Methods to score documents with aggregation Aggregation applied for each document on word scores additional_params Additional prameters for scores (e.g. embedding needs text language) state TaskState for reporting the task status and giving partial results """ def callback(i: float) -> None: state.set_progress_value(i * 100) if state.is_interruption_requested(): raise Exception cb_part = 1 / (len(scoring_methods) + 1) # +1 for preprocessing words = _preprocess_words(corpus, words, wrap_callback(callback, end=cb_part)) if len(words) == 0: raise Exception( "Empty word list after preprocessing. Please provide a valid set of words." ) for i, sm in enumerate(scoring_methods): scoring_method = SCORING_METHODS[sm][1] sig = signature(scoring_method) add_params = { k: v for k, v in additional_params.items() if k in sig.parameters } scs = scoring_method( corpus, words, wrap_callback(callback, start=(i + 1) * cb_part, end=(i + 2) * cb_part), **add_params) scs = AGGREGATIONS[aggregation](scs, axis=1) state.set_partial_result((sm, aggregation, scs))
def compute_clusters(embedding: Table, result: Result, state: TaskState): if not result.scores.table or not embedding: result.clusters.table = None result.clusters.groups = None else: state.set_status("Finding clusters...") kwargs = {} if result.clusters.epsilon is not None: kwargs["eps"] = result.clusters.epsilon clusters = annotate_projection(result.scores.table, embedding, **kwargs) result.clusters.table = clusters[0] result.clusters.groups = clusters[1] result.clusters.epsilon = clusters[2] state.set_partial_result(("clusters", result))
def run(data: Table, embedding: Optional[np.ndarray], state: TaskState): res = Result(embedding=embedding) # simulate wasteful calculation (increase 'steps') step, steps = 0, 10 state.set_status("Calculating...") while step < steps: for _ in range(steps): x_data = np.array(np.mean(data.X, axis=1)) if x_data.ndim == 2: x_data = x_data.ravel() y_data = np.random.rand(len(x_data)) embedding = np.vstack((x_data, y_data)).T step += 1 if step % (steps / 10) == 0: state.set_progress_value(100 * step / steps) if state.is_interruption_requested(): return res res.embedding = embedding state.set_partial_result(res) return res
def run_freeviz(data: Table, projector: FreeViz, state: TaskState): res = Result(projector=projector, projection=None) step, steps = 0, MAX_ITERATIONS initial = res.projector.components_.T state.set_status("Calculating...") while True: # Needs a copy because projection should not be modified inplace. # If it is modified inplace, the widget and the thread hold a # reference to the same object. When the thread is interrupted it # is still modifying the object, but the widget receives it # (the modified object) with a delay. res.projection = res.projector(data).copy() anchors = res.projector.components_.T res.projector.initial = anchors state.set_partial_result(res) if np.allclose(initial, anchors, rtol=1e-5, atol=1e-4): return res initial = anchors step += 1 state.set_progress_value(100 * step / steps) if state.is_interruption_requested(): return res
def compute_scores( data: Table, genes: Table, p_threshold: float, p_value_fun: str, scoring: str, start: float, end: float, result: Result, state: TaskState, ): if not data or not genes: result.scores.z_vals = None result.scores.annotations = None result.scores.p_vals = None result.scores.table = None else: state.set_status("Computing scores...") weights = np.array([15, 75, 10]) * (end - start) / 100 if not result.scores.z_vals: result.scores.z_vals = AnnotateSamplesMeta.mann_whitney_test( data) state.set_partial_result(("scores", result)) state.set_progress_value(weights[0]) if state.is_interruption_requested(): return if not result.scores.annotations or not result.scores.p_vals: annot, p_vals = AnnotateSamplesMeta.assign_annotations( result.scores.z_vals, genes, data, p_value_fun=p_value_fun, scoring=scoring) result.scores.annotations = annot result.scores.p_vals = p_vals state.set_partial_result(("scores", result)) state.set_progress_value(weights[1]) if state.is_interruption_requested(): return result.scores.table = AnnotateSamplesMeta.filter_annotations( result.scores.annotations, result.scores.p_vals, p_threshold=p_threshold) state.set_partial_result(("scores", result))
def worker(self, state: TaskState): while True: state.set_partial_result(self.update_frame()) time.sleep(1 / 10)