def aggregate_by_assigned_entity(annotated_chromatograms, delta_rt=0.25): aggregated = defaultdict(list) finished = [] log_handle.log("Aggregating Common Entities: %d chromatograms" % (len(annotated_chromatograms, ))) for chroma in annotated_chromatograms: if chroma.composition is not None: if chroma.entity is not None: aggregated[chroma.entity].append(chroma) else: aggregated[chroma.composition].append(chroma) else: finished.append(chroma) for entity, group in aggregated.items(): out = [] group = sorted(group, key=lambda x: x.start_time) chroma = group[0] for obs in group[1:]: if chroma.chromatogram.overlaps_in_time(obs) or ( chroma.end_time - obs.start_time) < delta_rt: chroma = chroma.merge(obs) else: out.append(chroma) chroma = obs out.append(chroma) finished.extend(out) log_handle.log("After merging: %d chromatograms" % (len(finished), )) return finished
def add_solution(self, item): case_mass = item.precursor_information.neutral_mass if abs(case_mass - self.chromatogram.neutral_mass) > 100: log_handle.log( "Warning, mis-assigned spectrum match to chromatogram %r, %r" % (self, item)) self.tandem_solutions.append(item)
def add_task(self, task): log_handle.log("Received Task %r (%s, %r)" % (task, task.name, task.id)) context = self.make_task_context(task.name) task.update_control_context(context) self.task_manager.add_task(task) path = self.get_task_path(task.name) dill.dump(task.args[:-1], open(path, 'wb'))
def run(self): self.loader = MSFileLoader(self.mzml_path) if self.start_scan is not None: self.loader.start_from_scan(self.start_scan) count = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if end_scan in ids or len(ids) == 0: break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def run(self): self.loader = MSFileLoader(self.ms_file_path, huge_tree=huge_tree, decode_binary=False) if self.start_scan is not None: try: self.loader.start_from_scan( self.start_scan, require_ms1=self.loader.has_ms1_scans(), grouped=True) except IndexError as e: log_handle.error("An error occurred while locating start scan", e) self.loader.reset() self.loader.make_iterator(grouped=True) except AttributeError: log_handle.error( "The reader does not support random access, start time will be ignored", e) self.loader.reset() self.loader.make_iterator(grouped=True) else: self.loader.make_iterator(grouped=True) count = 0 last = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if (count - last) > 1000: last = count self.queue.join() if (end_scan in ids and end_scan is not None) or len(ids) == 0: log_handle.log("End Scan Found") break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work),)) return current_work
def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work), )) return current_work
def complete(self): self.save() self.serializer.complete() try: self.serializer.format() except OSError as e: if e.errno == 32: log_handle.log("Could not reformat the file in-place") except Exception: import traceback traceback.print_exc()
def explore_grid(self): if self.network_reduction is None: self.network_reduction = self.model.find_threshold_and_lambda( rho=DEFAULT_RHO, threshold_step=0.1, fit_tau=True) log_handle.log("... Exploring Grid Landscape") stack = [] tau_magnitude = [] thresholds = [] for level in self.network_reduction: thresholds.append(level.threshold) # Pull the distribution slightly to the right bias_shift = 1 - (1 / self.threshold_bias) # Reduces the influence of the threshold bias_scale = self.threshold_bias for level in self.network_reduction: stack.append(np.array(level.taus).mean(axis=0)) tau_magnitude.append( np.abs(level.optimal_tau).sum() * ((level.threshold / bias_scale) + bias_shift)) tau_magnitude = np.array(tau_magnitude) if len(tau_magnitude) == 0: # No solutions, so these will be empty return GridSearchSolution(stack, tau_magnitude, thresholds, np.array([]), thresholds) elif len(tau_magnitude) <= 2: apex = np.argmax(tau_magnitude) elif len(tau_magnitude) > 2: apex = peak_indices(tau_magnitude) if len(apex) == 0: apex = np.array([np.argmax(tau_magnitude)]) thresholds = np.array(thresholds) apex_threshold = tau_magnitude[apex].max() * self.apex_threshold if apex_threshold != 0: apex = apex[(tau_magnitude[apex] > apex_threshold)] else: # The tau threshold may be 0, in which case any point will do, but this # solution carries no generalization. apex = apex[(tau_magnitude[apex] >= apex_threshold)] target_thresholds = [t for t in thresholds[apex]] solution = GridSearchSolution(stack, tau_magnitude, thresholds, apex, target_thresholds) log_handle.log("... %d Candidate Solutions" % (len(target_thresholds), )) return solution
def force_build_indices(self): log_handle.log("Building Sample Index") self.sample_manager.rebuild() log_handle.log("Building Analysis Index") self.analysis_manager.rebuild() log_handle.log("Building Hypothesis Index") self.hypothesis_manager.rebuild()
def smooth_network(network, observed_compositions, threshold_step=0.5, apex_threshold=0.95, belongingness_matrix=None, rho=DEFAULT_RHO, lambda_max=1, include_missing=False, lmbda=None, model_state=None, observation_aggregator=VariableObservationAggregation, belongingness_normalization=NORMALIZATION): convert = GlycanCompositionSolutionRecord.from_chromatogram observed_compositions = [ convert(o) for o in observed_compositions if _has_glycan_composition(o)] model = GlycomeModel( observed_compositions, network, belongingness_matrix=belongingness_matrix, observation_aggregator=observation_aggregator, belongingness_normalization=belongingness_normalization) log_handle.log("... Begin Model Fitting") if model_state is None: reduction = model.find_threshold_and_lambda( rho=rho, threshold_step=threshold_step, lambda_max=lambda_max) if len(reduction) == 0: log_handle.log("... No Network Reduction Found") return None, None, None search = ThresholdSelectionGridSearch(model, reduction, apex_threshold) params = search.average_solution(lmbda=lmbda) else: search = ThresholdSelectionGridSearch(model, None, apex_threshold) model_state.reindex(model) params = model_state if lmbda is not None: params.lmbda = lmbda log_handle.log("... Projecting Solution Onto Network") network = search.annotate_network(params, include_missing=include_missing) return network, search, params
def _worker_loop(self): has_work = True i = 0 def drain_queue(): current_work = [] try: while len(current_work) < 300: current_work.append(self.queue.get_nowait()) except QueueEmptyException: pass if len(current_work) > 5: log_handle.log("Drained Write Queue of %d items" % (len(current_work), )) return current_work while has_work: try: next_bunch = self.queue.get(True, 1) if next_bunch == DONE: has_work = False continue if self.log_inserts and (i % 100 == 0): log_handle.log("Saving %r" % (next_bunch[0].id, )) self._save_bunch(*next_bunch) self.commit_counter += 1 + len(next_bunch[1]) i += 1 if self.queue.qsize() > 0: current_work = drain_queue() for next_bunch in current_work: if next_bunch == DONE: has_work = False else: if self.log_inserts and (i % 100 == 0): log_handle.log("Saving %r" % (next_bunch[0].id, )) self._save_bunch(*next_bunch) self.commit_counter += 1 + len(next_bunch[1]) i += 1 if self.commit_counter - self.last_commit_count > self.commit_interval: self.last_commit_count = self.commit_counter log_handle.log( "Syncing Scan Cache To Disk (%d items waiting)" % (self.queue.qsize(), )) self.serializer.commit() if self.serializer.is_sqlite(): self.serializer.session.execute( "PRAGMA wal_checkpoint(SQLITE_CHECKPOINT_RESTART);" ) self.serializer.session.expunge_all() except QueueEmptyException: continue except Exception as e: log_handle.error( "An error occurred while writing scans to disk", e) self.serializer.commit() self.serializer.session.expunge_all()
def smooth_network(network, observed_compositions, threshold_step=0.5, apex_threshold=0.95, belongingness_matrix=None, rho=DEFAULT_RHO, lambda_max=1, include_missing=False, lmbda=None, model_state=None, observation_aggregator=VariableObservationAggregation, belongingness_normalization=NORMALIZATION, annotate_network=True): convert = GlycanCompositionSolutionRecord.from_chromatogram observed_compositions = [ convert(o) for o in observed_compositions if _has_glycan_composition(o) ] model = GlycomeModel( observed_compositions, network, belongingness_matrix=belongingness_matrix, observation_aggregator=observation_aggregator, belongingness_normalization=belongingness_normalization) log_handle.log("... Begin Model Fitting") if model_state is None: reduction = model.find_threshold_and_lambda( rho=rho, threshold_step=threshold_step, lambda_max=lambda_max) if len(reduction) == 0: log_handle.log("... No Network Reduction Found") return None, None, None search = ThresholdSelectionGridSearch(model, reduction, apex_threshold) params = search.average_solution(lmbda=lmbda) if params is None: log_handle.log("... No Acceptable Solution. Could not fit model.") return None, None, None else: search = ThresholdSelectionGridSearch(model, None, apex_threshold) model_state.reindex(model) params = model_state if lmbda is not None: params.lmbda = lmbda if annotate_network: log_handle.log("... Projecting Solution Onto Network") annotated_network = search.annotate_network( params, include_missing=include_missing) else: annotated_network = None return annotated_network, search, params
def is_project_resolved(self, ratio=0.5): n = 0 k = 0 for record in self.hypotheses(): log_handle.log("Testing %r" % (record, )) if record.is_resolvable(): k += 1 n += 1 for record in self.samples(): log_handle.log("Testing %r" % (record, )) if record.is_resolvable(): k += 1 n += 1 for record in self.analyses(): log_handle.log("Testing %r" % (record, )) if record.is_resolvable(): k += 1 n += 1 if n == 0: return True return k / float(n) > ratio
def merge_common_entities(self, annotated_chromatograms, delta_rt=0.25, require_unmodified=True, threshold_fn=lambda x: x.q_value < 0.05): aggregated = defaultdict(list) finished = [] self.log("Aggregating Common Entities: %d chromatograms" % (len(annotated_chromatograms, ))) for chroma in annotated_chromatograms: if chroma.composition is not None: if chroma.entity is not None: # Convert to string to avoid redundant sequences from getting # binned differently due to random ordering of ids. aggregated[str(chroma.entity)].append(chroma) else: aggregated[str(chroma.composition)].append(chroma) else: finished.append(chroma) for entity, group in aggregated.items(): out = [] group = sorted(group, key=lambda x: x.start_time) chroma = group[0] for obs in group[1:]: if chroma.chromatogram.overlaps_in_time(obs) or ( chroma.end_time - obs.start_time) < delta_rt: chroma = chroma.merge(obs) else: out.append(chroma) chroma = obs out.append(chroma) finished.extend(out) self.log("After merging: %d chromatograms" % (len(finished), )) if require_unmodified: out = [] for chromatogram in finished: # the structure's best match has not been identified in an unmodified state if Unmodified not in chromatogram.mass_shifts: solutions = chromatogram.most_representative_solutions( threshold_fn, reject_shifted=True) # if there is a reasonable solution in an unmodified state if solutions: # select the best solution solutions = sorted(solutions, key=lambda x: x.score, reverse=True) # remove the invalidated mass shifts current_shifts = chromatogram.chromatogram.mass_shifts partitions = [] for shift in current_shifts: partition, _ = chromatogram.chromatogram.bisect_mass_shift( shift) partitions.append( partition.deduct_node_type(shift)) accumulated_chromatogram = partitions[0] for partition in partitions[1:]: accumulated_chromatogram = accumulated_chromatogram.merge( partition) chromatogram.chromatogram = accumulated_chromatogram # update the tandem annotations chromatogram.assign_entity( solutions[0], entity_chromatogram_type=chromatogram.chromatogram. __class__) chromatogram.representative_solutions = solutions out.append(chromatogram) else: log_handle.log( "... Could not find an alternative option for %r" % (chromatogram, )) out.append(chromatogram) else: out.append(chromatogram) finished = [] aggregated = defaultdict(list) for chroma in out: if chroma.composition is not None: if chroma.entity is not None: aggregated[chroma.entity].append(chroma) else: aggregated[chroma.composition].append(chroma) else: finished.append(chroma) for entity, group in aggregated.items(): out = [] group = sorted(group, key=lambda x: x.start_time) chroma = group[0] for obs in group[1:]: if chroma.chromatogram.overlaps_in_time(obs) or ( chroma.end_time - obs.start_time) < delta_rt: chroma = chroma.merge(obs) else: out.append(chroma) chroma = obs out.append(chroma) finished.extend(out) return finished
def find_threshold_and_lambda(self, rho, lambda_max=1., lambda_step=0.02, threshold_start=0., threshold_step=0.2, fit_tau=True, drop_missing=True, renormalize_belongingness=NORMALIZATION): r'''Iterate over score thresholds and smoothing factors (lambda), sampling points from the parameter grid and computing the PRESS residual at each point. This produces a :class:`NetworkReduction` data structure recording the results for later local maximum detection. Parameters ---------- rho: float The scale of the variance of the observed score lambda_max: float The maximum value of lambda to consider on the grid lambda_step: float The size of the change in lambda at each iteration threshold_start: float The minimum observed score threshold to start the grid search at threshold_step: float The size of the change in the observed score threshold at each iteration fit_tau: bool Whether or not to estimate :math:`\tau` for each iteration when computing the PRESS drop_missing: bool Whether or not to remove nodes from the graph which are not observed above the threshold, restructuring the graph, which in turn changes the Laplacian. renormalize_belongingness: str A string constant which names the belongingness normalization technique to use. Returns ------- :class:`NetworkReduction`: The recorded grid of sampled points and snapshots of the model at each point ''' solutions = NetworkReduction() limit = max(self.S0) start = max(min(self.S0) - 1e-3, threshold_start) current_network = self.network.clone() thresholds = np.arange(start, limit, threshold_step) last_solution = None last_raw_observations = None last_aggregate = None for i_threshold, threshold in enumerate(thresholds): if i_threshold % 10 == 0: log_handle.log("... Threshold = %r (%0.2f%%)" % ( threshold, (100.0 * i_threshold / len(thresholds)))) # Aggregate the raw observations into averaged, variance reduced records # and annotate the network with these new scores raw_observations = [c for c in self._observed_compositions if c.score > threshold] # cache on the explicit raw observations used because the step size may be smaller than # the next highest difference, and aggregating observations can be expensive. There is # no solution to the general problem as it calls for inverting a potentially large matrix # to only be used in this loop. if raw_observations == last_raw_observations: observations, summarized_state, obs_ix = last_aggregate else: agg = self.observation_aggregator(self.network) agg.collect(raw_observations) observations, summarized_state = agg.build_records() obs_ix = agg.observed_indices() last_aggregate = (observations, summarized_state, obs_ix) last_raw_observations = raw_observations variance_matrix = summarized_state.variance_matrix inverse_variance_matrix = summarized_state.inverse_variance_matrix variance_matrix = np.diag(variance_matrix[obs_ix, obs_ix]) inverse_variance_matrix = np.diag(inverse_variance_matrix[obs_ix, obs_ix]) # clear the scores from the network current_network = current_network.clone() for i, node in enumerate(current_network): node.score = 0 # assign aggregated scores to the network network = assign_network(current_network, observations) # Filter the network, marking nodes for removal and recording observed # nodes for future use. obs = [] missed = [] for i, node in enumerate(network): if node.score < threshold: missed.append(node) node.marked = True else: obs.append(node.score) if len(obs) == 0: break obs = np.array(obs) press = [] if drop_missing: # drop nodes whose score does not exceed the threshold for node in missed: network.remove_node(node, limit=5) if last_solution is not None: # If after pruning the network, no new nodes have been removed, # the optimal solution won't have changed from previous iteration # so just reuse the solution if last_solution.network == network: current_solution = last_solution.copy() current_solution.threshold = threshold solutions[threshold] = current_solution last_solution = current_solution current_network = network continue wpl = weighted_laplacian_matrix(network) ident = np.eye(wpl.shape[0]) lum = LaplacianSmoothingModel( network, self.normalized_belongingness_matrix, threshold, neighborhood_walker=self.neighborhood_walker, belongingness_normalization=renormalize_belongingness, variance_matrix=variance_matrix, inverse_variance_matrix=inverse_variance_matrix) updates = [] taus = [] lambda_values = np.arange(0.01, lambda_max, lambda_step) for lambd in lambda_values: if fit_tau: tau = lum.estimate_tau_from_S0(rho, lambd) else: tau = np.zeros(self.A0.shape[1]) T = lum.optimize_observed_scores(lambd, lum.A0.dot(tau)) A = ident + lambd * wpl H = np.linalg.inv(A) diag_H = np.diag(H) if len(diag_H) != len(T): diag_H = diag_H[lum.obs_ix] assert len(diag_H) == len(T) press_value = sum( ((obs - T) / (1 - (diag_H - np.finfo(float).eps))) ** 2) / len(obs) press.append(press_value) updates.append(T) taus.append(tau) current_solution = NetworkTrimmingSearchSolution( threshold, lambda_values, np.array(press), network, np.array(obs), updates, taus, lum) solutions[threshold] = current_solution last_solution = current_solution current_network = network return solutions
def validate_indices(self, ratio=0.5): with self._data_lock: if not self.is_project_resolved(ratio): log_handle.log("Rebuilding Project Indices") self.force_build_indices()
def find_threshold_and_lambda(self, rho, lambda_max=1., lambda_step=0.02, threshold_start=0., threshold_step=0.2, fit_tau=True, drop_missing=True, renormalize_belongingness=NORMALIZATION): r'''Iterate over score thresholds and smoothing factors (lambda), sampling points from the parameter grid and computing the PRESS residual at each point. This produces a :class:`NetworkReduction` data structure recording the results for later local maximum detection. Parameters ---------- rho: float The scale of the variance of the observed score lambda_max: float The maximum value of lambda to consider on the grid lambda_step: float The size of the change in lambda at each iteration threshold_start: float The minimum observed score threshold to start the grid search at threshold_step: float The size of the change in the observed score threshold at each iteration fit_tau: bool Whether or not to estimate :math:`\tau` for each iteration when computing the PRESS drop_missing: bool Whether or not to remove nodes from the graph which are not observed above the threshold, restructuring the graph, which in turn changes the Laplacian. renormalize_belongingness: str A string constant which names the belongingness normalization technique to use. Returns ------- :class:`NetworkReduction`: The recorded grid of sampled points and snapshots of the model at each point ''' solutions = NetworkReduction() limit = max(self.S0) start = max(min(self.S0) - 1e-3, threshold_start) current_network = self.network.clone() thresholds = np.arange(start, limit, threshold_step) last_solution = None last_raw_observations = None last_aggregate = None for i_threshold, threshold in enumerate(thresholds): if i_threshold % 10 == 0: log_handle.log("... Threshold = %r (%0.2f%%)" % (threshold, (100.0 * i_threshold / len(thresholds)))) # Aggregate the raw observations into averaged, variance reduced records # and annotate the network with these new scores raw_observations = [ c for c in self._observed_compositions if c.score > threshold ] # cache on the explicit raw observations used because the step size may be smaller than # the next highest difference, and aggregating observations can be expensive. There is # no solution to the general problem as it calls for inverting a potentially large matrix # to only be used in this loop. if raw_observations == last_raw_observations: observations, summarized_state, obs_ix = last_aggregate # pylint: disable=unpacking-non-sequence else: agg = self.observation_aggregator(self.network) agg.collect(raw_observations) observations, summarized_state = agg.build_records() obs_ix = agg.observed_indices() last_aggregate = (observations, summarized_state, obs_ix) last_raw_observations = raw_observations # Extract pre-calculated variance matrices variance_matrix = summarized_state.variance_matrix inverse_variance_matrix = summarized_state.inverse_variance_matrix variance_matrix = np.diag(variance_matrix[obs_ix, obs_ix]) inverse_variance_matrix = np.diag(inverse_variance_matrix[obs_ix, obs_ix]) # clear the scores from the network current_network = current_network.clone() for node in current_network: node.score = 0 node.internal_score = 0 # assign aggregated scores to the network network = assign_network(current_network, observations) # Filter the network, marking nodes for removal and recording observed # nodes for future use. obs = [] missed = [] for i, node in enumerate(network): if node.score < threshold: missed.append(node) node.marked = True else: obs.append(node.score) if len(obs) == 0: break obs = np.array(obs) press = [] if drop_missing: # drop nodes whose score does not exceed the threshold for node in missed: network.remove_node(node, limit=5) if last_solution is not None: # If after pruning the network, no new nodes have been removed, # the optimal solution won't have changed from previous iteration # so just reuse the solution if last_solution.network == network: current_solution = last_solution.copy() current_solution.threshold = threshold solutions[threshold] = current_solution last_solution = current_solution current_network = network continue wpl = weighted_laplacian_matrix(network) ident = np.eye(wpl.shape[0]) # The network passed into LaplacianSmoothingModel will have its indices changed, # and will not match the ordering of the belongingness matrix, so make sure the # observed indices are aligned. lum = LaplacianSmoothingModel( network, self.normalized_belongingness_matrix[obs_ix, :], threshold, neighborhood_walker=self.neighborhood_walker, belongingness_normalization=renormalize_belongingness, variance_matrix=variance_matrix, inverse_variance_matrix=inverse_variance_matrix) updates = [] taus = [] lambda_values = np.arange(0.01, lambda_max, lambda_step) for lambd in lambda_values: if fit_tau: tau = lum.estimate_tau_from_S0(rho, lambd) else: tau = np.zeros(self.A0.shape[1]) T = lum.optimize_observed_scores(lambd, lum.A0.dot(tau)) A = ident + lambd * wpl H = np.linalg.inv(A) diag_H = np.diag(H) if len(diag_H) != len(T): diag_H = diag_H[lum.obs_ix] assert len(diag_H) == len(T) press_value = sum( ((obs - T) / (1 - (diag_H - np.finfo(float).eps)))**2) / len(obs) press.append(press_value) updates.append(T) taus.append(tau) current_solution = NetworkTrimmingSearchSolution( threshold, lambda_values, np.array(press), network, np.array(obs), updates, taus, lum) solutions[threshold] = current_solution last_solution = current_solution current_network = network return solutions
def merge_common_entities(self, annotated_chromatograms, delta_rt=0.25, require_unmodified=True, threshold_fn=lambda x: x.q_value < 0.05): aggregated = defaultdict(list) finished = [] self.log("Aggregating Common Entities: %d chromatograms" % (len(annotated_chromatograms,))) for chroma in annotated_chromatograms: if chroma.composition is not None: if chroma.entity is not None: # Convert to string to avoid redundant sequences from getting # binned differently due to random ordering of ids. aggregated[str(chroma.entity)].append(chroma) else: aggregated[str(chroma.composition)].append(chroma) else: finished.append(chroma) for entity, group in aggregated.items(): out = [] group = sorted(group, key=lambda x: x.start_time) chroma = group[0] for obs in group[1:]: if chroma.chromatogram.overlaps_in_time(obs) or ( chroma.end_time - obs.start_time) < delta_rt: chroma = chroma.merge(obs) else: out.append(chroma) chroma = obs out.append(chroma) finished.extend(out) self.log("After merging: %d chromatograms" % (len(finished),)) if require_unmodified: out = [] for chromatogram in finished: # the structure's best match has not been identified in an unmodified state if Unmodified not in chromatogram.mass_shifts: solutions = chromatogram.most_representative_solutions( threshold_fn, reject_shifted=True) # if there is a reasonable solution in an unmodified state if solutions: # select the best solution solutions = sorted(solutions, key=lambda x: x.score, reverse=True) # remove the invalidated mass shifts current_shifts = chromatogram.chromatogram.mass_shifts partitions = [] for shift in current_shifts: partition, _ = chromatogram.chromatogram.bisect_mass_shift(shift) partitions.append(partition.deduct_node_type(shift)) accumulated_chromatogram = partitions[0] for partition in partitions[1:]: accumulated_chromatogram = accumulated_chromatogram.merge(partition) chromatogram.chromatogram = accumulated_chromatogram # update the tandem annotations chromatogram.assign_entity( solutions[0], entity_chromatogram_type=chromatogram.chromatogram.__class__) chromatogram.representative_solutions = solutions out.append(chromatogram) else: log_handle.log("... Could not find an alternative option for %r" % (chromatogram,)) out.append(chromatogram) else: out.append(chromatogram) finished = [] aggregated = defaultdict(list) for chroma in out: if chroma.composition is not None: if chroma.entity is not None: aggregated[chroma.entity].append(chroma) else: aggregated[chroma.composition].append(chroma) else: finished.append(chroma) for entity, group in aggregated.items(): out = [] group = sorted(group, key=lambda x: x.start_time) chroma = group[0] for obs in group[1:]: if chroma.chromatogram.overlaps_in_time(obs) or ( chroma.end_time - obs.start_time) < delta_rt: chroma = chroma.merge(obs) else: out.append(chroma) chroma = obs out.append(chroma) finished.extend(out) return finished
def log(self, message): log_handle.log(message)
def _log(self, message): log_handle.log(message)
def add_solution(self, item): case_mass = item.precursor_information.neutral_mass if abs(case_mass - self.chromatogram.neutral_mass) > 100: log_handle.log("Warning, mis-assigned spectrum match to chromatogram %r, %r" % (self, item)) self.tandem_solutions.append(item)
def complete(self): self.save() log_handle.log("Completing Serializer") self.serializer.complete()