def run(self): self.loader = MSFileLoader(self.mzml_path) if self.start_scan is not None: self.loader.start_from_scan(self.start_scan) count = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if end_scan in ids or len(ids) == 0: break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def run(self): self.loader = MSFileLoader(self.ms_file_path, huge_tree=huge_tree, decode_binary=False) if self.start_scan is not None: try: self.loader.start_from_scan( self.start_scan, require_ms1=self.loader.has_ms1_scans(), grouped=True) except IndexError as e: log_handle.error("An error occurred while locating start scan", e) self.loader.reset() self.loader.make_iterator(grouped=True) except AttributeError: log_handle.error( "The reader does not support random access, start time will be ignored", e) self.loader.reset() self.loader.make_iterator(grouped=True) else: self.loader.make_iterator(grouped=True) count = 0 last = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if (count - last) > 1000: last = count self.queue.join() if (end_scan in ids and end_scan is not None) or len(ids) == 0: log_handle.log("End Scan Found") break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def _preindex_file(self): reader = MSFileLoader(self.ms_file, use_index=False) try: reader.prebuild_byte_offset_file(self.ms_file) except AttributeError: # the type does not support this type of indexing pass except IOError: # the file could not be written pass except Exception as e: # something else went wrong self.error("An error occurred while pre-indexing.", e)
def _make_interval_tree(self, start_scan, end_scan): reader = MSFileLoader(self.ms_file) if start_scan is not None: start_ix = reader.get_scan_by_id(start_scan).index else: start_ix = 0 if end_scan is not None: end_ix = reader.get_scan_by_id(end_scan).index else: end_ix = len(reader) reader.reset() index, interval_tree = build_scan_index( reader, self.number_of_helpers + 1, (start_ix, end_ix)) self._scan_interval_tree = interval_tree
def _make_interval_tree(self, start_scan, end_scan): reader = MSFileLoader(self.ms_file) start_ix = reader.get_scan_by_id(start_scan).index end_ix = reader.get_scan_by_id(end_scan).index reader.reset() index, interval_tree = build_scan_index(reader, self.number_of_helpers + 1, (start_ix, end_ix)) self._scan_interval_tree = interval_tree
def run(self): self.loader = MSFileLoader(self.ms_file_path) if self.start_scan is not None: try: self.loader.start_from_scan( self.start_scan, require_ms1=self.loader.has_ms1_scans(), grouped=True) except IndexError as e: self.log_handler("An error occurred while locating start scan", e) self.loader.reset() self.loader.make_iterator(grouped=True) except AttributeError: self.log_handler("The reader does not support random access, start time will be ignored", e) self.loader.reset() self.loader.make_iterator(grouped=True) else: self.loader.make_iterator(grouped=True) count = 0 last = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if (count - last) > 1000: last = count self.queue.join() if (end_scan in ids and end_scan is not None) or len(ids) == 0: self.log_handler("End Scan Found") break except StopIteration: break except Exception as e: self.log_handler("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() self.log_handler("All Scan IDs have been dealt. %d scan bunches." % (count,)) else: self.queue.put(DONE)
def run(self): loader = MSFileLoader(self.ms_file_path) queued_loader = ScanBunchLoader(loader) has_input = True transformer = self.make_scan_transformer(loader) self.transformer = transformer self._silence_loggers() i = 0 last = 0 while has_input: try: scan_id, product_scan_ids, process_msn = self.get_work( True, 10) self.input_queue.task_done() except QueueEmpty: if self.no_more_event is not None and self.no_more_event.is_set( ): has_input = False continue i += 1 + len(product_scan_ids) if scan_id == DONE: has_input = False break try: queued_loader.put(scan_id, product_scan_ids) scan, product_scans = queued_loader.get() except Exception as e: self.log_message( "Something went wrong when loading bunch (%s): %r.\nRecovery is not possible." % ((scan_id, product_scan_ids), e)) self.handle_scan_bunch(scan, product_scans, scan_id, product_scan_ids, process_msn) if (i - last) > 1000: last = i self.output_queue.join() self.log_message("Done (%d scans)" % i) if self.no_more_event is None: self.output_queue.put((DONE, DONE, DONE)) self._work_complete.set()
def _make_interval_tree(self, start_scan, end_scan): reader = MSFileLoader(self.ms_file) if start_scan is not None: start_ix = reader.get_scan_by_id(start_scan).index else: start_ix = 0 if end_scan is not None: end_ix = reader.get_scan_by_id(end_scan).index else: end_ix = len(reader) reader.reset() index, interval_tree = build_scan_index(reader, self.number_of_helpers + 1, (start_ix, end_ix)) self._scan_interval_tree = interval_tree
def _make_interval_tree(self, start_scan, end_scan): reader = MSFileLoader(self.ms_file, decode_binary=False) if start_scan is not None: start_ix = reader.get_scan_by_id(start_scan).index else: start_ix = 0 if end_scan is not None: end_ix = reader.get_scan_by_id(end_scan).index else: end_ix = len(reader) reader.reset() _index, interval_tree = build_scan_index(reader, self.number_of_helpers + 1, (start_ix, end_ix)) self._scan_interval_tree = interval_tree self.log("RT Tree: %r" % (self._scan_interval_tree.rt_tree))
class ScanIDYieldingProcess(Process): def __init__(self, ms_file_path, queue, start_scan=None, max_scans=None, end_scan=None, no_more_event=None, ignore_tandem_scans=False, batch_size=1, log_handler=None): if log_handler is None: log_handler = show_message Process.__init__(self) self.daemon = True self.ms_file_path = ms_file_path self.queue = queue self.loader = None self.start_scan = start_scan self.max_scans = max_scans self.end_scan = end_scan self.ignore_tandem_scans = ignore_tandem_scans self.batch_size = batch_size self.log_handler = log_handler self.no_more_event = no_more_event def _make_scan_batch(self): batch = [] scan_ids = [] for _ in range(self.batch_size): try: bunch = next(self.loader) scan, products = bunch if scan is not None: scan_id = scan.id else: scan_id = None product_scan_ids = [p.id for p in products] except StopIteration: break except Exception as e: self.log_handler("An error occurred in _make_scan_batch", e) break if not self.ignore_tandem_scans: batch.append((scan_id, product_scan_ids, True)) else: batch.append((scan_id, product_scan_ids, False)) scan_ids.append(scan_id) return batch, scan_ids def run(self): self.loader = MSFileLoader(self.ms_file_path, decode_binary=False) if self.start_scan is not None: try: self.loader.start_from_scan( self.start_scan, require_ms1=self.loader.has_ms1_scans(), grouped=True) except IndexError as e: self.log_handler("An error occurred while locating start scan", e) self.loader.reset() self.loader.make_iterator(grouped=True) except AttributeError: self.log_handler("The reader does not support random access, start time will be ignored", e) self.loader.reset() self.loader.make_iterator(grouped=True) else: self.loader.make_iterator(grouped=True) count = 0 last = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if (count - last) > 1000: last = count self.queue.join() if (end_scan in ids and end_scan is not None) or len(ids) == 0: self.log_handler("End Scan Found") break except StopIteration: break except Exception as e: self.log_handler("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() self.log_handler("All Scan IDs have been dealt. %d scan bunches." % (count,)) else: self.queue.put(DONE)
class ScanIDYieldingProcess(Process): def __init__(self, ms_file_path, queue, start_scan=None, max_scans=None, end_scan=None, no_more_event=None, ignore_tandem_scans=False, batch_size=1, log_handler=None): if log_handler is None: log_handler = show_message Process.__init__(self) self.daemon = True self.ms_file_path = ms_file_path self.queue = queue self.loader = None self.start_scan = start_scan self.max_scans = max_scans self.end_scan = end_scan self.ignore_tandem_scans = ignore_tandem_scans self.batch_size = batch_size self.log_handler = log_handler self.no_more_event = no_more_event def _make_scan_batch(self): batch = [] scan_ids = [] for _ in range(self.batch_size): try: bunch = next(self.loader) scan, products = bunch if scan is not None: scan_id = scan.id else: scan_id = None product_scan_ids = [p.id for p in products] except StopIteration: break except Exception as e: self.log_handler("An error occurred in _make_scan_batch", e) break if not self.ignore_tandem_scans: batch.append((scan_id, product_scan_ids, True)) else: batch.append((scan_id, product_scan_ids, False)) scan_ids.append(scan_id) return batch, scan_ids def run(self): self.loader = MSFileLoader(self.ms_file_path) if self.start_scan is not None: try: self.loader.start_from_scan( self.start_scan, require_ms1=self.loader.has_ms1_scans(), grouped=True) except IndexError as e: self.log_handler("An error occurred while locating start scan", e) self.loader.reset() self.loader.make_iterator(grouped=True) except AttributeError: self.log_handler("The reader does not support random access, start time will be ignored", e) self.loader.reset() self.loader.make_iterator(grouped=True) else: self.loader.make_iterator(grouped=True) count = 0 last = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if (count - last) > 1000: last = count self.queue.join() if (end_scan in ids and end_scan is not None) or len(ids) == 0: self.log_handler("End Scan Found") break except StopIteration: break except Exception as e: self.log_handler("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() self.log_handler("All Scan IDs have been dealt. %d scan bunches." % (count,)) else: self.queue.put(DONE)
def run(self): loader = MSFileLoader(self.mzml_path) queued_loader = ScanBunchLoader(loader) has_input = True transformer = self.make_scan_transformer(loader) nologs = ["deconvolution_scan_processor"] if not self.deconvolute: nologs.append("deconvolution") for logname in nologs: logger_to_silence = logging.getLogger( "deconvolution_scan_processor") logger_to_silence.propagate = False logger_to_silence.setLevel("CRITICAL") logger_to_silence.addHandler(logging.NullHandler()) # logger_to_silence.addHandler(logging.StreamHandler()) i = 0 while has_input: try: scan_id, product_scan_ids, process_msn = self.get_work( True, 10) except QueueEmpty: if self.no_more_event is not None and self.no_more_event.is_set( ): has_input = False continue i += 1 + len(product_scan_ids) if scan_id == DONE: has_input = False break try: queued_loader.put(scan_id, product_scan_ids) scan, product_scans = queued_loader.get() except Exception as e: self.log_message( "Something went wrong when loading bunch (%s): %r.\nRecovery is not possible." % ((scan_id, product_scan_ids), e)) if len(scan.arrays[0]) == 0: self.skip_scan(scan) continue try: scan, priorities, product_scans = transformer.process_scan_group( scan, product_scans) if self.deconvolute: transformer.deconvolute_precursor_scan(scan, priorities) self.send_scan(scan) except NoIsotopicClustersError as e: self.log_message( "No isotopic clusters were extracted from scan %s (%r)" % (e.scan_id, len(scan.peak_set))) except Exception as e: self.skip_scan(scan) self.log_error(e, scan_id, scan, (product_scan_ids)) for product_scan in product_scans: if len(product_scan.arrays[0]) == 0 or (not process_msn): self.skip_scan(product_scan) continue try: transformer.pick_product_scan_peaks(product_scan) if self.deconvolute: transformer.deconvolute_product_scan(product_scan) self.send_scan(product_scan) except NoIsotopicClustersError as e: self.log_message( "No isotopic clusters were extracted from scan %s (%r)" % (e.scan_id, len(product_scan.peak_set))) except Exception as e: self.skip_scan(product_scan) self.log_error(e, product_scan.id, product_scan, (product_scan_ids)) self.log_message("Done (%d scans)" % i) if self.no_more_event is None: self.output_queue.put((DONE, DONE, DONE)) self._work_complete.set()
class ScanIDYieldingProcess(Process): def __init__(self, mzml_path, queue, start_scan=None, max_scans=None, end_scan=None, no_more_event=None, ignore_tandem_scans=False, batch_size=1): Process.__init__(self) self.daemon = True self.mzml_path = mzml_path self.queue = queue self.loader = None self.start_scan = start_scan self.max_scans = max_scans self.end_scan = end_scan self.ignore_tandem_scans = ignore_tandem_scans self.batch_size = batch_size self.no_more_event = no_more_event def _make_scan_batch(self): batch = [] scan_ids = [] for i in range(self.batch_size): try: scan, products = next(self.loader) except Exception: break scan_id = scan.id if not self.ignore_tandem_scans: batch.append((scan_id, [p.id for p in products], True)) else: batch.append((scan_id, [p.id for p in products], False)) scan_ids.append(scan_id) return batch, scan_ids def run(self): self.loader = MSFileLoader(self.mzml_path) if self.start_scan is not None: self.loader.start_from_scan(self.start_scan) count = 0 if self.max_scans is None: max_scans = float('inf') else: max_scans = self.max_scans end_scan = self.end_scan while count < max_scans: try: batch, ids = self._make_scan_batch() if len(batch) > 0: self.queue.put(batch) count += len(ids) if end_scan in ids or len(ids) == 0: break except StopIteration: break except Exception as e: log_handle.error("An error occurred while fetching scans", e) break if self.no_more_event is not None: self.no_more_event.set() log_handle.log("All Scan IDs have been dealt. %d scan bunches." % (count, )) else: self.queue.put(DONE)
def _open_ms_file(self) -> Union[RandomAccessScanSource, ScanIterator]: self.loader = MSFileLoader(self.ms_file_path, decode_binary=False) return self.loader
def _open_ms_file(self, **kwargs): return MSFileLoader(self.ms_file, **kwargs)
def run(self): loader = MSFileLoader(self.mzml_path, huge_tree=huge_tree, decode_binary=False) queued_loader = ScanBunchLoader(loader) has_input = True transformer = self.make_scan_transformer(loader) self.transformer = transformer nologs = ["deconvolution_scan_processor"] if not self.deconvolute: nologs.append("deconvolution") debug_mode = os.getenv("GLYCRESOFTDEBUG") if debug_mode: handler = logging.FileHandler( "piped-deconvolution-debug-%s.log" % (os.getpid()), 'w') fmt = logging.Formatter( "%(asctime)s - %(name)s:%(filename)s:%(lineno)-4d - %(levelname)s - %(message)s", "%H:%M:%S") handler.setFormatter(fmt) for logname in nologs: logger_to_silence = logging.getLogger(logname) if debug_mode: logger_to_silence.setLevel("DEBUG") logger_to_silence.addHandler(handler) else: logger_to_silence.propagate = False logger_to_silence.setLevel("CRITICAL") logger_to_silence.addHandler(logging.NullHandler()) i = 0 last = 0 while has_input: try: scan_id, product_scan_ids, process_msn = self.get_work( True, 10) self.input_queue.task_done() except QueueEmpty: if self.no_more_event is not None and self.no_more_event.is_set( ): has_input = False continue i += 1 + len(product_scan_ids) if scan_id == DONE: has_input = False break try: queued_loader.put(scan_id, product_scan_ids) scan, product_scans = queued_loader.get() except Exception as e: self.log_message( "Something went wrong when loading bunch (%s): %r.\nRecovery is not possible." % ((scan_id, product_scan_ids), e)) self.handle_scan_bunch(scan, product_scans, scan_id, product_scan_ids, process_msn) if (i - last) > 1000: last = i self.output_queue.join() self.log_message("Done (%d scans)" % i) if self.no_more_event is None: self.output_queue.put((DONE, DONE, DONE)) self._work_complete.set()
def preprocess(mzml_file, database_connection, averagine=None, start_time=None, end_time=None, maximum_charge=None, name=None, msn_averagine=None, score_threshold=35., msn_score_threshold=5., missed_peaks=1, msn_missed_peaks=1, n_processes=5, storage_path=None, extract_only_tandem_envelopes=False, ms1_background_reduction=5., msn_background_reduction=0, channel=None): minimum_charge = 1 if maximum_charge > 0 else -1 charge_range = (minimum_charge, maximum_charge) logger.info("Begin Scan Interpolation") loader = MSFileLoader(mzml_file) start_scan_id = loader._locate_ms1_scan( loader.get_scan_by_time(start_time)).id end_scan_id = loader._locate_ms1_scan(loader.get_scan_by_time(end_time)).id loader.reset() is_profile = next(loader).precursor.is_profile if is_profile: logger.info("Spectra are profile") else: logger.info("Spectra are centroided") logger.info("Resolving Sample Name") if name is None: name = os.path.splitext(os.path.basename(mzml_file))[0] name = validate_sample_run_name(None, database_connection, name) logger.info("Validating arguments") try: averagine = validate_averagine(averagine) except Exception: channel.abort("Could not validate MS1 Averagine %s" % averagine) try: msn_averagine = validate_averagine(msn_averagine) except Exception: channel.abort("Could not validate MSn Averagine %s" % msn_averagine) if is_profile: ms1_peak_picking_args = { "transforms": [ ms_peak_picker.scan_filter.FTICRBaselineRemoval( scale=ms1_background_reduction, window_length=2.), ms_peak_picker.scan_filter.SavitskyGolayFilter() ] } else: ms1_peak_picking_args = { "transforms": [ ms_peak_picker.scan_filter.FTICRBaselineRemoval( scale=ms1_background_reduction, window_length=2.), ] } if msn_background_reduction > 0: msn_peak_picking_args = { "transforms": [ ms_peak_picker.scan_filter.FTICRBaselineRemoval( scale=msn_background_reduction, window_length=2.), ] } else: msn_peak_picking_args = None ms1_deconvolution_args = { "scorer": ms_deisotope.scoring.PenalizedMSDeconVFitter(score_threshold, 2.), "averagine": averagine, "max_missed_peaks": missed_peaks, "truncate_after": SampleConsumer.MS1_ISOTOPIC_PATTERN_WIDTH, "ignore_below": SampleConsumer.MS1_IGNORE_BELOW } msn_deconvolution_args = { "scorer": ms_deisotope.scoring.MSDeconVFitter(msn_score_threshold), "averagine": msn_averagine, "max_missed_peaks": msn_missed_peaks, "truncate_after": SampleConsumer.MSN_ISOTOPIC_PATTERN_WIDTH, "ignore_below": SampleConsumer.MSN_IGNORE_BELOW } consumer = SampleConsumer( mzml_file, averagine=averagine, charge_range=charge_range, ms1_peak_picking_args=ms1_peak_picking_args, ms1_deconvolution_args=ms1_deconvolution_args, msn_peak_picking_args=msn_peak_picking_args, msn_deconvolution_args=msn_deconvolution_args, storage_path=storage_path, sample_name=name, start_scan_id=start_scan_id, end_scan_id=end_scan_id, n_processes=n_processes, extract_only_tandem_envelopes=extract_only_tandem_envelopes, cache_handler_type=ThreadedMzMLScanCacheHandler) try: consumer.start() sample_run_data = consumer.sample_run logger.info("Updating New Sample Run") reader = ProcessedMzMLDeserializer(storage_path, use_index=False) reader.read_index_file() if reader.extended_index.msn_ids: sample_type = "MS/MS Sample" else: sample_type = "MS Sample" sample_run = sample.SampleRunRecord(name=sample_run_data.name, uuid=sample_run_data.uuid, completed=True, path=storage_path, sample_type=sample_type, user_id=channel.user.id) channel.send(Message(sample_run.to_json(), "new-sample-run")) except Exception: channel.send(Message.traceback()) channel.abort("An error occurred during preprocessing.")