Esempio n. 1
0
    def run(self):
        self.loader = MSFileLoader(self.mzml_path)

        if self.start_scan is not None:
            self.loader.start_from_scan(self.start_scan)

        count = 0
        if self.max_scans is None:
            max_scans = float('inf')
        else:
            max_scans = self.max_scans

        end_scan = self.end_scan
        while count < max_scans:
            try:
                batch, ids = self._make_scan_batch()
                if len(batch) > 0:
                    self.queue.put(batch)
                count += len(ids)
                if end_scan in ids or len(ids) == 0:
                    break
            except StopIteration:
                break
            except Exception as e:
                log_handle.error("An error occurred while fetching scans", e)
                break

        if self.no_more_event is not None:
            self.no_more_event.set()
            log_handle.log("All Scan IDs have been dealt. %d scan bunches." %
                           (count, ))
        else:
            self.queue.put(DONE)
    def _worker_loop(self):
        has_work = True
        i = 0

        def drain_queue():
            current_work = []
            try:
                while len(current_work) < 300:
                    current_work.append(self.queue.get_nowait())
            except QueueEmptyException:
                pass
            if len(current_work) > 5:
                log_handle.log("Drained Write Queue of %d items" %
                               (len(current_work), ))
            return current_work

        while has_work:
            try:
                next_bunch = self.queue.get(True, 1)
                if next_bunch == DONE:
                    has_work = False
                    continue
                if self.log_inserts and (i % 100 == 0):
                    log_handle.log("Saving %r" % (next_bunch[0].id, ))
                self._save_bunch(*next_bunch)
                self.commit_counter += 1 + len(next_bunch[1])
                i += 1

                if self.queue.qsize() > 0:
                    current_work = drain_queue()
                    for next_bunch in current_work:
                        if next_bunch == DONE:
                            has_work = False
                        else:
                            if self.log_inserts and (i % 100 == 0):
                                log_handle.log("Saving %r" %
                                               (next_bunch[0].id, ))
                            self._save_bunch(*next_bunch)
                            self.commit_counter += 1 + len(next_bunch[1])
                            i += 1

                if self.commit_counter - self.last_commit_count > self.commit_interval:
                    self.last_commit_count = self.commit_counter
                    log_handle.log(
                        "Syncing Scan Cache To Disk (%d items waiting)" %
                        (self.queue.qsize(), ))
                    self.serializer.commit()
                    if self.serializer.is_sqlite():
                        self.serializer.session.execute(
                            "PRAGMA wal_checkpoint(SQLITE_CHECKPOINT_RESTART);"
                        )
                    self.serializer.session.expunge_all()
            except QueueEmptyException:
                continue
            except Exception as e:
                log_handle.error(
                    "An error occurred while writing scans to disk", e)
        self.serializer.commit()
        self.serializer.session.expunge_all()
Esempio n. 3
0
 def create(self):
     for index in self.table.indexes:
         try:
             conn = self.session.connection()
             index.create(conn)
             self.session.commit()
         except (OperationalError, ProgrammingError) as e:
             self.session.rollback()
             log_handle.error("An error occurred during index.create for %r" % index, exception=e)
 def save_bunch(self, precursor, products):
     try:
         self.serializer.save(ScanBunch(precursor, products), commit=False)
         self.commit_counter += 1 + len(products)
         if self.commit_counter - self.last_commit_count > self.commit_interval:
             self.last_commit_count = self.commit_counter
             self.commit()
     except Exception as e:
         log_handle.error("An error occured while saving scans", e)
Esempio n. 5
0
 def drop(self):
     for index in self.table.indexes:
         # log_handle.log("Dropping Index %r" % index)
         try:
             conn = self.session.connection()
             index.drop(conn)
             self.session.commit()
         except (OperationalError, ProgrammingError) as e:
             self.session.rollback()
             log_handle.error("An error occurred during index.drop for %r" % index, exception=e)
Esempio n. 6
0
 def create(self):
     for index in self.table.indexes:
         try:
             conn = self.session.connection()
             index.create(conn)
             self.session.commit()
         except (OperationalError, ProgrammingError) as e:
             self.session.rollback()
             log_handle.error(
                 "An error occurred during index.create for %r" % index,
                 exception=e)
Esempio n. 7
0
 def drop(self):
     for index in self.table.indexes:
         # log_handle.log("Dropping Index %r" % index)
         try:
             conn = self.session.connection()
             index.drop(conn)
             self.session.commit()
         except (OperationalError, ProgrammingError) as e:
             self.session.rollback()
             log_handle.error("An error occurred during index.drop for %r" %
                              index,
                              exception=e)
    def _worker_loop(self):
        has_work = True
        i = 0

        def drain_queue():
            current_work = []
            try:
                while len(current_work) < 300:
                    current_work.append(self.queue.get_nowait())
            except QueueEmptyException:
                pass
            if len(current_work) > 5:
                log_handle.log("Drained Write Queue of %d items" %
                               (len(current_work), ))
            return current_work

        while has_work:
            try:
                next_bunch = self.queue.get(True, 1)
                i += 1
                if next_bunch == DONE:
                    has_work = False
                    continue
                # log_handle.log("Writing %s %f (%d, %d)" % (
                #     next_bunch[0].id,
                #     next_bunch[0].scan_time,
                #     len(next_bunch[0].deconvoluted_peak_set) + sum(
                #         [len(p.deconvoluted_peak_set) for p in next_bunch[1]]),
                #     self.queue.qsize()))
                self._save_bunch(*next_bunch)
                if self.queue.qsize() > 0:
                    current_work = drain_queue()
                    for next_bunch in current_work:
                        i += 1
                        if next_bunch == DONE:
                            has_work = False
                        else:
                            # log_handle.log("Writing %s %f (%d, %d)" % (
                            #     next_bunch[0].id,
                            #     next_bunch[0].scan_time,
                            #     len(next_bunch[0].deconvoluted_peak_set) + sum(
                            #         [len(p.deconvoluted_peak_set) for p in next_bunch[1]]),
                            #     self.queue.qsize()))
                            self._save_bunch(*next_bunch)
                            i += 1
            except QueueEmptyException:
                continue
            except Exception as e:
                log_handle.error(
                    "An error occurred while writing scans to disk", e)
Esempio n. 9
0
    def _worker_loop(self):
        has_work = True
        i = 0

        def drain_queue():
            current_work = []
            try:
                while len(current_work) < 300:
                    current_work.append(self.queue.get_nowait())
            except QueueEmptyException:
                pass
            if len(current_work) > 5:
                log_handle.log("Drained Write Queue of %d items" %
                               (len(current_work), ))
            return current_work

        while has_work:
            try:
                next_bunch = self.queue.get(True, 1)
                i += 1
                if next_bunch == DONE:
                    has_work = False
                    continue
                self._save_bunch(*next_bunch)
                if self.queue.qsize() > 0:
                    current_work = drain_queue()
                    for next_bunch in current_work:
                        i += 1
                        if next_bunch == DONE:
                            has_work = False
                        else:
                            self._save_bunch(*next_bunch)
                            i += 1
            except QueueEmptyException:
                continue
            except Exception as e:
                log_handle.error(
                    "An error occurred while writing scans to disk", e)
Esempio n. 10
0
 def _make_scan_batch(self):
     batch = []
     scan_ids = []
     for _i in range(self.batch_size):
         try:
             bunch = next(self.loader)
             scan, products = bunch
             if scan is not None:
                 scan_id = scan.id
             else:
                 scan_id = None
             product_scan_ids = [p.id for p in products]
         except StopIteration:
             break
         except Exception as e:
             log_handle.error("An error occurred in _make_scan_batch", e)
             break
         if not self.ignore_tandem_scans:
             batch.append((scan_id, product_scan_ids, True))
         else:
             batch.append((scan_id, product_scan_ids, False))
         scan_ids.append(scan_id)
     return batch, scan_ids
Esempio n. 11
0
    def run(self):
        self.loader = MSFileLoader(self.ms_file_path,
                                   huge_tree=huge_tree,
                                   decode_binary=False)

        if self.start_scan is not None:
            try:
                self.loader.start_from_scan(
                    self.start_scan,
                    require_ms1=self.loader.has_ms1_scans(),
                    grouped=True)
            except IndexError as e:
                log_handle.error("An error occurred while locating start scan",
                                 e)
                self.loader.reset()
                self.loader.make_iterator(grouped=True)
            except AttributeError:
                log_handle.error(
                    "The reader does not support random access, start time will be ignored",
                    e)
                self.loader.reset()
                self.loader.make_iterator(grouped=True)
        else:
            self.loader.make_iterator(grouped=True)

        count = 0
        last = 0
        if self.max_scans is None:
            max_scans = float('inf')
        else:
            max_scans = self.max_scans

        end_scan = self.end_scan
        while count < max_scans:
            try:
                batch, ids = self._make_scan_batch()
                if len(batch) > 0:
                    self.queue.put(batch)
                count += len(ids)
                if (count - last) > 1000:
                    last = count
                    self.queue.join()
                if (end_scan in ids and end_scan is not None) or len(ids) == 0:
                    log_handle.log("End Scan Found")
                    break
            except StopIteration:
                break
            except Exception as e:
                log_handle.error("An error occurred while fetching scans", e)
                break

        if self.no_more_event is not None:
            self.no_more_event.set()
            log_handle.log("All Scan IDs have been dealt. %d scan bunches." %
                           (count, ))
        else:
            self.queue.put(DONE)
Esempio n. 12
0
    def _worker_loop(self):
        has_work = True
        i = 0

        def drain_queue():
            current_work = []
            try:
                while len(current_work) < 300:
                    current_work.append(self.queue.get_nowait())
            except QueueEmptyException:
                pass
            if len(current_work) > 5:
                log_handle.log("Drained Write Queue of %d items" % (len(current_work),))
            return current_work

        while has_work:
            try:
                next_bunch = self.queue.get(True, 1)
                i += 1
                if next_bunch == DONE:
                    has_work = False
                    continue
                self._save_bunch(*next_bunch)
                if self.queue.qsize() > 0:
                    current_work = drain_queue()
                    for next_bunch in current_work:
                        i += 1
                        if next_bunch == DONE:
                            has_work = False
                        else:
                            self._save_bunch(*next_bunch)
                            i += 1
            except QueueEmptyException:
                continue
            except Exception as e:
                log_handle.error("An error occurred while writing scans to disk", e)
Esempio n. 13
0
    def configure_storage(cls, path=None, name=None, source=None):
        if path is not None:
            if name is None:
                sample_name = os.path.basename(path)
            else:
                sample_name = name
        else:
            path = "processed.mzML"
        if source is not None:
            reader = MSFileLoader(source.scan_source)
            n_spectra = len(reader.index)
            deconvoluting = source.deconvoluting
            inst = cls(path, sample_name, n_spectra=n_spectra, deconvoluted=deconvoluting)
            try:
                description = reader.file_description()
            except AttributeError:
                description = FileInformation()
            source_file_metadata = MetadataSourceFile.from_path(source.scan_source)
            inst.serializer.add_file_information(description)
            try:
                inst.serializer.remove_file_contents("profile spectrum")
            except KeyError:
                pass
            inst.serializer.add_file_contents("centroid spectrum")
            if source_file_metadata not in description.source_files:
                inst.serializer.add_source_file(source_file_metadata)
            try:
                instrument_configs = reader.instrument_configuration()
                for config in instrument_configs:
                    inst.serializer.add_instrument_configuration(config)
            except Exception as e:
                log_handle.error(
                    "An error occurred while writing instrument configuration", e)
            for trans in source.ms1_peak_picking_args.get("transforms"):
                inst.register_parameter("parameter: ms1-%s" % trans.__class__.__name__, repr(trans))
            if deconvoluting:
                if source.ms1_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: ms1-averagine", repr(source.ms1_deconvolution_args.get("averagine")))
                if source.ms1_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: ms1-scorer", repr(source.ms1_deconvolution_args.get("scorer")))
                if source.ms1_averaging > 0:
                    inst.register_parameter("parameter: ms1-averaging", repr(source.ms1_averaging))
                if source.ignore_tandem_scans:
                    inst.register_parameter("parameter: ignore-tandem-scans", "")
                if source.extract_only_tandem_envelopes:
                    inst.register_parameter("parameter: extract-only-tandem-envelopes", "")

            if source.msn_peak_picking_args is not None:
                for trans in source.msn_peak_picking_args.get("transforms"):
                    inst.register_parameter("parameter: msn-%s" % trans.__class__.__name__, repr(trans))
            if deconvoluting:
                if source.msn_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: msn-averagine", repr(source.msn_deconvolution_args.get("averagine")))
                if source.msn_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: msn-scorer", repr(source.msn_deconvolution_args.get("scorer")))
            data_processing = inst.serializer.build_processing_method()
            inst.serializer.add_data_processing(data_processing)
        else:
            n_spectra = 2e5
            inst = cls(path, sample_name, n_spectra=n_spectra)
        # Force marshalling of controlled vocabularies early.
        inst.serializer.writer.param("32-bit float")
        return inst
Esempio n. 14
0
    def configure_storage(cls, path=None, name=None, source=None):
        if path is not None:
            if name is None:
                sample_name = os.path.basename(path)
            else:
                sample_name = name
        else:
            path = "processed.mzML"
        if source is not None:
            reader = MSFileLoader(source.scan_source)
            n_spectra = len(reader.index)
            deconvoluting = source.deconvoluting
            inst = cls(path,
                       sample_name,
                       n_spectra=n_spectra,
                       deconvoluted=deconvoluting)
            try:
                description = reader.file_description()
            except AttributeError:
                description = FileInformation()
            source_file_metadata = MetadataSourceFile.from_path(
                source.scan_source)
            inst.serializer.add_file_information(description)
            try:
                inst.serializer.remove_file_contents("profile spectrum")
            except KeyError:
                pass
            inst.serializer.add_file_contents("centroid spectrum")
            if source_file_metadata not in description.source_files:
                inst.serializer.add_source_file(source_file_metadata)
            try:
                instrument_configs = reader.instrument_configuration()
                for config in instrument_configs:
                    inst.serializer.add_instrument_configuration(config)
            except Exception as e:
                log_handle.error(
                    "An error occurred while writing instrument configuration",
                    e)
            for trans in source.ms1_peak_picking_args.get("transforms"):
                inst.register_parameter(
                    "parameter: ms1-%s" % trans.__class__.__name__,
                    repr(trans))
            if deconvoluting:
                if source.ms1_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: ms1-averagine",
                        repr(source.ms1_deconvolution_args.get("averagine")))
                if source.ms1_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: ms1-scorer",
                        repr(source.ms1_deconvolution_args.get("scorer")))
                if source.ms1_averaging > 0:
                    inst.register_parameter("parameter: ms1-averaging",
                                            repr(source.ms1_averaging))
                if source.ignore_tandem_scans:
                    inst.register_parameter("parameter: ignore-tandem-scans",
                                            "")
                if source.extract_only_tandem_envelopes:
                    inst.register_parameter(
                        "parameter: extract-only-tandem-envelopes", "")

            if source.msn_peak_picking_args is not None:
                for trans in source.msn_peak_picking_args.get("transforms"):
                    inst.register_parameter(
                        "parameter: msn-%s" % trans.__class__.__name__,
                        repr(trans))
            if deconvoluting:
                if source.msn_deconvolution_args.get("averagine"):
                    inst.register_parameter(
                        "parameter: msn-averagine",
                        repr(source.msn_deconvolution_args.get("averagine")))
                if source.msn_deconvolution_args.get("scorer"):
                    inst.register_parameter(
                        "parameter: msn-scorer",
                        repr(source.msn_deconvolution_args.get("scorer")))
            data_processing = inst.serializer.build_processing_method()
            inst.serializer.add_data_processing(data_processing)
        else:
            n_spectra = 2e5
            inst = cls(path, sample_name, n_spectra=n_spectra)
        # Force marshalling of controlled vocabularies early.
        inst.serializer.writer.param("32-bit float")
        return inst