예제 #1
0
    def run(self,
            database: DigestionDatabase) -> Optional[Set[DigestionSettings]]:
        # Refreshing anzymes combobox
        self.enzymeComboBox.clear()

        for enzyme in enzymescollection.available_enzymes():
            self.enzymeComboBox.addItem(enzyme)

        # Refreshing digestion settings table
        self.digestionSettingsTableWidget.setRowCount(0)
        self.digestionSettingsTableWidget.setSortingEnabled(False)

        for i, digestion in enumerate(database.available_digestions):
            self.digestionSettingsTableWidget.insertRow(i)
            enzyme_item = QTableWidgetItem(digestion.enzyme)
            missed_cleavages_item = QTableWidgetItem(
                str(digestion.missed_cleavages))
            rule_item = QTableWidgetItem(
                enzymescollection.enzyme(digestion.enzyme).description)
            self.digestionSettingsTableWidget.setItem(i, 0, enzyme_item)
            self.digestionSettingsTableWidget.setItem(i, 1,
                                                      missed_cleavages_item)
            self.digestionSettingsTableWidget.setItem(i, 2, rule_item)

        self.digestionSettingsTableWidget.setSortingEnabled(True)

        if self.exec() == QDialog.Accepted:
            return self._generateDigestionSettings()

        return None
예제 #2
0
 def enzymeComboBoxCurrentTextChanged(self, text: str) -> None:
     try:
         enzyme = enzymescollection.enzyme(text)
     except enzymescollection.InvalidEnzymeError:
         self.enzymeDescriptionLabel.setText('')
     else:
         self.enzymeDescriptionLabel.setText(
             f'<i>{enzyme.name}: {enzyme.description}</i>')
예제 #3
0
    def is_coherent_with_enzymes_collection(self) -> bool:
        # Checks that the enzymes used in the database still exist in the collections and that the rules used in
        # the database are the same than the ones in the collection
        available_enzymes = enzymescollection.available_enzymes()
        cursor = self._connection.execute(
            'SELECT enzyme, rule FROM digestions')

        for row in cursor:
            if row['enzyme'] not in available_enzymes or row[
                    'rule'] != enzymescollection.enzyme(row['enzyme']).rule:
                return False

        return True
예제 #4
0
    def addPushButtonClicked(self):
        digestion_settings = DigestionSettings(
            self.enzymeComboBox.currentText(),
            self.missedCleavagesSpinBox.value())

        if not digestion_settings in self._generateDigestionSettings():
            row = self.digestionSettingsTableWidget.rowCount()
            self.digestionSettingsTableWidget.insertRow(row)
            enzyme_item = QTableWidgetItem(self.enzymeComboBox.currentText())
            missed_cleavages_item = QTableWidgetItem(
                str(self.missedCleavagesSpinBox.value()))
            rule_item = QTableWidgetItem(
                enzymescollection.enzyme(
                    self.enzymeComboBox.currentText()).description)
            self.digestionSettingsTableWidget.setItem(row, 0, enzyme_item)
            self.digestionSettingsTableWidget.setItem(row, 1,
                                                      missed_cleavages_item)
            self.digestionSettingsTableWidget.setItem(row, 2, rule_item)
            self.digestionSettingsTableWidget.selectRow(row)
            self.digestionSettingsTableWidget.scrollToItem(enzyme_item)
        else:
            commondialog.errorMessage(
                self, 'This digestion settings is already listed.')
예제 #5
0
    def _digest(self,
                digestion,
                callback=None,
                proteins_per_batch=10000) -> None:
        digestion_tables = self._digestion_tables(digestion)
        enzyme = enzymescollection.enzyme(digestion.enzyme)
        self._progress_handler_function = callback
        self._maximum_task_iteration = 0
        self._current_task_iteration = 0
        self._current_task = 'Determining number of sequences to digest...'

        # Counting the number of sequence to digest
        cursor = self._connection.execute(
            f'''SELECT COUNT(*) FROM sequences WHERE sequences.id NOT IN
                                              (SELECT DISTINCT {digestion_tables.peptides_association}.sequence_id 
                                               FROM {digestion_tables.peptides_association})'''
        )

        self._maximum_task_iteration = cursor.fetchone()[0]

        # Nothing to digest, exiting
        if not self._maximum_task_iteration:
            return

        self._current_task_iteration = 0
        self._current_task = (
            f'Digesting database with {digestion.enzyme}, {digestion.missed_cleavages} '
            f'missed cleavage{"s" if digestion.missed_cleavages > 1 else ""}...'
        )

        # Dropping the indicies to speed up digestion
        self._connection.execute(
            f'DROP INDEX IF EXISTS {digestion_tables.peptides_table_index}')
        self._connection.execute(
            f'DROP INDEX IF EXISTS {digestion_tables.peptides_association_index}'
        )

        # Reading sequences to digest...
        read_cursor = self._connection.execute(
            f'''SELECT id, sequence FROM sequences WHERE sequences.id NOT IN
                                                   (SELECT DISTINCT {digestion_tables.peptides_association}.sequence_id 
                                                    FROM {digestion_tables.peptides_association})'''
        )

        rows = read_cursor.fetchmany(proteins_per_batch)

        while rows:
            for aa_sequence in (AminoAcidSequence(row['sequence'], row['id'])
                                for row in rows):
                peptides = tuple(
                    enzyme.cleave(aa_sequence, digestion.missed_cleavages))
                self._connection.executemany(
                    f'''INSERT INTO {digestion_tables.peptides_table}
                                                 (sequence, missed_cleavages) 
                                                 VALUES(?, ?) ON CONFLICT DO NOTHING''',
                    ((peptide.sequence, peptide.missed_cleavages)
                     for peptide in peptides))

                # We need that to preserve the digestion order of peptide when updating the association table
                sequences_to_ids = {}

                for i in range(0, len(peptides), 900):
                    queried_peptide_sequences = tuple(
                        peptide.sequence for peptide in peptides[i:i + 900])
                    parameters_substitution = ','.join(
                        ('?', ) * len(queried_peptide_sequences))

                    cursor = self._connection.execute(
                        f'''SELECT id, sequence FROM {digestion_tables.peptides_table}
                                                          WHERE sequence IN ({parameters_substitution})''',
                        queried_peptide_sequences)

                    # Mapping peptide sequence to its id
                    sequences_to_ids.update(
                        {row['sequence']: row['id']
                         for row in cursor})

                # Creating a list of ids, sorted by digestion order
                sorted_peptides_id = (sequences_to_ids[peptide.sequence]
                                      for peptide in peptides)

                self._connection.executemany(
                    f'''INSERT INTO {digestion_tables.peptides_association}
                                                 (peptide_id, sequence_id) VALUES(?, ?)''',
                    ((peptide_id, aa_sequence.id)
                     for peptide_id in sorted_peptides_id))

                self._current_task_iteration += 1
            rows = read_cursor.fetchmany(proteins_per_batch)

        # Creating indicies to speed up search
        self._maximum_task_iteration = 0
        self._current_task_iteration = 0
        self._current_task = (
            f'Creating index for digestion {digestion.enzyme}, {digestion.missed_cleavages} '
            f'missed cleavage{"s" if digestion.missed_cleavages > 1 else ""}...'
        )
        self._connection.execute(
            f'''CREATE INDEX {digestion_tables.peptides_table_index} ON  
                                     {digestion_tables.peptides_table}(sequence)'''
        )
        self._connection.execute(
            f'''CREATE INDEX {digestion_tables.peptides_association_index} ON  
                                     {digestion_tables.peptides_association}(peptide_id, sequence_id)'''
        )
예제 #6
0
    def update_digestion(self,
                         digestion_settings: Iterable[DigestionSettings],
                         remove=False,
                         callback=None,
                         proteins_per_batch=10000) -> None:
        available_digestions = set(self.available_digestions)
        updated_digestions = set(digestion_settings)
        self._progress_handler_function = callback
        cleanup_needed = False

        if not self.is_coherent_with_enzymes_collection:
            raise IncoherencyWithEnzymesCollectionError

        # Removing unneeded digestions
        if remove:
            self._current_task_iteration = 0
            self._maximum_task_iteration = 0

            with self._connection:
                for digestion in available_digestions - updated_digestions:
                    cleanup_needed = True
                    self._current_task = (
                        f'Removing digestion {digestion.enzyme}, {digestion.missed_cleavages} '
                        f'missed cleavage{"s" if digestion.missed_cleavages > 1 else ""}...'
                    )
                    self._progress_handler()
                    digestion_tables = self._digestion_tables(digestion)

                    try:
                        # self._digestion_tables returns table names surrounded by ", we need to remove them in this
                        # case
                        self._connection.execute(
                            'DELETE FROM digestions WHERE peptides_table = ?',
                            (digestion_tables.peptides_table[1:-1], ))
                        self._connection.execute(
                            f'DROP TABLE {digestion_tables.peptides_table}')
                        self._connection.execute(
                            f'DROP TABLE {digestion_tables.peptides_association}'
                        )
                    except sqlite3.OperationalError:
                        self._connection.rollback()
                        self._end_of_task()
                        return

        # Adding digestions
        with self._connection:
            added_digestions = list(updated_digestions - available_digestions)
            added_digestions.sort(
                key=lambda digestion: digestion.missed_cleavages)
            added_digestions.sort(key=lambda digestion: digestion.enzyme)

            for digestion in added_digestions:
                # Generates a uuid as the table name
                digestion_table_name = uuid.uuid4().hex

                # Add this digestion table into the list of digestion
                enzyme = enzymescollection.enzyme(digestion.enzyme)
                self._connection.execute(
                    '''INSERT INTO digestions(enzyme, rule, missed_cleavages, peptides_table)
                                            VALUES(?, ?, ?, ?)''',
                    (enzyme.name, enzyme.rule, digestion.missed_cleavages,
                     digestion_table_name))

                # Get the table names (including many-to-many table name)
                digestion_tables = self._digestion_tables(digestion)

                # Creates all the tables needed to store the digestion result
                try:
                    self._connection.execute(
                        f'''CREATE TABLE {digestion_tables.peptides_table}(
                                                 id INTEGER,
                                                 sequence TEXT NOT NULL UNIQUE,
                                                 missed_cleavages INTEGER NOT NULL,
                                                 PRIMARY KEY(id))''')

                    self._connection.execute(
                        f'''CREATE TABLE {digestion_tables.peptides_association}(
                                                 peptide_id INTEGER,
                                                 sequence_id INTEGER,
                                                 FOREIGN KEY(peptide_id) REFERENCES {digestion_tables.peptides_table}(id),
                                                 FOREIGN KEY(sequence_id) REFERENCES sequences(id))'''
                    )

                    self._digest(digestion,
                                 callback=callback,
                                 proteins_per_batch=proteins_per_batch)

                except sqlite3.OperationalError:
                    self._connection.rollback()
                    self._end_of_task()
                    return

        if cleanup_needed:
            self._current_task = (f'Cleaning up database...')
            self._connection.execute('VACUUM')

        self._end_of_task()