Python Protein Exemples, macpepdb.models.protein.Protein Python Exemples

Exemple #1

0

Afficher le fichier

    def test_create_read_update_delete_cycle(self):
        # Using Leptin (UniProt  accession: Q257X2)
        leptin = Protein(
            'Q257X2', 'LEP_CAPHI', 'Leptin',
            'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC',
            9925, 'UP000291000', True)

        ## Create
        # Start db session
        session = self.session_factory()
        session.add(leptin)
        try:
            session.commit()
        except:
            print("leptin already exists")
        # Check if id is now an integer (autoincrement from db)
        self.assertTrue(isinstance(leptin.id, int))
        # Digest protein and save peptides with association to protein
        trypsin = Trypsin(3, 0, 60)
        leptin.peptides = trypsin.digest(leptin)
        PEPTIDE_COUNT = leptin.peptides.count()
        session.commit()
        for peptide in leptin.peptides:
            # Check if id is now an integer (autoincrement from db)
            self.assertTrue(isinstance(peptide.id, int))
        # Save autoincremented id
        LEPTIN_ID = leptin.id
        # Close session and set Leptin to None, so all connections to db are lost
        session.close()
        leptin = None

        ## Read
        session = self.session_factory()
        # Get Leptin by accession
        leptin = session.query(Protein).filter(
            Protein.accession == "Q257X2").one()
        leptin_petides = leptin.peptides.all()
        self.assertEqual(LEPTIN_ID, leptin.id)
        self.assertEqual(PEPTIDE_COUNT, len(leptin_petides))
        session.close()

        ## Update
        # Not implemented yes

        ## Delete
        # Start new session
        session = self.session_factory()
        # Bound letpin to the new session
        session.add(leptin)
        # Remove association between leptin and peptides
        for peptide in leptin.peptides.all():
            session.delete(peptide)
        session.delete(leptin)
        session.commit()
        self.assertEqual(0, session.query(Protein).count())
        self.assertEqual(0, session.query(Peptide).count())
        session.close()

Exemple #2

0

Afficher le fichier

    def test_create_read_update_delete_cycle(self):
        # Using Leptin (UniProt  accession: Q257X2)
        leptin = Protein(
            'Q257X2', 'LEP_CAPHI', 'Leptin',
            'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC',
            9925, 'UP000291000', True)

        ## create
        # start db session
        session = self.session_factory()
        session.add(leptin)
        session.commit()
        # check if id is now an integer (autoincrement from db)
        self.assertTrue(isinstance(leptin.id, int))
        # save autoincremented id
        LEPTIN_ID = leptin.id
        # close session and set Leptin to None, so all connections to db are lost
        session.close()
        leptin = None

        ## read
        # start new db session
        session = self.session_factory()
        # get Leptin by accession
        leptin = session.query(Protein).filter(
            Protein.accession == "Q257X2").one()
        self.assertEqual(LEPTIN_ID, leptin.id)
        session.close()

        ## update
        # start new db session
        session = self.session_factory()
        # update accession
        leptin.accession = "CHANGED"
        session.add(leptin)
        session.commit()
        # close session to make sure nothing is cached
        session.close()
        # start new session
        session = self.session_factory()
        # query accession of Leptin
        leptin_accession = session.query(
            Protein.accession).filter(Protein.id == LEPTIN_ID).scalar()
        self.assertEqual("CHANGED", leptin_accession)
        session.close()

        ## delete
        # start new session
        session = self.session_factory()
        session.delete(leptin)
        session.commit()
        self.assertEqual(0, session.query(Protein).count())
        session.close()

Exemple #3

0

Afficher le fichier

    def __next__(self):
        entry_name = ""
        name = ""
        is_reviewed = False
        accessions = []
        taxonomy_id = None
        sequence = ""
        proteome_id = None

        while True:
            line = next(self.__file_iter)

            if line.startswith("ID"):
                entry_name, is_reviewed = self.__process_id(line[5:])
            elif line.startswith("AC"):
                accessions += self.__process_ac(line[5:])
            elif line.startswith("OX"):
                taxonomy_id = self.__process_ox(line[5:])
            elif line.startswith("DR"):
                if line[5:].startswith("Proteomes;"):
                    proteome_id = self.__process_dr_proteoms(line[5:])
            # sequence starts with two whitespaces
            elif line.startswith("  "):
                sequence += self.__process_sq_no_header(line)
            elif line.startswith("DE"):
                if name == "" and line[5:].startswith(
                        "RecName") or line[5:].startswith(
                            "AltName") or line[5:].startswith("Sub"):
                    name = self.__process_de_name(line[5:])
            elif line.startswith("//"):
                accession = accessions.pop(0)
                return Protein(accession, entry_name, name, sequence,
                               taxonomy_id, proteome_id,
                               is_reviewed), self.__create_protein_merges(
                                   accessions, accession)

Exemple #4

0

Afficher le fichier

    def proteins(sequence: str):
        peptide = Peptide(sequence.upper(), 0)
        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            proteins = Protein.select(
                database_cursor,
                WhereCondition([
                    f"accession = ANY(SELECT protein_accession FROM {ProteinPeptideAssociation.TABLE_NAME} as ppa WHERE ppa.partition = %s AND ppa.peptide_mass = %s AND ppa.peptide_sequence = %s)"
                ], [peptide.partition, peptide.mass, peptide.sequence]), True)

            reviewed_proteins = []
            unreviewed_proteins = []

            for protein in proteins:
                if protein.is_reviewed:
                    reviewed_proteins.append(protein)
                else:
                    unreviewed_proteins.append(protein)

            def json_stream() -> Iterator[bytes]:
                yield b"{\"reviewed_proteins\": ["
                for protein_idx, protein in enumerate(reviewed_proteins):
                    if protein_idx > 0:
                        yield b","
                    yield from protein.to_json()
                yield b"],\"unreviewed_proteins\": ["
                for protein_idx, protein in enumerate(unreviewed_proteins):
                    if protein_idx > 0:
                        yield b","
                    yield from protein.to_json()
                yield b"]}"

            return Response(json_stream(), content_type="application/json")

Exemple #5

0

Afficher le fichier

 def test_accession_regex(self):
     # Using Leptin (UniProt  accession: Q257X2)
     leptin = Protein(
         'Q257X2', 'LEP_CAPHI', 'Leptin',
         'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC',
         9925, 'UP000291000', True)
     self.assertEqual("Q257X2", leptin.accession)

Exemple #6

0

Afficher le fichier

    def show(accession: str):
        accession = accession.upper()

        database_connection = get_database_connection()

        with database_connection.cursor() as database_cursor:
            protein = Protein.select(
                database_cursor, 
                WhereCondition(
                    ["accession = %s"], 
                    [accession]
                ),
                False
            )

            if protein:
                return Response(
                    protein.to_json(),
                    content_type="application/json"
                )
            else:               
                return jsonify({
                    "errors": {
                        "accession": ["not found"]
                    }
                }), 404

Exemple #7

0

Afficher le fichier

    def test_digest(self):
        # Using Leptin (UniProt  accession: Q257X2)
        leptin = Protein(
            "Q257X2", "LEP_CAPHI", "Leptin",
            "MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC",
            9925, "UP000291000", True)
        trypsin = Trypsin(3, 0, 60)
        peptides = trypsin.digest(leptin)

        self.assertEqual(len(DESIRED_RESULTS), len(peptides))

        for peptide in peptides:
            self.assertTrue(peptide.sequence in DESIRED_RESULTS)

Exemple #8

0

Afficher le fichier

    def digest():
        """
        Digests the seqeunce of the given protein.
        """
        data = request.get_json()
        errors = ApiDigestionController.check_digestion_parameters(data)

        if not "accession" in data:
            errors["accession"].append("cannot be empty")

        peptides = []
        if len(errors) == 0:
            database_connection = get_database_connection()
            with database_connection.cursor() as database_cursor:
                protein = Protein.select(
                    database_cursor, 
                    WhereCondition(
                        ["accession = %s"], 
                        [data["accession"]]
                    ),
                    False
                )
                if protein:
                    peptides = list(filter(
                        lambda peptide: peptide.number_of_missed_cleavages <= data["maximum_number_of_missed_cleavages"] \
                            and data["minimum_peptide_length"] <= peptide.length <= data["maximum_peptide_length"],
                        protein.peptides(database_cursor)
                    ))
                    peptides.sort(key = lambda peptide: peptide.mass)
                else:
                    errors["accession"].append("not found")

        if len(errors) == 0:
            def json_stream():
                yield b"{\"peptides\": ["
                for peptide_idx, peptide in enumerate(peptides):
                    if peptide_idx > 0:
                        yield b","
                    yield from peptide.to_json()
                yield f"], \"count\": {len(peptides)}}}".encode("utf-8 ")
            return Response(
                json_stream(),
                content_type="application/json"
            )
        else:
            return jsonify({
                "errors": errors
            }), 422

Exemple #9

0

Afficher le fichier

    def __next__(self):
        entry_name = ""
        name = ""
        is_reviewed = False
        accessions = []
        taxonomy_id = None
        sequence = ""
        proteome_id = None
        last_update = "01-JAN-1970"

        while True:
            line = next(self.__file_iter)

            line = line.rstrip()

            if len(line) >= 2:
                if line.startswith("ID"):
                    entry_name, is_reviewed = self.__process_id(line[5:])
                elif line.startswith("AC"):
                    accessions += self.__process_ac(line[5:])
                elif line.startswith("OX"):
                    taxonomy_id = self.__process_ox(line[5:])
                elif line.startswith("DR"):
                    if line[5:].startswith("Proteomes;"):
                        proteome_id = self.__process_dr_proteoms(line[5:])
                # sequence starts with two whitespaces
                elif line.startswith("  "):
                    sequence += self.__process_sq_no_header(line)
                elif line.startswith("DE"):
                    if name == "" and line[5:].startswith(
                            "RecName") or line[5:].startswith(
                                "AltName") or line[5:].startswith("Sub"):
                        name = self.__process_de_name(line[5:])
                elif line.startswith("DT"):
                    last_update = line[5:16]
                elif line.startswith("//"):
                    primary_accession = accessions.pop(0)
                    return Protein(
                        primary_accession, accessions, entry_name, name,
                        sequence, taxonomy_id, proteome_id, is_reviewed,
                        self.__dt_date_to_utc_timestamp(last_update))

Exemple #10

0

Afficher le fichier

Fichier : protein_digestion_process.py Projet : mpc-bioinformatics/macpepdb

    def run(self):
        """
        Starts the process which digests proteins and inserts the peptides into the database.
        """
        self.activate_signal_handling()
        self.__general_log.send("digest worker {} is online".format(self.__id))
        database_connection = None

        # Let the process run until finish_event is true and protein_queue is empty or termination_event is true.
        while (not self.__finish_event.is_set()
               or not self.__protein_queue.empty()
               ) and not self.termination_event.is_set():
            try:
                # Open/reopen database connection
                if not database_connection or (
                        database_connection
                        and database_connection.closed != 0):
                    database_connection = psycopg2.connect(self.__database_url)

                # Try to get a protein from the queue, timeout is 2 seconds
                new_protein = self.__protein_queue.get(True, 5)

                # Variables for loop control
                unsolvable_error_factor = 0
                try_transaction_again = True
                while try_transaction_again:
                    number_of_new_peptides = 0
                    error = None
                    try:
                        count_protein = False
                        number_of_new_peptides = 0
                        with database_connection:
                            with database_connection.cursor(
                            ) as database_cursor:
                                skip_protein_creation = False
                                # Check if the Protein exists by its accession or secondary accessions
                                accessions = [
                                    new_protein.accession
                                ] + new_protein.secondary_accessions
                                existing_proteins = Protein.select(
                                    database_cursor,
                                    WhereCondition(["accession = ANY(%s)"],
                                                   [accessions]),
                                    fetchall=True)
                                if len(existing_proteins) > 0:
                                    # If more than one protein were found and the first protein is the same protein as the current one from the queue ...
                                    if existing_proteins[
                                            0].accession == new_protein.accession:
                                        updateable_protein = existing_proteins.pop(
                                            0)
                                        # ... delete the other other proteins, because they are merged with this one.
                                        for existing_protein in existing_proteins:
                                            Protein.delete(
                                                database_cursor,
                                                existing_protein)
                                        skip_protein_creation = True
                                        number_of_new_peptides = updateable_protein.update(
                                            database_cursor, new_protein,
                                            self.__enzyme)
                                    else:
                                        # If the first protein from the found proteins has not the same accession as the new one from the queue
                                        # each of the found proteins are merged with the new protein. So delete them.
                                        for existing_protein in existing_proteins:
                                            Protein.delete(
                                                database_cursor,
                                                existing_protein)
                                if not skip_protein_creation:
                                    number_of_new_peptides = Protein.create(
                                        database_cursor, new_protein,
                                        self.__enzyme)
                                    count_protein = True

                        # Commit was successfully stop while-loop and add statistics
                        try_transaction_again = False
                        self.__statistics.acquire()
                        if count_protein:
                            self.__statistics[0] += 1
                        self.__statistics[1] += number_of_new_peptides
                        self.__statistics.release()
                    # Rollback is done implcit by `with database_connection`
                    # Each error increases the unsolveable error factor differently. If the factor reaches UNSOLVEABLE_ERROR_FACTOR_LIMIT the protein is logged as unprocessible
                    ## Catch violation of unique constraints. Usually a peptide which is already inserted by another transaction.
                    except psycopg2.errors.UniqueViolation as unique_violation_error:
                        error = unique_violation_error
                        if unsolvable_error_factor < self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT:
                            unsolvable_error_factor += 0.2
                    ## Catch deadlocks between transactions. This occures usually when 2 transactions try to insert the same peptides
                    except psycopg2.errors.DeadlockDetected as deadlock_detected_error:
                        error = deadlock_detected_error
                        # Try again after 5 (first try) and 10 (second try) + a random number between 0 and 5 (both including) seconds maybe some blocking transactions can pass so this transaction will successfully finish on the next try.
                        if unsolvable_error_factor < self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT:
                            unsolvable_error_factor += 1
                            time.sleep(5 * unsolvable_error_factor +
                                       random.randint(0, 5))
                    ## Catch other errors.
                    except psycopg2.Error as base_error:
                        unsolvable_error_factor += self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT
                        error = base_error
                    finally:
                        # Log the last error if the unsolvable_error_factor exceeds the limit
                        if unsolvable_error_factor >= self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT:
                            self.__general_log.send(
                                "Exception on protein {}, see:\n{}".format(
                                    new_protein.accession, error))
                            self.__unprocessible_protein_log.send(
                                new_protein.to_embl_entry())
                            self.__statistics.acquire()
                            self.__statistics[2] += 1
                            self.__statistics.release()
                            try_transaction_again = False
            # Catch errors which occure during database connect
            except psycopg2.Error as error:
                self.__general_log.send(
                    "Error when opening the database connection, see:\n{}".
                    format(error))
            # Catch queue.Empty which is thrown when protein_queue.get() timed out
            except EmptyQueueError:
                pass
        # Close database connection
        if database_connection and database_connection.closed == 0:
            database_connection.close()
        self.__general_log.send("digest worker {} is stopping".format(
            self.__id))
        self.__general_log.close()
        self.__unprocessible_protein_log.close()

Exemple #11

0

Afficher le fichier

    def test_lifecycle(self):
        trypsin = Trypsin(2, 6, 50)
        # Using Leptin (UniProt  accession: Q257X2)
        leptin = Protein(
            'Q257X2', ['TESTACC'], 'LEP_CAPHI', 'Leptin',
            'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC',
            9925, 'UP000291000', True, 1145311200)
        leptin_peptides = trypsin.digest(leptin)
        # Letpin with a new accession, old accession moved to secondary accessions, and new sequence where the first leucine is replaced by an isoleucine which creates a new peptide.
        updated_leptin = Protein(
            'Q257X2V2', ['Q257X2', 'TESTACC'], 'LEP_CAPHI', 'Leptin',
            'MRCGPIYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC',
            9925, 'UP000291000', True, 1627596000)
        updated_leptin_peptides = {
            peptide.sequence: peptide
            for peptide in trypsin.digest(updated_leptin)
        }

        inserted_leptin_peptide_count = 0
        ## Create
        # Start db session
        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                inserted_leptin_peptide_count = Protein.create(
                    database_cursor, leptin, trypsin)
                self.database_connection.commit()
                database_cursor.execute(
                    f"SELECT true FROM {Protein.TABLE_NAME} WHERE accession = %s;",
                    (leptin.accession, ))
                # Check if id is now an integer (autoincrement from db)
                self.assertTrue(database_cursor.fetchone()[0])
                # Check if alle peptides were inserted
                self.assertEqual(inserted_leptin_peptide_count,
                                 len(leptin_peptides))

                # Database should contain exactly the amout of leptin peptides
                database_cursor.execute(
                    f"SELECT count(*) FROM {Peptide.TABLE_NAME};")
                self.assertEqual(len(leptin_peptides),
                                 database_cursor.fetchone()[0])

                # Database should contain also exactly one association per leptin peptide
                database_cursor.execute(
                    f"SELECT count(*) FROM {ProteinPeptideAssociation.TABLE_NAME};"
                )
                self.assertEqual(len(leptin_peptides),
                                 database_cursor.fetchone()[0])

                for peptide in leptin_peptides:
                    database_cursor.execute(
                        f"SELECT true FROM {Peptide.TABLE_NAME} WHERE sequence = %s;",
                        (peptide.sequence, ))
                    self.assertTrue(database_cursor.fetchone()[0])

        ## Read
        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                # Get Leptin by accession
                database_leptin = Protein.select(
                    database_cursor,
                    WhereCondition(["accession = %s"], [leptin.accession]))
                database_leptin_petides = database_leptin.peptides(
                    database_cursor)
                self.assertEqual(database_leptin.accession, leptin.accession)
                self.assertEqual(len(database_leptin_petides),
                                 len(leptin_peptides))

        ## Update
        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                database_leptin = Protein.select(
                    database_cursor,
                    WhereCondition(["accession = %s"], [leptin.accession]))
                database_leptin.update(database_cursor, updated_leptin,
                                       trypsin)
                self.database_connection.commit()
                database_cursor.execute(
                    f"SELECT COUNT(*) FROM {Protein.TABLE_NAME};")
                protein_count = database_cursor.fetchone()[0]
                # There should still be only one protein (updated letpin)
                self.assertEqual(protein_count, 1)
                updated_database_leptin = Protein.select(
                    database_cursor,
                    WhereCondition(["accession = %s"],
                                   [updated_leptin.accession]))
                # Check the updated attributes
                self.assertEqual(updated_database_leptin.accession,
                                 updated_leptin.accession)
                self.assertEqual(updated_database_leptin.secondary_accessions,
                                 updated_leptin.secondary_accessions)
                self.assertEqual(updated_database_leptin.sequence,
                                 updated_leptin.sequence)
                self.assertNotEqual(updated_database_leptin, None)
                # Fetch peptides
                updated_database_leptin_peptides = {
                    peptide.sequence: peptide
                    for peptide in updated_database_leptin.peptides(
                        database_cursor)
                }
                self.assertEqual(len(updated_database_leptin_peptides),
                                 len(updated_leptin_peptides))
                # Cross check if only the updated leptin peptides are returned
                for sequence in updated_database_leptin_peptides.keys():
                    self.assertIn(sequence, updated_leptin_peptides)

        ## Delete
        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                database_leptin = Protein.select(
                    database_cursor,
                    WhereCondition(["accession = %s"],
                                   [updated_leptin.accession]))
                Protein.delete(database_cursor, database_leptin)
                self.database_connection.commit()

                database_cursor.execute(
                    f"SELECT COUNT(*) FROM {Protein.TABLE_NAME};")
                protein_count = database_cursor.fetchone()[0]
                self.assertEqual(0, protein_count)
                database_cursor.execute(
                    f"SELECT COUNT(*) FROM {ProteinPeptideAssociation.TABLE_NAME};"
                )
                peptide_count = database_cursor.fetchone()[0]
                self.assertEqual(0, peptide_count)

Exemple #12

0

Afficher le fichier

    def digest():
        """
        Digest a given peptide/sequence, search the resulting peptides in the database and return matching and not matching peptides in separate array.
        """
        data = request.get_json()
        errors = ApiDigestionController.check_digestion_parameters(data)

        if not "sequence" in data:
            errors["sequence"].append("cannot be empty")

        digestion_peptides = []
        database_peptides = []
        if len(errors) == 0:
            EnzymeClass = get_digestion_enzyme_by_name("trypsin")
            enzyme = EnzymeClass(data["maximum_number_of_missed_cleavages"],
                                 data["minimum_peptide_length"],
                                 data["maximum_peptide_length"])
            digestion_peptides = enzyme.digest(
                Protein("TMP", [], "TMP", "TMP", data["sequence"], [], [],
                        False, 0))

            if "do_database_search" in data and isinstance(
                    data["do_database_search"],
                    bool) and data["do_database_search"]:
                database_connection = get_database_connection()
                with database_connection.cursor() as database_cursor:
                    database_peptides = Peptide.select(
                        database_cursor,
                        WhereCondition(
                            ["(partition, mass, sequence) IN %s"], (tuple(
                                (peptide.partition, peptide.mass,
                                 peptide.sequence)
                                for peptide in digestion_peptides), )),
                        fetchall=True)
                database_peptides.sort(key=lambda peptide: peptide.mass)
                digestion_peptides = [
                    peptide for peptide in digestion_peptides
                    if peptide not in database_peptides
                ]

            digestion_peptides.sort(key=lambda peptide: peptide.mass)

        if len(errors) == 0:

            def json_stream() -> Iterator[bytes]:
                yield b"{\"database\": ["
                for peptide_idx, peptide in enumerate(database_peptides):
                    if peptide_idx > 0:
                        yield b","
                    yield from peptide.to_json()
                yield b"],\"digestion\": ["
                for peptide_idx, peptide in enumerate(digestion_peptides):
                    if peptide_idx > 0:
                        yield b","
                    yield from peptide.to_json()
                yield f"],\"count\": {len(database_peptides) +  len(digestion_peptides)}}}".encode(
                    "utf-8")

            return Response(json_stream(), content_type="application/json")

        else:
            return jsonify({"errors": errors}), 422

Exemple #13

0

Afficher le fichier

    def verify_database_integrity(self, proteins_from_file: List[Protein],
                                  enzym: DigestEnzyme):
        """
        Verifies the database by:
        1. Check if all protein from file exists and their attributes are matching
        2. Digest the given proteins and check if:
            2.1 The peptides are found in the database (by primary key)
            2.2 The values which are generated on the fly and not send from the database, e.g. amino acid counts, matches the on in the database.
        3. Check if all proteins and their peptides have association and if the association count matches the actual protein peptides relationships
        4. Check if all peptides have a related metadata record

        Parameters
        ----------
        proteins_from_file : List[Protein]
            Proteins read from the protein file
        enzym : DigestEnzyme
            Enzym for digesting. Shoud match the one which is used for the database creation.
        """
        peptides_from_file_proteins = set()
        for file_protein in proteins_from_file:
            for new_peptide in enzym.digest(file_protein):
                peptides_from_file_proteins.add(new_peptide)

        with self.database_connection.cursor() as database_cursor:
            # Check if protein count in database are equals to set
            database_cursor.execute(
                f"SELECT count(*) FROM {Protein.TABLE_NAME};")
            self.assertEqual(len(proteins_from_file),
                             database_cursor.fetchone()[0])

            # Check if all proteins are correct proteins
            for file_protein in proteins_from_file:
                db_protein = Protein.select(
                    database_cursor,
                    WhereCondition(["accession = %s"],
                                   [file_protein.accession]),
                )
                self.assertIsNotNone(db_protein)
                self.assertEqual(db_protein.accession, file_protein.accession)
                self.assertEqual(db_protein.secondary_accessions,
                                 file_protein.secondary_accessions)
                self.assertEqual(db_protein.entry_name,
                                 file_protein.entry_name)
                self.assertEqual(db_protein.name, file_protein.name)
                self.assertEqual(db_protein.sequence, file_protein.sequence)
                self.assertEqual(db_protein.taxonomy_id,
                                 file_protein.taxonomy_id)
                self.assertEqual(db_protein.proteome_id,
                                 file_protein.proteome_id)
                self.assertEqual(db_protein.is_reviewed,
                                 file_protein.is_reviewed)

            # Check if set count is equals db count
            # Because peptides are not removed from the database it is possible to have more peptides
            # in the database after protein updates than in the file.
            database_cursor.execute(
                f"SELECT count(*) FROM {Peptide.TABLE_NAME};")
            self.assertLessEqual(len(peptides_from_file_proteins),
                                 database_cursor.fetchone()[0])

            for file_peptide in peptides_from_file_proteins:
                db_peptide = Peptide.select(
                    database_cursor,
                    WhereCondition([
                        "partition = %s", "AND", "mass = %s", "AND",
                        "sequence = %s"
                    ], [
                        file_peptide.partition, file_peptide.mass,
                        file_peptide.sequence
                    ]))
                self.assertIsNotNone(db_peptide)
                self.assertEqual(db_peptide.sequence, file_peptide.sequence)
                self.assertEqual(db_peptide.mass, file_peptide.mass)
                self.assertEqual(db_peptide.partition, file_peptide.partition)
                self.assertEqual(db_peptide.number_of_missed_cleavages,
                                 file_peptide.number_of_missed_cleavages)

                # Because the amino acid counts are counted on the fly to save I/O and bandwidth, lets check the values in the database
                database_cursor.execute(
                    ("SELECT "
                     "a_count, "
                     "b_count, "
                     "c_count, "
                     "d_count, "
                     "e_count, "
                     "f_count, "
                     "g_count, "
                     "h_count, "
                     "i_count, "
                     "j_count, "
                     "k_count, "
                     "l_count, "
                     "m_count, "
                     "n_count, "
                     "o_count, "
                     "p_count, "
                     "q_count, "
                     "r_count, "
                     "s_count, "
                     "t_count, "
                     "u_count, "
                     "v_count, "
                     "w_count, "
                     "y_count, "
                     "z_count, "
                     "n_terminus, "
                     "c_terminus "
                     f"FROM {Peptide.TABLE_NAME} "
                     "WHERE partition = %s AND mass = %s AND sequence = %s"),
                    (file_peptide.partition, file_peptide.mass,
                     file_peptide.sequence))
                db_peptide_record = database_cursor.fetchone()
                self.assertIsNotNone(db_peptide_record)
                # file_peptide attributes in the array below have the same order as in the query
                for value_idx, file_peptide_value in enumerate([
                        file_peptide.a_count, file_peptide.b_count,
                        file_peptide.c_count, file_peptide.d_count,
                        file_peptide.e_count, file_peptide.f_count,
                        file_peptide.g_count, file_peptide.h_count,
                        file_peptide.i_count, file_peptide.j_count,
                        file_peptide.k_count, file_peptide.l_count,
                        file_peptide.m_count, file_peptide.n_count,
                        file_peptide.o_count, file_peptide.p_count,
                        file_peptide.q_count, file_peptide.r_count,
                        file_peptide.s_count, file_peptide.t_count,
                        file_peptide.u_count, file_peptide.v_count,
                        file_peptide.w_count, file_peptide.y_count,
                        file_peptide.z_count,
                        file_peptide.get_n_terminus_ascii_dec(),
                        file_peptide.get_c_terminus_ascii_dec()
                ]):
                    self.assertEqual(file_peptide_value,
                                     db_peptide_record[value_idx])

            # Check protein/peptide-associations from both directions
            desired_number_of_associations = 0
            for file_protein in proteins_from_file:
                for file_peptide in enzym.digest(file_protein):
                    # Increase association counter
                    desired_number_of_associations += 1
                    database_cursor.execute((
                        "SELECT true "
                        f"FROM {ProteinPeptideAssociation.TABLE_NAME} "
                        "WHERE protein_accession = %s AND partition = %s AND peptide_mass = %s AND peptide_sequence = %s;"
                    ), (
                        file_protein.accession,
                        file_peptide.partition,
                        file_peptide.mass,
                        file_peptide.sequence,
                    ))
                    is_association_found = database_cursor.fetchone()[0]
                    self.assertIsNotNone(is_association_found)
                    self.assertTrue(is_association_found)

            # Check association counter. Must be equals even after updates.
            database_cursor.execute(
                f"SELECT count(*) FROM {ProteinPeptideAssociation.TABLE_NAME};"
            )
            self.assertEqual(desired_number_of_associations,
                             database_cursor.fetchone()[0])

            # Check if peptide metadata equals peptides
            database_cursor.execute(
                f"SELECT count(*) FROM {PeptideMetadata.TABLE_NAME};")
            metadata_count = database_cursor.fetchone()[0]
            database_cursor.execute(
                f"SELECT count(*) FROM {Peptide.TABLE_NAME};")
            peptide_count = database_cursor.fetchone()[0]
            self.assertEqual(metadata_count, peptide_count)
            # Check if the current peptides have updated metadata
            for file_peptide in peptides_from_file_proteins:
                file_peptide.fetch_metadata_from_proteins(database_cursor)
                db_metadata = PeptideMetadata.select(database_cursor,
                                                     file_peptide)
                self.assertIsNotNone(
                    db_metadata,
                    f"metadata for peptide '{file_peptide.sequence}' is missing"
                )
                if db_metadata:
                    self.assertEqual(db_metadata.is_swiss_prot,
                                     file_peptide.metadata.is_swiss_prot)
                    self.assertEqual(db_metadata.is_trembl,
                                     file_peptide.metadata.is_trembl)
                    self.assertEqual(
                        sorted(db_metadata.taxonomy_ids),
                        sorted(file_peptide.metadata.taxonomy_ids))
                    self.assertEqual(
                        sorted(db_metadata.unique_taxonomy_ids),
                        sorted(file_peptide.metadata.unique_taxonomy_ids))
                    self.assertEqual(
                        sorted(db_metadata.proteome_ids),
                        sorted(file_peptide.metadata.proteome_ids))

            # Check if maintenance mode is false and update timestamp is greater zero
            database_status = MaintenanceInformation.select(
                database_cursor, MaintenanceInformation.DATABASE_STATUS_KEY)
            self.assertNotEqual(database_status, None)
            self.assertGreater(database_status.values['last_update'], 0)
            self.assertEqual(database_status.values['status'],
                             DatabaseStatus.READY.value)
            self.assertFalse(database_status.values['maintenance_mode'])