def test_create_read_update_delete_cycle(self): # Using Leptin (UniProt accession: Q257X2) leptin = Protein( 'Q257X2', 'LEP_CAPHI', 'Leptin', 'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC', 9925, 'UP000291000', True) ## Create # Start db session session = self.session_factory() session.add(leptin) try: session.commit() except: print("leptin already exists") # Check if id is now an integer (autoincrement from db) self.assertTrue(isinstance(leptin.id, int)) # Digest protein and save peptides with association to protein trypsin = Trypsin(3, 0, 60) leptin.peptides = trypsin.digest(leptin) PEPTIDE_COUNT = leptin.peptides.count() session.commit() for peptide in leptin.peptides: # Check if id is now an integer (autoincrement from db) self.assertTrue(isinstance(peptide.id, int)) # Save autoincremented id LEPTIN_ID = leptin.id # Close session and set Leptin to None, so all connections to db are lost session.close() leptin = None ## Read session = self.session_factory() # Get Leptin by accession leptin = session.query(Protein).filter( Protein.accession == "Q257X2").one() leptin_petides = leptin.peptides.all() self.assertEqual(LEPTIN_ID, leptin.id) self.assertEqual(PEPTIDE_COUNT, len(leptin_petides)) session.close() ## Update # Not implemented yes ## Delete # Start new session session = self.session_factory() # Bound letpin to the new session session.add(leptin) # Remove association between leptin and peptides for peptide in leptin.peptides.all(): session.delete(peptide) session.delete(leptin) session.commit() self.assertEqual(0, session.query(Protein).count()) self.assertEqual(0, session.query(Peptide).count()) session.close()
def test_create_read_update_delete_cycle(self): # Using Leptin (UniProt accession: Q257X2) leptin = Protein( 'Q257X2', 'LEP_CAPHI', 'Leptin', 'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC', 9925, 'UP000291000', True) ## create # start db session session = self.session_factory() session.add(leptin) session.commit() # check if id is now an integer (autoincrement from db) self.assertTrue(isinstance(leptin.id, int)) # save autoincremented id LEPTIN_ID = leptin.id # close session and set Leptin to None, so all connections to db are lost session.close() leptin = None ## read # start new db session session = self.session_factory() # get Leptin by accession leptin = session.query(Protein).filter( Protein.accession == "Q257X2").one() self.assertEqual(LEPTIN_ID, leptin.id) session.close() ## update # start new db session session = self.session_factory() # update accession leptin.accession = "CHANGED" session.add(leptin) session.commit() # close session to make sure nothing is cached session.close() # start new session session = self.session_factory() # query accession of Leptin leptin_accession = session.query( Protein.accession).filter(Protein.id == LEPTIN_ID).scalar() self.assertEqual("CHANGED", leptin_accession) session.close() ## delete # start new session session = self.session_factory() session.delete(leptin) session.commit() self.assertEqual(0, session.query(Protein).count()) session.close()
def __next__(self): entry_name = "" name = "" is_reviewed = False accessions = [] taxonomy_id = None sequence = "" proteome_id = None while True: line = next(self.__file_iter) if line.startswith("ID"): entry_name, is_reviewed = self.__process_id(line[5:]) elif line.startswith("AC"): accessions += self.__process_ac(line[5:]) elif line.startswith("OX"): taxonomy_id = self.__process_ox(line[5:]) elif line.startswith("DR"): if line[5:].startswith("Proteomes;"): proteome_id = self.__process_dr_proteoms(line[5:]) # sequence starts with two whitespaces elif line.startswith(" "): sequence += self.__process_sq_no_header(line) elif line.startswith("DE"): if name == "" and line[5:].startswith( "RecName") or line[5:].startswith( "AltName") or line[5:].startswith("Sub"): name = self.__process_de_name(line[5:]) elif line.startswith("//"): accession = accessions.pop(0) return Protein(accession, entry_name, name, sequence, taxonomy_id, proteome_id, is_reviewed), self.__create_protein_merges( accessions, accession)
def proteins(sequence: str): peptide = Peptide(sequence.upper(), 0) database_connection = get_database_connection() with database_connection.cursor() as database_cursor: proteins = Protein.select( database_cursor, WhereCondition([ f"accession = ANY(SELECT protein_accession FROM {ProteinPeptideAssociation.TABLE_NAME} as ppa WHERE ppa.partition = %s AND ppa.peptide_mass = %s AND ppa.peptide_sequence = %s)" ], [peptide.partition, peptide.mass, peptide.sequence]), True) reviewed_proteins = [] unreviewed_proteins = [] for protein in proteins: if protein.is_reviewed: reviewed_proteins.append(protein) else: unreviewed_proteins.append(protein) def json_stream() -> Iterator[bytes]: yield b"{\"reviewed_proteins\": [" for protein_idx, protein in enumerate(reviewed_proteins): if protein_idx > 0: yield b"," yield from protein.to_json() yield b"],\"unreviewed_proteins\": [" for protein_idx, protein in enumerate(unreviewed_proteins): if protein_idx > 0: yield b"," yield from protein.to_json() yield b"]}" return Response(json_stream(), content_type="application/json")
def test_accession_regex(self): # Using Leptin (UniProt accession: Q257X2) leptin = Protein( 'Q257X2', 'LEP_CAPHI', 'Leptin', 'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC', 9925, 'UP000291000', True) self.assertEqual("Q257X2", leptin.accession)
def show(accession: str): accession = accession.upper() database_connection = get_database_connection() with database_connection.cursor() as database_cursor: protein = Protein.select( database_cursor, WhereCondition( ["accession = %s"], [accession] ), False ) if protein: return Response( protein.to_json(), content_type="application/json" ) else: return jsonify({ "errors": { "accession": ["not found"] } }), 404
def test_digest(self): # Using Leptin (UniProt accession: Q257X2) leptin = Protein( "Q257X2", "LEP_CAPHI", "Leptin", "MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC", 9925, "UP000291000", True) trypsin = Trypsin(3, 0, 60) peptides = trypsin.digest(leptin) self.assertEqual(len(DESIRED_RESULTS), len(peptides)) for peptide in peptides: self.assertTrue(peptide.sequence in DESIRED_RESULTS)
def digest(): """ Digests the seqeunce of the given protein. """ data = request.get_json() errors = ApiDigestionController.check_digestion_parameters(data) if not "accession" in data: errors["accession"].append("cannot be empty") peptides = [] if len(errors) == 0: database_connection = get_database_connection() with database_connection.cursor() as database_cursor: protein = Protein.select( database_cursor, WhereCondition( ["accession = %s"], [data["accession"]] ), False ) if protein: peptides = list(filter( lambda peptide: peptide.number_of_missed_cleavages <= data["maximum_number_of_missed_cleavages"] \ and data["minimum_peptide_length"] <= peptide.length <= data["maximum_peptide_length"], protein.peptides(database_cursor) )) peptides.sort(key = lambda peptide: peptide.mass) else: errors["accession"].append("not found") if len(errors) == 0: def json_stream(): yield b"{\"peptides\": [" for peptide_idx, peptide in enumerate(peptides): if peptide_idx > 0: yield b"," yield from peptide.to_json() yield f"], \"count\": {len(peptides)}}}".encode("utf-8 ") return Response( json_stream(), content_type="application/json" ) else: return jsonify({ "errors": errors }), 422
def __next__(self): entry_name = "" name = "" is_reviewed = False accessions = [] taxonomy_id = None sequence = "" proteome_id = None last_update = "01-JAN-1970" while True: line = next(self.__file_iter) line = line.rstrip() if len(line) >= 2: if line.startswith("ID"): entry_name, is_reviewed = self.__process_id(line[5:]) elif line.startswith("AC"): accessions += self.__process_ac(line[5:]) elif line.startswith("OX"): taxonomy_id = self.__process_ox(line[5:]) elif line.startswith("DR"): if line[5:].startswith("Proteomes;"): proteome_id = self.__process_dr_proteoms(line[5:]) # sequence starts with two whitespaces elif line.startswith(" "): sequence += self.__process_sq_no_header(line) elif line.startswith("DE"): if name == "" and line[5:].startswith( "RecName") or line[5:].startswith( "AltName") or line[5:].startswith("Sub"): name = self.__process_de_name(line[5:]) elif line.startswith("DT"): last_update = line[5:16] elif line.startswith("//"): primary_accession = accessions.pop(0) return Protein( primary_accession, accessions, entry_name, name, sequence, taxonomy_id, proteome_id, is_reviewed, self.__dt_date_to_utc_timestamp(last_update))
def run(self): """ Starts the process which digests proteins and inserts the peptides into the database. """ self.activate_signal_handling() self.__general_log.send("digest worker {} is online".format(self.__id)) database_connection = None # Let the process run until finish_event is true and protein_queue is empty or termination_event is true. while (not self.__finish_event.is_set() or not self.__protein_queue.empty() ) and not self.termination_event.is_set(): try: # Open/reopen database connection if not database_connection or ( database_connection and database_connection.closed != 0): database_connection = psycopg2.connect(self.__database_url) # Try to get a protein from the queue, timeout is 2 seconds new_protein = self.__protein_queue.get(True, 5) # Variables for loop control unsolvable_error_factor = 0 try_transaction_again = True while try_transaction_again: number_of_new_peptides = 0 error = None try: count_protein = False number_of_new_peptides = 0 with database_connection: with database_connection.cursor( ) as database_cursor: skip_protein_creation = False # Check if the Protein exists by its accession or secondary accessions accessions = [ new_protein.accession ] + new_protein.secondary_accessions existing_proteins = Protein.select( database_cursor, WhereCondition(["accession = ANY(%s)"], [accessions]), fetchall=True) if len(existing_proteins) > 0: # If more than one protein were found and the first protein is the same protein as the current one from the queue ... if existing_proteins[ 0].accession == new_protein.accession: updateable_protein = existing_proteins.pop( 0) # ... delete the other other proteins, because they are merged with this one. for existing_protein in existing_proteins: Protein.delete( database_cursor, existing_protein) skip_protein_creation = True number_of_new_peptides = updateable_protein.update( database_cursor, new_protein, self.__enzyme) else: # If the first protein from the found proteins has not the same accession as the new one from the queue # each of the found proteins are merged with the new protein. So delete them. for existing_protein in existing_proteins: Protein.delete( database_cursor, existing_protein) if not skip_protein_creation: number_of_new_peptides = Protein.create( database_cursor, new_protein, self.__enzyme) count_protein = True # Commit was successfully stop while-loop and add statistics try_transaction_again = False self.__statistics.acquire() if count_protein: self.__statistics[0] += 1 self.__statistics[1] += number_of_new_peptides self.__statistics.release() # Rollback is done implcit by `with database_connection` # Each error increases the unsolveable error factor differently. If the factor reaches UNSOLVEABLE_ERROR_FACTOR_LIMIT the protein is logged as unprocessible ## Catch violation of unique constraints. Usually a peptide which is already inserted by another transaction. except psycopg2.errors.UniqueViolation as unique_violation_error: error = unique_violation_error if unsolvable_error_factor < self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT: unsolvable_error_factor += 0.2 ## Catch deadlocks between transactions. This occures usually when 2 transactions try to insert the same peptides except psycopg2.errors.DeadlockDetected as deadlock_detected_error: error = deadlock_detected_error # Try again after 5 (first try) and 10 (second try) + a random number between 0 and 5 (both including) seconds maybe some blocking transactions can pass so this transaction will successfully finish on the next try. if unsolvable_error_factor < self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT: unsolvable_error_factor += 1 time.sleep(5 * unsolvable_error_factor + random.randint(0, 5)) ## Catch other errors. except psycopg2.Error as base_error: unsolvable_error_factor += self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT error = base_error finally: # Log the last error if the unsolvable_error_factor exceeds the limit if unsolvable_error_factor >= self.__class__.UNSOLVEABLE_ERROR_FACTOR_LIMIT: self.__general_log.send( "Exception on protein {}, see:\n{}".format( new_protein.accession, error)) self.__unprocessible_protein_log.send( new_protein.to_embl_entry()) self.__statistics.acquire() self.__statistics[2] += 1 self.__statistics.release() try_transaction_again = False # Catch errors which occure during database connect except psycopg2.Error as error: self.__general_log.send( "Error when opening the database connection, see:\n{}". format(error)) # Catch queue.Empty which is thrown when protein_queue.get() timed out except EmptyQueueError: pass # Close database connection if database_connection and database_connection.closed == 0: database_connection.close() self.__general_log.send("digest worker {} is stopping".format( self.__id)) self.__general_log.close() self.__unprocessible_protein_log.close()
def test_lifecycle(self): trypsin = Trypsin(2, 6, 50) # Using Leptin (UniProt accession: Q257X2) leptin = Protein( 'Q257X2', ['TESTACC'], 'LEP_CAPHI', 'Leptin', 'MRCGPLYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC', 9925, 'UP000291000', True, 1145311200) leptin_peptides = trypsin.digest(leptin) # Letpin with a new accession, old accession moved to secondary accessions, and new sequence where the first leucine is replaced by an isoleucine which creates a new peptide. updated_leptin = Protein( 'Q257X2V2', ['Q257X2', 'TESTACC'], 'LEP_CAPHI', 'Leptin', 'MRCGPIYRFLWLWPYLSYVEAVPIRKVQDDTKTLIKTIVTRINDISHTQSVSSKQRVTGLDFIPGLHPLLSLSKMDQTLAIYQQILASLPSRNVIQISNDLENLRDLLHLLAASKSCPLPQVRALESLESLGVVLEASLYSTEVVALSRLQGSLQDMLRQLDLSPGC', 9925, 'UP000291000', True, 1627596000) updated_leptin_peptides = { peptide.sequence: peptide for peptide in trypsin.digest(updated_leptin) } inserted_leptin_peptide_count = 0 ## Create # Start db session with self.database_connection: with self.database_connection.cursor() as database_cursor: inserted_leptin_peptide_count = Protein.create( database_cursor, leptin, trypsin) self.database_connection.commit() database_cursor.execute( f"SELECT true FROM {Protein.TABLE_NAME} WHERE accession = %s;", (leptin.accession, )) # Check if id is now an integer (autoincrement from db) self.assertTrue(database_cursor.fetchone()[0]) # Check if alle peptides were inserted self.assertEqual(inserted_leptin_peptide_count, len(leptin_peptides)) # Database should contain exactly the amout of leptin peptides database_cursor.execute( f"SELECT count(*) FROM {Peptide.TABLE_NAME};") self.assertEqual(len(leptin_peptides), database_cursor.fetchone()[0]) # Database should contain also exactly one association per leptin peptide database_cursor.execute( f"SELECT count(*) FROM {ProteinPeptideAssociation.TABLE_NAME};" ) self.assertEqual(len(leptin_peptides), database_cursor.fetchone()[0]) for peptide in leptin_peptides: database_cursor.execute( f"SELECT true FROM {Peptide.TABLE_NAME} WHERE sequence = %s;", (peptide.sequence, )) self.assertTrue(database_cursor.fetchone()[0]) ## Read with self.database_connection: with self.database_connection.cursor() as database_cursor: # Get Leptin by accession database_leptin = Protein.select( database_cursor, WhereCondition(["accession = %s"], [leptin.accession])) database_leptin_petides = database_leptin.peptides( database_cursor) self.assertEqual(database_leptin.accession, leptin.accession) self.assertEqual(len(database_leptin_petides), len(leptin_peptides)) ## Update with self.database_connection: with self.database_connection.cursor() as database_cursor: database_leptin = Protein.select( database_cursor, WhereCondition(["accession = %s"], [leptin.accession])) database_leptin.update(database_cursor, updated_leptin, trypsin) self.database_connection.commit() database_cursor.execute( f"SELECT COUNT(*) FROM {Protein.TABLE_NAME};") protein_count = database_cursor.fetchone()[0] # There should still be only one protein (updated letpin) self.assertEqual(protein_count, 1) updated_database_leptin = Protein.select( database_cursor, WhereCondition(["accession = %s"], [updated_leptin.accession])) # Check the updated attributes self.assertEqual(updated_database_leptin.accession, updated_leptin.accession) self.assertEqual(updated_database_leptin.secondary_accessions, updated_leptin.secondary_accessions) self.assertEqual(updated_database_leptin.sequence, updated_leptin.sequence) self.assertNotEqual(updated_database_leptin, None) # Fetch peptides updated_database_leptin_peptides = { peptide.sequence: peptide for peptide in updated_database_leptin.peptides( database_cursor) } self.assertEqual(len(updated_database_leptin_peptides), len(updated_leptin_peptides)) # Cross check if only the updated leptin peptides are returned for sequence in updated_database_leptin_peptides.keys(): self.assertIn(sequence, updated_leptin_peptides) ## Delete with self.database_connection: with self.database_connection.cursor() as database_cursor: database_leptin = Protein.select( database_cursor, WhereCondition(["accession = %s"], [updated_leptin.accession])) Protein.delete(database_cursor, database_leptin) self.database_connection.commit() database_cursor.execute( f"SELECT COUNT(*) FROM {Protein.TABLE_NAME};") protein_count = database_cursor.fetchone()[0] self.assertEqual(0, protein_count) database_cursor.execute( f"SELECT COUNT(*) FROM {ProteinPeptideAssociation.TABLE_NAME};" ) peptide_count = database_cursor.fetchone()[0] self.assertEqual(0, peptide_count)
def digest(): """ Digest a given peptide/sequence, search the resulting peptides in the database and return matching and not matching peptides in separate array. """ data = request.get_json() errors = ApiDigestionController.check_digestion_parameters(data) if not "sequence" in data: errors["sequence"].append("cannot be empty") digestion_peptides = [] database_peptides = [] if len(errors) == 0: EnzymeClass = get_digestion_enzyme_by_name("trypsin") enzyme = EnzymeClass(data["maximum_number_of_missed_cleavages"], data["minimum_peptide_length"], data["maximum_peptide_length"]) digestion_peptides = enzyme.digest( Protein("TMP", [], "TMP", "TMP", data["sequence"], [], [], False, 0)) if "do_database_search" in data and isinstance( data["do_database_search"], bool) and data["do_database_search"]: database_connection = get_database_connection() with database_connection.cursor() as database_cursor: database_peptides = Peptide.select( database_cursor, WhereCondition( ["(partition, mass, sequence) IN %s"], (tuple( (peptide.partition, peptide.mass, peptide.sequence) for peptide in digestion_peptides), )), fetchall=True) database_peptides.sort(key=lambda peptide: peptide.mass) digestion_peptides = [ peptide for peptide in digestion_peptides if peptide not in database_peptides ] digestion_peptides.sort(key=lambda peptide: peptide.mass) if len(errors) == 0: def json_stream() -> Iterator[bytes]: yield b"{\"database\": [" for peptide_idx, peptide in enumerate(database_peptides): if peptide_idx > 0: yield b"," yield from peptide.to_json() yield b"],\"digestion\": [" for peptide_idx, peptide in enumerate(digestion_peptides): if peptide_idx > 0: yield b"," yield from peptide.to_json() yield f"],\"count\": {len(database_peptides) + len(digestion_peptides)}}}".encode( "utf-8") return Response(json_stream(), content_type="application/json") else: return jsonify({"errors": errors}), 422
def verify_database_integrity(self, proteins_from_file: List[Protein], enzym: DigestEnzyme): """ Verifies the database by: 1. Check if all protein from file exists and their attributes are matching 2. Digest the given proteins and check if: 2.1 The peptides are found in the database (by primary key) 2.2 The values which are generated on the fly and not send from the database, e.g. amino acid counts, matches the on in the database. 3. Check if all proteins and their peptides have association and if the association count matches the actual protein peptides relationships 4. Check if all peptides have a related metadata record Parameters ---------- proteins_from_file : List[Protein] Proteins read from the protein file enzym : DigestEnzyme Enzym for digesting. Shoud match the one which is used for the database creation. """ peptides_from_file_proteins = set() for file_protein in proteins_from_file: for new_peptide in enzym.digest(file_protein): peptides_from_file_proteins.add(new_peptide) with self.database_connection.cursor() as database_cursor: # Check if protein count in database are equals to set database_cursor.execute( f"SELECT count(*) FROM {Protein.TABLE_NAME};") self.assertEqual(len(proteins_from_file), database_cursor.fetchone()[0]) # Check if all proteins are correct proteins for file_protein in proteins_from_file: db_protein = Protein.select( database_cursor, WhereCondition(["accession = %s"], [file_protein.accession]), ) self.assertIsNotNone(db_protein) self.assertEqual(db_protein.accession, file_protein.accession) self.assertEqual(db_protein.secondary_accessions, file_protein.secondary_accessions) self.assertEqual(db_protein.entry_name, file_protein.entry_name) self.assertEqual(db_protein.name, file_protein.name) self.assertEqual(db_protein.sequence, file_protein.sequence) self.assertEqual(db_protein.taxonomy_id, file_protein.taxonomy_id) self.assertEqual(db_protein.proteome_id, file_protein.proteome_id) self.assertEqual(db_protein.is_reviewed, file_protein.is_reviewed) # Check if set count is equals db count # Because peptides are not removed from the database it is possible to have more peptides # in the database after protein updates than in the file. database_cursor.execute( f"SELECT count(*) FROM {Peptide.TABLE_NAME};") self.assertLessEqual(len(peptides_from_file_proteins), database_cursor.fetchone()[0]) for file_peptide in peptides_from_file_proteins: db_peptide = Peptide.select( database_cursor, WhereCondition([ "partition = %s", "AND", "mass = %s", "AND", "sequence = %s" ], [ file_peptide.partition, file_peptide.mass, file_peptide.sequence ])) self.assertIsNotNone(db_peptide) self.assertEqual(db_peptide.sequence, file_peptide.sequence) self.assertEqual(db_peptide.mass, file_peptide.mass) self.assertEqual(db_peptide.partition, file_peptide.partition) self.assertEqual(db_peptide.number_of_missed_cleavages, file_peptide.number_of_missed_cleavages) # Because the amino acid counts are counted on the fly to save I/O and bandwidth, lets check the values in the database database_cursor.execute( ("SELECT " "a_count, " "b_count, " "c_count, " "d_count, " "e_count, " "f_count, " "g_count, " "h_count, " "i_count, " "j_count, " "k_count, " "l_count, " "m_count, " "n_count, " "o_count, " "p_count, " "q_count, " "r_count, " "s_count, " "t_count, " "u_count, " "v_count, " "w_count, " "y_count, " "z_count, " "n_terminus, " "c_terminus " f"FROM {Peptide.TABLE_NAME} " "WHERE partition = %s AND mass = %s AND sequence = %s"), (file_peptide.partition, file_peptide.mass, file_peptide.sequence)) db_peptide_record = database_cursor.fetchone() self.assertIsNotNone(db_peptide_record) # file_peptide attributes in the array below have the same order as in the query for value_idx, file_peptide_value in enumerate([ file_peptide.a_count, file_peptide.b_count, file_peptide.c_count, file_peptide.d_count, file_peptide.e_count, file_peptide.f_count, file_peptide.g_count, file_peptide.h_count, file_peptide.i_count, file_peptide.j_count, file_peptide.k_count, file_peptide.l_count, file_peptide.m_count, file_peptide.n_count, file_peptide.o_count, file_peptide.p_count, file_peptide.q_count, file_peptide.r_count, file_peptide.s_count, file_peptide.t_count, file_peptide.u_count, file_peptide.v_count, file_peptide.w_count, file_peptide.y_count, file_peptide.z_count, file_peptide.get_n_terminus_ascii_dec(), file_peptide.get_c_terminus_ascii_dec() ]): self.assertEqual(file_peptide_value, db_peptide_record[value_idx]) # Check protein/peptide-associations from both directions desired_number_of_associations = 0 for file_protein in proteins_from_file: for file_peptide in enzym.digest(file_protein): # Increase association counter desired_number_of_associations += 1 database_cursor.execute(( "SELECT true " f"FROM {ProteinPeptideAssociation.TABLE_NAME} " "WHERE protein_accession = %s AND partition = %s AND peptide_mass = %s AND peptide_sequence = %s;" ), ( file_protein.accession, file_peptide.partition, file_peptide.mass, file_peptide.sequence, )) is_association_found = database_cursor.fetchone()[0] self.assertIsNotNone(is_association_found) self.assertTrue(is_association_found) # Check association counter. Must be equals even after updates. database_cursor.execute( f"SELECT count(*) FROM {ProteinPeptideAssociation.TABLE_NAME};" ) self.assertEqual(desired_number_of_associations, database_cursor.fetchone()[0]) # Check if peptide metadata equals peptides database_cursor.execute( f"SELECT count(*) FROM {PeptideMetadata.TABLE_NAME};") metadata_count = database_cursor.fetchone()[0] database_cursor.execute( f"SELECT count(*) FROM {Peptide.TABLE_NAME};") peptide_count = database_cursor.fetchone()[0] self.assertEqual(metadata_count, peptide_count) # Check if the current peptides have updated metadata for file_peptide in peptides_from_file_proteins: file_peptide.fetch_metadata_from_proteins(database_cursor) db_metadata = PeptideMetadata.select(database_cursor, file_peptide) self.assertIsNotNone( db_metadata, f"metadata for peptide '{file_peptide.sequence}' is missing" ) if db_metadata: self.assertEqual(db_metadata.is_swiss_prot, file_peptide.metadata.is_swiss_prot) self.assertEqual(db_metadata.is_trembl, file_peptide.metadata.is_trembl) self.assertEqual( sorted(db_metadata.taxonomy_ids), sorted(file_peptide.metadata.taxonomy_ids)) self.assertEqual( sorted(db_metadata.unique_taxonomy_ids), sorted(file_peptide.metadata.unique_taxonomy_ids)) self.assertEqual( sorted(db_metadata.proteome_ids), sorted(file_peptide.metadata.proteome_ids)) # Check if maintenance mode is false and update timestamp is greater zero database_status = MaintenanceInformation.select( database_cursor, MaintenanceInformation.DATABASE_STATUS_KEY) self.assertNotEqual(database_status, None) self.assertGreater(database_status.values['last_update'], 0) self.assertEqual(database_status.values['status'], DatabaseStatus.READY.value) self.assertFalse(database_status.values['maintenance_mode'])