def __init__(self, name: str, one_letter_code: str, three_letter_code: str, chemical_formula: str, mono_mass: float, average_mass: float): self.name = name self.one_letter_code = one_letter_code self.three_letter_code = three_letter_code self.chemical_formula = chemical_formula self.mono_mass = mass_to_int(mono_mass) self.average_mass = mass_to_int(average_mass)
def test_with_modifications(self): # lower hit # Add peptides to database with self.database_connection: with self.database_connection.cursor() as database_cursor: for key in PEPTIDES_FOR_MODIFIED_SEARCH.keys(): Peptide.bulk_insert(database_cursor, [ Peptide(sequence, 0) for sequence in PEPTIDES_FOR_MODIFIED_SEARCH[key] ]) csv_file_path = pathlib.Path("./test_files/modifications.csv") modification_collection = ModificationCollection.read_from_csv_file( csv_file_path) precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO, CHARGE)) with self.database_connection: with self.database_connection.cursor() as database_cursor: modification_combination_list = ModificationCombinationList( modification_collection, precursor, PRECURSOR_TOLERANCE, PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM) where_condition = modification_combination_list.to_where_condition( ) peptides = Peptide.select(database_cursor, where_condition, fetchall=True) # Check if only matching peptides were found self.assertEqual(len(peptides), len(PEPTIDES_FOR_MODIFIED_SEARCH['matching'])) for peptide in peptides: self.assertIn(peptide.sequence, PEPTIDES_FOR_MODIFIED_SEARCH['matching'])
def start_from_comand_line(cls, args): """ Starts a precursor range calculation with the arguments from the CLI. Parameters ---------- args Arguments from the CLI parser """ calculation = cls( mass_to_int(args.precursor), args.lower_precursor_tolerance, args.upper_precursor_tolerance, ModificationCollection.read_from_csv_file( pathlib.Path(args.modifications)) if args.modifications is not None else ModificationCollection([]), args.max_variable_modifications, args.partitions) print(calculation)
def test_without_modifications(self): # Add peptides to database with self.database_connection: with self.database_connection.cursor() as database_cursor: for key in PEPTIDES_FOR_UNMODIFIED_SEARCH.keys(): Peptide.bulk_insert(database_cursor, [ Peptide(sequence, 0) for sequence in PEPTIDES_FOR_UNMODIFIED_SEARCH[key] ]) modification_collection = ModificationCollection([]) precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO, CHARGE)) with self.database_connection: with self.database_connection.cursor() as database_cursor: modification_combination_list = ModificationCombinationList( modification_collection, precursor, PRECURSOR_TOLERANCE, PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM) where_condition = modification_combination_list.to_where_condition( ) select_conditions_string = database_cursor.mogrify( where_condition.get_condition_str(), where_condition.values).decode('utf-8') matches = re.findall(self.__class__.MASS_TOLERANCE_REGEX, select_conditions_string) # Without modifications there is only one between-condition. self.assertEqual(len(matches), 1) peptides = Peptide.select(database_cursor, where_condition, fetchall=True) # Check if only matching peptides were found self.assertEqual( len(peptides), len(PEPTIDES_FOR_UNMODIFIED_SEARCH['matching'])) for peptide in peptides: self.assertIn(peptide.sequence, PEPTIDES_FOR_UNMODIFIED_SEARCH['matching'])
def test_without_modifications(self): # Add peptides to database session = self.session_factory() for key in PEPTIDES_FOR_UNMODIFIED_SEARCH.keys(): for sequence in PEPTIDES_FOR_UNMODIFIED_SEARCH[key]: peptide = Peptide(sequence, 0) session.add(peptide) session.commit() session.close() modification_collection = ModificationCollection([]) precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO, CHARGE)) # Create fresh session session = self.session_factory() builder = ModifiedPeptideWhereClauseBuilder( modification_collection, precursor, PRECURSOR_TOLERANCE, PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM) where_clause = builder.build(Peptide) where_clause_string = str( where_clause.compile(compile_kwargs={"literal_binds": True})) matches = re.findall(self.__class__.WEIGHT_TOLERANCE_REGEX, where_clause_string) # Without modifications there is only one between-condition. self.assertEqual(len(matches), 1) peptides = session.query(Peptide).filter(where_clause).all() # Check if only matching peptides were found self.assertEqual(len(peptides), len(PEPTIDES_FOR_UNMODIFIED_SEARCH['matching'])) for peptide in peptides: self.assertIn(peptide.sequence, PEPTIDES_FOR_UNMODIFIED_SEARCH['matching'])
def test_with_modifications(self): # lower hit # Add peptides to database session = self.session_factory() for key in PEPTIDES_FOR_MODIFIED_SEARCH.keys(): for sequence in PEPTIDES_FOR_MODIFIED_SEARCH[key]: peptide = Peptide(sequence, 0) session.add(peptide) session.commit() session.close() csv_file_path = pathlib.Path("./test_files/modifications.csv") modification_collection = ModificationCollection.read_from_csv_file( csv_file_path) precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO, CHARGE)) builder = ModifiedPeptideWhereClauseBuilder( modification_collection, precursor, PRECURSOR_TOLERANCE, PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM) where_clause = builder.build(Peptide) # Create fresh session session = self.session_factory() builder = ModifiedPeptideWhereClauseBuilder( modification_collection, precursor, PRECURSOR_TOLERANCE, PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM) where_clause = builder.build(Peptide) peptides = session.query(Peptide).filter(where_clause).all() # Check if only matching peptides were found self.assertEqual(len(peptides), len(PEPTIDES_FOR_MODIFIED_SEARCH['matching'])) for peptide in peptides: self.assertIn(peptide.sequence, PEPTIDES_FOR_MODIFIED_SEARCH['matching'])
def _search(request, file_extension: str): errors = defaultdict(list) data = None if request.headers.get("Content-Type", "") == "application/json": data = request.get_json() elif request.headers.get("Content-Type", "") == "application/x-www-form-urlencoded": # For use with classical form-tag. The JSON-formatted search parameters should be provided in the form parameter "search_params" data = json.loads(request.form.get("search_params", "{}")) include_count = False if 'include_count' in data and isinstance(data['include_count'], bool): include_count = data['include_count'] order_by = None if 'order_by' in data: if isinstance(data['order_by'], str) and data[ 'order_by'] in ApiAbstractPeptideController.SUPPORTED_ORDER_COLUMNS: order_by = data['order_by'] else: errors["order_by"].append( f"must be a string with one of following values: {', '.join(ApiAbstractPeptideController.SUPPORTED_ORDER_COLUMNS)}" ) if 'order_direction' in data: if not isinstance(data['order_direction'], str) or not data[ 'order_direction'] in ApiAbstractPeptideController.SUPPORTED_ORDER_DIRECTIONS: errors["order_direction"].append( f"'order_direction' must be a string with one of following values: {', '.join(ApiAbstractPeptideController.SUPPORTED_ORDER_DIRECTIONS)}" ) include_metadata = False if "include_metadata" in data: if isinstance(data["include_metadata"], bool): include_metadata = data["include_metadata"] else: errors["include_metadata"].append("must be a boolean") output_style = None if file_extension is not None: try: output_style = OutputFormat.from_name(file_extension) except KeyError: pass else: try: output_style = OutputFormat.from_value( request.headers.get("accept", default="")) except KeyError: output_style = OutputFormat.json # validate int attributes for attribute in [ "lower_precursor_tolerance_ppm", "upper_precursor_tolerance_ppm", "variable_modification_maximum" ]: if attribute in data: if isinstance(data[attribute], int): if data[attribute] < 0: errors[attribute].append("not greater or equals 0") else: errors[attribute].append("not an integer") else: errors[attribute].append("cannot be empty") modifications = [] if "modifications" in data: if isinstance(data["modifications"], list): for idx, modification_attributes in enumerate( data["modifications"]): if isinstance(modification_attributes, dict): accession_and_name = "onlinemod:{}".format(idx) try: modification_attributes[ 'accession'] = accession_and_name modification_attributes[ 'name'] = accession_and_name modification_attributes['delta'] = mass_to_int( modification_attributes['delta']) modifications.append( Modification.from_dict( modification_attributes)) except Exception as e: errors[f"modifications[{idx}]"].append( "is invalid") else: errors[f"modifications[{idx}]"].append( "not a dictionary") else: errors["modifications"].append( "modifications has to be of type list") try: modification_collection = ModificationCollection(modifications) except Exception as e: errors["modifications"].append(f"{e}") database_connection = get_database_connection() if not len(errors): if "precursor" in data: if isinstance(data["precursor"], float) or isinstance( data["precursor"], int): modification_combination_list = ModificationCombinationList( modification_collection, mass_to_int(data["precursor"]), data["lower_precursor_tolerance_ppm"], data["upper_precursor_tolerance_ppm"], data["variable_modification_maximum"]) metadata_condition = MetadataCondition() # List of metadata conditions if "taxonomy_id" in data: if isinstance(data["taxonomy_id"], int): with database_connection.cursor( ) as database_cursor: taxonomy = Taxonomy.select( database_cursor, ("id = %s", (data["taxonomy_id"], ))) if taxonomy is not None: metadata_condition.taxonomy_ids = [ sub.id for sub in taxonomy.sub_species( database_cursor) ] else: errors["taxonomy_id"].append("not found") else: errors["taxonomy_id"].append("must be an integer") if "proteome_id" in data: if isinstance(data["proteome_id"], str): metadata_condition.proteome_id = data[ "proteome_id"] else: errors["proteome_id"].append("must be a string") if "is_reviewed" in data: if isinstance(data["is_reviewed"], bool): if data["is_reviewed"]: metadata_condition.is_swiss_prot = True else: metadata_condition.is_trembl = True else: errors["is_reviewed"].append("must be a boolean") # Sort by `order_by` order_by_instruction = None if order_by and not output_style == OutputFormat.text: order_by_instruction = f"{order_by} {data['order_direction']}" # Note about offset and limit: It is much faster to fetch data from server and discard rows below the offset and stop the fetching when the limit is reached, instead of applying LIMIT and OFFSET directly to the query. # Even on high offsets, which discards a lot of rows, this approach is faster. # Curl shows the diffences: curl -o foo.json --header "Content-Type: application/json" --request POST --data '{"include_count":true,"offset":0,"limit":50,"modifications":[{"amino_acid":"C","position":"anywhere","is_static":true,"delta":57.021464}],"lower_precursor_tolerance_ppm":5,"upper_precursor_tolerance_ppm":5,"variable_modification_maximum":0,"order":true,"precursor":859.49506802369}' http://localhost:3000/api/peptides/search # Applying OFFSET and LIMIT to query: 49 - 52 seconds # Discarding rows which are below the offset and stop the fetching early: a few hundred miliseconds (not printed by curl). offset = 0 limit = math.inf if "limit" in data: if isinstance(data["limit"], int): limit = data["limit"] else: errors["limit"].append("must be an integer") if "offset" in data: if isinstance(data["offset"], int): offset = data["offset"] else: errors["offset"].append("must be an integer") else: errors["precursor"] = ["must be an integer or float"] else: errors["precursor"] = ["cannot be missing"] if len(errors): return jsonify({"errors": errors}), 422 include_metadata = include_metadata or metadata_condition.has_conditions( ) peptide_conversion = lambda _, __: ( b"", ) # lambda to convert peptide to output type delimiter = b"" # delimiter between each converted peptide pre_peptide_content = b"" # content before peptide post_peptide_content = lambda _, __: b"" # content after peptides if output_style == OutputFormat.json: peptide_conversion = lambda _, peptide: peptide.to_json() delimiter = b"," pre_peptide_content = b"{\"peptides\":[" post_peptide_content = lambda _, __: b"]}" if include_count: post_peptide_content = lambda database_cursor, where_condition: f"],\"count\":{Peptide.count(database_cursor, where_condition)}}}".encode( "utf-8") elif output_style == OutputFormat.stream: peptide_conversion = lambda _, peptide: peptide.to_json() delimiter = b"\n" elif output_style == OutputFormat.fasta: peptide_conversion = lambda peptide_idx, peptide: peptide.to_fasta_entry( f"P{peptide_idx}".encode()) delimiter = b"\n" elif output_style == OutputFormat.csv: peptide_conversion = lambda _, peptide: peptide.to_csv_row() delimiter = b"\n" pre_peptide_content = ( ",".join(Peptide.CSV_HEADER).encode("utf-8") if not include_metadata else \ ",".join(Peptide.CSV_HEADER + Peptide.METADATA_CSV_HEADER).encode("utf-8") ) + b"\n" elif output_style == OutputFormat.text: peptide_conversion = lambda _, peptide: peptide.to_plain_text() delimiter = b"\n" return Response(ApiAbstractPeptideController.stream( peptide_conversion, delimiter, pre_peptide_content, post_peptide_content, modification_combination_list.to_where_condition(), order_by_instruction, offset, limit, include_metadata, metadata_condition), content_type=f"{output_style}; charset=utf-8")
def test_validation(self): static_carbamidomethylation_of_c = Modification( 'unimod:4', 'carbamidomethylation of cysteine', AminoAcid.get_by_one_letter_code('C'), mass_to_int(57.021464), True, ModificationPosition.ANYWHERE) variable_oxidation_of_m = Modification( 'unimod:35', 'oxidation of methionine', AminoAcid.get_by_one_letter_code('M'), mass_to_int(15.994915), False, ModificationPosition.ANYWHERE) static_custom_modification_of_n_terminal_d = Modification( 'custom:1', 'custom of aspartic acid', AminoAcid.get_by_one_letter_code('D'), mass_to_int(10.01541), True, ModificationPosition.N_TERMINUS) variable_custom_modification_of_n_terminal_d = Modification( 'custom:2', 'custom of aspartic acid', AminoAcid.get_by_one_letter_code('D'), mass_to_int(10.01541), False, ModificationPosition.N_TERMINUS) static_custom_modification_of_c_terminal_r = Modification( 'custom:3', 'custom of arginine', AminoAcid.get_by_one_letter_code('R'), mass_to_int(6.153215), True, ModificationPosition.C_TERMINUS) variable_custom_modification_of_c_terminal_r = Modification( 'custom:4', 'custom of arginine', AminoAcid.get_by_one_letter_code('R'), mass_to_int(6.153215), False, ModificationPosition.C_TERMINUS) peptide = Peptide(LEPTIN_PEPTIDE_SEQUENCE, 2) # Static carbamidomethylation of C expected_peptide_mass = peptide.weight + peptide.c_count * static_carbamidomethylation_of_c.delta modification_collection = ModificationCollection( [static_carbamidomethylation_of_c]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 0, precursor_range) self.assertTrue(validator.validate(peptide)) # This should als match with allowed variable modification (where actually none is applied) # Static carbamidomethylation of C # Variable oxidation of M (not considered in expected_weight) modification_collection = ModificationCollection( [static_carbamidomethylation_of_c, variable_oxidation_of_m]) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide)) # Static carbamidomethylation of C # 1 variable oxidation of M expected_peptide_mass = peptide.weight \ + peptide.c_count * static_carbamidomethylation_of_c.delta \ + 1 * variable_oxidation_of_m.delta modification_collection = ModificationCollection( [static_carbamidomethylation_of_c, variable_oxidation_of_m]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide)) # This should not match if no variable modifiations are allowed # Static carbamidomethylation of C # Variable oxidation of M (considered in expected_weight but no variable modification allowed in validation) validator.set_maximum_number_of_variable_modifications(0) self.assertFalse(validator.validate(peptide)) # Lets replace two Js with Ms and test 3 applied variable oxidations of M # Static carbamidomethylation of C # 3 Variable oxidation of M peptide = Peptide(LEPTIN_PEPTIDE_SEQUENCE.replace('J', 'M', 2), 2) expected_peptide_mass = peptide.weight \ + peptide.c_count * static_carbamidomethylation_of_c.delta \ + 3 * variable_oxidation_of_m.delta modification_collection = ModificationCollection( [static_carbamidomethylation_of_c, variable_oxidation_of_m]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide)) # This should fail with only 2 allowed variable modifications validator.set_maximum_number_of_variable_modifications(2) self.assertFalse(validator.validate(peptide)) # Test variable n-terminal # Variable n-terminal modification of D # Static carbamidomethylation of C # 2 variable oxidation of M expected_peptide_mass = peptide.weight \ + variable_custom_modification_of_n_terminal_d.delta \ + peptide.c_count * static_carbamidomethylation_of_c.delta \ + 2 * variable_oxidation_of_m.delta modification_collection = ModificationCollection([ static_carbamidomethylation_of_c, variable_oxidation_of_m, variable_custom_modification_of_n_terminal_d ]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide)) # This should fail with only 2 allowed variable modifications validator.set_maximum_number_of_variable_modifications(2) self.assertFalse(validator.validate(peptide)) # Test static n-terminal modification # Static n-terminal modification of D # Static carbamidomethylation of C # 2 variable oxidation of M expected_peptide_mass = peptide.weight \ + static_custom_modification_of_n_terminal_d.delta \ + peptide.c_count * static_carbamidomethylation_of_c.delta \ + 2 * variable_oxidation_of_m.delta modification_collection = ModificationCollection([ static_carbamidomethylation_of_c, variable_oxidation_of_m, static_custom_modification_of_n_terminal_d ]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide)) # Test variable n-terminal # Variable c-terminal modification of R # Static carbamidomethylation of C # 2 variable oxidation of M expected_peptide_mass = peptide.weight \ + variable_custom_modification_of_c_terminal_r.delta \ + peptide.c_count * static_carbamidomethylation_of_c.delta \ + 2 * variable_oxidation_of_m.delta modification_collection = ModificationCollection([ static_carbamidomethylation_of_c, variable_oxidation_of_m, variable_custom_modification_of_c_terminal_r ]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide)) # This should fail with only 2 allowed variable modifications validator.set_maximum_number_of_variable_modifications(2) self.assertFalse(validator.validate(peptide)) # Test static n-terminal modification # Static c-terminal modification of R # Static carbamidomethylation of C # 2 variable oxidation of M expected_peptide_mass = peptide.weight \ + static_custom_modification_of_c_terminal_r.delta \ + peptide.c_count * static_carbamidomethylation_of_c.delta \ + 2 * variable_oxidation_of_m.delta modification_collection = ModificationCollection([ static_carbamidomethylation_of_c, variable_oxidation_of_m, static_custom_modification_of_c_terminal_r ]) precursor_range = PrecursorRange(expected_peptide_mass, 0, 0) validator = PeptideMassValidator(modification_collection, 3, precursor_range) self.assertTrue(validator.validate(peptide))
def read_from_csv_file(cls, csv_file_path: pathlib.Path) -> List[Modification]: """ Reads modifications from CSV file. Parameters ---------- csv_file_path : pathlib.Path Path of the CSV-file Returns ------- List of modifications """ modifications = [] with csv_file_path.open("r") as csv_file: csv_reader = csv.reader(csv_file) # Omit header next(csv_reader) for row in csv_reader: modifications.append( Modification(row[0], row[1], AminoAcid.get_by_one_letter_code(row[2]), mass_to_int(float(row[3])), cls.string_to_is_static(row[4].lower()), ModificationPosition.from_string(row[5])) ) return modifications
class PeptideMassValidatorTestCase(unittest.TestCase): """ Tests peptide mass validator with modified peptides. """ PRECURSOR = mass_to_int(1751.868942379) """Precursor """ PRECURSOR_RANGE = PrecursorRange(PRECURSOR, 5, 5) """Precursor tolerance """ PEPTIDE_SEQUENCES = [ ("VYMGWJKGVYTTYR", "VYM[v:any:15994915000]GWJKGVYTTYR"), ("AVMQCVTVQSKPYNK", "AVMQC[s:any:57021464000]VTVQSKPYNK"), ("MTEYPDVJWGTRIR", "M[v:any:15994915000]TEYPDVJWGTRIR"), ("EPEHJDVJMPRMAAK", "EPEHJDVJMPRM[v:any:15994915000]AAK"), ("WYMAZLVWJIZER", "WYM[v:any:15994915000]AZLVWJIZER"), ("CGCJVHJJMFFJAR", "C[s:any:57021464000]GC[s:any:57021464000]JVHJJM[v:any:15994915000]FFJAR" ), ("CVPPPQSATDJQNVAR", "C[s:any:57021464000]VPPPQSATDJQNVAR"), ("LJPYRVPFTPMCDK", "LJPYRVPFTPM[v:any:15994915000]C[s:any:57021464000]DK"), ("VFFTWESJTVHCVK", "VFFTWESJTVHC[s:any:57021464000]VK"), ("JSYNCDIEJRASRR", "JSYNC[s:any:57021464000]DIEJRASRR"), ("QYECRRVOWYR", "QYEC[s:any:57021464000]RRVOWYR"), ("AYYWJNRGFYJMR", "AYYWJNRGFYJMR"), ("JQATYMTSGGTSPPITK", "JQATYMTSGGTSPPITK"), ("ODGANVQZRTBPMAJ", "ODGANVQZRTBPMAJ"), ("QJMVFGKQQCQLEK", "QJM[v:any:15994915000]VFGKQQC[s:any:57021464000]QLEK"), ("PAPVHCDYPPYPVJK", "PAPVHC[s:any:57021464000]DYPPYPVJK"), ("VBLVVTBMDHVHMVK", "VBLVVTBMDHVHM[v:any:15994915000]VK"), ("MIHCPAFYRIMAVK", "MIHC[s:any:57021464000]PAFYRIM[v:any:15994915000]AVK"), ("QMYFARHJHDGLHK", "QMYFARHJHDGLHK"), ("QJDJRMBTTFDBKR", "QJDJRMBTTFDBKR"), ("QRTEFCREIGEVTK", "QRTEFC[s:any:57021464000]REIGEVTK"), ("HKDDRTVQLFAMYL", "HKDDRTVQLFAM[v:any:15994915000]YL"), ("VAEPMFDRVMRMVR", "VAEPMFDRVM[v:any:15994915000]RMVR"), ("WTEQAPSJYJMGGRK", "WTEQAPSJYJM[v:any:15994915000]GGRK"), ("ATVFPQLKEQJVTUK", "ATVFPQLKEQJVTUK"), ("FQSVVBFVEBIHYR", "FQSVVBFVEBIHYR"), ("ZVVCCJVJSABIYGR", "ZVVC[s:any:57021464000]C[s:any:57021464000]JVJSABIYGR"), ("EGRSAAAETCVVFSLR", "EGRSAAAETC[s:any:57021464000]VVFSLR"), ("HQQMDAVTKSPGTQPK", "HQQMDAVTKSPGTQPK"), ("VDFAFVQRPKCEEK", "VDFAFVQRPKC[s:any:57021464000]EEK"), ("DQMCPFRJCKJIR", "DQM[v:any:15994915000]C[s:any:57021464000]PFRJC[s:any:57021464000]KJIR" ), ("MCMHTJRIVEFKR", "M[v:any:15994915000]C[s:any:57021464000]M[v:any:15994915000]HTJRIVEFKR" ), ("JJSPGDYTPHVTHGMK", "JJSPGDYTPHVTHGMK"), ("FVKMYGRFJCYPR", "FVKM[v:any:15994915000]YGRFJC[s:any:57021464000]YPR"), ("CIMPYRGRTIQWR", "C[s:any:57021464000]IM[v:any:15994915000]PYRGRTIQWR"), ("FYVDVHMFTJQQPK", "FYVDVHMFTJQQPK"), ("ODBRVBFMYAAIGK", "ODBRVBFM[v:any:15994915000]YAAIGK"), ("FFVMCRPNDRVVGR", "FFVMC[s:any:57021464000]RPNDRVVGR"), ("UPMVTQPAGPPIIPKR", "UPMVTQPAGPPIIPKR"), ("FTBDTIVVTNZFPQK", "FTBDTIVVTNZFPQK"), ("JMQWKCFVPJVCR", "JM[v:any:15994915000]QWKC[s:any:57021464000]FVPJVC[s:any:57021464000]R" ), ("YKQFTFFMGJAEVR", "YKQFTFFM[v:any:15994915000]GJAEVR"), ("HTVVJTFJJSKUVSTG", "HTVVJTFJJSKUVSTG"), ("WAKQJSNRCTFWR", "WAKQJSNRC[s:any:57021464000]TFWR"), ("JSJGWZBJCWAYJK", "JSJGWZBJC[s:any:57021464000]WAYJK"), ("YYSNIHNQAIVRQF", "YYSNIHNQAIVRQF"), ("REVJRDIMFPMGEK", "REVJRDIM[v:any:15994915000]FPM[v:any:15994915000]GEK"), ("MJNCMTWAGKQKLR", "MJNC[s:any:57021464000]M[v:any:15994915000]TWAGKQKLR"), ("FGILPJSVRWQUTGK", "FGILPJSVRWQUTGK"), ("YYPGKPEPMKRENK", "YYPGKPEPM[v:any:15994915000]KRENK"), ("JTMVDENNWAJKYR", "JTMVDENNWAJKYR"), ("QQYMICAJAPMVRR", "QQYMIC[s:any:57021464000]AJAPM[v:any:15994915000]VRR"), ("CIGFEQKIKBZQMK", "C[s:any:57021464000]IGFEQKIKBZQMK"), ("FQAIBSPMVKTMBVR", "FQAIBSPMVKTM[v:any:15994915000]BVR"), ("LIDFIZAZLAVTIUR", "LIDFIZAZLAVTIUR"), ("YAMKPASAMJKMJGPAG", "YAMKPASAM[v:any:15994915000]JKMJGPAG"), ("PQNGPZQPJZTCKQK", "PQNGPZQPJZTC[s:any:57021464000]KQK"), ("MCIQMQJKYPPRR", "M[v:any:15994915000]C[s:any:57021464000]IQM[v:any:15994915000]QJKYPPRR" ), ("CWNHPAKJVWWQK", "C[s:any:57021464000]WNHPAKJVWWQK"), ("WZGQTAVGZAQQOGR", "WZGQTAVGZAQQOGR"), ("MQCDJGHSORKQR", "MQC[s:any:57021464000]DJGHSORKQR"), ("TSFFHFVINNKDQR", "TSFFHFVINNKDQR"), ("WLQQJAGTEQPYYR", "WLQQJAGTEQPYYR"), ("TCMTDDRIRPVJJY", "TC[s:any:57021464000]MTDDRIRPVJJY"), ("MKGBMIPSJAZVYQR", "MKGBM[v:any:15994915000]IPSJAZVYQR"), ("KGAHJQQQDADAAPFR", "KGAHJQQQDADAAPFR"), ("EVWVGYTDGRJVCAK", "EVWVGYTDGRJVC[s:any:57021464000]AK"), ("OAYIJRPUFRSNK", "OAYIJRPUFRSNK"), ("BCDAVMAVBAJPIVHK", "BC[s:any:57021464000]DAVMAVBAJPIVHK"), ("QJPMCKFPYEPAKK", "QJPM[v:any:15994915000]C[s:any:57021464000]KFPYEPAKK"), ("UHRFRQVFJFPVR", "UHRFRQVFJFPVR"), ("QMPJOAASZWBPQK", "QM[v:any:15994915000]PJOAASZWBPQK"), ("CMGJEJLDVKKMDGK", "C[s:any:57021464000]M[v:any:15994915000]GJEJLDVKKMDGK"), ("VDCDJTQJJEQKYK", "VDC[s:any:57021464000]DJTQJJEQKYK"), ("NVVNFDVPVMVJMEF", "NVVNFDVPVMVJMEF"), ("MJVQVCFIJNDTQR", "M[v:any:15994915000]JVQVC[s:any:57021464000]FIJNDTQR"), ("DVJACKGQSRTQGJGY", "DVJAC[s:any:57021464000]KGQSRTQGJGY"), ("JQCPJQCRYVKCK", "JQC[s:any:57021464000]PJQC[s:any:57021464000]RYVKC[s:any:57021464000]K" ), ("MNPASVJEMTJFJMR", "MNPASVJEMTJFJMR"), ("IPDTQRYKMAJCEK", "IPDTQRYKMAJC[s:any:57021464000]EK"), ("HAOSJPJVUGKMPAK", "HAOSJPJVUGKM[v:any:15994915000]PAK"), ("VSTPVRJFMVACGCR", "VSTPVRJFMVAC[s:any:57021464000]GC[s:any:57021464000]R"), ("CVGGAKALDYHYJSAK", "C[s:any:57021464000]VGGAKALDYHYJSAK"), ("RVVEPFAYCJEDVR", "RVVEPFAYC[s:any:57021464000]JEDVR"), ("CJMPMMVPMKVQKK", "C[s:any:57021464000]JMPM[v:any:15994915000]M[v:any:15994915000]VPMKVQKK" ), ("ZQRYJFVYMTFZK", "ZQRYJFVYMTFZK"), ("JGTWPASJHDSLYHR", "JGTWPASJHDSLYHR"), ("HTBDTEZJCROIK", "HTBDTEZJC[s:any:57021464000]ROIK"), ("ALWJEYSRCJEANK", "ALWJEYSRC[s:any:57021464000]JEANK"), ("WGDSCDIGAJJPPVPR", "WGDSC[s:any:57021464000]DIGAJJPPVPR"), ("MJREDFJIEIWCK", "MJREDFJIEIWC[s:any:57021464000]K"), ("VGHQMAMGPPJVDQJK", "VGHQM[v:any:15994915000]AM[v:any:15994915000]GPPJVDQJK"), ("WGAYRRJYWYYR", "WGAYRRJYWYYR"), ("KGQRVYZMNBQTJR", "KGQRVYZM[v:any:15994915000]NBQTJR"), ("SWMQEKSPVFWAIK", "SWM[v:any:15994915000]QEKSPVFWAIK"), ("FKQAGTVMYMYJJR", "FKQAGTVM[v:any:15994915000]YM[v:any:15994915000]YJJR"), ("BJPVSFPQBHGTWVR", "BJPVSFPQBHGTWVR"), ("QDGLJJPFWNMYQK", "QDGLJJPFWNMYQK"), ("DCITAMHPAKPMPKR", "DC[s:any:57021464000]ITAMHPAKPMPKR"), ("BMJZMJVDYJPRMK", "BMJZMJVDYJPRMK") ] """Plain and PTM annotated peptide seqeunces """ MODIFICATION_COLLECTION: ClassVar[ ModificationCollection] = ModificationCollection.read_from_csv_file( pathlib.Path("./test_files/modifications.csv")) """Modification Collection """ NUMBER_OF_VARIABLE_MODIFICATIONS: ClassVar[int] = 3 """Number of variable modifications """ def test_validation(self): """ Checks if mass validation works. """ peptide_mass_validator = PeptideMassValidator( self.__class__.MODIFICATION_COLLECTION, self.__class__.NUMBER_OF_VARIABLE_MODIFICATIONS, self.__class__.PRECURSOR_RANGE) for plain_sequence, annotated_sequence in self.__class__.PEPTIDE_SEQUENCES: peptide = Peptide(plain_sequence, Trypsin.count_missed_cleavages(plain_sequence)) self.assertTrue( peptide_mass_validator.validate(peptide, True), f"expected: {annotated_sequence}; is: {peptide.sequence_with_modification_markers}" )
def __init__(self, name: str, mono_mass: float, average_mass: float): self.name = name self.mono_mass = mass_to_int(mono_mass) self.average_mass = mass_to_int(average_mass)