Exemple #1
0
 def __init__(self, name: str, one_letter_code: str, three_letter_code: str,
              chemical_formula: str, mono_mass: float, average_mass: float):
     self.name = name
     self.one_letter_code = one_letter_code
     self.three_letter_code = three_letter_code
     self.chemical_formula = chemical_formula
     self.mono_mass = mass_to_int(mono_mass)
     self.average_mass = mass_to_int(average_mass)
Exemple #2
0
    def test_with_modifications(self):  # lower hit
        # Add peptides to database
        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                for key in PEPTIDES_FOR_MODIFIED_SEARCH.keys():
                    Peptide.bulk_insert(database_cursor, [
                        Peptide(sequence, 0)
                        for sequence in PEPTIDES_FOR_MODIFIED_SEARCH[key]
                    ])

        csv_file_path = pathlib.Path("./test_files/modifications.csv")
        modification_collection = ModificationCollection.read_from_csv_file(
            csv_file_path)

        precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO,
                                                  CHARGE))

        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                modification_combination_list = ModificationCombinationList(
                    modification_collection, precursor, PRECURSOR_TOLERANCE,
                    PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM)
                where_condition = modification_combination_list.to_where_condition(
                )
                peptides = Peptide.select(database_cursor,
                                          where_condition,
                                          fetchall=True)

                # Check if only matching peptides were found
                self.assertEqual(len(peptides),
                                 len(PEPTIDES_FOR_MODIFIED_SEARCH['matching']))
                for peptide in peptides:
                    self.assertIn(peptide.sequence,
                                  PEPTIDES_FOR_MODIFIED_SEARCH['matching'])
    def start_from_comand_line(cls, args):
        """
        Starts a precursor range calculation with the arguments from the CLI.

        Parameters
        ----------
        args
            Arguments from the CLI parser
        """
        calculation = cls(
            mass_to_int(args.precursor), args.lower_precursor_tolerance,
            args.upper_precursor_tolerance,
            ModificationCollection.read_from_csv_file(
                pathlib.Path(args.modifications))
            if args.modifications is not None else ModificationCollection([]),
            args.max_variable_modifications, args.partitions)
        print(calculation)
Exemple #4
0
    def test_without_modifications(self):
        # Add peptides to database
        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                for key in PEPTIDES_FOR_UNMODIFIED_SEARCH.keys():
                    Peptide.bulk_insert(database_cursor, [
                        Peptide(sequence, 0)
                        for sequence in PEPTIDES_FOR_UNMODIFIED_SEARCH[key]
                    ])

        modification_collection = ModificationCollection([])
        precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO,
                                                  CHARGE))

        with self.database_connection:
            with self.database_connection.cursor() as database_cursor:
                modification_combination_list = ModificationCombinationList(
                    modification_collection, precursor, PRECURSOR_TOLERANCE,
                    PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM)

                where_condition = modification_combination_list.to_where_condition(
                )
                select_conditions_string = database_cursor.mogrify(
                    where_condition.get_condition_str(),
                    where_condition.values).decode('utf-8')
                matches = re.findall(self.__class__.MASS_TOLERANCE_REGEX,
                                     select_conditions_string)
                # Without modifications there is only one between-condition.
                self.assertEqual(len(matches), 1)

                peptides = Peptide.select(database_cursor,
                                          where_condition,
                                          fetchall=True)

                # Check if only matching peptides were found
                self.assertEqual(
                    len(peptides),
                    len(PEPTIDES_FOR_UNMODIFIED_SEARCH['matching']))
                for peptide in peptides:
                    self.assertIn(peptide.sequence,
                                  PEPTIDES_FOR_UNMODIFIED_SEARCH['matching'])
Exemple #5
0
    def test_without_modifications(self):
        # Add peptides to database
        session = self.session_factory()
        for key in PEPTIDES_FOR_UNMODIFIED_SEARCH.keys():
            for sequence in PEPTIDES_FOR_UNMODIFIED_SEARCH[key]:
                peptide = Peptide(sequence, 0)
                session.add(peptide)
        session.commit()
        session.close()

        modification_collection = ModificationCollection([])
        precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO,
                                                  CHARGE))

        # Create fresh session
        session = self.session_factory()
        builder = ModifiedPeptideWhereClauseBuilder(
            modification_collection, precursor, PRECURSOR_TOLERANCE,
            PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM)
        where_clause = builder.build(Peptide)

        where_clause_string = str(
            where_clause.compile(compile_kwargs={"literal_binds": True}))
        matches = re.findall(self.__class__.WEIGHT_TOLERANCE_REGEX,
                             where_clause_string)
        # Without modifications there is only one between-condition.
        self.assertEqual(len(matches), 1)

        peptides = session.query(Peptide).filter(where_clause).all()

        # Check if only matching peptides were found
        self.assertEqual(len(peptides),
                         len(PEPTIDES_FOR_UNMODIFIED_SEARCH['matching']))
        for peptide in peptides:
            self.assertIn(peptide.sequence,
                          PEPTIDES_FOR_UNMODIFIED_SEARCH['matching'])
Exemple #6
0
    def test_with_modifications(self):  # lower hit
        # Add peptides to database
        session = self.session_factory()
        for key in PEPTIDES_FOR_MODIFIED_SEARCH.keys():
            for sequence in PEPTIDES_FOR_MODIFIED_SEARCH[key]:
                peptide = Peptide(sequence, 0)
                session.add(peptide)
        session.commit()
        session.close()

        csv_file_path = pathlib.Path("./test_files/modifications.csv")
        modification_collection = ModificationCollection.read_from_csv_file(
            csv_file_path)

        precursor = mass_to_int(thomson_to_dalton(MASS_TO_CHARGE_RATIO,
                                                  CHARGE))

        builder = ModifiedPeptideWhereClauseBuilder(
            modification_collection, precursor, PRECURSOR_TOLERANCE,
            PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM)

        where_clause = builder.build(Peptide)
        # Create fresh session
        session = self.session_factory()
        builder = ModifiedPeptideWhereClauseBuilder(
            modification_collection, precursor, PRECURSOR_TOLERANCE,
            PRECURSOR_TOLERANCE, VARIABLE_MODIFICATION_MAXIMUM)
        where_clause = builder.build(Peptide)
        peptides = session.query(Peptide).filter(where_clause).all()

        # Check if only matching peptides were found
        self.assertEqual(len(peptides),
                         len(PEPTIDES_FOR_MODIFIED_SEARCH['matching']))
        for peptide in peptides:
            self.assertIn(peptide.sequence,
                          PEPTIDES_FOR_MODIFIED_SEARCH['matching'])
Exemple #7
0
    def _search(request, file_extension: str):
        errors = defaultdict(list)
        data = None
        if request.headers.get("Content-Type", "") == "application/json":
            data = request.get_json()
        elif request.headers.get("Content-Type",
                                 "") == "application/x-www-form-urlencoded":
            # For use with classical form-tag. The JSON-formatted search parameters should be provided in the form parameter "search_params"
            data = json.loads(request.form.get("search_params", "{}"))

        include_count = False
        if 'include_count' in data and isinstance(data['include_count'], bool):
            include_count = data['include_count']

        order_by = None
        if 'order_by' in data:
            if isinstance(data['order_by'], str) and data[
                    'order_by'] in ApiAbstractPeptideController.SUPPORTED_ORDER_COLUMNS:
                order_by = data['order_by']
            else:
                errors["order_by"].append(
                    f"must be a string with one of following values: {', '.join(ApiAbstractPeptideController.SUPPORTED_ORDER_COLUMNS)}"
                )

        if 'order_direction' in data:
            if not isinstance(data['order_direction'], str) or not data[
                    'order_direction'] in ApiAbstractPeptideController.SUPPORTED_ORDER_DIRECTIONS:
                errors["order_direction"].append(
                    f"'order_direction' must be a string with one of following values: {', '.join(ApiAbstractPeptideController.SUPPORTED_ORDER_DIRECTIONS)}"
                )

        include_metadata = False
        if "include_metadata" in data:
            if isinstance(data["include_metadata"], bool):
                include_metadata = data["include_metadata"]
            else:
                errors["include_metadata"].append("must be a boolean")

        output_style = None
        if file_extension is not None:
            try:
                output_style = OutputFormat.from_name(file_extension)
            except KeyError:
                pass
        else:
            try:
                output_style = OutputFormat.from_value(
                    request.headers.get("accept", default=""))
            except KeyError:
                output_style = OutputFormat.json

        # validate int attributes
        for attribute in [
                "lower_precursor_tolerance_ppm",
                "upper_precursor_tolerance_ppm",
                "variable_modification_maximum"
        ]:
            if attribute in data:
                if isinstance(data[attribute], int):
                    if data[attribute] < 0:
                        errors[attribute].append("not greater or equals 0")
                else:
                    errors[attribute].append("not an integer")
            else:
                errors[attribute].append("cannot be empty")

        modifications = []
        if "modifications" in data:
            if isinstance(data["modifications"], list):
                for idx, modification_attributes in enumerate(
                        data["modifications"]):
                    if isinstance(modification_attributes, dict):
                        accession_and_name = "onlinemod:{}".format(idx)
                        try:
                            modification_attributes[
                                'accession'] = accession_and_name
                            modification_attributes[
                                'name'] = accession_and_name
                            modification_attributes['delta'] = mass_to_int(
                                modification_attributes['delta'])
                            modifications.append(
                                Modification.from_dict(
                                    modification_attributes))
                        except Exception as e:
                            errors[f"modifications[{idx}]"].append(
                                "is invalid")
                    else:
                        errors[f"modifications[{idx}]"].append(
                            "not a dictionary")
            else:
                errors["modifications"].append(
                    "modifications has to be of type list")

        try:
            modification_collection = ModificationCollection(modifications)
        except Exception as e:
            errors["modifications"].append(f"{e}")

        database_connection = get_database_connection()
        if not len(errors):
            if "precursor" in data:
                if isinstance(data["precursor"], float) or isinstance(
                        data["precursor"], int):

                    modification_combination_list = ModificationCombinationList(
                        modification_collection,
                        mass_to_int(data["precursor"]),
                        data["lower_precursor_tolerance_ppm"],
                        data["upper_precursor_tolerance_ppm"],
                        data["variable_modification_maximum"])

                    metadata_condition = MetadataCondition()

                    # List of metadata conditions
                    if "taxonomy_id" in data:
                        if isinstance(data["taxonomy_id"], int):
                            with database_connection.cursor(
                            ) as database_cursor:
                                taxonomy = Taxonomy.select(
                                    database_cursor,
                                    ("id = %s", (data["taxonomy_id"], )))
                                if taxonomy is not None:
                                    metadata_condition.taxonomy_ids = [
                                        sub.id for sub in taxonomy.sub_species(
                                            database_cursor)
                                    ]
                                else:
                                    errors["taxonomy_id"].append("not found")

                        else:
                            errors["taxonomy_id"].append("must be an integer")

                    if "proteome_id" in data:
                        if isinstance(data["proteome_id"], str):
                            metadata_condition.proteome_id = data[
                                "proteome_id"]
                        else:
                            errors["proteome_id"].append("must be a string")

                    if "is_reviewed" in data:
                        if isinstance(data["is_reviewed"], bool):
                            if data["is_reviewed"]:
                                metadata_condition.is_swiss_prot = True
                            else:
                                metadata_condition.is_trembl = True
                        else:
                            errors["is_reviewed"].append("must be a boolean")

                    # Sort by `order_by`
                    order_by_instruction = None
                    if order_by and not output_style == OutputFormat.text:
                        order_by_instruction = f"{order_by} {data['order_direction']}"

                    # Note about offset and limit: It is much faster to fetch data from server and discard rows below the offset and stop the fetching when the limit is reached, instead of applying LIMIT and OFFSET directly to the query.
                    # Even on high offsets, which discards a lot of rows, this approach is faster.
                    # Curl shows the diffences: curl -o foo.json --header "Content-Type: application/json" --request POST --data '{"include_count":true,"offset":0,"limit":50,"modifications":[{"amino_acid":"C","position":"anywhere","is_static":true,"delta":57.021464}],"lower_precursor_tolerance_ppm":5,"upper_precursor_tolerance_ppm":5,"variable_modification_maximum":0,"order":true,"precursor":859.49506802369}' http://localhost:3000/api/peptides/search
                    # Applying OFFSET and LIMIT to query: 49 - 52 seconds
                    # Discarding rows which are below the offset and stop the fetching early: a few hundred miliseconds (not printed by curl).
                    offset = 0
                    limit = math.inf
                    if "limit" in data:
                        if isinstance(data["limit"], int):
                            limit = data["limit"]
                        else:
                            errors["limit"].append("must be an integer")
                    if "offset" in data:
                        if isinstance(data["offset"], int):
                            offset = data["offset"]
                        else:
                            errors["offset"].append("must be an integer")

                else:
                    errors["precursor"] = ["must be an integer or float"]
            else:
                errors["precursor"] = ["cannot be missing"]

        if len(errors):
            return jsonify({"errors": errors}), 422

        include_metadata = include_metadata or metadata_condition.has_conditions(
        )

        peptide_conversion = lambda _, __: (
            b"", )  # lambda to convert peptide to output type
        delimiter = b""  # delimiter between each converted peptide
        pre_peptide_content = b""  # content before peptide
        post_peptide_content = lambda _, __: b""  # content after peptides

        if output_style == OutputFormat.json:
            peptide_conversion = lambda _, peptide: peptide.to_json()
            delimiter = b","
            pre_peptide_content = b"{\"peptides\":["
            post_peptide_content = lambda _, __: b"]}"
            if include_count:
                post_peptide_content = lambda database_cursor, where_condition: f"],\"count\":{Peptide.count(database_cursor, where_condition)}}}".encode(
                    "utf-8")
        elif output_style == OutputFormat.stream:
            peptide_conversion = lambda _, peptide: peptide.to_json()
            delimiter = b"\n"
        elif output_style == OutputFormat.fasta:
            peptide_conversion = lambda peptide_idx, peptide: peptide.to_fasta_entry(
                f"P{peptide_idx}".encode())
            delimiter = b"\n"
        elif output_style == OutputFormat.csv:
            peptide_conversion = lambda _, peptide: peptide.to_csv_row()
            delimiter = b"\n"
            pre_peptide_content = (
                ",".join(Peptide.CSV_HEADER).encode("utf-8") if not include_metadata else \
                ",".join(Peptide.CSV_HEADER + Peptide.METADATA_CSV_HEADER).encode("utf-8")
            ) + b"\n"
        elif output_style == OutputFormat.text:
            peptide_conversion = lambda _, peptide: peptide.to_plain_text()
            delimiter = b"\n"

        return Response(ApiAbstractPeptideController.stream(
            peptide_conversion, delimiter, pre_peptide_content,
            post_peptide_content,
            modification_combination_list.to_where_condition(),
            order_by_instruction, offset, limit, include_metadata,
            metadata_condition),
                        content_type=f"{output_style}; charset=utf-8")
Exemple #8
0
    def test_validation(self):
        static_carbamidomethylation_of_c = Modification(
            'unimod:4', 'carbamidomethylation of cysteine',
            AminoAcid.get_by_one_letter_code('C'), mass_to_int(57.021464),
            True, ModificationPosition.ANYWHERE)
        variable_oxidation_of_m = Modification(
            'unimod:35', 'oxidation of methionine',
            AminoAcid.get_by_one_letter_code('M'), mass_to_int(15.994915),
            False, ModificationPosition.ANYWHERE)
        static_custom_modification_of_n_terminal_d = Modification(
            'custom:1', 'custom of aspartic acid',
            AminoAcid.get_by_one_letter_code('D'), mass_to_int(10.01541), True,
            ModificationPosition.N_TERMINUS)
        variable_custom_modification_of_n_terminal_d = Modification(
            'custom:2', 'custom of aspartic acid',
            AminoAcid.get_by_one_letter_code('D'), mass_to_int(10.01541),
            False, ModificationPosition.N_TERMINUS)
        static_custom_modification_of_c_terminal_r = Modification(
            'custom:3', 'custom of arginine',
            AminoAcid.get_by_one_letter_code('R'), mass_to_int(6.153215), True,
            ModificationPosition.C_TERMINUS)
        variable_custom_modification_of_c_terminal_r = Modification(
            'custom:4', 'custom of arginine',
            AminoAcid.get_by_one_letter_code('R'), mass_to_int(6.153215),
            False, ModificationPosition.C_TERMINUS)

        peptide = Peptide(LEPTIN_PEPTIDE_SEQUENCE, 2)

        # Static carbamidomethylation of C
        expected_peptide_mass = peptide.weight + peptide.c_count * static_carbamidomethylation_of_c.delta
        modification_collection = ModificationCollection(
            [static_carbamidomethylation_of_c])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 0,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # This should als match with allowed variable modification (where actually none is applied)
        # Static carbamidomethylation of C
        # Variable oxidation of M (not considered in expected_weight)
        modification_collection = ModificationCollection(
            [static_carbamidomethylation_of_c, variable_oxidation_of_m])
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # Static carbamidomethylation of C
        # 1 variable oxidation of M
        expected_peptide_mass = peptide.weight \
            + peptide.c_count * static_carbamidomethylation_of_c.delta \
            + 1 * variable_oxidation_of_m.delta
        modification_collection = ModificationCollection(
            [static_carbamidomethylation_of_c, variable_oxidation_of_m])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # This should not match if no variable modifiations are allowed
        # Static carbamidomethylation of C
        # Variable oxidation of M (considered in expected_weight but no variable modification allowed in validation)
        validator.set_maximum_number_of_variable_modifications(0)
        self.assertFalse(validator.validate(peptide))

        # Lets replace two Js with Ms and test 3 applied variable oxidations of M
        # Static carbamidomethylation of C
        # 3 Variable oxidation of M
        peptide = Peptide(LEPTIN_PEPTIDE_SEQUENCE.replace('J', 'M', 2), 2)
        expected_peptide_mass = peptide.weight \
            + peptide.c_count * static_carbamidomethylation_of_c.delta \
            + 3 * variable_oxidation_of_m.delta
        modification_collection = ModificationCollection(
            [static_carbamidomethylation_of_c, variable_oxidation_of_m])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # This should fail with only 2 allowed variable modifications
        validator.set_maximum_number_of_variable_modifications(2)
        self.assertFalse(validator.validate(peptide))

        # Test variable n-terminal
        # Variable n-terminal modification of D
        # Static carbamidomethylation of C
        # 2 variable oxidation of M
        expected_peptide_mass = peptide.weight \
            + variable_custom_modification_of_n_terminal_d.delta \
            + peptide.c_count * static_carbamidomethylation_of_c.delta \
            + 2 * variable_oxidation_of_m.delta
        modification_collection = ModificationCollection([
            static_carbamidomethylation_of_c, variable_oxidation_of_m,
            variable_custom_modification_of_n_terminal_d
        ])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # This should fail with only 2 allowed variable modifications
        validator.set_maximum_number_of_variable_modifications(2)
        self.assertFalse(validator.validate(peptide))

        # Test static n-terminal modification
        # Static n-terminal modification of D
        # Static carbamidomethylation of C
        # 2 variable oxidation of M
        expected_peptide_mass = peptide.weight \
            + static_custom_modification_of_n_terminal_d.delta \
            + peptide.c_count * static_carbamidomethylation_of_c.delta \
            + 2 * variable_oxidation_of_m.delta
        modification_collection = ModificationCollection([
            static_carbamidomethylation_of_c, variable_oxidation_of_m,
            static_custom_modification_of_n_terminal_d
        ])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # Test variable n-terminal
        # Variable c-terminal modification of R
        # Static carbamidomethylation of C
        # 2 variable oxidation of M
        expected_peptide_mass = peptide.weight \
            + variable_custom_modification_of_c_terminal_r.delta \
            + peptide.c_count * static_carbamidomethylation_of_c.delta \
            + 2 * variable_oxidation_of_m.delta
        modification_collection = ModificationCollection([
            static_carbamidomethylation_of_c, variable_oxidation_of_m,
            variable_custom_modification_of_c_terminal_r
        ])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))

        # This should fail with only 2 allowed variable modifications
        validator.set_maximum_number_of_variable_modifications(2)
        self.assertFalse(validator.validate(peptide))

        # Test static n-terminal modification
        # Static c-terminal modification of R
        # Static carbamidomethylation of C
        # 2 variable oxidation of M
        expected_peptide_mass = peptide.weight \
            + static_custom_modification_of_c_terminal_r.delta \
            + peptide.c_count * static_carbamidomethylation_of_c.delta \
            + 2 * variable_oxidation_of_m.delta
        modification_collection = ModificationCollection([
            static_carbamidomethylation_of_c, variable_oxidation_of_m,
            static_custom_modification_of_c_terminal_r
        ])
        precursor_range = PrecursorRange(expected_peptide_mass, 0, 0)
        validator = PeptideMassValidator(modification_collection, 3,
                                         precursor_range)
        self.assertTrue(validator.validate(peptide))
    def read_from_csv_file(cls, csv_file_path: pathlib.Path) -> List[Modification]:
        """
        Reads modifications from CSV file.

        Parameters
        ----------
        csv_file_path : pathlib.Path
            Path of the CSV-file

        Returns
        -------
        List of modifications
        """
        modifications = []
        with csv_file_path.open("r") as csv_file:
            csv_reader = csv.reader(csv_file)
            # Omit header
            next(csv_reader)
            for row in csv_reader:
                modifications.append(
                    Modification(row[0], row[1], AminoAcid.get_by_one_letter_code(row[2]), mass_to_int(float(row[3])), cls.string_to_is_static(row[4].lower()), ModificationPosition.from_string(row[5]))
                )
        return modifications
Exemple #10
0
class PeptideMassValidatorTestCase(unittest.TestCase):
    """
    Tests peptide mass validator with modified peptides.
    """

    PRECURSOR = mass_to_int(1751.868942379)
    """Precursor
    """

    PRECURSOR_RANGE = PrecursorRange(PRECURSOR, 5, 5)
    """Precursor tolerance
    """

    PEPTIDE_SEQUENCES = [
        ("VYMGWJKGVYTTYR", "VYM[v:any:15994915000]GWJKGVYTTYR"),
        ("AVMQCVTVQSKPYNK", "AVMQC[s:any:57021464000]VTVQSKPYNK"),
        ("MTEYPDVJWGTRIR", "M[v:any:15994915000]TEYPDVJWGTRIR"),
        ("EPEHJDVJMPRMAAK", "EPEHJDVJMPRM[v:any:15994915000]AAK"),
        ("WYMAZLVWJIZER", "WYM[v:any:15994915000]AZLVWJIZER"),
        ("CGCJVHJJMFFJAR",
         "C[s:any:57021464000]GC[s:any:57021464000]JVHJJM[v:any:15994915000]FFJAR"
         ), ("CVPPPQSATDJQNVAR", "C[s:any:57021464000]VPPPQSATDJQNVAR"),
        ("LJPYRVPFTPMCDK",
         "LJPYRVPFTPM[v:any:15994915000]C[s:any:57021464000]DK"),
        ("VFFTWESJTVHCVK", "VFFTWESJTVHC[s:any:57021464000]VK"),
        ("JSYNCDIEJRASRR", "JSYNC[s:any:57021464000]DIEJRASRR"),
        ("QYECRRVOWYR", "QYEC[s:any:57021464000]RRVOWYR"),
        ("AYYWJNRGFYJMR", "AYYWJNRGFYJMR"),
        ("JQATYMTSGGTSPPITK", "JQATYMTSGGTSPPITK"),
        ("ODGANVQZRTBPMAJ", "ODGANVQZRTBPMAJ"),
        ("QJMVFGKQQCQLEK",
         "QJM[v:any:15994915000]VFGKQQC[s:any:57021464000]QLEK"),
        ("PAPVHCDYPPYPVJK", "PAPVHC[s:any:57021464000]DYPPYPVJK"),
        ("VBLVVTBMDHVHMVK", "VBLVVTBMDHVHM[v:any:15994915000]VK"),
        ("MIHCPAFYRIMAVK",
         "MIHC[s:any:57021464000]PAFYRIM[v:any:15994915000]AVK"),
        ("QMYFARHJHDGLHK", "QMYFARHJHDGLHK"),
        ("QJDJRMBTTFDBKR", "QJDJRMBTTFDBKR"),
        ("QRTEFCREIGEVTK", "QRTEFC[s:any:57021464000]REIGEVTK"),
        ("HKDDRTVQLFAMYL", "HKDDRTVQLFAM[v:any:15994915000]YL"),
        ("VAEPMFDRVMRMVR", "VAEPMFDRVM[v:any:15994915000]RMVR"),
        ("WTEQAPSJYJMGGRK", "WTEQAPSJYJM[v:any:15994915000]GGRK"),
        ("ATVFPQLKEQJVTUK", "ATVFPQLKEQJVTUK"),
        ("FQSVVBFVEBIHYR", "FQSVVBFVEBIHYR"),
        ("ZVVCCJVJSABIYGR",
         "ZVVC[s:any:57021464000]C[s:any:57021464000]JVJSABIYGR"),
        ("EGRSAAAETCVVFSLR", "EGRSAAAETC[s:any:57021464000]VVFSLR"),
        ("HQQMDAVTKSPGTQPK", "HQQMDAVTKSPGTQPK"),
        ("VDFAFVQRPKCEEK", "VDFAFVQRPKC[s:any:57021464000]EEK"),
        ("DQMCPFRJCKJIR",
         "DQM[v:any:15994915000]C[s:any:57021464000]PFRJC[s:any:57021464000]KJIR"
         ),
        ("MCMHTJRIVEFKR",
         "M[v:any:15994915000]C[s:any:57021464000]M[v:any:15994915000]HTJRIVEFKR"
         ), ("JJSPGDYTPHVTHGMK", "JJSPGDYTPHVTHGMK"),
        ("FVKMYGRFJCYPR",
         "FVKM[v:any:15994915000]YGRFJC[s:any:57021464000]YPR"),
        ("CIMPYRGRTIQWR",
         "C[s:any:57021464000]IM[v:any:15994915000]PYRGRTIQWR"),
        ("FYVDVHMFTJQQPK", "FYVDVHMFTJQQPK"),
        ("ODBRVBFMYAAIGK", "ODBRVBFM[v:any:15994915000]YAAIGK"),
        ("FFVMCRPNDRVVGR", "FFVMC[s:any:57021464000]RPNDRVVGR"),
        ("UPMVTQPAGPPIIPKR", "UPMVTQPAGPPIIPKR"),
        ("FTBDTIVVTNZFPQK", "FTBDTIVVTNZFPQK"),
        ("JMQWKCFVPJVCR",
         "JM[v:any:15994915000]QWKC[s:any:57021464000]FVPJVC[s:any:57021464000]R"
         ), ("YKQFTFFMGJAEVR", "YKQFTFFM[v:any:15994915000]GJAEVR"),
        ("HTVVJTFJJSKUVSTG", "HTVVJTFJJSKUVSTG"),
        ("WAKQJSNRCTFWR", "WAKQJSNRC[s:any:57021464000]TFWR"),
        ("JSJGWZBJCWAYJK", "JSJGWZBJC[s:any:57021464000]WAYJK"),
        ("YYSNIHNQAIVRQF", "YYSNIHNQAIVRQF"),
        ("REVJRDIMFPMGEK",
         "REVJRDIM[v:any:15994915000]FPM[v:any:15994915000]GEK"),
        ("MJNCMTWAGKQKLR",
         "MJNC[s:any:57021464000]M[v:any:15994915000]TWAGKQKLR"),
        ("FGILPJSVRWQUTGK", "FGILPJSVRWQUTGK"),
        ("YYPGKPEPMKRENK", "YYPGKPEPM[v:any:15994915000]KRENK"),
        ("JTMVDENNWAJKYR", "JTMVDENNWAJKYR"),
        ("QQYMICAJAPMVRR",
         "QQYMIC[s:any:57021464000]AJAPM[v:any:15994915000]VRR"),
        ("CIGFEQKIKBZQMK", "C[s:any:57021464000]IGFEQKIKBZQMK"),
        ("FQAIBSPMVKTMBVR", "FQAIBSPMVKTM[v:any:15994915000]BVR"),
        ("LIDFIZAZLAVTIUR", "LIDFIZAZLAVTIUR"),
        ("YAMKPASAMJKMJGPAG", "YAMKPASAM[v:any:15994915000]JKMJGPAG"),
        ("PQNGPZQPJZTCKQK", "PQNGPZQPJZTC[s:any:57021464000]KQK"),
        ("MCIQMQJKYPPRR",
         "M[v:any:15994915000]C[s:any:57021464000]IQM[v:any:15994915000]QJKYPPRR"
         ), ("CWNHPAKJVWWQK", "C[s:any:57021464000]WNHPAKJVWWQK"),
        ("WZGQTAVGZAQQOGR", "WZGQTAVGZAQQOGR"),
        ("MQCDJGHSORKQR", "MQC[s:any:57021464000]DJGHSORKQR"),
        ("TSFFHFVINNKDQR", "TSFFHFVINNKDQR"),
        ("WLQQJAGTEQPYYR", "WLQQJAGTEQPYYR"),
        ("TCMTDDRIRPVJJY", "TC[s:any:57021464000]MTDDRIRPVJJY"),
        ("MKGBMIPSJAZVYQR", "MKGBM[v:any:15994915000]IPSJAZVYQR"),
        ("KGAHJQQQDADAAPFR", "KGAHJQQQDADAAPFR"),
        ("EVWVGYTDGRJVCAK", "EVWVGYTDGRJVC[s:any:57021464000]AK"),
        ("OAYIJRPUFRSNK", "OAYIJRPUFRSNK"),
        ("BCDAVMAVBAJPIVHK", "BC[s:any:57021464000]DAVMAVBAJPIVHK"),
        ("QJPMCKFPYEPAKK",
         "QJPM[v:any:15994915000]C[s:any:57021464000]KFPYEPAKK"),
        ("UHRFRQVFJFPVR", "UHRFRQVFJFPVR"),
        ("QMPJOAASZWBPQK", "QM[v:any:15994915000]PJOAASZWBPQK"),
        ("CMGJEJLDVKKMDGK",
         "C[s:any:57021464000]M[v:any:15994915000]GJEJLDVKKMDGK"),
        ("VDCDJTQJJEQKYK", "VDC[s:any:57021464000]DJTQJJEQKYK"),
        ("NVVNFDVPVMVJMEF", "NVVNFDVPVMVJMEF"),
        ("MJVQVCFIJNDTQR",
         "M[v:any:15994915000]JVQVC[s:any:57021464000]FIJNDTQR"),
        ("DVJACKGQSRTQGJGY", "DVJAC[s:any:57021464000]KGQSRTQGJGY"),
        ("JQCPJQCRYVKCK",
         "JQC[s:any:57021464000]PJQC[s:any:57021464000]RYVKC[s:any:57021464000]K"
         ), ("MNPASVJEMTJFJMR", "MNPASVJEMTJFJMR"),
        ("IPDTQRYKMAJCEK", "IPDTQRYKMAJC[s:any:57021464000]EK"),
        ("HAOSJPJVUGKMPAK", "HAOSJPJVUGKM[v:any:15994915000]PAK"),
        ("VSTPVRJFMVACGCR",
         "VSTPVRJFMVAC[s:any:57021464000]GC[s:any:57021464000]R"),
        ("CVGGAKALDYHYJSAK", "C[s:any:57021464000]VGGAKALDYHYJSAK"),
        ("RVVEPFAYCJEDVR", "RVVEPFAYC[s:any:57021464000]JEDVR"),
        ("CJMPMMVPMKVQKK",
         "C[s:any:57021464000]JMPM[v:any:15994915000]M[v:any:15994915000]VPMKVQKK"
         ), ("ZQRYJFVYMTFZK", "ZQRYJFVYMTFZK"),
        ("JGTWPASJHDSLYHR", "JGTWPASJHDSLYHR"),
        ("HTBDTEZJCROIK", "HTBDTEZJC[s:any:57021464000]ROIK"),
        ("ALWJEYSRCJEANK", "ALWJEYSRC[s:any:57021464000]JEANK"),
        ("WGDSCDIGAJJPPVPR", "WGDSC[s:any:57021464000]DIGAJJPPVPR"),
        ("MJREDFJIEIWCK", "MJREDFJIEIWC[s:any:57021464000]K"),
        ("VGHQMAMGPPJVDQJK",
         "VGHQM[v:any:15994915000]AM[v:any:15994915000]GPPJVDQJK"),
        ("WGAYRRJYWYYR", "WGAYRRJYWYYR"),
        ("KGQRVYZMNBQTJR", "KGQRVYZM[v:any:15994915000]NBQTJR"),
        ("SWMQEKSPVFWAIK", "SWM[v:any:15994915000]QEKSPVFWAIK"),
        ("FKQAGTVMYMYJJR",
         "FKQAGTVM[v:any:15994915000]YM[v:any:15994915000]YJJR"),
        ("BJPVSFPQBHGTWVR", "BJPVSFPQBHGTWVR"),
        ("QDGLJJPFWNMYQK", "QDGLJJPFWNMYQK"),
        ("DCITAMHPAKPMPKR", "DC[s:any:57021464000]ITAMHPAKPMPKR"),
        ("BMJZMJVDYJPRMK", "BMJZMJVDYJPRMK")
    ]
    """Plain and PTM annotated peptide seqeunces
    """

    MODIFICATION_COLLECTION: ClassVar[
        ModificationCollection] = ModificationCollection.read_from_csv_file(
            pathlib.Path("./test_files/modifications.csv"))
    """Modification Collection
    """

    NUMBER_OF_VARIABLE_MODIFICATIONS: ClassVar[int] = 3
    """Number of variable modifications
    """
    def test_validation(self):
        """
        Checks if mass validation works.
        """
        peptide_mass_validator = PeptideMassValidator(
            self.__class__.MODIFICATION_COLLECTION,
            self.__class__.NUMBER_OF_VARIABLE_MODIFICATIONS,
            self.__class__.PRECURSOR_RANGE)
        for plain_sequence, annotated_sequence in self.__class__.PEPTIDE_SEQUENCES:
            peptide = Peptide(plain_sequence,
                              Trypsin.count_missed_cleavages(plain_sequence))
            self.assertTrue(
                peptide_mass_validator.validate(peptide, True),
                f"expected: {annotated_sequence}; is: {peptide.sequence_with_modification_markers}"
            )
Exemple #11
0
 def __init__(self, name: str, mono_mass: float, average_mass: float):
     self.name = name
     self.mono_mass = mass_to_int(mono_mass)
     self.average_mass = mass_to_int(average_mass)