Example #1
0
    def proteins(sequence: str):
        peptide = Peptide(sequence.upper(), 0)
        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            proteins = Protein.select(
                database_cursor,
                WhereCondition([
                    f"accession = ANY(SELECT protein_accession FROM {ProteinPeptideAssociation.TABLE_NAME} as ppa WHERE ppa.partition = %s AND ppa.peptide_mass = %s AND ppa.peptide_sequence = %s)"
                ], [peptide.partition, peptide.mass, peptide.sequence]), True)

            reviewed_proteins = []
            unreviewed_proteins = []

            for protein in proteins:
                if protein.is_reviewed:
                    reviewed_proteins.append(protein)
                else:
                    unreviewed_proteins.append(protein)

            def json_stream() -> Iterator[bytes]:
                yield b"{\"reviewed_proteins\": ["
                for protein_idx, protein in enumerate(reviewed_proteins):
                    if protein_idx > 0:
                        yield b","
                    yield from protein.to_json()
                yield b"],\"unreviewed_proteins\": ["
                for protein_idx, protein in enumerate(unreviewed_proteins):
                    if protein_idx > 0:
                        yield b","
                    yield from protein.to_json()
                yield b"]}"

            return Response(json_stream(), content_type="application/json")
Example #2
0
    def show(accession: str):
        accession = accession.upper()

        database_connection = get_database_connection()

        with database_connection.cursor() as database_cursor:
            protein = Protein.select(
                database_cursor, 
                WhereCondition(
                    ["accession = %s"], 
                    [accession]
                ),
                False
            )

            if protein:
                return Response(
                    protein.to_json(),
                    content_type="application/json"
                )
            else:               
                return jsonify({
                    "errors": {
                        "accession": ["not found"]
                    }
                }), 404
Example #3
0
 def show(sequence: str):
     is_reviewed = request.args.get("is_reviewed", None)
     if is_reviewed is not None:
         is_reviewed = bool(int(is_reviewed))
     sequence = sequence.upper()
     database_connection = get_database_connection()
     with database_connection.cursor() as database_cursor:
         peptide = Peptide(sequence, 0, None)
         peptide = Peptide.select(
             database_cursor,
             WhereCondition([
                 "partition = %s", "AND", "mass = %s", "AND",
                 "sequence = %s"
             ], [peptide.partition, peptide.mass, peptide.sequence]),
             include_metadata=True)
         if peptide is None:
             return jsonify({"errors": {"sequence": ["not found"]}}), 404
         # Return peptide if is_reviewed is not requested (None),
         # or is_reviewed is requested and True and metadata is_swiss_prot is also True
         # or is_reviewed is requested and False and metadata is_trembl is True
         if is_reviewed is None \
             or is_reviewed and peptide.metadata.is_swiss_prot \
             or not is_reviewed and peptide.metadata.is_trembl:
             return Response(peptide.to_json(),
                             content_type="application/json")
         return jsonify({}), 404
Example #4
0
    def peptides(accession: str):

        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
                peptides = Peptide.select(
                    database_cursor,
                    WhereCondition(
                        [f"(peps.partition, peps.mass, peps.sequence) IN (SELECT partition, peptide_mass, peptide_sequence FROM {ProteinPeptideAssociation.TABLE_NAME} as ppa WHERE ppa.protein_accession = %s)"],
                        [accession]
                    ),
                    fetchall=True,
                    include_metadata= True
                )
                peptides.sort(key = lambda peptide: peptide.mass)

                def json_stream() -> Iterator[bytes]:
                    yield b"{\"peptides\": ["
                    for peptide_idx, peptide in enumerate(peptides):
                        if peptide_idx > 0:
                            yield b","
                        yield from peptide.to_json()
                    yield b"]}"
                    
                return Response(
                    json_stream(),
                    content_type="application/json"
                )
Example #5
0
    def show(id):
        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            taxonomy = Taxonomy.select(database_cursor, ("id = %s", [id]))

            if not taxonomy:
                taxonomy_merge = TaxonomyMerge.select(database_cursor,
                                                      ("source_id = %s", [id]))
                if taxonomy_merge:
                    taxonomy = Taxonomy.select(
                        database_cursor,
                        ("id = %s", [taxonomy_merge.target_id]))

            response = None
            if taxonomy:
                response = {
                    "id": taxonomy.id,
                    "name": taxonomy.name,
                    "parent": taxonomy.parent_id,
                    "rank": taxonomy.rank,
                }

        if response:
            return jsonify(response)
        else:
            return jsonify(["not found"]), 422
Example #6
0
    def sub_species(id):
        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            taxonomy = Taxonomy.select(database_cursor, ("id = %s", [id]))

            if taxonomy is None:
                taxonomy_merge = TaxonomyMerge.select(database_cursor,
                                                      ("source_id = %s", [id]))
                if taxonomy_merge:
                    taxonomy = Taxonomy.select(
                        database_cursor,
                        ("id = %s", [taxonomy_merge.target_id]))

            if taxonomy is not None:
                return jsonify({
                    "sub_species":
                    [{
                        "id": sub_taxonomy.id,
                        "name": sub_taxonomy.name,
                        "parent": sub_taxonomy.parent_id,
                        "rank": sub_taxonomy.rank,
                    }
                     for sub_taxonomy in taxonomy.sub_species(database_cursor)]
                })
        return jsonify(["not found"]), 422
Example #7
0
    def by_ids():
        data = request.get_json()
        errors = defaultdict(list)

        if not "ids" in data:
            errors["ids"].append("cannot be missing")

        if "ids" in data and isinstance(data["ids"], list):
            for idx, id in enumerate(data["ids"]):
                if not isinstance(id, int):
                    errors[f"ids[{idx}]"].append(f"must be an integer")
        else:
            errors["ids"].append("must be a array")

        if len(errors):
            return jsonify({"errors": errors})

        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            return jsonify({
                "taxonomies": [{
                    "id": taxonomy.id,
                    "name": taxonomy.name
                } for taxonomy in Taxonomy.select(database_cursor, (
                    "id = ANY(%s)", [data["ids"]]), True)]
            })
Example #8
0
    def maintenance():
        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            comment = MaintenanceInformation.select(database_cursor, MaintenanceInformation.COMMENT_KEY)
            digestion_parameter = MaintenanceInformation.select(database_cursor, MaintenanceInformation.DIGESTION_PARAMTERS_KEY)

            return jsonify({
                "comment": comment.values["comment"] if comment is not None else None,
                "digestion_parameters": digestion_parameter.values
            })
Example #9
0
    def digest():
        """
        Digests the seqeunce of the given protein.
        """
        data = request.get_json()
        errors = ApiDigestionController.check_digestion_parameters(data)

        if not "accession" in data:
            errors["accession"].append("cannot be empty")

        peptides = []
        if len(errors) == 0:
            database_connection = get_database_connection()
            with database_connection.cursor() as database_cursor:
                protein = Protein.select(
                    database_cursor, 
                    WhereCondition(
                        ["accession = %s"], 
                        [data["accession"]]
                    ),
                    False
                )
                if protein:
                    peptides = list(filter(
                        lambda peptide: peptide.number_of_missed_cleavages <= data["maximum_number_of_missed_cleavages"] \
                            and data["minimum_peptide_length"] <= peptide.length <= data["maximum_peptide_length"],
                        protein.peptides(database_cursor)
                    ))
                    peptides.sort(key = lambda peptide: peptide.mass)
                else:
                    errors["accession"].append("not found")

        if len(errors) == 0:
            def json_stream():
                yield b"{\"peptides\": ["
                for peptide_idx, peptide in enumerate(peptides):
                    if peptide_idx > 0:
                        yield b","
                    yield from peptide.to_json()
                yield f"], \"count\": {len(peptides)}}}".encode("utf-8 ")
            return Response(
                json_stream(),
                content_type="application/json"
            )
        else:
            return jsonify({
                "errors": errors
            }), 422
Example #10
0
    def status():
        database_connection = get_database_connection()
        with database_connection.cursor() as database_cursor:
            database_cursor.execute("SELECT count(*) from pg_dist_node");
            number_of_nodes = database_cursor.fetchone()[0]

            database_cursor.execute("SELECT * FROM get_rebalance_progress()");
            rebalance_job_rows = database_cursor.fetchall()
            finished_rebalance_job_rows = list(filter(lambda row: row[8] == 2, rebalance_job_rows))
            running_rebalance_job_rows = list(filter(lambda row: row[8] == 1, rebalance_job_rows))
            database_status = MaintenanceInformation.select(database_cursor, MaintenanceInformation.DATABASE_STATUS_KEY)
            status: Dict[str, Any] = {
                "number_of_nodes": number_of_nodes,
                "number_of_rebalance_jobs": len(rebalance_job_rows),
                "number_of_finished_rebalance_jobs": len(finished_rebalance_job_rows),
                "number_of_running_rebalance_jobs": len(running_rebalance_job_rows),
            }
            status.update(database_status.values)
            return jsonify(status)
Example #11
0
    def search():
        data = request.get_json()
        errors = defaultdict(list)

        if not "query" in data:
            errors["query"].append("must be present")
            if not isinstance(data["query"], str) or not isinstance(
                    data["query"], int):
                errors["query"].append("must be a string or an integer")

        if not len(errors):
            condition = None
            if isinstance(data["query"], str):
                query = data["query"].replace("*", "%")
                condition = ("name LIKE %s", [query])
            else:
                condition = ("id = %s", [data["query"]])

            database_connection = get_database_connection()
            with database_connection.cursor() as database_cursor:
                taxonomies = Taxonomy.select(database_cursor,
                                             condition,
                                             fetchall=True)

                if isinstance(data["query"], int) and not len(taxonomies):
                    taxonomy_merge = TaxonomyMerge.select(
                        database_cursor, ("source_id = %s", [data["query"]]))
                    if taxonomy_merge:
                        taxonomies = Taxonomy.select(
                            database_cursor,
                            ("id = %s", [taxonomy_merge.target_id]),
                            fetchall=True)

                response = [{
                    "id": taxonomy.id,
                    "name": taxonomy.name
                } for taxonomy in taxonomies]

            return jsonify(response)
        else:
            return jsonify({"errors": errors}), 422
Example #12
0
    def digest():
        """
        Digest a given peptide/sequence, search the resulting peptides in the database and return matching and not matching peptides in separate array.
        """
        data = request.get_json()
        errors = ApiDigestionController.check_digestion_parameters(data)

        if not "sequence" in data:
            errors["sequence"].append("cannot be empty")

        digestion_peptides = []
        database_peptides = []
        if len(errors) == 0:
            EnzymeClass = get_digestion_enzyme_by_name("trypsin")
            enzyme = EnzymeClass(data["maximum_number_of_missed_cleavages"],
                                 data["minimum_peptide_length"],
                                 data["maximum_peptide_length"])
            digestion_peptides = enzyme.digest(
                Protein("TMP", [], "TMP", "TMP", data["sequence"], [], [],
                        False, 0))

            if "do_database_search" in data and isinstance(
                    data["do_database_search"],
                    bool) and data["do_database_search"]:
                database_connection = get_database_connection()
                with database_connection.cursor() as database_cursor:
                    database_peptides = Peptide.select(
                        database_cursor,
                        WhereCondition(
                            ["(partition, mass, sequence) IN %s"], (tuple(
                                (peptide.partition, peptide.mass,
                                 peptide.sequence)
                                for peptide in digestion_peptides), )),
                        fetchall=True)
                database_peptides.sort(key=lambda peptide: peptide.mass)
                digestion_peptides = [
                    peptide for peptide in digestion_peptides
                    if peptide not in database_peptides
                ]

            digestion_peptides.sort(key=lambda peptide: peptide.mass)

        if len(errors) == 0:

            def json_stream() -> Iterator[bytes]:
                yield b"{\"database\": ["
                for peptide_idx, peptide in enumerate(database_peptides):
                    if peptide_idx > 0:
                        yield b","
                    yield from peptide.to_json()
                yield b"],\"digestion\": ["
                for peptide_idx, peptide in enumerate(digestion_peptides):
                    if peptide_idx > 0:
                        yield b","
                    yield from peptide.to_json()
                yield f"],\"count\": {len(database_peptides) +  len(digestion_peptides)}}}".encode(
                    "utf-8")

            return Response(json_stream(), content_type="application/json")

        else:
            return jsonify({"errors": errors}), 422
Example #13
0
    def _search(request, file_extension: str):
        errors = defaultdict(list)
        data = None
        if request.headers.get("Content-Type", "") == "application/json":
            data = request.get_json()
        elif request.headers.get("Content-Type",
                                 "") == "application/x-www-form-urlencoded":
            # For use with classical form-tag. The JSON-formatted search parameters should be provided in the form parameter "search_params"
            data = json.loads(request.form.get("search_params", "{}"))

        include_count = False
        if 'include_count' in data and isinstance(data['include_count'], bool):
            include_count = data['include_count']

        order_by = None
        if 'order_by' in data:
            if isinstance(data['order_by'], str) and data[
                    'order_by'] in ApiAbstractPeptideController.SUPPORTED_ORDER_COLUMNS:
                order_by = data['order_by']
            else:
                errors["order_by"].append(
                    f"must be a string with one of following values: {', '.join(ApiAbstractPeptideController.SUPPORTED_ORDER_COLUMNS)}"
                )

        if 'order_direction' in data:
            if not isinstance(data['order_direction'], str) or not data[
                    'order_direction'] in ApiAbstractPeptideController.SUPPORTED_ORDER_DIRECTIONS:
                errors["order_direction"].append(
                    f"'order_direction' must be a string with one of following values: {', '.join(ApiAbstractPeptideController.SUPPORTED_ORDER_DIRECTIONS)}"
                )

        include_metadata = False
        if "include_metadata" in data:
            if isinstance(data["include_metadata"], bool):
                include_metadata = data["include_metadata"]
            else:
                errors["include_metadata"].append("must be a boolean")

        output_style = None
        if file_extension is not None:
            try:
                output_style = OutputFormat.from_name(file_extension)
            except KeyError:
                pass
        else:
            try:
                output_style = OutputFormat.from_value(
                    request.headers.get("accept", default=""))
            except KeyError:
                output_style = OutputFormat.json

        # validate int attributes
        for attribute in [
                "lower_precursor_tolerance_ppm",
                "upper_precursor_tolerance_ppm",
                "variable_modification_maximum"
        ]:
            if attribute in data:
                if isinstance(data[attribute], int):
                    if data[attribute] < 0:
                        errors[attribute].append("not greater or equals 0")
                else:
                    errors[attribute].append("not an integer")
            else:
                errors[attribute].append("cannot be empty")

        modifications = []
        if "modifications" in data:
            if isinstance(data["modifications"], list):
                for idx, modification_attributes in enumerate(
                        data["modifications"]):
                    if isinstance(modification_attributes, dict):
                        accession_and_name = "onlinemod:{}".format(idx)
                        try:
                            modification_attributes[
                                'accession'] = accession_and_name
                            modification_attributes[
                                'name'] = accession_and_name
                            modification_attributes['delta'] = mass_to_int(
                                modification_attributes['delta'])
                            modifications.append(
                                Modification.from_dict(
                                    modification_attributes))
                        except Exception as e:
                            errors[f"modifications[{idx}]"].append(
                                "is invalid")
                    else:
                        errors[f"modifications[{idx}]"].append(
                            "not a dictionary")
            else:
                errors["modifications"].append(
                    "modifications has to be of type list")

        try:
            modification_collection = ModificationCollection(modifications)
        except Exception as e:
            errors["modifications"].append(f"{e}")

        database_connection = get_database_connection()
        if not len(errors):
            if "precursor" in data:
                if isinstance(data["precursor"], float) or isinstance(
                        data["precursor"], int):

                    modification_combination_list = ModificationCombinationList(
                        modification_collection,
                        mass_to_int(data["precursor"]),
                        data["lower_precursor_tolerance_ppm"],
                        data["upper_precursor_tolerance_ppm"],
                        data["variable_modification_maximum"])

                    metadata_condition = MetadataCondition()

                    # List of metadata conditions
                    if "taxonomy_id" in data:
                        if isinstance(data["taxonomy_id"], int):
                            with database_connection.cursor(
                            ) as database_cursor:
                                taxonomy = Taxonomy.select(
                                    database_cursor,
                                    ("id = %s", (data["taxonomy_id"], )))
                                if taxonomy is not None:
                                    metadata_condition.taxonomy_ids = [
                                        sub.id for sub in taxonomy.sub_species(
                                            database_cursor)
                                    ]
                                else:
                                    errors["taxonomy_id"].append("not found")

                        else:
                            errors["taxonomy_id"].append("must be an integer")

                    if "proteome_id" in data:
                        if isinstance(data["proteome_id"], str):
                            metadata_condition.proteome_id = data[
                                "proteome_id"]
                        else:
                            errors["proteome_id"].append("must be a string")

                    if "is_reviewed" in data:
                        if isinstance(data["is_reviewed"], bool):
                            if data["is_reviewed"]:
                                metadata_condition.is_swiss_prot = True
                            else:
                                metadata_condition.is_trembl = True
                        else:
                            errors["is_reviewed"].append("must be a boolean")

                    # Sort by `order_by`
                    order_by_instruction = None
                    if order_by and not output_style == OutputFormat.text:
                        order_by_instruction = f"{order_by} {data['order_direction']}"

                    # Note about offset and limit: It is much faster to fetch data from server and discard rows below the offset and stop the fetching when the limit is reached, instead of applying LIMIT and OFFSET directly to the query.
                    # Even on high offsets, which discards a lot of rows, this approach is faster.
                    # Curl shows the diffences: curl -o foo.json --header "Content-Type: application/json" --request POST --data '{"include_count":true,"offset":0,"limit":50,"modifications":[{"amino_acid":"C","position":"anywhere","is_static":true,"delta":57.021464}],"lower_precursor_tolerance_ppm":5,"upper_precursor_tolerance_ppm":5,"variable_modification_maximum":0,"order":true,"precursor":859.49506802369}' http://localhost:3000/api/peptides/search
                    # Applying OFFSET and LIMIT to query: 49 - 52 seconds
                    # Discarding rows which are below the offset and stop the fetching early: a few hundred miliseconds (not printed by curl).
                    offset = 0
                    limit = math.inf
                    if "limit" in data:
                        if isinstance(data["limit"], int):
                            limit = data["limit"]
                        else:
                            errors["limit"].append("must be an integer")
                    if "offset" in data:
                        if isinstance(data["offset"], int):
                            offset = data["offset"]
                        else:
                            errors["offset"].append("must be an integer")

                else:
                    errors["precursor"] = ["must be an integer or float"]
            else:
                errors["precursor"] = ["cannot be missing"]

        if len(errors):
            return jsonify({"errors": errors}), 422

        include_metadata = include_metadata or metadata_condition.has_conditions(
        )

        peptide_conversion = lambda _, __: (
            b"", )  # lambda to convert peptide to output type
        delimiter = b""  # delimiter between each converted peptide
        pre_peptide_content = b""  # content before peptide
        post_peptide_content = lambda _, __: b""  # content after peptides

        if output_style == OutputFormat.json:
            peptide_conversion = lambda _, peptide: peptide.to_json()
            delimiter = b","
            pre_peptide_content = b"{\"peptides\":["
            post_peptide_content = lambda _, __: b"]}"
            if include_count:
                post_peptide_content = lambda database_cursor, where_condition: f"],\"count\":{Peptide.count(database_cursor, where_condition)}}}".encode(
                    "utf-8")
        elif output_style == OutputFormat.stream:
            peptide_conversion = lambda _, peptide: peptide.to_json()
            delimiter = b"\n"
        elif output_style == OutputFormat.fasta:
            peptide_conversion = lambda peptide_idx, peptide: peptide.to_fasta_entry(
                f"P{peptide_idx}".encode())
            delimiter = b"\n"
        elif output_style == OutputFormat.csv:
            peptide_conversion = lambda _, peptide: peptide.to_csv_row()
            delimiter = b"\n"
            pre_peptide_content = (
                ",".join(Peptide.CSV_HEADER).encode("utf-8") if not include_metadata else \
                ",".join(Peptide.CSV_HEADER + Peptide.METADATA_CSV_HEADER).encode("utf-8")
            ) + b"\n"
        elif output_style == OutputFormat.text:
            peptide_conversion = lambda _, peptide: peptide.to_plain_text()
            delimiter = b"\n"

        return Response(ApiAbstractPeptideController.stream(
            peptide_conversion, delimiter, pre_peptide_content,
            post_peptide_content,
            modification_combination_list.to_where_condition(),
            order_by_instruction, offset, limit, include_metadata,
            metadata_condition),
                        content_type=f"{output_style}; charset=utf-8")