Ejemplo n.º 1
0
    def _parse_alignment_line(
            line: str) -> Tuple[str, str, int, str, int, int, Optional[int]]:
        sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=5)

        columns = ["type", "id", "ali_start", "sequence", "ali_end", "length"]
        dline = dict(zip(columns, sline))

        length = fmap(lambda x: x.lstrip("(").rstrip(")"),
                      dline.get("length", None))

        if length is None:
            raise LineParseError(
                f"Missing 'length' from alignment line: '{line}'.")

        seq_begin_match = ALI_REGEX.match(line)
        if seq_begin_match is None:
            seq_begin: Optional[int] = None
        else:
            seq_begin = seq_begin_match.end()

        return (get_and_parse("type", "type", is_one_of(["T", "Q"]))(dline),
                get_and_parse("id", "id", parse_str)(dline),
                get_and_parse("ali_start", "ali_start", parse_int)(dline),
                get_and_parse("sequence", "sequence", parse_str)(dline),
                get_and_parse("ali_end", "ali_end", parse_int)(dline),
                raise_it(parse_field(parse_int, "length",
                                     "field"))(length), seq_begin)
Ejemplo n.º 2
0
import re

from typing import Optional
from typing import TextIO
from typing import Iterator

from predectorutils.analyses.base import Analysis
from predectorutils.parsers import (FieldParseError, LineParseError, raise_it,
                                    parse_field, parse_regex, parse_str,
                                    parse_float, is_one_of)
from predectorutils.analyses.base import float_or_none

e1_name = raise_it(parse_field(parse_str, "name"))
e1_prediction = raise_it(
    parse_field(is_one_of(["Effector", "Non-effector"]), "prediction"))
e1_prob = raise_it(parse_field(parse_float, "prob"))


class EffectorP1(Analysis):
    """ """

    columns = ["name", "prediction", "prob"]
    types = [str, str, float]
    analysis = "effectorp1"
    software = "EffectorP"

    def __init__(self, name: str, prediction: str, prob: float) -> None:
        self.name = name
        self.prediction = prediction
        self.prob = prob
Ejemplo n.º 3
0
    LineParseError,
    raise_it,
    parse_field,
    parse_str,
    parse_float,
    parse_int,
    parse_or_none,
    is_one_of
)

__all__ = ["DeepSig"]


ds_name = raise_it(parse_field(parse_str, "name"))
ds_prediction = raise_it(parse_field(
    is_one_of(["SignalPeptide", "Transmembrane", "Other"]),
    "prediction"
))
ds_prob = raise_it(parse_field(parse_float, "prob"))
ds_cs_pos = raise_it(parse_field(parse_or_none(parse_int, "-"), "cs_pos"))


class DeepSig(Analysis, GFFAble):

    """     """

    columns = ["name", "prediction", "prob", "cs_pos"]
    types = [str, str, float, int_or_none]
    analysis = "deepsig"
    software = "DeepSig"
Ejemplo n.º 4
0
#!/usr/bin/env python3

from typing import TextIO
from typing import Iterator

from predectorutils.analyses.base import Analysis
from predectorutils.parsers import (FieldParseError, LineParseError, raise_it,
                                    parse_field, parse_str, parse_float,
                                    is_one_of)

__all__ = ["ApoplastP"]

apo_name = raise_it(parse_field(parse_str, "name"))
apo_prediction = raise_it(
    parse_field(is_one_of(["Apoplastic", "Non-apoplastic"]), "prediction"))
apo_prob = raise_it(parse_field(parse_float, "prob"))


class ApoplastP(Analysis):
    """     """

    columns = ["name", "prediction", "prob"]
    types = [str, str, float]
    analysis = "apoplastp"
    software = "ApoplastP"

    def __init__(self, name: str, prediction: str, prob: float) -> None:
        self.name = name
        self.prediction = prediction
        self.prob = prob
        return
Ejemplo n.º 5
0
    "Target",
    "Gap",
    "Derives_from",
    "Note",
    "Dbxref",
    "Ontology_term",
    "Is_circular",
]

rec_seqid = raise_it(parse_field(parse_str, "seqid"))
rec_source = raise_it(parse_field(parse_str, "source"))
rec_type = raise_it(parse_field(parse_str, "type"))
rec_start = raise_it(parse_field(parse_int, "start"))
rec_end = raise_it(parse_field(parse_int, "end"))
rec_score = raise_it(parse_field(parse_or_none(parse_float, "."), "score"))
rec_strand = raise_it(parse_field(is_one_of(["-", "+", ".", "?"]), "strand"))
rec_phase = raise_it(parse_field(is_one_of(["0", "1", "2", "."]), "phase"))


def parse_attr_list(string: str) -> List[str]:
    return list(f.strip() for f in string.strip(", ").split(","))


attr_is_circular = raise_it(
    parse_field(
        parse_bool_options(["true", "TRUE", "True"],
                           ["false", "FALSE", "False"]), "is_circular",
        "attributes"))

attr_target_id = raise_it(parse_field(parse_str, "target.id" "attributes"))
attr_target_start = raise_it(
Ejemplo n.º 6
0
#!/usr/bin/env python3

from typing import TextIO
from typing import Iterator

from predectorutils.analyses.base import Analysis
from predectorutils.parsers import (FieldParseError, LineParseError,
                                    parse_field, raise_it, parse_str,
                                    parse_float, is_one_of)

dl_name = raise_it(parse_field(parse_str, "name"))
dl_prediction = raise_it(
    parse_field(
        is_one_of([
            "Membrane", "Nucleus", "Cytoplasm", "Extracellular",
            "Mitochondrion", "Cell_membrane", "Endoplasmic_reticulum",
            "Plastid", "Golgi_apparatus", "Lysosome/Vacuole", "Peroxisome"
        ]), "prediction"))
dl_membrane = raise_it(parse_field(parse_float, "membrane"))
dl_nucleus = raise_it(parse_field(parse_float, "nucleus"))
dl_cytoplasm = raise_it(parse_field(parse_float, "cytoplasm"))
dl_extracellular = raise_it(parse_field(parse_float, "extracellular"))
dl_mitochondrion = raise_it(parse_field(parse_float, "mitochondrion"))
dl_cell_membrane = raise_it(parse_field(parse_float, "cell_membrane"))
dl_endoplasmic_reticulum = raise_it(
    parse_field(parse_float, "endoplasmic_reticulum"))
dl_plastid = raise_it(parse_field(parse_float, "plastid"))
dl_golgi_apparatus = raise_it(parse_field(parse_float, "golgi_apparatus"))
dl_lysosome = raise_it(parse_field(parse_float, "lysosome_vacuole"))
dl_peroxisome = raise_it(parse_field(parse_float, "peroxisome"))
Ejemplo n.º 7
0
#!/usr/bin/env python3

from typing import TextIO
from typing import Iterator

from predectorutils.analyses.base import Analysis
from predectorutils.parsers import (FieldParseError, LineParseError,
                                    parse_field, raise_it, parse_str,
                                    parse_float, is_one_of)

dre_name = raise_it(parse_field(parse_str, "name"))
dre_s_score = raise_it(parse_field(parse_float, "s_score"))
dre_prediction = raise_it(
    parse_field(is_one_of(["effector", "non-effector"]), "prediction"))


class Deepredeff(Analysis):
    """ """
    columns = [
        "name",
        "s_score",
        "prediction",
    ]

    types = [
        str,
        float,
        str,
    ]

    software = "deepredeff"
Ejemplo n.º 8
0
        return


s4_name = raise_it(parse_field(parse_str, "name"))
s4_cmax = raise_it(parse_field(parse_float, "cmax"))
s4_cmax_pos = raise_it(parse_field(parse_int, "cmax_pos"))
s4_ymax = raise_it(parse_field(parse_float, "ymax"))
s4_ymax_pos = raise_it(parse_field(parse_int, "ymax_pos"))
s4_smax = raise_it(parse_field(parse_float, "smax"))
s4_smax_pos = raise_it(parse_field(parse_int, "smax_pos"))
s4_smean = raise_it(parse_field(parse_float, "smean"))
s4_d = raise_it(parse_field(parse_float, "d"))
s4_decision = raise_it(parse_field(parse_bool("Y", "N"), "decision"))
s4_dmax_cut = raise_it(parse_field(parse_float, "dmax_cut"))
s4_networks_used = raise_it(
    parse_field(is_one_of(["SignalP-noTM", "SignalP-TM"]), "networks_used"))


class SignalP4(Analysis, GFFAble):
    """ The graphical output from SignalP (neural network) comprises
    three different scores, C, S and Y. Two additional scores are reported
    in the SignalP output, namely the S-mean and the D-score, but these
    are only reported as numerical values.

    For each organism class in SignalP; Eukaryote, Gram-negative and
    Gram-positive, two different neural networks are used, one for
    predicting the actual signal peptide and one for predicting the
    position of the signal peptidase I (SPase I) cleavage site.
    The S-score for the signal peptide prediction is reported for every
    single amino acid position in the submitted sequence, with high
    scores indicating that the corresponding amino acid is part of a
Ejemplo n.º 9
0
from typing import Iterator

from predectorutils.gff import (
    GFFRecord,
    GFFAttributes,
    Strand,
)
from predectorutils.analyses.base import Analysis, GFFAble
from predectorutils.analyses.base import float_or_none, str_or_none
from predectorutils.parsers import (FieldParseError, LineParseError,
                                    parse_field, raise_it, parse_str,
                                    parse_regex, parse_float, is_one_of)

tp_name = raise_it(parse_field(parse_str, "name"))
tp_prediction = raise_it(
    parse_field(is_one_of(["OTHER", "noTP", "SP", "mTP", "cTP", "luTP"]),
                "prediction"))
tp_other = raise_it(parse_field(parse_float, "OTHER"))
tp_sp = raise_it(parse_field(parse_float, "SP"))
tp_mtp = raise_it(parse_field(parse_float, "mTP"))

pl_prediction = raise_it(
    parse_field(is_one_of(["OTHER", "SP", "mTP", "cTP", "luTP"]),
                "prediction"))
pl_ctp = raise_it(parse_field(parse_float, "cTP"))
pl_lutp = raise_it(parse_field(parse_float, "luTP"))

CS_POS_REGEX = re.compile(r"CS\s+pos:\s+\d+-(?P<cs>\d+)\.?\s+"
                          r"[A-Za-z]+-[A-Za-z]+\.?\s+"
                          r"Pr: (?P<cs_prob>[-+]?\d*\.?\d+)")
cs_actual_pos = raise_it(parse_field(parse_regex(CS_POS_REGEX), "cs_pos"))