コード例 #1
0
ファイル: mmseqs.py プロジェクト: ccdmb/predector-utils
    def from_line(cls, line: str) -> "MMSeqs":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t", maxsplit=17)
        if len(sline) != 18:
            # Technically because of the max_split this should be impossible.
            # the description line is allowed to have spaces.
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 18 but got {len(sline)}")

        return cls(
            mm_query(sline[0]),
            mm_target(sline[1]),
            mm_qstart(sline[2]) - 1,
            mm_qend(sline[3]),
            mm_qlen(sline[4]),
            mm_tstart(sline[5]) - 1,
            mm_tend(sline[6]),
            mm_tlen(sline[7]),
            mm_evalue(sline[8]),
            mm_gapopen(sline[9]),
            mm_pident(sline[10]),
            mm_alnlen(sline[11]),
            mm_raw(sline[12]),
            mm_bits(sline[13]),
            mm_cigar(sline[14]),
            mm_mismatch(sline[15]),
            mm_qcov(sline[16]),
            mm_tcov(sline[17]),
        )
コード例 #2
0
    def from_line(cls, line: str) -> "PfamScan":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=16)
        if len(sline) != 15 and len(sline) != 16:
            # Technically because of the max_split this should be impossible.
            # the description line is allowed to have spaces.
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 15 or 16 but got {len(sline)}")

        if len(sline) == 15:
            active_sites: Optional[str] = None
        else:
            active_sites = parse_predicted_active_site(sline[15])

        return cls(
            ps_name(sline[0]),
            ps_ali_start(sline[1]) - 1,
            ps_ali_end(sline[2]),
            ps_env_start(sline[3]) - 1,
            ps_env_end(sline[4]),
            ps_hmm(sline[5]),
            ps_hmm_name(sline[6]),
            ps_hmm_type(sline[7]),
            ps_hmm_start(sline[8]) - 1,
            ps_hmm_end(sline[9]),
            ps_hmm_len(sline[10]),
            ps_bitscore(sline[11]),
            ps_evalue(sline[12]),
            ps_is_significant(sline[13]),
            ps_clan(sline[14]),
            active_sites,
        )
コード例 #3
0
    def from_line(cls, line: str) -> "SignalP4":
        """ Parse a short-format signalp4 line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = MULTISPACE_REGEX.split(line)

        if len(sline) != 12:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 12 but got {len(sline)}")

        return cls(
            s4_name(sline[0]),
            s4_cmax(sline[1]),
            s4_cmax_pos(sline[2]),
            s4_ymax(sline[3]),
            s4_ymax_pos(sline[4]),
            s4_smax(sline[5]),
            s4_smax_pos(sline[6]),
            s4_smean(sline[7]),
            s4_d(sline[8]),
            s4_decision(sline[9]),
            s4_dmax_cut(sline[10]),
            s4_networks_used(sline[11]),
        )
コード例 #4
0
    def from_line(cls, line: str) -> "TargetPNonPlant":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) == 6:
            cs_pos: Optional[str] = str(sline[5])
        elif len(sline) == 5:
            cs_pos = None
        else:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 5 or 6 but got {len(sline)}")

        prediction = tp_prediction(sline[1])
        if prediction == "noTP":
            prediction = "OTHER"

        return cls(
            tp_name(sline[0]),
            prediction,
            tp_other(sline[2]),
            tp_sp(sline[3]),
            tp_mtp(sline[4]),
            cs_pos=cs_pos,
        )
コード例 #5
0
    def from_line(cls, line: str) -> "SignalP3NN":
        """ Parse a short-format NN line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = MULTISPACE_REGEX.split(line)

        if len(sline) != 14:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 14 but got {len(sline)}")
        return cls(
            s3nn_name(sline[0]),
            s3nn_cmax(sline[1]),
            s3nn_cmax_pos(sline[2]),
            s3nn_cmax_decision(sline[3]),
            s3nn_ymax(sline[4]),
            s3nn_ymax_pos(sline[5]),
            s3nn_ymax_decision(sline[6]),
            s3nn_smax(sline[7]),
            s3nn_smax_pos(sline[8]),
            s3nn_smax_decision(sline[9]),
            s3nn_smean(sline[10]),
            s3nn_smean_decision(sline[11]),
            s3nn_d(sline[12]),
            s3nn_d_decision(sline[13]),
        )
コード例 #6
0
ファイル: hmmer.py プロジェクト: ccdmb/predector-utils
    def from_line(cls, line: str) -> "DomTbl":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=22)
        if len(sline) != 22 and len(sline) != 23:
            # Technically because of the max_split this should be impossible.
            # the description line is allowed to have spaces.
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 22 or 23 but got {len(sline)}")

        if len(sline) == 22:
            description: Optional[str] = None
        elif sline[22] == "-" or sline[22] == "":
            description = None
        else:
            description = sline[22]

        return cls(hm_name(sline[3]), hm_hmm(sline[0]), hm_hmm_len(sline[2]),
                   hm_query_len(sline[5]), hm_full_evalue(sline[6]),
                   hm_full_score(sline[7]), hm_full_bias(sline[8]),
                   hm_nmatches(sline[10]), hm_domain_c_evalue(sline[11]),
                   hm_domain_i_evalue(sline[12]), hm_domain_score(sline[13]),
                   hm_domain_bias(sline[14]),
                   hm_hmm_from(sline[15]) - 1, hm_hmm_to(sline[16]),
                   hm_query_from(sline[17]) - 1, hm_query_to(sline[18]),
                   hm_acc(sline[21]), description)
コード例 #7
0
ファイル: deeploc.py プロジェクト: ccdmb/predector-utils
    def from_line(cls, line: str) -> "DeepLoc":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) != 13:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 13 but got {len(sline)}")

        return cls(
            dl_name(sline[0]),
            dl_prediction(sline[1]),
            dl_membrane(sline[2]),
            dl_nucleus(sline[3]),
            dl_cytoplasm(sline[4]),
            dl_extracellular(sline[5]),
            dl_mitochondrion(sline[6]),
            dl_cell_membrane(sline[7]),
            dl_endoplasmic_reticulum(sline[8]),
            dl_plastid(sline[9]),
            dl_golgi_apparatus(sline[10]),
            dl_lysosome(sline[11]),
            dl_peroxisome(sline[12]),
        )
コード例 #8
0
ファイル: regex.py プロジェクト: ccdmb/predector-utils
    def from_line(cls, line: str) -> "RegexAnalysis":
        """ Parse a table line as an object """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = [c.strip() for c in line.strip().split("\t")]

        if len(sline) != 6:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 6 but got {len(sline)}")

        return cls(re_name(sline[0]), re_kind(sline[1]), re_pattern(sline[2]),
                   re_match(sline[3]), re_start(sline[4]), re_end(sline[5]))
コード例 #9
0
ファイル: deepredeff.py プロジェクト: ccdmb/predector-utils
    def from_line(cls, line: str) -> "Deepredeff":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t", maxsplit=3)
        if len(sline) != 3:
            # Technically because of the max_split this should be impossible.
            # the description line is allowed to have spaces.
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 3 but got {len(sline)}")

        return cls(
            dre_name(sline[0]),
            dre_s_score(sline[1]),
            dre_prediction(sline[2]),
        )
コード例 #10
0
    def _parse_alignment_line(
            line: str) -> Tuple[str, str, int, str, int, int, Optional[int]]:
        sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=5)

        columns = ["type", "id", "ali_start", "sequence", "ali_end", "length"]
        dline = dict(zip(columns, sline))

        length = fmap(lambda x: x.lstrip("(").rstrip(")"),
                      dline.get("length", None))

        if length is None:
            raise LineParseError(
                f"Missing 'length' from alignment line: '{line}'.")

        seq_begin_match = ALI_REGEX.match(line)
        if seq_begin_match is None:
            seq_begin: Optional[int] = None
        else:
            seq_begin = seq_begin_match.end()

        return (get_and_parse("type", "type", is_one_of(["T", "Q"]))(dline),
                get_and_parse("id", "id", parse_str)(dline),
                get_and_parse("ali_start", "ali_start", parse_int)(dline),
                get_and_parse("sequence", "sequence", parse_str)(dline),
                get_and_parse("ali_end", "ali_end", parse_int)(dline),
                raise_it(parse_field(parse_int, "length",
                                     "field"))(length), seq_begin)
コード例 #11
0
    def from_line(cls, line: str) -> "EffectorP1":
        """ Parse an EffectorP1 line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) != 3:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 3 but got {len(sline)}.")

        return cls(
            e1_name(sline[0]),
            e1_prediction(sline[1]),
            e1_prob(sline[2]),
        )
コード例 #12
0
    def from_line(cls, line: str) -> "EffectorP3":
        """ Parse an EffectorP3 line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) != 5:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 5 but got {len(sline)}.")

        return cls(
            e3_name(sline[0]),
            e3_prediction(sline[4]),
            e3_parse_field(sline[1], "cytoplasmic_prob"),
            e3_parse_field(sline[2], "apoplastic_prob"),
            e3_parse_field(sline[3], "noneffector_prob"),
        )
コード例 #13
0
    def from_line(cls, line: str) -> "TMHMM":
        """ Parse a tmhmm line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) != 6:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 6 but got {len(sline)}")

        return cls(
            tm_name(sline[0]),
            tm_length(sline[1]),
            tm_exp_aa(sline[2]),
            tm_first_60(sline[3]),
            tm_pred_hel(sline[4]),
            tm_topology(sline[5]),
        )
コード例 #14
0
ファイル: deepsig.py プロジェクト: ccdmb/predector-utils
    def from_line(cls, line: str) -> "DeepSig":
        """ Parse a deepsig line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) != 4:
            raise LineParseError(
                "The line had the wrong number of columns. "
                f"Expected 4 but got {len(sline)}"
            )

        return cls(
            ds_name(sline[0]),
            ds_prediction(sline[1]),
            ds_prob(sline[2]),
            ds_cs_pos(sline[3]),
        )
コード例 #15
0
    def from_line(cls, line: str) -> "SignalP3HMM":
        """ Parse a short-format HMM line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = MULTISPACE_REGEX.split(line)

        if len(sline) != 7:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 7 but got {len(sline)}")

        # in column !.
        # Q is non-secreted, A is something, possibly long signalpeptide?
        return cls(
            s3hmm_name(sline[0]),
            s3hmm_is_secreted(sline[1]),
            s3hmm_cmax(sline[2]),
            s3hmm_cmax_pos(sline[3]),
            s3hmm_cmax_decision(sline[4]),
            s3hmm_sprob(sline[5]),
            s3hmm_sprob_decision(sline[6]),
        )
コード例 #16
0
    def from_line(cls, line: str) -> "SignalP6":
        """ Parse a short-format signalp5 line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) == 5:
            cs_pos: Optional[str] = s6_cs_pos(sline[4])
        elif len(sline) == 4:
            cs_pos = None
        else:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 4 or 5 but got {len(sline)}")

        return cls(
            s6_name(sline[0]),
            s6_prediction(sline[1]),
            s6_prob_signal(sline[3]),
            s6_prob_other(sline[2]),
            cs_pos,
        )
コード例 #17
0
    def from_line(cls, line: str) -> "TargetPPlant":
        if line == "":
            raise LineParseError("The line was empty.")

        sline = line.strip().split("\t")

        if len(sline) == 8:
            cs_pos: Optional[str] = str(sline[7])
        elif len(sline) == 7:
            cs_pos = None
        else:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 7 or 8 but got {len(sline)}")

        return cls(
            tp_name(sline[0]),
            pl_prediction(sline[1]),
            tp_other(sline[2]),
            tp_sp(sline[3]),
            tp_mtp(sline[4]),
            pl_ctp(sline[5]),
            pl_lutp(sline[6]),
            cs_pos,
        )
コード例 #18
0
def parse_predicted_active_site(
    field: str,
    field_name: str = "active_site",
) -> str:
    """ """

    field = field.strip()
    if not field.startswith("predicted_active_site"):
        raise LineParseError(
            f"Invalid value: '{field}' in the column: '{field_name}'. "
            "Must have the form 'predicted_active_site[1,2,3]'.")

    field = field[len("predicted_active_site"):]
    sfield = (f.strip("[],; ") for f in field.split('['))
    return ';'.join(f.replace(' ', '') for f in sfield if len(f) > 0)
コード例 #19
0
    def from_line(cls, line: str) -> "LOCALIZER":
        """ Parse an ApoplastP line as an object. """

        if line == "":
            raise LineParseError("The line was empty.")

        sline = [c.strip() for c in line.strip().split("\t")]

        if len(sline) != 4:
            raise LineParseError("The line had the wrong number of columns. "
                                 f"Expected 4 but got {len(sline)}")

        (cp, cp_prob, cp_start,
         cp_end) = parse_tp_field(sline[1], "chloroplast")

        (mt, mt_prob, mt_start,
         mt_end) = parse_tp_field(sline[2], "mitochondria")

        (nuc, nuc_sigs) = parse_nuc_field(sline[3])

        return cls(
            raise_it(parse_field(parse_str, "name"))(sline[0]), cp, cp_prob,
            fmap(lambda x: x - 1 + 20, cp_start), cp_end, mt, mt_prob,
            fmap(lambda x: x - 1 + 20, mt_start), mt_end, nuc, nuc_sigs)
コード例 #20
0
 def _is_not_none(val: Optional[T], field_name: str) -> T:
     if val is None:
         raise LineParseError(
             f"Did not encounter {field_name} in alignment.")
     return val
コード例 #21
0
 def _is_not_empty(val: List[T], field_name: str) -> List[T]:
     if len(val) == 0:
         raise LineParseError(
             f"Did not encounter {field_name} in alignment.")
     return val