def from_line(cls, line: str) -> "MMSeqs": if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t", maxsplit=17) if len(sline) != 18: # Technically because of the max_split this should be impossible. # the description line is allowed to have spaces. raise LineParseError("The line had the wrong number of columns. " f"Expected 18 but got {len(sline)}") return cls( mm_query(sline[0]), mm_target(sline[1]), mm_qstart(sline[2]) - 1, mm_qend(sline[3]), mm_qlen(sline[4]), mm_tstart(sline[5]) - 1, mm_tend(sline[6]), mm_tlen(sline[7]), mm_evalue(sline[8]), mm_gapopen(sline[9]), mm_pident(sline[10]), mm_alnlen(sline[11]), mm_raw(sline[12]), mm_bits(sline[13]), mm_cigar(sline[14]), mm_mismatch(sline[15]), mm_qcov(sline[16]), mm_tcov(sline[17]), )
def from_line(cls, line: str) -> "PfamScan": if line == "": raise LineParseError("The line was empty.") sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=16) if len(sline) != 15 and len(sline) != 16: # Technically because of the max_split this should be impossible. # the description line is allowed to have spaces. raise LineParseError("The line had the wrong number of columns. " f"Expected 15 or 16 but got {len(sline)}") if len(sline) == 15: active_sites: Optional[str] = None else: active_sites = parse_predicted_active_site(sline[15]) return cls( ps_name(sline[0]), ps_ali_start(sline[1]) - 1, ps_ali_end(sline[2]), ps_env_start(sline[3]) - 1, ps_env_end(sline[4]), ps_hmm(sline[5]), ps_hmm_name(sline[6]), ps_hmm_type(sline[7]), ps_hmm_start(sline[8]) - 1, ps_hmm_end(sline[9]), ps_hmm_len(sline[10]), ps_bitscore(sline[11]), ps_evalue(sline[12]), ps_is_significant(sline[13]), ps_clan(sline[14]), active_sites, )
def from_line(cls, line: str) -> "SignalP4": """ Parse a short-format signalp4 line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = MULTISPACE_REGEX.split(line) if len(sline) != 12: raise LineParseError("The line had the wrong number of columns. " f"Expected 12 but got {len(sline)}") return cls( s4_name(sline[0]), s4_cmax(sline[1]), s4_cmax_pos(sline[2]), s4_ymax(sline[3]), s4_ymax_pos(sline[4]), s4_smax(sline[5]), s4_smax_pos(sline[6]), s4_smean(sline[7]), s4_d(sline[8]), s4_decision(sline[9]), s4_dmax_cut(sline[10]), s4_networks_used(sline[11]), )
def from_line(cls, line: str) -> "TargetPNonPlant": if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) == 6: cs_pos: Optional[str] = str(sline[5]) elif len(sline) == 5: cs_pos = None else: raise LineParseError("The line had the wrong number of columns. " f"Expected 5 or 6 but got {len(sline)}") prediction = tp_prediction(sline[1]) if prediction == "noTP": prediction = "OTHER" return cls( tp_name(sline[0]), prediction, tp_other(sline[2]), tp_sp(sline[3]), tp_mtp(sline[4]), cs_pos=cs_pos, )
def from_line(cls, line: str) -> "SignalP3NN": """ Parse a short-format NN line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = MULTISPACE_REGEX.split(line) if len(sline) != 14: raise LineParseError("The line had the wrong number of columns. " f"Expected 14 but got {len(sline)}") return cls( s3nn_name(sline[0]), s3nn_cmax(sline[1]), s3nn_cmax_pos(sline[2]), s3nn_cmax_decision(sline[3]), s3nn_ymax(sline[4]), s3nn_ymax_pos(sline[5]), s3nn_ymax_decision(sline[6]), s3nn_smax(sline[7]), s3nn_smax_pos(sline[8]), s3nn_smax_decision(sline[9]), s3nn_smean(sline[10]), s3nn_smean_decision(sline[11]), s3nn_d(sline[12]), s3nn_d_decision(sline[13]), )
def from_line(cls, line: str) -> "DomTbl": if line == "": raise LineParseError("The line was empty.") sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=22) if len(sline) != 22 and len(sline) != 23: # Technically because of the max_split this should be impossible. # the description line is allowed to have spaces. raise LineParseError("The line had the wrong number of columns. " f"Expected 22 or 23 but got {len(sline)}") if len(sline) == 22: description: Optional[str] = None elif sline[22] == "-" or sline[22] == "": description = None else: description = sline[22] return cls(hm_name(sline[3]), hm_hmm(sline[0]), hm_hmm_len(sline[2]), hm_query_len(sline[5]), hm_full_evalue(sline[6]), hm_full_score(sline[7]), hm_full_bias(sline[8]), hm_nmatches(sline[10]), hm_domain_c_evalue(sline[11]), hm_domain_i_evalue(sline[12]), hm_domain_score(sline[13]), hm_domain_bias(sline[14]), hm_hmm_from(sline[15]) - 1, hm_hmm_to(sline[16]), hm_query_from(sline[17]) - 1, hm_query_to(sline[18]), hm_acc(sline[21]), description)
def from_line(cls, line: str) -> "DeepLoc": if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) != 13: raise LineParseError("The line had the wrong number of columns. " f"Expected 13 but got {len(sline)}") return cls( dl_name(sline[0]), dl_prediction(sline[1]), dl_membrane(sline[2]), dl_nucleus(sline[3]), dl_cytoplasm(sline[4]), dl_extracellular(sline[5]), dl_mitochondrion(sline[6]), dl_cell_membrane(sline[7]), dl_endoplasmic_reticulum(sline[8]), dl_plastid(sline[9]), dl_golgi_apparatus(sline[10]), dl_lysosome(sline[11]), dl_peroxisome(sline[12]), )
def from_line(cls, line: str) -> "RegexAnalysis": """ Parse a table line as an object """ if line == "": raise LineParseError("The line was empty.") sline = [c.strip() for c in line.strip().split("\t")] if len(sline) != 6: raise LineParseError("The line had the wrong number of columns. " f"Expected 6 but got {len(sline)}") return cls(re_name(sline[0]), re_kind(sline[1]), re_pattern(sline[2]), re_match(sline[3]), re_start(sline[4]), re_end(sline[5]))
def from_line(cls, line: str) -> "Deepredeff": if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t", maxsplit=3) if len(sline) != 3: # Technically because of the max_split this should be impossible. # the description line is allowed to have spaces. raise LineParseError("The line had the wrong number of columns. " f"Expected 3 but got {len(sline)}") return cls( dre_name(sline[0]), dre_s_score(sline[1]), dre_prediction(sline[2]), )
def _parse_alignment_line( line: str) -> Tuple[str, str, int, str, int, int, Optional[int]]: sline = MULTISPACE_REGEX.split(line.strip(), maxsplit=5) columns = ["type", "id", "ali_start", "sequence", "ali_end", "length"] dline = dict(zip(columns, sline)) length = fmap(lambda x: x.lstrip("(").rstrip(")"), dline.get("length", None)) if length is None: raise LineParseError( f"Missing 'length' from alignment line: '{line}'.") seq_begin_match = ALI_REGEX.match(line) if seq_begin_match is None: seq_begin: Optional[int] = None else: seq_begin = seq_begin_match.end() return (get_and_parse("type", "type", is_one_of(["T", "Q"]))(dline), get_and_parse("id", "id", parse_str)(dline), get_and_parse("ali_start", "ali_start", parse_int)(dline), get_and_parse("sequence", "sequence", parse_str)(dline), get_and_parse("ali_end", "ali_end", parse_int)(dline), raise_it(parse_field(parse_int, "length", "field"))(length), seq_begin)
def from_line(cls, line: str) -> "EffectorP1": """ Parse an EffectorP1 line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) != 3: raise LineParseError("The line had the wrong number of columns. " f"Expected 3 but got {len(sline)}.") return cls( e1_name(sline[0]), e1_prediction(sline[1]), e1_prob(sline[2]), )
def from_line(cls, line: str) -> "EffectorP3": """ Parse an EffectorP3 line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) != 5: raise LineParseError("The line had the wrong number of columns. " f"Expected 5 but got {len(sline)}.") return cls( e3_name(sline[0]), e3_prediction(sline[4]), e3_parse_field(sline[1], "cytoplasmic_prob"), e3_parse_field(sline[2], "apoplastic_prob"), e3_parse_field(sline[3], "noneffector_prob"), )
def from_line(cls, line: str) -> "TMHMM": """ Parse a tmhmm line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) != 6: raise LineParseError("The line had the wrong number of columns. " f"Expected 6 but got {len(sline)}") return cls( tm_name(sline[0]), tm_length(sline[1]), tm_exp_aa(sline[2]), tm_first_60(sline[3]), tm_pred_hel(sline[4]), tm_topology(sline[5]), )
def from_line(cls, line: str) -> "DeepSig": """ Parse a deepsig line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) != 4: raise LineParseError( "The line had the wrong number of columns. " f"Expected 4 but got {len(sline)}" ) return cls( ds_name(sline[0]), ds_prediction(sline[1]), ds_prob(sline[2]), ds_cs_pos(sline[3]), )
def from_line(cls, line: str) -> "SignalP3HMM": """ Parse a short-format HMM line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = MULTISPACE_REGEX.split(line) if len(sline) != 7: raise LineParseError("The line had the wrong number of columns. " f"Expected 7 but got {len(sline)}") # in column !. # Q is non-secreted, A is something, possibly long signalpeptide? return cls( s3hmm_name(sline[0]), s3hmm_is_secreted(sline[1]), s3hmm_cmax(sline[2]), s3hmm_cmax_pos(sline[3]), s3hmm_cmax_decision(sline[4]), s3hmm_sprob(sline[5]), s3hmm_sprob_decision(sline[6]), )
def from_line(cls, line: str) -> "SignalP6": """ Parse a short-format signalp5 line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) == 5: cs_pos: Optional[str] = s6_cs_pos(sline[4]) elif len(sline) == 4: cs_pos = None else: raise LineParseError("The line had the wrong number of columns. " f"Expected 4 or 5 but got {len(sline)}") return cls( s6_name(sline[0]), s6_prediction(sline[1]), s6_prob_signal(sline[3]), s6_prob_other(sline[2]), cs_pos, )
def from_line(cls, line: str) -> "TargetPPlant": if line == "": raise LineParseError("The line was empty.") sline = line.strip().split("\t") if len(sline) == 8: cs_pos: Optional[str] = str(sline[7]) elif len(sline) == 7: cs_pos = None else: raise LineParseError("The line had the wrong number of columns. " f"Expected 7 or 8 but got {len(sline)}") return cls( tp_name(sline[0]), pl_prediction(sline[1]), tp_other(sline[2]), tp_sp(sline[3]), tp_mtp(sline[4]), pl_ctp(sline[5]), pl_lutp(sline[6]), cs_pos, )
def parse_predicted_active_site( field: str, field_name: str = "active_site", ) -> str: """ """ field = field.strip() if not field.startswith("predicted_active_site"): raise LineParseError( f"Invalid value: '{field}' in the column: '{field_name}'. " "Must have the form 'predicted_active_site[1,2,3]'.") field = field[len("predicted_active_site"):] sfield = (f.strip("[],; ") for f in field.split('[')) return ';'.join(f.replace(' ', '') for f in sfield if len(f) > 0)
def from_line(cls, line: str) -> "LOCALIZER": """ Parse an ApoplastP line as an object. """ if line == "": raise LineParseError("The line was empty.") sline = [c.strip() for c in line.strip().split("\t")] if len(sline) != 4: raise LineParseError("The line had the wrong number of columns. " f"Expected 4 but got {len(sline)}") (cp, cp_prob, cp_start, cp_end) = parse_tp_field(sline[1], "chloroplast") (mt, mt_prob, mt_start, mt_end) = parse_tp_field(sline[2], "mitochondria") (nuc, nuc_sigs) = parse_nuc_field(sline[3]) return cls( raise_it(parse_field(parse_str, "name"))(sline[0]), cp, cp_prob, fmap(lambda x: x - 1 + 20, cp_start), cp_end, mt, mt_prob, fmap(lambda x: x - 1 + 20, mt_start), mt_end, nuc, nuc_sigs)
def _is_not_none(val: Optional[T], field_name: str) -> T: if val is None: raise LineParseError( f"Did not encounter {field_name} in alignment.") return val
def _is_not_empty(val: List[T], field_name: str) -> List[T]: if len(val) == 0: raise LineParseError( f"Did not encounter {field_name} in alignment.") return val