Esempio n. 1
0
def pad(input, output, length, nterm):
    """Pad protein sequence to a specified length by adding amino acids in the
    pattern of "GSGG".

    INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT.

    """
    terminus = "N" if nterm else "C"
    for (name, seq, qual) in readfq(input):
        padded = pad_ggsg(seq, length, terminus)
        pad_len = len(padded) - len(seq)
        if pad_len > 0:
            output_title = f"{name}|{terminus}-PADDED-{pad_len}"
        else:
            output_title = name
        print(f">{output_title}\n{padded}", file=output)
Esempio n. 2
0
 def test_nonsense_terminus(self):
     with raises(ValueError):
         # note lowercase 'c'
         padded = pad_ggsg(short_protein_seq,
                           len(short_protein_seq) + 5, "c")
Esempio n. 3
0
 def test_exact_len_seq(self):
     padded = pad_ggsg(short_protein_seq, len(short_protein_seq), "C")
     assert padded == short_protein_seq
Esempio n. 4
0
 def test_long_seq(self):
     padded = pad_ggsg(short_protein_seq, len(short_protein_seq) - 3, "C")
     assert padded == short_protein_seq
Esempio n. 5
0
 def test_c_term_pad(self):
     padded = pad_ggsg(short_protein_seq, len(short_protein_seq) + 7, "C")
     assert padded == short_protein_seq + "GGSGGGS"
Esempio n. 6
0
 def test_n_term_pad(self):
     padded = pad_ggsg(short_protein_seq, len(short_protein_seq) + 5, "N")
     assert padded == "GGSGG" + short_protein_seq