def pad(input, output, length, nterm): """Pad protein sequence to a specified length by adding amino acids in the pattern of "GSGG". INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT. """ terminus = "N" if nterm else "C" for (name, seq, qual) in readfq(input): padded = pad_ggsg(seq, length, terminus) pad_len = len(padded) - len(seq) if pad_len > 0: output_title = f"{name}|{terminus}-PADDED-{pad_len}" else: output_title = name print(f">{output_title}\n{padded}", file=output)
def test_nonsense_terminus(self): with raises(ValueError): # note lowercase 'c' padded = pad_ggsg(short_protein_seq, len(short_protein_seq) + 5, "c")
def test_exact_len_seq(self): padded = pad_ggsg(short_protein_seq, len(short_protein_seq), "C") assert padded == short_protein_seq
def test_long_seq(self): padded = pad_ggsg(short_protein_seq, len(short_protein_seq) - 3, "C") assert padded == short_protein_seq
def test_c_term_pad(self): padded = pad_ggsg(short_protein_seq, len(short_protein_seq) + 7, "C") assert padded == short_protein_seq + "GGSGGGS"
def test_n_term_pad(self): padded = pad_ggsg(short_protein_seq, len(short_protein_seq) + 5, "N") assert padded == "GGSGG" + short_protein_seq