def test_load_fasta_align(): """ This tests load_fasta_align using toy_data's fastas. """ aminobet = "-ACDEFGHIKLMNPQRSTVWY" def ring_pop(amino_str): out = amino_str[-1] amino_str = amino_str[:-1] return out + amino_str seq_loc = join(dirname(__file__),pardir) mod_loc = join(seq_loc,pardir) toy_loc = join(mod_loc,"toy_data") fas_loc = join(toy_loc,"toy_fastas") #test cycled cyc_loc = join(fas_loc,toy_fastas["cyc"]) cyc_aln = load_fasta_align(cyc_loc) assert len(cyc_aln) == 210 assert sum([int(seq_id) for seq_id in cyc_aln]) == \ sum(xrange(210)) for i in xrange(210): assert cyc_aln[str(i)] == aminobet aminobet = ring_pop(aminobet)
def load_toy_data(subset="two_seq"): """ Valid subsets are: cyc_seq : A 21x210 fasta of all of the standard aminoalphabet and a gap. two_seq : The first two sequences from cyc_seq. pmm_seq : A MUSCLE alignment of phosphomannomutase retrieved from the PIR on Apr 3, 2012. """ file_handle = data_names[subset] if subset[-3:] == 'seq': return load_fasta_align(file_handle)