def run(description): parser = argparse.ArgumentParser( description=description, usage='fastaq to_fasta [options] <infile> <outfile>') parser.add_argument( 'infile', help= 'Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip' ) parser.add_argument('outfile', help='Name of output file') parser.add_argument( '-l', '--line_length', type=int, help= 'Number of bases on each sequence line of output file. Set to zero for no linebreaks in sequences [%(default)s]', default=60) parser.add_argument( '-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitespace in every sequence name') parser.add_argument( '-u', '--check_unique', action='store_true', help='Die if any of the output sequence names are not unique') options = parser.parse_args() tasks.to_fasta(options.infile, options.outfile, line_length=options.line_length, strip_after_first_whitespace=options.strip_after_whitespace, check_unique=options.check_unique)
def test_to_fasta_strip_after_whitespace_unique(self): '''Test strip_after_whitespace with unique names''' tmpfile = 'tmp.strip_after_whitespace.fa' infile = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.in.fa') expected = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.out.fa') tasks.to_fasta(infile, tmpfile, strip_after_first_whitespace=True, check_unique=True) self.assertTrue(filecmp.cmp(tmpfile, expected, shallow=False)) os.unlink(tmpfile)
def run(description): parser = argparse.ArgumentParser( description = description, usage = 'fastaq to_fasta [options] <infile> <outfile>') parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip') parser.add_argument('outfile', help='Name of output file') parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file. Set to zero for no linebreaks in sequences [%(default)s]', default=60) parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitespace in every sequence name') options = parser.parse_args() tasks.to_fasta( options.infile, options.outfile, line_length=options.line_length, strip_after_first_whitespace=options.strip_after_whitespace )
def test_to_fasta(self): '''Test to_fasta''' tmpfile = 'tmp.to_fasta' infiles = [ 'sequences_test_good_file.fq', 'sequences_test_gffv3.gff', 'sequences_test_gffv3.no_FASTA_line.gff', 'sequences_test.embl', 'sequences_test.gbk', 'sequences_test_phylip.interleaved', 'sequences_test_phylip.interleaved2', 'sequences_test_phylip.sequential' ] infiles = [os.path.join(data_dir, x) for x in infiles] expected_outfiles = [x + '.to_fasta' for x in infiles] for i in range(len(infiles)): tasks.to_fasta(infiles[i], tmpfile) self.assertTrue(filecmp.cmp(expected_outfiles[i], tmpfile)) tasks.to_fasta(os.path.join(data_dir, 'sequences_test.fa'), tmpfile, line_length=3) self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.line_length3.fa'), tmpfile)) tasks.to_fasta(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa'), tmpfile, strip_after_first_whitespace=True) self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa.to_fasta'), tmpfile)) os.unlink(tmpfile)