Exemple #1
0
def run(description):
    parser = argparse.ArgumentParser(
        description=description,
        usage='fastaq to_fasta [options] <infile> <outfile>')
    parser.add_argument(
        'infile',
        help=
        'Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip'
    )
    parser.add_argument('outfile', help='Name of output file')
    parser.add_argument(
        '-l',
        '--line_length',
        type=int,
        help=
        'Number of bases on each sequence line of output file. Set to zero for no linebreaks in sequences [%(default)s]',
        default=60)
    parser.add_argument(
        '-s',
        '--strip_after_whitespace',
        action='store_true',
        help='Remove everything after first whitespace in every sequence name')
    parser.add_argument(
        '-u',
        '--check_unique',
        action='store_true',
        help='Die if any of the output sequence names are not unique')
    options = parser.parse_args()

    tasks.to_fasta(options.infile,
                   options.outfile,
                   line_length=options.line_length,
                   strip_after_first_whitespace=options.strip_after_whitespace,
                   check_unique=options.check_unique)
Exemple #2
0
 def test_to_fasta_strip_after_whitespace_unique(self):
     '''Test strip_after_whitespace with unique names'''
     tmpfile = 'tmp.strip_after_whitespace.fa'
     infile = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.in.fa')
     expected = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.out.fa')
     tasks.to_fasta(infile, tmpfile, strip_after_first_whitespace=True, check_unique=True)
     self.assertTrue(filecmp.cmp(tmpfile, expected, shallow=False))
     os.unlink(tmpfile)
 def test_to_fasta_strip_after_whitespace_unique(self):
     '''Test strip_after_whitespace with unique names'''
     tmpfile = 'tmp.strip_after_whitespace.fa'
     infile = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.in.fa')
     expected = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.out.fa')
     tasks.to_fasta(infile, tmpfile, strip_after_first_whitespace=True, check_unique=True)
     self.assertTrue(filecmp.cmp(tmpfile, expected, shallow=False))
     os.unlink(tmpfile)
Exemple #4
0
def run(description):
    parser = argparse.ArgumentParser(
        description = description,
        usage = 'fastaq to_fasta [options] <infile> <outfile>')
    parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
    parser.add_argument('outfile', help='Name of output file')
    parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file. Set to zero for no linebreaks in sequences [%(default)s]', default=60)
    parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitespace in every sequence name')
    options = parser.parse_args()

    tasks.to_fasta(
        options.infile,
        options.outfile,
        line_length=options.line_length,
        strip_after_first_whitespace=options.strip_after_whitespace
    )
Exemple #5
0
    def test_to_fasta(self):
        '''Test to_fasta'''
        tmpfile = 'tmp.to_fasta'
        infiles = [
            'sequences_test_good_file.fq',
            'sequences_test_gffv3.gff',
            'sequences_test_gffv3.no_FASTA_line.gff',
            'sequences_test.embl',
            'sequences_test.gbk',
            'sequences_test_phylip.interleaved',
            'sequences_test_phylip.interleaved2',
            'sequences_test_phylip.sequential'
        ]
        infiles = [os.path.join(data_dir, x) for x in infiles]
        expected_outfiles = [x + '.to_fasta' for x in infiles]

        for i in range(len(infiles)):
            tasks.to_fasta(infiles[i], tmpfile)
            self.assertTrue(filecmp.cmp(expected_outfiles[i], tmpfile))

        tasks.to_fasta(os.path.join(data_dir, 'sequences_test.fa'), tmpfile, line_length=3)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.line_length3.fa'), tmpfile))
        tasks.to_fasta(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa'), tmpfile, strip_after_first_whitespace=True)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa.to_fasta'), tmpfile))
        os.unlink(tmpfile)
    def test_to_fasta(self):
        '''Test to_fasta'''
        tmpfile = 'tmp.to_fasta'
        infiles = [
            'sequences_test_good_file.fq',
            'sequences_test_gffv3.gff',
            'sequences_test_gffv3.no_FASTA_line.gff',
            'sequences_test.embl',
            'sequences_test.gbk',
            'sequences_test_phylip.interleaved',
            'sequences_test_phylip.interleaved2',
            'sequences_test_phylip.sequential'
        ]
        infiles = [os.path.join(data_dir, x) for x in infiles]
        expected_outfiles = [x + '.to_fasta' for x in infiles]

        for i in range(len(infiles)):
            tasks.to_fasta(infiles[i], tmpfile)
            self.assertTrue(filecmp.cmp(expected_outfiles[i], tmpfile))

        tasks.to_fasta(os.path.join(data_dir, 'sequences_test.fa'), tmpfile, line_length=3)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.line_length3.fa'), tmpfile))
        tasks.to_fasta(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa'), tmpfile, strip_after_first_whitespace=True)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa.to_fasta'), tmpfile))
        os.unlink(tmpfile)