Python to_fasta Exemples, pyfastaq.tasks.to_fasta Python Exemples

Exemple #1

0

Afficher le fichier

def run(description):
    parser = argparse.ArgumentParser(
        description=description,
        usage='fastaq to_fasta [options] <infile> <outfile>')
    parser.add_argument(
        'infile',
        help=
        'Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip'
    )
    parser.add_argument('outfile', help='Name of output file')
    parser.add_argument(
        '-l',
        '--line_length',
        type=int,
        help=
        'Number of bases on each sequence line of output file. Set to zero for no linebreaks in sequences [%(default)s]',
        default=60)
    parser.add_argument(
        '-s',
        '--strip_after_whitespace',
        action='store_true',
        help='Remove everything after first whitespace in every sequence name')
    parser.add_argument(
        '-u',
        '--check_unique',
        action='store_true',
        help='Die if any of the output sequence names are not unique')
    options = parser.parse_args()

    tasks.to_fasta(options.infile,
                   options.outfile,
                   line_length=options.line_length,
                   strip_after_first_whitespace=options.strip_after_whitespace,
                   check_unique=options.check_unique)

Exemple #2

0

Afficher le fichier

Fichier : tasks_test.py Projet : martinghunt/Fastaq

 def test_to_fasta_strip_after_whitespace_unique(self):
     '''Test strip_after_whitespace with unique names'''
     tmpfile = 'tmp.strip_after_whitespace.fa'
     infile = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.in.fa')
     expected = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.out.fa')
     tasks.to_fasta(infile, tmpfile, strip_after_first_whitespace=True, check_unique=True)
     self.assertTrue(filecmp.cmp(tmpfile, expected, shallow=False))
     os.unlink(tmpfile)

Exemple #3

0

Afficher le fichier

Fichier : tasks_test.py Projet : sanger-pathogens/Fastaq

 def test_to_fasta_strip_after_whitespace_unique(self):
     '''Test strip_after_whitespace with unique names'''
     tmpfile = 'tmp.strip_after_whitespace.fa'
     infile = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.in.fa')
     expected = os.path.join(data_dir, 'sequences_test.to_fasta.strip_after_whitespace_unique.out.fa')
     tasks.to_fasta(infile, tmpfile, strip_after_first_whitespace=True, check_unique=True)
     self.assertTrue(filecmp.cmp(tmpfile, expected, shallow=False))
     os.unlink(tmpfile)

Exemple #4

0

Afficher le fichier

Fichier : to_fasta.py Projet : nds/Fastaq

def run(description):
    parser = argparse.ArgumentParser(
        description = description,
        usage = 'fastaq to_fasta [options] <infile> <outfile>')
    parser.add_argument('infile', help='Name of input file. Can be any of FASTA, FASTQ, GFF3, EMBL, GBK, Phylip')
    parser.add_argument('outfile', help='Name of output file')
    parser.add_argument('-l', '--line_length', type=int, help='Number of bases on each sequence line of output file. Set to zero for no linebreaks in sequences [%(default)s]', default=60)
    parser.add_argument('-s', '--strip_after_whitespace', action='store_true', help='Remove everything after first whitespace in every sequence name')
    options = parser.parse_args()

    tasks.to_fasta(
        options.infile,
        options.outfile,
        line_length=options.line_length,
        strip_after_first_whitespace=options.strip_after_whitespace
    )

Exemple #5

0

Afficher le fichier

Fichier : tasks_test.py Projet : martinghunt/Fastaq

    def test_to_fasta(self):
        '''Test to_fasta'''
        tmpfile = 'tmp.to_fasta'
        infiles = [
            'sequences_test_good_file.fq',
            'sequences_test_gffv3.gff',
            'sequences_test_gffv3.no_FASTA_line.gff',
            'sequences_test.embl',
            'sequences_test.gbk',
            'sequences_test_phylip.interleaved',
            'sequences_test_phylip.interleaved2',
            'sequences_test_phylip.sequential'
        ]
        infiles = [os.path.join(data_dir, x) for x in infiles]
        expected_outfiles = [x + '.to_fasta' for x in infiles]

        for i in range(len(infiles)):
            tasks.to_fasta(infiles[i], tmpfile)
            self.assertTrue(filecmp.cmp(expected_outfiles[i], tmpfile))

        tasks.to_fasta(os.path.join(data_dir, 'sequences_test.fa'), tmpfile, line_length=3)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.line_length3.fa'), tmpfile))
        tasks.to_fasta(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa'), tmpfile, strip_after_first_whitespace=True)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa.to_fasta'), tmpfile))
        os.unlink(tmpfile)

Exemple #6

0

Afficher le fichier

Fichier : tasks_test.py Projet : sanger-pathogens/Fastaq

    def test_to_fasta(self):
        '''Test to_fasta'''
        tmpfile = 'tmp.to_fasta'
        infiles = [
            'sequences_test_good_file.fq',
            'sequences_test_gffv3.gff',
            'sequences_test_gffv3.no_FASTA_line.gff',
            'sequences_test.embl',
            'sequences_test.gbk',
            'sequences_test_phylip.interleaved',
            'sequences_test_phylip.interleaved2',
            'sequences_test_phylip.sequential'
        ]
        infiles = [os.path.join(data_dir, x) for x in infiles]
        expected_outfiles = [x + '.to_fasta' for x in infiles]

        for i in range(len(infiles)):
            tasks.to_fasta(infiles[i], tmpfile)
            self.assertTrue(filecmp.cmp(expected_outfiles[i], tmpfile))

        tasks.to_fasta(os.path.join(data_dir, 'sequences_test.fa'), tmpfile, line_length=3)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.line_length3.fa'), tmpfile))
        tasks.to_fasta(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa'), tmpfile, strip_after_first_whitespace=True)
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa.to_fasta'), tmpfile))
        os.unlink(tmpfile)