예제 #1
0
def run(description):
    parser = argparse.ArgumentParser(
        description = 'Splits a multi sequence file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
        usage = 'fastaq chunker [options] <infile> <out> <chunk size> <tolerance>')
    parser.add_argument('infile', help='Name of input file to be split')
    parser.add_argument('out', help='Prefix of output file. If --onefile used, then name of single output file')
    parser.add_argument('chunk_size', type=int, help='Size of each chunk')
    parser.add_argument('tolerance', type=int, help='Tolerance allowed in chunk size')
    parser.add_argument('--onefile', action='store_true', help='Output all the sequences in one file')
    parser.add_argument('--skip_all_Ns', action='store_true', help='Do not output any sequence that consists of all Ns')
    options = parser.parse_args()
    if options.onefile:
        tasks.split_by_fixed_size_onefile(
            options.infile,
            options.out,
            options.chunk_size,
            options.tolerance,
            skip_if_all_Ns=options.skip_all_Ns
        )
    else:
        tasks.split_by_fixed_size(
            options.infile,
            options.out,
            options.chunk_size,
            options.tolerance,
            skip_if_all_Ns=options.skip_all_Ns
        )
예제 #2
0
    def test_split_by_fixed_size_exclude_Ns(self):
        infile = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa')
        outprefix = 'tmp.sequences_test_split'
        tasks.split_by_fixed_size(infile, outprefix, 4, 1, skip_if_all_Ns=True)

        for i in range(1,5,1):
            correct = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.' + str(i))
            test = outprefix + '.' + str(i)
            self.assertTrue(filecmp.cmp(test, correct))
            os.unlink(test)

        test_coords = outprefix + '.coords'
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords'), test_coords))
        os.unlink(test_coords)
예제 #3
0
    def test_split_by_fixed_size_exclude_Ns(self):
        infile = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa')
        outprefix = 'tmp.sequences_test_split'
        tasks.split_by_fixed_size(infile, outprefix, 4, 1, skip_if_all_Ns=True)

        for i in range(1,5,1):
            correct = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.' + str(i))
            test = outprefix + '.' + str(i)
            self.assertTrue(filecmp.cmp(test, correct))
            os.unlink(test)

        test_coords = outprefix + '.coords'
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords'), test_coords))
        os.unlink(test_coords)
예제 #4
0
    def test_split_by_fixed_size(self):
        '''Test fasta/q file split by fixed size'''
        infile = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa')
        outprefix = 'tmp.sequences_test_split'
        tasks.split_by_fixed_size(infile, outprefix, 4, 1)

        for i in range(1,7,1):
            correct = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.' + str(i))
            test = outprefix + '.' + str(i)
            self.assertTrue(filecmp.cmp(test, correct))
            os.unlink(test)

        test_coords = outprefix + '.coords'
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.coords'), test_coords))
        os.unlink(test_coords)
예제 #5
0
    def test_split_by_fixed_size(self):
        '''Test fasta/q file split by fixed size'''
        infile = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa')
        outprefix = 'tmp.sequences_test_split'
        tasks.split_by_fixed_size(infile, outprefix, 4, 1)

        for i in range(1,7,1):
            correct = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.' + str(i))
            test = outprefix + '.' + str(i)
            self.assertTrue(filecmp.cmp(test, correct))
            os.unlink(test)

        test_coords = outprefix + '.coords'
        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.coords'), test_coords))
        os.unlink(test_coords)
예제 #6
0
def run(description):
    parser = argparse.ArgumentParser(
        description=
        'Splits a multi sequence file into separate files. Splits sequences into chunks of a fixed size. Aims for chunk_size chunks in each file, but allows a little extra, so chunk can be up to (chunk_size + tolerance), to prevent tiny chunks made from the ends of sequences',
        usage='fastaq chunker [options] <infile> <out> <chunk size> <tolerance>'
    )
    parser.add_argument('infile', help='Name of input file to be split')
    parser.add_argument(
        'out',
        help=
        'Prefix of output file. If --onefile used, then name of single output file'
    )
    parser.add_argument('chunk_size', type=int, help='Size of each chunk')
    parser.add_argument('tolerance',
                        type=int,
                        help='Tolerance allowed in chunk size')
    parser.add_argument('--onefile',
                        action='store_true',
                        help='Output all the sequences in one file')
    parser.add_argument(
        '--skip_all_Ns',
        action='store_true',
        help='Do not output any sequence that consists of all Ns')
    options = parser.parse_args()
    if options.onefile:
        tasks.split_by_fixed_size_onefile(options.infile,
                                          options.out,
                                          options.chunk_size,
                                          options.tolerance,
                                          skip_if_all_Ns=options.skip_all_Ns)
    else:
        tasks.split_by_fixed_size(options.infile,
                                  options.out,
                                  options.chunk_size,
                                  options.tolerance,
                                  skip_if_all_Ns=options.skip_all_Ns)