Example #1
0
def main(args):
    grace.require_shrimp_1()

    n_cpus = grace.how_many_cpus()

    solid, args = grace.get_flag(args, '--solid')
    verbose, args = grace.get_flag(args, '--verbose')

    threshold, args = grace.get_option_value(args, '--threshold', str, '68%')

    stride, args = grace.get_option_value(args, '--stride', int, 1)
    max_shrimps, args = grace.get_option_value(args, '--cpus', int, n_cpus)
    batch_size, args = grace.get_option_value(args, '--batch-size', int,
                                              5000000)

    input_reference_filenames = []
    reads_filenames = []

    shrimp_options = ['-h', threshold]
    if threshold.endswith('%'):
        threshold = -float(threshold[:-1]) / 100.0
    else:
        threshold = int(threshold)

    output_dir = []  #As list so can write to from function. Gah.

    def front_command(args):
        grace.expect_no_further_options(args)

        if len(args) < 1:
            return

        output_dir.append(args[0])
        input_reference_filenames.extend(
            [os.path.abspath(filename) for filename in args[1:]])

    def reads_command(args):
        grace.expect_no_further_options(args)
        reads_filenames.extend([[os.path.abspath(filename)]
                                for filename in args])

    def pairs_command(args):
        grace.expect_no_further_options(args)
        assert len(args) == 2, 'Expected exactly two files in "pairs"'
        reads_filenames.append(
            [os.path.abspath(filename) for filename in args])

    def shrimp_options_command(args):
        shrimp_options.extend(args)

    grace.execute(
        args, {
            'reads': reads_command,
            '--reads': reads_command,
            'pairs': pairs_command,
            'shrimp-options': shrimp_options_command,
            '--shrimp-options': shrimp_options_command,
        }, front_command)

    if not output_dir:
        print >> sys.stderr, USAGE % n_cpus
        return 1

    output_dir = output_dir[0]

    assert input_reference_filenames, 'No reference files given'
    assert reads_filenames, 'No read files given'

    for filename in itertools.chain(input_reference_filenames,
                                    *reads_filenames):
        assert os.path.exists(filename), '%s does not exist' % filename

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    if solid:
        shrimp = 'rmapper-cs'
    else:
        shrimp = 'rmapper-ls'

    reference_filename = os.path.join(output_dir, 'reference.fa')
    reference_file = open(reference_filename, 'wb')
    total_reference_sequences = 0
    total_reference_bases = 0
    for input_reference_filename in input_reference_filenames:
        for name, sequence in io.read_sequences(input_reference_filename):
            #Don't retain any comment
            name = name.split()[0]
            io.write_fasta(reference_file, name, sequence)

            total_reference_sequences += 1
            total_reference_bases += len(sequence)

    reference_file.close()

    print '%s base%s in %s reference sequence%s' % (
        grace.pretty_number(total_reference_bases),
        's' if total_reference_bases != 1 else '',
        grace.pretty_number(total_reference_sequences),
        's' if total_reference_sequences != 1 else '')

    assert total_reference_bases, 'Reference sequence file is empty'

    config = {
        'references': input_reference_filenames,
        'reads': reads_filenames,
        'stride': stride,
        'solid': solid,
        'threshold': threshold,
    }
    config_file = open(os.path.join(output_dir, 'config.txt'), 'wb')
    pprint.pprint(config, config_file)
    config_file.close()

    output_filename = os.path.join(output_dir, 'shrimp_hits.txt.gz')
    output_file = gzip.open(output_filename, 'wb')

    unmapped_filename = os.path.join(output_dir, 'unmapped.fa.gz')
    unmapped_file = gzip.open(unmapped_filename, 'wb')

    dirty_filenames = set()
    dirty_filenames.add(output_filename)
    dirty_filenames.add(unmapped_filename)

    #warn_low_threshold = True

    try:  #Cleanup temporary files

        N = [0]

        def do_shrimp(read_set):
            my_number = N[0]
            N[0] += 1

            tempname = os.path.join(output_dir,
                                    'temp%d-%d.fa' % (os.getpid(), my_number))
            tempname_out = os.path.join(
                output_dir, 'temp%d-%d.txt' % (os.getpid(), my_number))

            dirty_filenames.add(tempname)
            dirty_filenames.add(tempname_out)

            f = open(tempname, 'wb')
            for read_name, read_seq in read_set:
                print >> f, '>' + read_name
                print >> f, read_seq
            f.close()

            command = shrimp + ' ' + ' '.join(shrimp_options) + ' ' + \
                      tempname + ' ' + reference_filename + ' >' + tempname_out
            if not verbose:
                command += ' 2>/dev/null'
            #f = os.popen(command, 'r')
            child_pid = os.spawnl(os.P_NOWAIT, '/bin/sh', '/bin/sh', '-c',
                                  command)

            #print 'SHRiMP %d running' % my_number

            def finalize():
                exit_status = os.waitpid(child_pid, 0)[1]
                assert exit_status == 0, 'Shrimp indicated an error'

                hits = {}  # read_name -> [ hit line ]

                f = open(tempname_out, 'rb')
                for line in f:
                    if line.startswith('>'):
                        read_name = line.split(None, 1)[0][1:]
                        if read_name not in hits:
                            hits[read_name] = []
                        hits[read_name].append(line)
                f.close()

                for read_name, read_seq in read_set:
                    if read_name in hits:
                        for hit in hits[read_name]:
                            output_file.write(hit)
                    else:
                        print >> unmapped_file, '>' + read_name
                        print >> unmapped_file, read_seq

                output_file.flush()
                unmapped_file.flush()

                os.unlink(tempname)
                dirty_filenames.remove(tempname)
                os.unlink(tempname_out)
                dirty_filenames.remove(tempname_out)
                #print 'SHRiMP %d finished' % my_number

            return finalize

        shrimps = []

        reader = iter_reads(config)
        read_count = 0

        while True:
            read_set = []
            read_set_bases = 0

            #Read name should not include comment cruft
            # - SHRIMP passes this through
            # - might stuff up identification of pairs

            for read_name, read_seq in reader:
                read_name = read_name.split()[0]
                read_set.append((read_name, read_seq))
                read_set_bases += len(read_seq)

                #if warn_low_threshold and len(read_seq)*7 < threshold: #Require 70% exact match
                #    sys.stderr.write('\n*** WARNING: Short reads, consider reducing --threshold ***\n\n')
                #    warn_low_threshold = False

                read_count += 1
                if read_set_bases >= batch_size: break

            if not read_set: break

            if len(shrimps) >= max_shrimps:
                shrimps.pop(0)()
            shrimps.append(do_shrimp(read_set))

            grace.status('SHRiMPing %s' % grace.pretty_number(read_count))

        while shrimps:
            grace.status('Waiting for SHRiMPs to finish %d ' % len(shrimps))
            shrimps.pop(0)()

        grace.status('')

        output_file.close()
        dirty_filenames.remove(output_filename)
        unmapped_file.close()
        dirty_filenames.remove(unmapped_filename)

        return 0

    finally:
        for filename in dirty_filenames:
            if os.path.exists(filename):
                os.unlink(filename)
Example #2
0
    def run(self):
        grace.require_shrimp_2()
        grace.require_samtools()
        assert self.references, 'No reference sequences given'
        assert self.reads or self.pairs or self.interleaved, 'No reads given'
        for pair in self.pairs:
            assert len(pair) == 2, 'Two files required in each pair: section'

        read_sets = [ ]
        for item in self.reads:
            read_sets.append( ([item], False) )
        for item in self.pairs:
            read_sets.append( (item, True) )
        for item in self.interleaved:
            read_sets.append( ([item], True) )
        
        default_options = { '-E' : None, '-T' : None, '-N' : str(grace.how_many_cpus()), '-n':'2', '-w':'200%',
                            '-p': 'opp-in', '-I': '0,500', '-X':None }
        
        if self.sam_unaligned:
            default_options['--sam-unaligned'] = None
        
        if self.half_paired:
            default_options['--half-paired'] = None
        else:
            default_options['--no-half-paired'] = None


        cutoff = '55%' #Default changed in SHRiMP 2.0.2
        if '-h' in self.shrimp_options:
            cutoff = self.shrimp_options[ self.shrimp_options.index('-h')+1 ]
        
        #Create working directory
        
        workspace = self.get_workspace() #working_directory.Working(self.output_dir, must_exist=False)
        workspace.setup_reference(self.references)        
        reference = workspace.get_reference()
        reference_filename = reference.reference_fasta_filename()
        
        #workspace = io.Workspace(self.output_dir)
        #
        #workspace.update_param( 
        #    shrimp_cutoff = cutoff
        #)
        #
        ##Make copy of reference sequences
        #
        #reference_filename = io.abspath(self.output_dir,'reference.fa')
        #reference_file = open(reference_filename,'wb')
        #
        #reference_genbank_filename = io.abspath(self.output_dir,'reference.gbk')
        #reference_genbank_file = open(reference_genbank_filename,'wb')
        #any_genbank = [ False ]
        #
        #def genbank_callback(name, record):
        #    """ Make a copy of any genbank files passed in. """
        #    from Bio import SeqIO
        #    
        #    SeqIO.write([record], reference_genbank_file, 'genbank')
        #    
        #    f = open(os.path.join(
        #        self.output_dir,
        #        grace.filesystem_friendly_name(name) + '.gbk'
        #    ), 'wb')
        #    SeqIO.write([record], f, 'genbank')
        #    f.close()
        #    
        #    any_genbank[0] = True
        #
        #for filename in self.references:
        #    for name, sequence in io.read_sequences(filename, genbank_callback=genbank_callback):
        #        #Don't retain any comment
        #        name = name.split()[0]
        #        io.write_fasta(reference_file, name, sequence.upper())
        #        
        #        f = open(os.path.join(
        #            self.output_dir,
        #            grace.filesystem_friendly_name(name) + '.fa'
        #        ), 'wb')
        #        io.write_fasta(f, name, sequence.upper())
        #        f.close()
        #        
        #
        #reference_file.close()
        #reference_genbank_file.close()
        #if not any_genbank[0]:
        #    os.unlink(reference_genbank_filename)
        #
        ## Create an index of the reference sequences
        #io.execute([
        #    'samtools', 'faidx', reference_filename
        #])
        
        #Run shrimp
        
        bam_filename = io.abspath(self.output_dir, 'alignments.bam')
        bam_prefix = io.abspath(self.output_dir, 'alignments')
        bam_sorted_prefix = io.abspath(self.output_dir, 'alignments_sorted')
        
        temp_filename = io.abspath(self.output_dir, 'temp.bam')
        
        log_filename = io.abspath(self.output_dir, 'shrimp_log.txt')
        log_file = open(log_filename, 'wb')
        
        sam_eater = sam.Bam_writer(temp_filename)
        
        #if self.cs:
        #    program = 'gmapper-cs'
        #else:
        #    program = 'gmapper-ls'
        
        sam_header_sent = [False]
        n_seen = [0]
        
        def eat(process):
            for line in process.stdout:
                if line.startswith('@'):
                    if sam_header_sent[0]: continue
                else:
                    n_seen[0] += 1
                    if n_seen[0] % 100000 == 0:
                        grace.status('%s alignments produced' % grace.pretty_number(n_seen[0]))
                sam_eater.write_raw(line)
                
            assert process.wait() == 0, 'shrimp failed'
            sam_header_sent[0] = True
        
        def remove_pair_options(options):
            for flag in ['-p','-I']:
                while flag in options:
                    pos = options.index(flag)
                    options = options[:pos] + options[pos+2:]
            for flag in ['--half-paired']:
                while flag in options:
                    pos = options.index(flag)
                    options = options[:pos] + options[pos+1:]
            return options
        
        if '--qv-offset' not in self.shrimp_options:
            guesses = [ ]
            for filenames, is_paired in read_sets:
                for filename in filenames:
                    guesses.append(io.guess_quality_offset(filename))
            assert len(set(guesses)) == 1, 'Conflicting quality offset guesses, please specify --qv-offset manually.'
            default_options['--qv-offset'] = str(guesses[0])
                
        for filenames, is_paired in read_sets:
            options = self.shrimp_options[:]
               
            has_qualities = all(
                len( io.read_sequences(filename, qualities=True).next() ) == 3  #A little ugly
                for filename in filenames
            )
            if has_qualities:
                options.append( '--fastq' )
            #    temp_read_filename = io.abspath(working_dir, 'temp.fa')
            #else:
            #    temp_read_filename = io.abspath(working_dir, 'temp.fq')
            
            #try:
            
            #if len(filenames) == 1: # gmapper can cope with gzipped     and filenames[0].endswith('.fa') or filenames[0].endswith('.fq'):
            #    actual_read_filename = filenames[0]
            #else:
            #    actual_read_filename = temp_read_filename
            #    grace.status('Copying reads')
            #    f = open(temp_read_filename, 'wb')
            #    if has_qualities:
            #        for reads in itertools.izip(*[ io.read_sequences(filename, qualities=True) for filename in filenames ]):
            #            for name, seq, qual in reads:
            #                io.write_fastq(f, name, seq, qual)
            #    else:
            #        for reads in itertools.izip(*[ io.read_sequences(filename) for filename in filenames ]):
            #            for name, seq in reads:
            #                io.write_fasta(f, name, seq)
            #    f.close()
            #    grace.status('')
            
            if len(filenames) == 1:
                reads_parameters = [ filenames[0] ]
            else:
                reads_parameters = [ '-1', filenames[0], '-2', filenames[1] ]
            
            for flag in default_options:
                if flag not in options:
                    options.append(flag)
                    if default_options[flag] is not None:
                        options.append(default_options[flag])
            
            if not is_paired:
               options = remove_pair_options(options)
            
            grace.status('')
            
            full_param = reference.shrimp_command(self.cs, options + reads_parameters)
            
            print >> sys.stderr, 'Running', ' '.join(full_param)
            
            p = io.run(full_param,
                    stdout=subprocess.PIPE,
                    stderr=log_file)
            eat(p)
                        
            #finally:
            #    if os.path.exists(temp_read_filename):
            #        os.unlink(temp_read_filename)
        
        log_file.close()
        
        sam_eater.close()
        
        grace.status('Sort')
        
        io.execute([
            'samtools', 'sort', '-n', temp_filename, bam_prefix
        ])
        
        os.unlink(temp_filename)
        
        grace.status('')
Example #3
0
def main(args):
    grace.require_shrimp_1()

    n_cpus = grace.how_many_cpus()
        
    solid, args = grace.get_flag(args, '--solid')
    verbose, args = grace.get_flag(args, '--verbose')

    threshold, args = grace.get_option_value(args, '--threshold', str, '68%')
    
    stride, args = grace.get_option_value(args, '--stride', int, 1)
    max_shrimps, args = grace.get_option_value(args, '--cpus', int, n_cpus)
    batch_size, args = grace.get_option_value(args, '--batch-size', int, 5000000)
        
    input_reference_filenames = [ ]
    reads_filenames = [ ]
    
    shrimp_options = [ '-h', threshold ]
    if threshold.endswith('%'):
        threshold = -float(threshold[:-1])/100.0
    else:
        threshold = int(threshold)
    
    output_dir = [ ]  #As list so can write to from function. Gah.
    
    def front_command(args):
        grace.expect_no_further_options(args)
        
        if len(args) < 1:
            return
        
        output_dir.append(args[0])        
        input_reference_filenames.extend(
            [ os.path.abspath(filename) for filename in args[1:] ])
    def reads_command(args):
        grace.expect_no_further_options(args)
        reads_filenames.extend([ [ os.path.abspath(filename) ] for filename in args])
    def pairs_command(args):
        grace.expect_no_further_options(args)
        assert len(args) == 2, 'Expected exactly two files in "pairs"'
        reads_filenames.append([ os.path.abspath(filename) for filename in args ])
    def shrimp_options_command(args):
        shrimp_options.extend(args)
    
    grace.execute(args, {
        'reads': reads_command,
        '--reads': reads_command,
        'pairs': pairs_command,
        'shrimp-options': shrimp_options_command,
        '--shrimp-options': shrimp_options_command,
    }, front_command)
    
    
    if not output_dir:
        print >> sys.stderr, USAGE % n_cpus
        return 1
    
    output_dir = output_dir[0]
    
    assert input_reference_filenames, 'No reference files given'
    assert reads_filenames, 'No read files given'
    
    for filename in itertools.chain(input_reference_filenames, *reads_filenames):
        assert os.path.exists(filename), '%s does not exist' % filename

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    
    if solid:
        shrimp = 'rmapper-cs'
    else:
        shrimp = 'rmapper-ls'
    
    
    reference_filename = os.path.join(output_dir,'reference.fa')
    reference_file = open(reference_filename,'wb')
    total_reference_sequences = 0
    total_reference_bases = 0
    for input_reference_filename in input_reference_filenames:
        for name, sequence in io.read_sequences(input_reference_filename):
            #Don't retain any comment
            name = name.split()[0]
            io.write_fasta(reference_file, name, sequence)
            
            total_reference_sequences += 1
            total_reference_bases += len(sequence)
            
    reference_file.close()
    
    print '%s base%s in %s reference sequence%s' % (
        grace.pretty_number(total_reference_bases), 's' if total_reference_bases != 1 else '',
        grace.pretty_number(total_reference_sequences), 's' if total_reference_sequences != 1 else '')
    
    assert total_reference_bases, 'Reference sequence file is empty' 
    
    config = {
        'references' : input_reference_filenames,
        'reads' : reads_filenames,
        'stride' : stride,
        'solid': solid,
        'threshold': threshold,
    }
    config_file = open(os.path.join(output_dir, 'config.txt'), 'wb')
    pprint.pprint(config, config_file)
    config_file.close()
    
    output_filename = os.path.join(output_dir, 'shrimp_hits.txt.gz')
    output_file = gzip.open(output_filename, 'wb')
    
    unmapped_filename = os.path.join(output_dir, 'unmapped.fa.gz')
    unmapped_file = gzip.open(unmapped_filename, 'wb')
    
    dirty_filenames = set()
    dirty_filenames.add(output_filename)
    dirty_filenames.add(unmapped_filename)
    
    #warn_low_threshold = True
    
    try: #Cleanup temporary files
        
        N = [0]
        def do_shrimp(read_set):
            my_number = N[0]
            N[0] += 1
            
            tempname = os.path.join(output_dir,'temp%d-%d.fa' % (os.getpid(),my_number))
            tempname_out = os.path.join(output_dir,'temp%d-%d.txt' % (os.getpid(),my_number))
            
            dirty_filenames.add(tempname)
            dirty_filenames.add(tempname_out)
            
            f = open(tempname,'wb')
            for read_name, read_seq in read_set:
                print >> f, '>' + read_name
                print >> f, read_seq
            f.close()
        
            command = shrimp + ' ' + ' '.join(shrimp_options) + ' ' + \
                      tempname + ' ' + reference_filename + ' >' + tempname_out
            if not verbose:
                command += ' 2>/dev/null'
            #f = os.popen(command, 'r')
            child_pid = os.spawnl(os.P_NOWAIT,'/bin/sh','/bin/sh','-c',command)
            #print 'SHRiMP %d running' % my_number
            
            def finalize():
                exit_status = os.waitpid(child_pid, 0)[1]
                assert exit_status == 0, 'Shrimp indicated an error'
                
                hits = { } # read_name -> [ hit line ]
                
                f = open(tempname_out,'rb')
                for line in f:
                    if line.startswith('>'):
                        read_name = line.split(None,1)[0][1:]
                        if read_name not in hits:
                            hits[read_name] = [ ]
                        hits[read_name].append(line)
                f.close()
                                
                for read_name, read_seq in read_set:
                    if read_name in hits:
                        for hit in hits[read_name]:
                            output_file.write(hit)
                    else:
                        print >> unmapped_file, '>' + read_name
                        print >> unmapped_file, read_seq

                output_file.flush()
                unmapped_file.flush()
        
                os.unlink(tempname)
                dirty_filenames.remove(tempname)
                os.unlink(tempname_out)
                dirty_filenames.remove(tempname_out)
                #print 'SHRiMP %d finished' % my_number
            return finalize
        
        
        shrimps = [ ]
        
        reader = iter_reads(config)
        read_count = 0
        
        while True:
            read_set = [ ]
            read_set_bases = 0

            #Read name should not include comment cruft
            # - SHRIMP passes this through
            # - might stuff up identification of pairs
            
            for read_name, read_seq in reader:
                read_name = read_name.split()[0]                
                read_set.append((read_name, read_seq))
                read_set_bases += len(read_seq)
                
                #if warn_low_threshold and len(read_seq)*7 < threshold: #Require 70% exact match
                #    sys.stderr.write('\n*** WARNING: Short reads, consider reducing --threshold ***\n\n')                    
                #    warn_low_threshold = False
            
                read_count += 1
                if read_set_bases >= batch_size: break
                
            if not read_set: break
        
            if len(shrimps) >= max_shrimps:
                shrimps.pop(0)()
            shrimps.append( do_shrimp(read_set) )
            
            grace.status('SHRiMPing %s' % grace.pretty_number(read_count))
        
        while shrimps:
            grace.status('Waiting for SHRiMPs to finish %d ' % len(shrimps) )
            shrimps.pop(0)()
        
        grace.status('')
        
        output_file.close()
        dirty_filenames.remove(output_filename)
        unmapped_file.close()
        dirty_filenames.remove(unmapped_filename)
        
        return 0

    finally:
        for filename in dirty_filenames:
            if os.path.exists(filename):
                os.unlink(filename)
Example #4
0
    def run(self):
        grace.require_shrimp_2()
        grace.require_samtools()
        assert self.references, 'No reference sequences given'
        assert self.reads or self.pairs or self.interleaved, 'No reads given'
        for pair in self.pairs:
            assert len(pair) == 2, 'Two files required in each pair: section'

        read_sets = []
        for item in self.reads:
            read_sets.append(([item], False))
        for item in self.pairs:
            read_sets.append((item, True))
        for item in self.interleaved:
            read_sets.append(([item], True))

        default_options = {
            '-E': None,
            '-T': None,
            '-N': str(grace.how_many_cpus()),
            '-n': '2',
            '-w': '200%',
            '-p': 'opp-in',
            '-I': '0,500',
            '-X': None
        }

        if self.sam_unaligned:
            default_options['--sam-unaligned'] = None

        if self.half_paired:
            default_options['--half-paired'] = None
        else:
            default_options['--no-half-paired'] = None

        cutoff = '55%'  #Default changed in SHRiMP 2.0.2
        if '-h' in self.shrimp_options:
            cutoff = self.shrimp_options[self.shrimp_options.index('-h') + 1]

        #Create working directory

        workspace = self.get_workspace(
        )  #working_directory.Working(self.output_dir, must_exist=False)
        workspace.setup_reference(self.references)
        reference = workspace.get_reference()
        reference_filename = reference.reference_fasta_filename()

        #workspace = io.Workspace(self.output_dir)
        #
        #workspace.update_param(
        #    shrimp_cutoff = cutoff
        #)
        #
        ##Make copy of reference sequences
        #
        #reference_filename = io.abspath(self.output_dir,'reference.fa')
        #reference_file = open(reference_filename,'wb')
        #
        #reference_genbank_filename = io.abspath(self.output_dir,'reference.gbk')
        #reference_genbank_file = open(reference_genbank_filename,'wb')
        #any_genbank = [ False ]
        #
        #def genbank_callback(name, record):
        #    """ Make a copy of any genbank files passed in. """
        #    from Bio import SeqIO
        #
        #    SeqIO.write([record], reference_genbank_file, 'genbank')
        #
        #    f = open(os.path.join(
        #        self.output_dir,
        #        grace.filesystem_friendly_name(name) + '.gbk'
        #    ), 'wb')
        #    SeqIO.write([record], f, 'genbank')
        #    f.close()
        #
        #    any_genbank[0] = True
        #
        #for filename in self.references:
        #    for name, sequence in io.read_sequences(filename, genbank_callback=genbank_callback):
        #        #Don't retain any comment
        #        name = name.split()[0]
        #        io.write_fasta(reference_file, name, sequence.upper())
        #
        #        f = open(os.path.join(
        #            self.output_dir,
        #            grace.filesystem_friendly_name(name) + '.fa'
        #        ), 'wb')
        #        io.write_fasta(f, name, sequence.upper())
        #        f.close()
        #
        #
        #reference_file.close()
        #reference_genbank_file.close()
        #if not any_genbank[0]:
        #    os.unlink(reference_genbank_filename)
        #
        ## Create an index of the reference sequences
        #io.execute([
        #    'samtools', 'faidx', reference_filename
        #])

        #Run shrimp

        bam_filename = io.abspath(self.output_dir, 'alignments.bam')
        bam_prefix = io.abspath(self.output_dir, 'alignments')
        bam_sorted_prefix = io.abspath(self.output_dir, 'alignments_sorted')

        temp_filename = io.abspath(self.output_dir, 'temp.bam')

        log_filename = io.abspath(self.output_dir, 'shrimp_log.txt')
        log_file = open(log_filename, 'wb')

        sam_eater = sam.Bam_writer(temp_filename)

        #if self.cs:
        #    program = 'gmapper-cs'
        #else:
        #    program = 'gmapper-ls'

        sam_header_sent = [False]
        n_seen = [0]

        def eat(process):
            for line in process.stdout:
                if line.startswith('@'):
                    if sam_header_sent[0]: continue
                else:
                    n_seen[0] += 1
                    if n_seen[0] % 100000 == 0:
                        grace.status('%s alignments produced' %
                                     grace.pretty_number(n_seen[0]))
                sam_eater.write_raw(line)

            assert process.wait() == 0, 'shrimp failed'
            sam_header_sent[0] = True

        def remove_pair_options(options):
            for flag in ['-p', '-I']:
                while flag in options:
                    pos = options.index(flag)
                    options = options[:pos] + options[pos + 2:]
            for flag in ['--half-paired']:
                while flag in options:
                    pos = options.index(flag)
                    options = options[:pos] + options[pos + 1:]
            return options

        if '--qv-offset' not in self.shrimp_options:
            guesses = []
            for filenames, is_paired in read_sets:
                for filename in filenames:
                    guesses.append(io.guess_quality_offset(filename))
            assert len(
                set(guesses)
            ) == 1, 'Conflicting quality offset guesses, please specify --qv-offset manually.'
            default_options['--qv-offset'] = str(guesses[0])

        for filenames, is_paired in read_sets:
            options = self.shrimp_options[:]

            has_qualities = all(
                len(io.read_sequences(filename, qualities=True).next()) ==
                3  #A little ugly
                for filename in filenames)
            if has_qualities:
                options.append('--fastq')
            #    temp_read_filename = io.abspath(working_dir, 'temp.fa')
            #else:
            #    temp_read_filename = io.abspath(working_dir, 'temp.fq')

            #try:

            #if len(filenames) == 1: # gmapper can cope with gzipped     and filenames[0].endswith('.fa') or filenames[0].endswith('.fq'):
            #    actual_read_filename = filenames[0]
            #else:
            #    actual_read_filename = temp_read_filename
            #    grace.status('Copying reads')
            #    f = open(temp_read_filename, 'wb')
            #    if has_qualities:
            #        for reads in itertools.izip(*[ io.read_sequences(filename, qualities=True) for filename in filenames ]):
            #            for name, seq, qual in reads:
            #                io.write_fastq(f, name, seq, qual)
            #    else:
            #        for reads in itertools.izip(*[ io.read_sequences(filename) for filename in filenames ]):
            #            for name, seq in reads:
            #                io.write_fasta(f, name, seq)
            #    f.close()
            #    grace.status('')

            if len(filenames) == 1:
                reads_parameters = [filenames[0]]
            else:
                reads_parameters = ['-1', filenames[0], '-2', filenames[1]]

            for flag in default_options:
                if flag not in options:
                    options.append(flag)
                    if default_options[flag] is not None:
                        options.append(default_options[flag])

            if not is_paired:
                options = remove_pair_options(options)

            grace.status('')

            full_param = reference.shrimp_command(self.cs,
                                                  options + reads_parameters)

            print >> sys.stderr, 'Running', ' '.join(full_param)

            p = io.run(full_param, stdout=subprocess.PIPE, stderr=log_file)
            eat(p)

            #finally:
            #    if os.path.exists(temp_read_filename):
            #        os.unlink(temp_read_filename)

        log_file.close()

        sam_eater.close()

        grace.status('Sort')

        io.execute(['samtools', 'sort', '-n', temp_filename, bam_prefix])

        os.unlink(temp_filename)

        grace.status('')