def check_merged_reads(merge_d, source_d):
    results = {}
    for idx_seq,idx in idx_lookup.items():
        for r in [1,2]:
            for l in range(1,9):
                sources = glob('%s/*_%s_L00%s_R%s*.fastq.gz' % (source_d,idx_seq,l,r))
                if len(sources) > 0:
                    source_sum = sum([preprocess_radtag_lane.get_read_count(f) for f in sources])
                    merge_f = '%s/s_%s_%s_sequence_index%s.txt.gz' % (merge_d,l,r,idx)
                    if os.path.exists(merge_f):
                        merge_sum = preprocess_radtag_lane.get_read_count(merge_f)
                        results[merge_f] = source_sum == merge_sum
                    else:
                        results[merge_f] = None
    return results
#!/usr/bin/env python

import Seq, os,sys
from radtag_denovo import preprocess_radtag_lane
from Util import smartopen

def join_pair(r1,r2,num_n=10,qual_n='#'):
    return [r1[0],r1[1]+'N'*num_n+str(Seq.Sequence(r2[1]).rc()),r1[2]+qual_n*num_n+''.join(reversed(r2[2]))]

if __name__ == "__main__":
    f1,f2 = sys.argv[1:]
    fh1 = smartopen(f1)
    fh2 = smartopen(f2)

    rc = preprocess_radtag_lane.get_read_count(f1)
    
    for i in xrange(rc):
        if i % 1000 == 0:
            print >> sys.stderr, '\r%s / %s' % (i,rc),
        r1 = preprocess_radtag_lane.next_read_from_fh(fh1,4)
        r2 = preprocess_radtag_lane.next_read_from_fh(fh2,4)
        print preprocess_radtag_lane.as_fq4_lines(*join_pair(r1,r2))
    print >> sys.stderr, '\ndone'

    
    
    if len(sys.argv) == 2:
        fq = sys.argv[1]
        boundstr = "0:"
    else:
        fq, boundstr = sys.argv[1:]

    start,end = boundstr.split(':')
    start = int(start)

    lnum,baseQ,readlen = get_fastq_properties(fq)

    if end == '':
        end = readlen

    readcount = preprocess_radtag_lane.get_read_count(fq)

    qsc_n = 0
    qsc_tot = numpy.zeros(readlen)
    qsc_by_read = []

    fh = smartopen(fq)

    tickon = readcount/1000
    for i in range(readcount):
        if i % tickon == 0:
            print >> sys.stderr, '\r%0.1f' % ((i/float(readcount)) * 100),
        t,r,q = preprocess_radtag_lane.next_read_from_fh(fh,lnum)
        qsc = [ord(c)-baseQ for c in q]
        qsc_n += 1
        qsc_tot += qsc
                raise OSError

        else:
            if opts.check_donefiles:
                if os.path.exists(donefile):
                    print >> sys.stderr, '.done file for bam %s found, but bam is missing; remove %s ...' % (rg_ref_bam,donefile),
                    ret = os.system('rm -f %s' %  donefile)
                    if ret == 0:
                        print >> sys.stderr, 'DONE'
                    else:
                        raise OSError, 'FAILED'
            
        if isinstance(readfile,tuple):
            r1,r2 = readfile

            readct1 = preprocess_radtag_lane.get_read_count(r1)
            readct2 = preprocess_radtag_lane.get_read_count(r2)
            if readct1 != readct2:
                raise ValueError, 'read counts do not match, abort'

            num_parts = (readct1*2)/opts.reads_per_part
            if num_parts < 1: num_parts = 1
            print >> sys.stderr, 'map in %s part(s)' % num_parts
            
            samparts_by_bam[rg_ref_bam] = []
            for i in xrange(1,num_parts+1):
                sampart = samfbase+'_%05dof%05d.sam' % (i,num_parts)
                samparts_by_bam[rg_ref_bam].append(sampart)
                cmdstr = 'run_safe.py \"module load %s; stampy.py -g %s -h %s  --gatkcigarworkaround --overwrite --readgroup=%s --processpart %s/%s -o %s %s -M %s %s\" %s.done' % (stampy_module,t,t,readgroup_arg,i,num_parts,sampart,stampy_argstr,r1,r2,sampart)
                cmds.append(cmdstr)
                cmd_by_sam[sampart] = cmdstr
#!/usr/bin/env python

'''calculate the percent of reads in a lane properly resolved by barcode
'''

import os,sys
from radtag_denovo import preprocess_radtag_lane
from glob import glob

fastq,analysis_folder = sys.argv[1:]

tot_reads = preprocess_radtag_lane.get_read_count(fastq)

indiv_barcoded = {}
fqs = glob('%s/*1_sequence.33.fq4*' % analysis_folder)
for fq in fqs:
    indiv_barcoded[fq] = preprocess_radtag_lane.get_read_count(fq)

print float(sum(indiv_barcoded.values()))/tot_reads