Python exists_empty_fnの例、mylib.util.exists_empty_fn Pythonの例

コード例 #1

0

ファイルを表示

def demultiplex(split):
  inp_fn = inp_dir + '%s.fq' % (split)
  for name in list(exp_design['Name']) + ['other']:
    util.ensure_dir_exists(out_dir + name)
    util.exists_empty_fn(out_dir + name + '/%s.fa' % (split))

  lc = util.line_count(inp_fn)
  num_bad_q, num_tot = 0, 0
  timer = util.Timer(total = lc)
  with open(inp_fn) as f:
    for i, line in enumerate(f):
      if i % 4 == 0:
        header = line.strip()
      if i % 4 == 1:
        read = line.strip()
      if i % 4 == 3:
        num_tot += 1
        qs = line.strip()
        quals = [ord(s)-33 for s in qs]
        if np.mean(quals) < 30:
          num_bad_q += 1
          continue

        demultiplex_id, trimmed_read = match(read, header)
        
        out_fn = out_dir +  '%s/%s.fa' % (demultiplex_id, split)
        with open(out_fn, 'a') as f:
          f.write('>' + header[1:] + '\n' + trimmed_read + '\n')
      
      timer.update()

  print 'Rejected %s fraction of reads' % (num_bad_q / num_tot)

  return

コード例 #2

0

ファイルを表示

def prepare_outfns(out_dir):
    let = "ATGC"

    for umi_short in [
            l1 + l2 + l3 + l4 + l5 + l6 for l1 in let for l2 in let
            for l3 in let for l4 in let for l5 in let for l6 in let
    ]:
        out_fn = out_dir + '%s.txt' % (umi_short)
        util.exists_empty_fn(out_fn)
    return

コード例 #3

0

ファイルを表示

def combine_outputs(out_dir):
    # Concatenates all split outputs together
    # into the main output directory
    out_splits = [
        out_dir + 'split' + str(s) + '/'
        for s in range(_parallel_config.SPLITS)
    ]

    fns = set()
    for s in out_splits:
        for fn in os.listdir(s):
            if fnmatch.fnmatch(s + fn, _parallel_config.REGEX_FILTER):
                fns.add(fn)

    for fn in fns:
        util.exists_empty_fn(out_dir + fn)
        print '\tCombining', fn, '...'
        locs = [s + fn for s in out_splits]
        subprocess.call('cat ' + ' '.join(locs) + ' > ' + out_dir + fn,
                        shell=True)
    return

コード例 #4

0

ファイルを表示

def matchmaker(nm, split):

    ##CUSTOM CODE FOR DICTIONARY CREATION
    from Bio import pairwise2
    from Bio.pairwise2 import format_alignment

    from Bio.Seq import Seq
    from Bio.Alphabet import generic_dna

    def rc(inp):
        d = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
        return "".join([d[e] for e in inp.strip()[::-1]])

    #UNSPLICED DATA PROCESSING
    READ1_TEMPLATE = "NNNtaccagctgccctcgTCGaCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGNNNNNNNNNNNNNNNNNNNNNNNNtgattacacatatagacacgcGAGCAGCCATCTTTTATAGAATGGGtagaacccgtcctaaggactcagattgagcatcgtttgcttctcgagtactacctgg"
    READ2_TEMPLATE = "NNNaaccgctgtgttctgcACGCGTNNNNNNNNNNNNNNNNNNACCGGTgcaggtaatgggccttactatcagtctcagtccttgtacagctcgtccatgccgagagtgatcccggcggcggtcacgaactccagcaggaccatgtgatcgcgcttctcgttggggtctttgctca"

    r1_seq = Seq(READ1_TEMPLATE, "generic_dna".upper())
    r2_seq = Seq(READ2_TEMPLATE, "generic_dna".upper())

    def quality(line):
        q_1 = line.strip()
        qs = [ord(s) - 33 for s in q_1]
        return np.mean(qs)

    i = -1

    qc_rejection_count = 0
    read1_rejection_count = 0
    constant_region_rejection_count = 0
    accepted_count = 0
    nolib_rejection_count = 0

    print nm, split

    #fq_unspliced_1 = open("/cluster/bh0085/prj/exons/data/{0}_1_sequence.fastq".format(nm))
    #fq_unspliced_2 = open("/cluster/bh0085/prj/exons/data/{0}_2_sequence.fastq".format(nm))

    stdout_fn = _config.SRC_DIR + 'b3_status_%s_%s.out' % (nm, split)
    util.exists_empty_fn(stdout_fn)

    out_dir = out_place + nm + '/' + split + '/'
    util.ensure_dir_exists(out_dir)

    inp_fn1 = inp_dir + '%s_1_sequence_%s.fastq' % (nm, split)
    inp_fn2 = inp_dir + '%s_2_sequence_%s.fastq' % (nm, split)

    lsh_dict = build_targets_better_lsh()
    umis_alignments_buffer = init_umis_alignments_buffer()
    short_outputs = []

    prepare_outfns(out_dir)

    qf = 0

    print inp_fn1
    tot_reads = util.line_count(inp_fn1)

    timer = util.Timer(total=tot_reads)

    i = -1

    print "OPENING FILES"
    with open(inp_fn1) as f1:
        with open(inp_fn2) as f2:
            while 1:
                i += 1

                try:
                    r2_l = f2.next()
                    r1_l = f1.next()
                except StopIteration as e:
                    break

                if i % 4 == 1:
                    read1 = r1_l
                    read2 = r2_l

                if i % 4 == 3:
                    if quality(r2_l) < 28 or quality(r1_l) < 28:
                        qc_rejection_count += 1
                        continue

                    r1_library_constant = "TACCAGCTGCCCTCGTCGAC".upper()
                    r1_library_start = len(r1_library_constant)
                    r1_library_format = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGNNNNNNNNNNNNNNNNNNNNNNNN"
                    r1_library_intron_format = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAG"
                    r1_library_ag_pos = len(
                        "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")
                    r1_library_exon_format = "NNNNNNNNNNNNNNNNNNNNNNNN"

                    r2_library_constant = "ggggtgttctgctggtagtggtc".upper()
                    r2_library_start = len(r2_library_constant)
                    r2_umi_format = "NNNNNNNNNNNNNNN"

                    try:
                        a1_offset = read1.upper().index(
                            r1_library_constant.upper())
                    except ValueError, e:
                        a1_offset = None
                    try:
                        a2_offset = read2.upper().index(r2_library_constant)
                    except ValueError, e:
                        a2_offset = None

                    if a1_offset is None or a2_offset is None:
                        constant_region_rejection_count += 1
                        continue

                    read1_const = read1[a1_offset:a1_offset + r1_library_start]
                    read1_content = read1[
                        a1_offset + r1_library_start:][:len(r1_library_format)]
                    read1_extended_content = read1[a1_offset +
                                                   r1_library_start:]
                    read2_const = read1[a2_offset:a2_offset + r2_library_start]
                    read2_content = read2[a2_offset +
                                          r2_library_start:][:len(r2_umi_format
                                                                  )]
                    read2_extended_content = read1[a2_offset +
                                                   r2_library_start:]

                    r1_ag = read1_content[len(r1_library_intron_format) -
                                          2:len(r1_library_intron_format)]

                    #check to see that the splice acceptor is in the right position
                    #and that the read1 constant sequence aligned

                    #if a1_tag_score <20:
                    #    read1_rejection_count+=1
                    #    continue

                    tag = "TACCANCTGCCCTCGTCGAC"
                    umi = read2_content[:len(r2_umi_format)]
                    lib = read1_content[:len(r1_library_format)]
                    lib_extended = read1_extended_content[:len(
                        r1_library_format) + 20]

                    if umi.count("N") != 0 or lib.count("N") != 0: continue

                    #no longer check for perfect matches. Just align
                    exp = target_names.get(lib, None)

                    cand_idxs = find_best_designed_target(lib, lsh_dict)
                    if len(cand_idxs) == 0:
                        print "rejecting for no good match"
                        nolib_rejection_count += 1
                        continue

                    best_idx = cand_idxs[0]
                    #extends a target alignment region to include an extra 20 bases to anchor the alignment for long r1 deletions
                    target_alignment_region = names_targets[
                        best_idx] + "tgattacacatatagacacg".upper()
                    align = pairwise2.align.localms(target_alignment_region,
                                                    read1_extended_content, 2,
                                                    -1, -5, -.1)[0]

                    output_complete = """>1\n{0}\n{1}\n{2}\n{3}\n""".format(
                        umi, best_idx, align[2],
                        "\n".join(format_alignment(*align).splitlines()[:3]))
                    output_short = (umi, best_idx)

                    umis_alignments_buffer[umi].append(output_complete)
                    short_outputs.append(output_short)
                    accepted_count += 1

                    if i % int(tot_reads / 1000) < 4 and i > 1:
                        print i
                        print "FLUSHING!"
                        # Flush alignment buffer
                        flush_tuples(umis_alignments_buffer, out_dir)
                        print len(umis_alignments_buffer.keys())

                        # Stats for the curious
                        with open(stdout_fn, 'a') as outf:
                            outf.write('Time: %s\n' %
                                       (datetime.datetime.now()))
                            outf.write('Progress: %s\n' %
                                       (i / int(tot_reads / 100)))
                            outf.write('Quality filtered pct: %s\n' %
                                       (qf / (i / 4)))
                            outf.write(
                                "accepted {0}, rejected {1} bad read1, {2} bad lib\n"
                                .format(accepted_count, read1_rejection_count,
                                        nolib_rejection_count))

                    timer.update()

コード例 #5

0

ファイルを表示

ファイル: b7_genomic_data_processing.py プロジェクト: shz24/splice

def matchmaker(nm, split):

    read_constant_rejection_count = 0
    qc_rejection_count = 0
    accepted_count = 0
    grna_failure_count = 0
    read1_rejection_count = 0
    ##CUSTOM CODE FOR DICTIONARY CREATION
    from Bio import pairwise2
    from Bio.pairwise2 import format_alignment

    from Bio.Seq import Seq
    from Bio.Alphabet import generic_dna

    def rc(inp):
        d = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"}
        return "".join([d[e] for e in inp.strip()[::-1]])

    #UNSPLICED DATA PROCESSING
    READ1_TEMPLATE = "NNNtaccagctgccctcgTCGaCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGNNNNNNNNNNNNNNNNNNNNNNNNtgattacacatatagacacgcGAGCAGCCATCTTTTATAGAATGGGtagaacccgtcctaaggactcagattgagcatcgtttgcttctcgagtactacctgg"
    READ2_TEMPLATE = "NNNaaccgctgtgttctgcACGCGTNNNNNNNNNNNNNNNNNNACCGGTgcaggtaatgggccttactatcagtctcagtccttgtacagctcgtccatgccgagagtgatcccggcggcggtcacgaactccagcaggaccatgtgatcgcgcttctcgttggggtctttgctca"

    r1_seq = Seq(READ1_TEMPLATE, "generic_dna".upper())
    r2_seq = Seq(READ2_TEMPLATE, "generic_dna".upper())

    def quality(line):
        q_1 = line.strip()
        qs = [ord(s) - 33 for s in q_1]
        return np.mean(qs)

    i = -1

    print nm, split

    umis_alignments_buffer = init_umis_alignments_buffer()
    stdout_fn = _config.SRC_DIR + 'b7_status_%s_%s.out' % (nm, split)
    util.exists_empty_fn(stdout_fn)
    out_dir = out_place + nm + '/' + split + '/'
    util.ensure_dir_exists(out_dir)

    inp_fn1 = inp_dir + '%s_1_sequence_%s.fastq' % (nm, split)
    inp_fn2 = inp_dir + '%s_2_sequence_%s.fastq' % (nm, split)

    short_outputs = []

    prepare_outfns(out_dir)
    qf = 0
    tot_reads = util.line_count(inp_fn1)
    timer = util.Timer(total=tot_reads)

    #raise Exception()
    i = -1
    with open(inp_fn1) as f1:
        with open(inp_fn2) as f2:
            while 1:
                i += 1
                try:
                    r2_l = f2.next()
                    r1_l = f1.next()
                except StopIteration as e:
                    break
                if i % 4 == 1:
                    read1 = r1_l
                    read2 = r2_l
                if i % 4 == 3:
                    if quality(r2_l) < 28 or quality(r1_l) < 28:
                        qc_rejection_count += 1
                        continue

                    print read1
                    print read2
                    print len(read2)
                    r1_grna19_format = "N" * 19
                    r1_grna20_format = "N" * 20
                    r2_umi_format = "N" * 15

                    r1_prefix_constant = "GACGAAACACCG".upper()
                    r1_grna_start = len(r1_prefix_constant)

                    r2_prefix_constant = "tcaaacaggacggcagcgtgcagctcgcc".upper(
                    )
                    r2_umi_start = len(r2_prefix_constant)
                    r2_umi_format = "N" * 15
                    r2_post_umi_format = "gaccactaccagcagaacacccc".upper()

                    print "working"
                    try:
                        print r1_prefix_constant
                        a1_offset = read1.upper().index(
                            r1_prefix_constant.upper())
                    except Exception, e:
                        read1_rejection_count += 1
                        a1_offset = None
                        print "A1 EXCEPTION"
                        continue
                    try:
                        a2_offset = read2.upper().index(
                            r2_prefix_constant.upper())
                    except Exception, e:
                        a2_offset = None
                        read_constant_rejection_count += 1
                        print "A2 REJECTION"
                        continue

                    read1_grna19 = read1[a1_offset +
                                         r1_grna_start:][:len(r1_grna19_format
                                                              )]
                    read1_grna20 = read1[a1_offset +
                                         r1_grna_start:][:len(r1_grna20_format
                                                              )]
                    read2_umi_content = read2[a2_offset +
                                              r2_umi_start:][:len(r2_umi_format
                                                                  )]

                    print a2_offset
                    print r2_umi_start
                    print len(r2_umi_format)
                    print len(read2_umi_content)

                    #raise Exception()

                    design_row = exp_design.loc[exp_design[
                        "Designed gRNA (NGG orientation, 19 and 20)"] ==
                                                read1_grna20]
                    if len(design_row) == 0:
                        design_row = exp_design.loc[exp_design[
                            "Designed gRNA (NGG orientation, 19 and 20)"] ==
                                                    read1_grna19]
                    if len(design_row) == 0:
                        grna_failure_count += 1
                        continue

                    design_row = design_row.iloc[0]

                    output_complete = """>1\n{0}\n{1}""".format(
                        read2_umi_content, design_row["Identifier number"])
                    output_short = (read2_umi_content,
                                    design_row["Identifier number"])

                    print output_short

                    umis_alignments_buffer[read2_umi_content].append(
                        output_complete)
                    short_outputs.append(output_short)
                    accepted_count += 1

                    if i % int(tot_reads / 10) < 4 and i > 1:

                        print "FLUSHING!"
                        print accepted_count
                        # Flush alignment buffer
                        flush_tuples(umis_alignments_buffer, out_dir)
                        print len(umis_alignments_buffer.keys())

                        # Stats for the curious
                        with open(stdout_fn, 'a') as outf:
                            outf.write('Time: %s\n' %
                                       (datetime.datetime.now()))
                            outf.write('Progress: %s\n' %
                                       (i / int(tot_reads / 100)))
                            outf.write('Quality filtered pct: %s\n' %
                                       (qf / (i / 4)))
                            outf.write(
                                "accepted {0}, rejected {1} bad read1\n{2} rc rejection\n"
                                .format(accepted_count, read1_rejection_count,
                                        read_constant_rejection_count))

                    timer.update()

コード例 #6

0

ファイルを表示

def matchmaker(nm, split):
    print(nm, split)
    stdout_fn = _config.SRC_DIR + 'nh_c_%s_%s.out' % (nm, split)
    util.exists_empty_fn(stdout_fn)
    out_dir = out_place + nm + '/' + split + '/'
    util.ensure_dir_exists(out_dir)

    read1_fn = inp_dir + '%s_R1_%s.fq' % (nm, split)
    read2_fn = inp_dir + '%s_R2_%s.fq' % (nm, split)

    lsh_dict = build_targets_better_lsh()
    alignment_buffer = init_alignment_buffer()

    prepare_outfns(out_dir)

    num_bad_matches = 0
    quality_pass = 0

    tot_lines = util.line_count(read1_fn)
    timer = util.Timer(total=tot_lines)
    with open(read1_fn) as f1, open(read2_fn) as f2:
        for i, (line1, line2) in enumerate(zip(f1, f2)):
            if i % 4 == 0:
                h1 = line1.strip()
                h2 = line2.strip()
            if i % 4 == 1:
                # RC of l1 contains target
                line1 = line1.strip()
                target_read = compbio.reverse_complement(line1[:61])
                ulmi, ulmi_idx = find_ulmi(line1)

                # l2 contains gRNA
                grna_read = line2.strip()

            if i % 4 == 3:

                q1, q2 = line1.strip(), line2.strip()
                read_q = q1[:61][::-1]
                ulmi_q = q1[ulmi_idx:ulmi_idx + len(ulmi)][::-1]
                grna_q = q2[18:22 + 20]

                qs = [ord(s) - 33 for s in read_q + ulmi_q + grna_q]
                if np.mean(qs) >= 28:
                    quality_pass += 1

                    align_header = '>1_%s_%s' % (ulmi, ulmi_q)

                    # Try to find designed target from LSH
                    cand_idxs = find_best_designed_target(
                        target_read, lsh_dict)
                    if len(cand_idxs) > 0:

                        bad_match = compare_target_to_grna(
                            cand_idxs, grna_read)
                        if bad_match == 'ok':
                            # Run alignment and store in buffer
                            best_idx, align = alignment(target_read, cand_idxs)
                            if align is None:
                                continue
                            store_alignment(alignment_buffer, best_idx,
                                            align_header, align, read_q)
                        else:
                            num_bad_matches += 1
                    else:
                        num_bad_matches += 1

            if i % int(tot_lines / 200) == 1 and i > 1:
                # Flush alignment buffer
                flush_alignments(alignment_buffer, out_dir)
                alignment_buffer = init_alignment_buffer()

                # Stats for the curious
                with open(stdout_fn, 'a') as outf:
                    outf.write('Time: %s\n' % (datetime.datetime.now()))
                    outf.write('Progress: %s\n' % (i / int(tot_lines / 100)))
                    outf.write('Num. mismatched gRNA/target pairs: %s\n' %
                               (num_bad_matches))
                    outf.write('Frac. mismatched gRNA/target pairs: %s\n' %
                               (num_bad_matches / quality_pass))

            timer.update()

    # Final flush
    flush_alignments(alignment_buffer, out_dir)

    return

コード例 #7

0

ファイルを表示

def prepare_outfns(out_dir):
    for exp in names_targets:
        out_fn = out_dir + '%s.txt' % (exp)
        util.exists_empty_fn(out_fn)
    return

コード例 #8

0

ファイルを表示

ファイル: c_alignment.py プロジェクト: maxwshen/peptide-screen-dataprocessing

def matchmaker(nm, split):
  print(split)
  stdout_fn = _config.SRC_DIR + f'nh_c_{nm}_{split}.out'
  util.exists_empty_fn(stdout_fn)
  out_dir = f'{out_place}{nm}/{split}/'
  util.ensure_dir_exists(out_dir)

  # Parse condition-specific settings
  exp_row = exp_design[exp_design['Name'] == nm].iloc[0]
  parent_fn = exp_row['Parent file']
  lib_nm = exp_row['Library']
  target_nm = exp_row['Target']

  # Library design
  global lib_design
  lib_design = pd.read_csv(_config.DATA_DIR + f'lib_{lib_nm}_design.csv')

  global prefixes
  global peptide_nms
  global prefix_to_peptide
  global suffixes
  global suffix_to_peptide
  prefixes = [s[:prefix_len] for s in lib_design['Sequence']]
  peptide_nms = list(lib_design['Name'])
  prefix_to_peptide = {prefix: nm for prefix, nm in zip(prefixes, peptide_nms)}
  suffixes = [compbio.reverse_complement(s[-suffix_len:]) for s in lib_design['Sequence']]
  suffix_to_peptide = {suffix: nm for suffix, nm in zip(suffixes, peptide_nms)}

  # Target 
  target_row = target_design[target_design['Target'] == target_nm].iloc[0]
  target = target_row['Sequence']
  target_strand = target_row['gRNA orientation']

  zf_split = str(split).zfill(3)
  read1_fn = inp_dir + f'{parent_fn}_R1_{zf_split}.fq'
  read2_fn = inp_dir + f'{parent_fn}_R2_{zf_split}.fq'

  count_stats = defaultdict(lambda: 0)
  count_stats['Success'] = 0

  alignment_buffer = init_alignment_buffer()
  prepare_outfns(out_dir, peptide_nms)

  tot_lines = util.line_count(read1_fn)
  timer = util.Timer(total = tot_lines)
  with open(read1_fn) as f1, open(read2_fn) as f2:
    for i, (line1, line2) in enumerate(zip(f1, f2)):
      if i % 4 == 0:
        h1 = line1.strip()
        h2 = line2.strip()
      if i % 4 == 1:
        read1 = line1.strip()
        read2 = line2.strip()
      if i % 4 == 3:
        q1, q2 = line1.strip(), line2.strip()
        count_stats['Read count'] += 1

        qs = [ord(s)-33 for s in q1 + q2]
        if np.mean(qs) < 25:
          count_stats['1a. Quality fail'] += 1
          continue

        res, msg = find_peptide1_nm(read2)
        if res is None:
          count_stats[f'2{msg}'] += 1
          continue
        p1_nm = res

        res, msg = find_peptide2_nm(read1)
        if res is None:
          count_stats[f'2{msg}'] += 1
          continue
        p2_nm = res

        peptide_nm = f'{p1_nm}-{p2_nm}'

        read1 = read1[6:]
        q1 = q1[6:]
        if target_strand == '-':
          read1 = compbio.reverse_complement(read1)
          q1 = q1[::-1]

        # Run alignment and store in buffer
        align_header = f'>1'
        align = alignment(read1, target)
        store_alignment(alignment_buffer, peptide_nm, align_header, align, q1)
        count_stats['Success'] += 1

      # flush_interval = 2000
      flush_interval = 200
      if i % int(tot_lines / flush_interval) == 1 and i > 1:
        # Flush alignment buffer
        flush_alignments(alignment_buffer, out_dir)
        alignment_buffer = init_alignment_buffer()

        # Stats for the curious
        with open(stdout_fn, 'a') as outf:
          outf.write(f'Time: {datetime.datetime.now()}\n')
          outf.write(f'Progress: {i / int(tot_lines / 100)}\n')
          outf.write(f'Line: {i}\n')
          for key in sorted(list(count_stats.keys())):
            outf.write(f'{key}, {count_stats[key]}\n')
        # break

      timer.update()
  
  # Final flush
  flush_alignments(alignment_buffer, out_dir)

  stats_df = pd.DataFrame(count_stats, index = [0])
  sorted_cols = sorted([s for s in stats_df.columns])
  stats_df = stats_df[sorted_cols]
  stats_df.to_csv(out_dir + f'stats_{nm}_{split}.csv')

  return

コード例 #9

0

ファイルを表示

ファイル: c_alignment.py プロジェクト: maxwshen/peptide-screen-dataprocessing

def prepare_outfns(out_dir, peptide_nms):
  for p1 in list(peptide_nms):
    for p2 in list(peptide_nms):
      out_fn = out_dir + f'{p1}-{p2}.txt'
      util.exists_empty_fn(out_fn)
  return

コード例 #10

0

ファイルを表示

ファイル: b_alignment.py プロジェクト: gifford-lab/inDelphi-dataprocessinganalysis

def matchmaker(nm, split):
  print nm, split
  stdout_fn = _config.SRC_DIR + 'nh_c_%s_%s.out' % (nm, split)
  util.exists_empty_fn(stdout_fn)
  out_dir = out_place + nm + '/' + split + '/'
  util.ensure_dir_exists(out_dir)

  inp_fn = inp_dir + '%s_r2_%s.fq' % (nm, split)

  lsh_dict = build_targets_better_lsh()
  alignment_buffer = init_alignment_buffer()

  prepare_outfns(out_dir)

  qf = 0

  tot_reads = util.line_count(inp_fn)
  timer = util.Timer(total = tot_reads)
  from itertools import izip
  with open(inp_fn) as f:
    for i, line in enumerate(f):
      if i % 4 == 0:
        pass
      if i % 4 == 1:
        l2 = line.strip()
      if i % 4 == 3:
        # Quality filter
        q2 = line.strip()
        qs = [ord(s)-33 for s in q2]
        if np.mean(qs) < 28:
          qf += 1
          continue

        l2 = compbio.reverse_complement(l2)
        align_header = '>1'

        # Try to find designed target from LSH
        cand_idxs = find_best_designed_target(l2, lsh_dict)
        if len(cand_idxs) == 0:
          continue

        # Run alignment
        best_idx, align = alignment(l2, cand_idxs)

        # Store alignment into buffer
        store_alignment(alignment_buffer, best_idx, align_header, align)

      if i % int(tot_reads / 100) == 1 and i > 1:
        # Flush alignment buffer
        flush_alignments(alignment_buffer, out_dir)
        alignment_buffer = init_alignment_buffer()

        # Stats for the curious
        with open(stdout_fn, 'a') as outf:
          outf.write('Time: %s\n' % (datetime.datetime.now()))
          outf.write('Progress: %s\n' % (i / int(tot_reads / 100)) )
          outf.write('Quality filtered pct: %s\n' % (qf / (i/4)))
      timer.update()
  
  # Final flush
  flush_alignments(alignment_buffer, out_dir)

  return