Python readSummaryToProfile Examples, selftarget.profile.readSummaryToProfile Python Examples

Example #1

0

Show file

File: fetch_mh_mismatch_frequencies.py Project: zhaijj/SelfTarget

def fetchMhMismatchFrequencies(dirname,
                               outdir='mh_mismatch_indel_frequencies'):

    if not os.path.isdir(outdir): os.makedirs(outdir)
    if isOldLib(dirname): raise Exception('Old Lib not supported')

    mh_exp_indels_file = getHighDataDir() + '/mh_mismatch_indels.txt'

    fout = io.open(outdir + '/' + getDirLabel(dirname) + '.txt', 'w')
    hdr_str = '\t'.join([
        '\t'.join([
            x + ' Indel Reads in ' + y for x in
            ['Orig', 'Left Mut', 'Right Mut', 'Merged Mut1', 'Merged Mut2']
        ]) for y in ['Mut', 'Orig']
    ])

    f = io.open(mh_exp_indels_file)
    rdr = csv.DictReader(f, delimiter='\t')
    fout.write(u'%s\t%s\tMut Non-Null Reads\tOrig Non-Null Reads\n' %
               ('\t'.join(rdr.fieldnames), hdr_str))
    for row in rdr:

        #Load Indel Profiles for both the original and mutated micrhomology forms
        mut_oligo_id = row['Oligo ID'].replace('_', '')
        orig_oligo_id = row['Mapped Oligo Id'].replace('_', '')

        mut_filepath, mut_filename = getFileForOligoIdx(
            getOligoIdxFromId(mut_oligo_id), ext='_mappedindelsummary.txt')
        orig_filepath, orig_filename = getFileForOligoIdx(
            getOligoIdxFromId(orig_oligo_id), ext='_mappedindelsummary.txt')

        p_mut, p_orig = {}, {}
        stats_mut = readSummaryToProfile(dirname + '/mapped_reads/' +
                                         mut_filepath + '/' + mut_filename,
                                         p_mut,
                                         oligoid=mut_oligo_id)
        stats_orig = readSummaryToProfile(dirname + '/mapped_reads/' +
                                          orig_filepath + '/' + orig_filename,
                                          p_orig,
                                          oligoid=orig_oligo_id)

        indels = [
            row['Orig Indel'], row['Left Mut-MH Indel'],
            row['Right Mut-MH Indel'], row['Merge Mut 1 Indel'],
            row['Merge Mut 2 Indel']
        ]
        reads = lambda indel, profile: profile[indel] if (indel in profile and
                                                          indel != '') else 0
        mut_read_str = '\t'.join(
            ['%d' % reads(indel, p_mut) for indel in indels])
        orig_read_str = '\t'.join(
            ['%d' % reads(indel, p_orig) for indel in indels])

        str_args = ('\t'.join([row[col] for col in rdr.fieldnames
                               ]), mut_read_str, orig_read_str,
                    stats_mut[0] - stats_mut[2], stats_orig[0] - stats_orig[2])
        fout.write(u'%s\t%s\t%s\t%d\t%d\n' % str_args)

    f.close()
    fout.close()

Example #2

0

Show file

File: model.py Project: shanring/SelfTarget

 def get_data(self) -> WGEData:
     reads_file = tempfile.mkstemp()[1]
     profile_file = tempfile.mkstemp()[1]
     r = requests.get(self.filename, allow_redirects=True)
     with open(reads_file, 'w') as f:
         f.write(r.text)
     r = requests.get(self.filename.replace("_predicted_rep_reads.txt", "_predicted_mapped_indel_summary.txt"),
                      allow_redirects=True)
     with open(profile_file, 'w') as f:
         f.write(r.text)
     crispr_line_info = get_guide_info_from_oligo_id(profile_file, self.oligo_id)
     profile = {}
     readSummaryToProfile(profile_file, profile, oligoid=self.oligo_id, remove_wt=False)
     return WGEData(reads_file, profile_file, profile, crispr_line_info)

Example #3

0

Show file

def read_profile(obj):
    reads_file = tempfile.mkstemp()[1]
    profile_file = tempfile.mkstemp()[1]
    r = requests.get(obj['filename'], allow_redirects=True)
    with open(reads_file, 'w') as f:
        f.write(r.text)
    r = requests.get(obj['filename'].replace(
        "_predicted_rep_reads.txt", "_predicted_mapped_indel_summary.txt"),
                     allow_redirects=True)
    with open(profile_file, 'w') as f:
        f.write(r.text)
    profile = {}
    readSummaryToProfile(profile_file,
                         profile,
                         oligoid=obj['oligo_id'],
                         remove_wt=False)
    return reads_file, profile_file, profile

Example #4

0

Show file

File: compute_old_new_kl.py Project: zhaijj/SelfTarget

def loadProfilePair(old_id, new_id):
    p_old, p_new = {}, {}
    old_file, new_file = getFileSuffix(old_id), getFileSuffix(new_id)
    mut_reads_old, mut_reads_new = 0, 0
    for new_dir in new_dirs:
        acc, pacc, null = readSummaryToProfile(new_dir + 'mapped_reads/' +
                                               new_file,
                                               p_new,
                                               oligoid=new_id)
        mut_reads_new += (acc - null)
    for old_dir in old_dirs:
        acc, pacc, null = readSummaryToProfile(old_dir + 'mapped_reads/' +
                                               old_file,
                                               p_old,
                                               oligoid=old_id)
        mut_reads_old += (acc - null)
    return p_old, p_new, mut_reads_old, mut_reads_new

Example #5

0

Show file

def loadProfilesSeparately(old_id, new_id):

    p_olds, p_news, old_sep_mr, new_sep_mr = [{}, {}], [{}, {}], [0, 0], [0, 0]
    old_file, new_file = getSummaryFileSuffix(old_id), getSummaryFileSuffix(
        new_id)
    for new_dir in [getHighDataDir() + '/' + x for x in new_dirs]:
        idx = 0 if '800' in new_dir else 1
        acc, pacc, null = readSummaryToProfile(new_dir + '/mapped_reads/' +
                                               new_file,
                                               p_news[idx],
                                               oligoid=new_id)
        new_sep_mr[idx] += acc - null
    for old_dir in [getHighDataDir() + '/' + x for x in old_dirs]:
        idx = 0 if '800' in old_dir else 1
        acc, pacc, null = readSummaryToProfile(old_dir + '/mapped_reads/' +
                                               old_file,
                                               p_olds[idx],
                                               oligoid=old_id)
        old_sep_mr[idx] += acc - null
    return p_olds, p_news, old_sep_mr, new_sep_mr

Example #6

0

Show file

File: compute_old_new_kl.py Project: zhaijj/SelfTarget

def loadSeparateProfilePairs(old_id, new_id):
    old_ps, new_ps = [], []
    old_file, new_file = getFileSuffix(old_id), getFileSuffix(new_id)
    for new_dir in new_dirs:
        p_new, mut_reads_new = {}, 0
        acc, pacc, null = readSummaryToProfile(new_dir + 'mapped_reads/' +
                                               new_file,
                                               p_new,
                                               oligoid=new_id)
        mut_reads_new += (acc - null)
        new_ps.append(p_new)
    for old_dir in old_dirs:
        p_old, mut_reads_old = {}, 0
        acc, pacc, null = readSummaryToProfile(old_dir + 'mapped_reads/' +
                                               old_file,
                                               p_old,
                                               oligoid=old_id)
        mut_reads_old += (acc - null)
        old_ps.append(p_old)
    return old_ps, new_ps

Example #7

0

Show file

File: compile_gen_indel_reads.py Project: zhaijj/SelfTarget

def compileGenIndelReads(gen_indel_dir='generated_indels',
                         out_dir='reads_for_gen_indels_all_samples',
                         sample_dirs=[]):

    if not os.path.isdir(out_dir): os.mkdir(out_dir)

    for gen_file in os.listdir(gen_indel_dir):

        oligo_id = gen_file.split('_')[0]
        oligo_idx = getOligoIdxFromId(oligo_id)
        oligo_subdir, sum_filename = getFileForOligoIdx(
            oligo_idx, ext='_mappedindelsummary.txt')

        out_subdir = out_dir + '/' + oligo_subdir
        if not os.path.isdir(out_subdir): os.mkdir(out_subdir)

        #Read all profiles for this oligo
        profiles, mut_read_totals = [], []
        for dirname in sample_dirs:
            profiles.append({})
            filename = getHighDataDir(
            ) + '/' + dirname + '/mapped_reads/' + oligo_subdir + '/' + sum_filename
            stats = readSummaryToProfile(filename,
                                         profiles[-1],
                                         oligoid=oligo_id)
            mut_read_totals.append('%d' % (stats[0] - stats[2]))

        #Compile reads for each indel across all samples
        f = io.open(gen_indel_dir + '/' + gen_file)
        fout = io.open(out_subdir + '/%s_gen_indel_reads.txt' % oligo_id, 'w')
        fout.write(f.readline())  #Git commit
        fout.write(u'Indel\tDetails\t%s\n' %
                   '\t'.join([getDirLabel(x) for x in sample_dirs]))
        fout.write(u'All Mutated\t[]\t%s\n' % '\t'.join(mut_read_totals))
        for toks in csv.reader(f, delimiter='\t'):
            indel, indel_details = toks[0], toks[2]
            read_str = '\t'.join(
                ['%d' % (p1[indel] if indel in p1 else 0) for p1 in profiles])
            fout.write(u'%s\t%s\t%s\n' % (indel, indel_details, read_str))
        fout.close()
        f.close()

Example #8

0

Show file

        if len(sys.argv) > 3: more_indels = eval(sys.argv[3])
    
        if more_indels: out_dir = createResultDirectory(high_dir + '/more_indel_summaries',subdir)
        else: out_dir = createResultDirectory(high_dir + '/most_common_indel_summaries', subdir)
        fout = io.open(out_dir + '/' + subdir.split('/')[-1] + '.txt', 'w')
        oligo_lookup = loadExpOligoLookup(subdir)

        #For each Oligo, summarise details of its most common indel
        fout.write(u'Oligo Id\tMost Common Indel\tLeft\tRight\tCentral\tType\tSize\tMCI Reads\tTotal reads\tMicrohomology Sequence\n')
        sum_files = getIndelSummaryFiles(subdir)
        for filename in sum_files:
            file_prefix = filename.split('/')[-1][:-23]
            oligo_details = {x[0]: x[1:] for x in oligo_lookup[file_prefix]}
            oligo_ids = getOligoIdsFromFile(filename)
            for id in oligo_ids:
    
                #Read in the profile (if it exists)	
                p1 = {}
                stats1 = readSummaryToProfile(filename, p1, oligoid=getShortOligoId(id))

                if len(p1) == 0 or p1.keys() == ['-']:
                    continue

                #Compute and summarise its MCI details
                writeMCISummary(fout, id, p1, stats1, oligo_details[id], more_indels)

        fout.close()

Example #9

0

Show file

File: compare_pairwise.py Project: zhaijj/SelfTarget

    
    dir1_files = getIndelSummaryFiles(dirname1 + '/mapped_reads/' + subdir, withpath=False)
    dir2_files = getIndelSummaryFiles(dirname2 + '/mapped_reads/' + subdir, withpath=False)
    common_files =  set(dir1_files).intersection(set(dir2_files))
    for filename in common_files:
        
        filename1 = dirname1 + '/mapped_reads/' + subdir + '/' + filename
        filename2 = dirname2 + '/mapped_reads/' + subdir + '/' + filename
        
        oligo_ids1 = getOligoIdsFromFile( filename1 )
        oligo_ids2 = getOligoIdsFromFile( filename2 )
        common_oligos =  set(oligo_ids1).intersection(set(oligo_ids2))
        for oligo_id in common_oligos:

            profile1, profile2 = {}, {}
            num_reads1, perc_acc1, nonull1 = readSummaryToProfile(filename1, profile1, oligoid=oligo_id)	
            num_reads2, perc_acc2, nonull2 = readSummaryToProfile(filename2, profile2, oligoid=oligo_id)
            ns1, ns2 = len(profile1), len(profile2)

            if remove_largeI:
                profile1 = filterLargeI(profile1)
                profile2 = filterLargeI(profile2)

            ent1a, ent2a = entropy(profile1,True), entropy(profile2,True)
            poverlap = percentOverlap( profile1, profile2, True )

            score1 = symmetricKL( profile1, profile2, False )
            score2 = symmetricKL( profile1, profile2, True )

            ent1b, ent2b = entropy(profile1,True), entropy(profile2,True)	#Since comparing the profiles appends missing states to both profiles

Example #10

0

Show file

def compareOverbeekProfiles(
        selected_overbeek_id=None,
        pred_results_dir='../indel_prediction/model_testing'):

    new_dirs = [
        'ST_June_2017/data/K562_800x_LV7A_DPI7/mapped_reads/Oligos_71',
        'ST_June_2017/data/K562_800x_LV7A_DPI10/mapped_reads/Oligos_71',
        'ST_June_2017/data/K562_800x_LV7B_DPI7/mapped_reads/Oligos_71',
        'ST_June_2017/data/K562_800x_LV7B_DPI10/mapped_reads/Oligos_71',
        'ST_June_2017/data/K562_1600x_LV7B_DPI5/mapped_reads/Oligos_71',
        'ST_Feb_2018/data/CAS9_12NA_1600X_DPI7/mapped_reads/Oligos_71'
    ]

    #Old Samples
    old_dirs = [
        'ST_June_2017/data/K562_1600x_6OA_DPI5/mapped_reads/Oligos_71',
        'ST_June_2017/data/K562_1600x_6OA_DPI7/mapped_reads/Oligos_71',
        'ST_April_2017/data/K562_800x_6OA_DPI3_Old7/mapped_reads/Oligos_71',
        'ST_April_2017/data/K562_800x_6OA_DPI7_Old8/mapped_reads/Oligos_71',
        'ST_April_2017/data/K562_800x_6OA_DPI10_Old9/mapped_reads/Oligos_71',
        'ST_April_2017/data/K562_800x_6OB_DPI3_Old10/mapped_reads/Oligos_71',
        'ST_April_2017/data/K562_800x_6OB_DPI7_Old11/mapped_reads/Oligos_71',
        'ST_April_2017/data/K562_800x_6OB_DPI10_Old12/mapped_reads/Oligos_71'
    ]
    remove_long_indels = False
    remove_wt, wt_thresh = True, 3.0
    mappings = loadMappings()

    all_overbeek_profiles, all_new_profiles, all_old_profiles, all_our_profiles, sel_overbeek_ids,oldnew_overbeek_ids, old_ids, new_ids = [],[],[],[], [],[],[],[]

    overbeek_inframes, ours_inframes, oof_sel_overbeek_ids = [], [], []

    kls, kls_old, kls_new, log_reads, overbeek_ids, above30_percentages, log_reads_new, log_reads_old, min_log_reads = [],[],[],[],[],[],[],[], []
    for idx in range(1, 97):

        overbeek_id = 'Overbeek%d' % idx
        if selected_overbeek_id is not None and selected_overbeek_id != overbeek_id:
            continue
        if overbeek_id not in mappings:
            continue

        overbeek_filename = getHighDataDir(
        ) + '/overbeek_fastq_files/' + overbeek_id + '_mappedindelsummary.txt'

        p1, p1_new, p1_old, o1, rep_reads1, rep_reads2 = {}, {}, {}, {}, {}, {}
        nreads2, nreads1, nreads_old, nreads_new, nnull_old, nnull_new, nnull1, nnull2 = 0, 0, 0, 0, 0, 0, 0, 0

        #Read the overbreek profile
        numread2, perc_accept2, num_null2 = readSummaryToProfile(
            overbeek_filename,
            o1,
            oligoid=overbeek_id,
            remove_long_indels=remove_long_indels,
            remove_wt=False)
        if selected_overbeek_id is not None:
            fetchRepresentativeCleanReads(
                getHighDataDir() + '/overbeek_fastq_files/' + overbeek_id +
                '_mappedindelprofiles.txt',
                rep_reads2,
                oligoid=overbeek_id)
            pam_loc2, pam_dir2 = getNullTargetPamDetails(
                getHighDataDir() + '/overbeek_control_fastq_files/' +
                overbeek_id + '_exptargets.txt',
                oligoid=overbeek_id)
        nreads2 += numread2
        nnull2 += num_null2

        if numread2 == 0: continue

        p1_new_reps, p1_old_reps = [{}, {}], [{}, {}]
        rr_new_reps, rr_old_reps = [{}, {}], [{}, {}]
        #Read all the new and old profiles
        pam_loc1, pam_dir1 = None, None
        for oligo_id, is_old in mappings[overbeek_id]:

            #Read all reads for all our K562 profiles
            oligo_idx = eval(oligo_id[5:])
            _, oligo_fileprefix = getFileForOligoIdx(oligo_idx, ext='')
            oligo_filename = oligo_fileprefix + '_mappedindelsummary.txt'
            read_filename = oligo_fileprefix + '_mappedindelprofiles.txt'
            exptarget_filename = oligo_fileprefix + '_exptargets.txt'
            if is_old:
                oligo_dirs, p1_old_new, null_oligo_dir = old_dirs, p1_old, 'ST_April_2017/data/NULL_Old/mapped_reads/Oligos_71'
                p1_reps, rr_reps = p1_old_reps, rr_old_reps
            else:
                oligo_dirs, p1_old_new, null_oligo_dir = new_dirs, p1_new, 'ST_April_2017/data/NULL_New/mapped_reads/Oligos_71'
                p1_reps, rr_reps = p1_new_reps, rr_new_reps

            for oligo_dir in [getHighDataDir() + '/' + x for x in oligo_dirs]:
                nr1, pa1, nn1 = readSummaryToProfile(
                    oligo_dir + '/' + oligo_filename,
                    p1_old_new,
                    oligoid=oligo_id,
                    remove_long_indels=remove_long_indels,
                    remove_wt=remove_wt,
                    wt_thresh=wt_thresh)
                numread1, perc_accept1, num_null1 = readSummaryToProfile(
                    oligo_dir + '/' + oligo_filename,
                    p1,
                    oligoid=oligo_id,
                    remove_long_indels=remove_long_indels,
                    remove_wt=remove_wt,
                    wt_thresh=wt_thresh)
                if 'DPI7' in oligo_dir:
                    rep_idx = 0 if '800x' in oligo_dir else 1
                    nr_rep, pa_rep, nn_rep = readSummaryToProfile(
                        oligo_dir + '/' + oligo_filename,
                        p1_reps[rep_idx],
                        oligoid=oligo_id,
                        remove_long_indels=remove_long_indels,
                        remove_wt=remove_wt,
                        wt_thresh=wt_thresh)
                if selected_overbeek_id is not None:
                    fetchRepresentativeCleanReads(oligo_dir + '/' +
                                                  read_filename,
                                                  rep_reads1,
                                                  oligoid=oligo_id)
                    if 'DPI7' in oligo_dir:
                        fetchRepresentativeCleanReads(oligo_dir + '/' +
                                                      read_filename,
                                                      rr_reps[rep_idx],
                                                      oligoid=oligo_id)
                    if pam_loc1 is None:
                        pam_loc1, pam_dir1 = getNullTargetPamDetails(
                            getHighDataDir() + '/' + null_oligo_dir + '/' +
                            exptarget_filename,
                            oligoid=oligo_id)
                if is_old:
                    nreads_old += numread1
                    nnull_old += num_null1
                else:
                    nreads_new += numread1
                    nnull_new += num_null1
                nreads1 += numread1
                nnull1 += num_null1

        kls.append(symmetricKL(p1, o1, True))
        kls_old.append(symmetricKL(p1_old, o1, True))
        kls_new.append(symmetricKL(p1_new, o1, True))

        log_reads.append(np.log10(nreads1 - nnull1 + 0.5))
        log_reads_old.append(np.log10(nreads_old - nnull_old + 0.5))
        log_reads_new.append(np.log10(nreads_new - nnull_new + 0.5))
        min_log_reads.append(min(log_reads_old[-1], log_reads_new[-1]))
        above30_percentages.append(computePercAbove30(o1))
        overbeek_ids.append(overbeek_id)

        if log_reads[-1] > 2.0:
            all_overbeek_profiles.append(o1)
            all_our_profiles.append(p1)
            sel_overbeek_ids.append(overbeek_id[8:])
            if above30_percentages[-1] < 50.0:
                oif, oof, _ = fetchIndelSizeCounts(o1)
                pif, pof, _ = fetchIndelSizeCounts(p1)
                overbeek_inframes.append(oif * 100.0 / (oif + oof))
                ours_inframes.append(pif * 100.0 / (pif + pof))
                oof_sel_overbeek_ids.append(overbeek_id)

        if min_log_reads[-1] > 2.0:
            all_new_profiles.append(p1_new)
            all_old_profiles.append(p1_old)
            oldnew_overbeek_ids.append(overbeek_id)
            old_ids.append(
                [id for id, is_old in mappings[overbeek_id] if is_old][0])
            new_ids.append(
                [id for id, is_old in mappings[overbeek_id] if not is_old][0])

        try:
            print(overbeek_id, [x for (x, y) in mappings[overbeek_id]],
                  kls[-1], nreads2, nreads1)
        except KeyError:
            print('Could not find', overbeek_id)
            print(mappings)

        if selected_overbeek_id is not None:
            title = '%s (KL=%.1f)' % (overbeek_id, kls[-1])
            labels = [
                'Conventional scaffold Rep A', 'Conventional scaffold  Rep B',
                'Improved scaffold Rep A', 'Improved scaffold  Rep B',
                'Endogenous Profile'
            ]
            plotProfiles([
                p1_old_reps[0], p1_old_reps[1], p1_new_reps[0], p1_new_reps[0],
                o1
            ], [
                rr_old_reps[0], rr_old_reps[1], rr_new_reps[0], rr_new_reps[1],
                rep_reads2
            ], [pam_loc1, pam_loc1, pam_loc1, pam_loc1, pam_loc2], [
                x == 'REVERSE'
                for x in [pam_dir1, pam_dir1, pam_dir1, pam_dir1, pam_dir2]
            ],
                         labels,
                         title=title)

    if selected_overbeek_id is None:

        plotInFrame(overbeek_inframes, ours_inframes, oof_sel_overbeek_ids,
                    pred_results_dir)

        i = 1
        PL.figure(figsize=(5.5, 5))
        for thr_l, thr_h in [(0.0, 10.0), (10.0, 20.0), (20.0, 50.0),
                             (50.0, 90.0), (90.0, 100.0)]:
            ydata = [
                kl for (kl, a30, id, reads) in zip(kls, above30_percentages,
                                                   overbeek_ids, log_reads)
                if a30 > thr_l and a30 <= thr_h
            ]
            xdata = [
                reads for (kl, a30, id, reads) in zip(kls, above30_percentages,
                                                      overbeek_ids, log_reads)
                if a30 > thr_l and a30 <= thr_h
            ]
            sel_ids = [
                id for (kl, a30, id, reads) in zip(kls, above30_percentages,
                                                   overbeek_ids, log_reads)
                if a30 > thr_l and a30 <= thr_h
            ]
            PL.plot(xdata,
                    ydata,
                    'o',
                    label='%d-%d%% Deletions > 30' % (thr_l, thr_h))
            for x, y, id in zip(xdata, ydata, sel_ids):
                if y > 3 and x > 2:
                    PL.text(x, y, id)
        PL.legend()
        PL.plot([0, 6], [0.77, 0.77], '--', color='grey')
        PL.text(0.1, 0.5, 'Median between our replicates', color='grey')
        PL.ylabel('Symmetric KL Divergence', fontsize=12)
        PL.xlabel('Log10 Mutated Reads', fontsize=12)
        PL.xlim((0, 5.5))
        PL.ylim((0, 8))
        PL.show(block=False)
        saveFig('scatter_KL')
        i += 1

        print('Median=', np.median(kls), 'Mean KL=', np.mean(kls))
        print(len(overbeek_ids))

        #Compute pairwise KL between overbeek and ours
        N = len(sel_overbeek_ids)
        kl_mat = np.zeros((N, N))
        for i, o1 in enumerate(all_overbeek_profiles):
            for j, p1 in enumerate(all_our_profiles):
                kl_mat[i, j] = symmetricKL(o1, p1)
        PL.figure(figsize=(8, 6))
        PL.imshow(kl_mat,
                  cmap='hot_r',
                  vmin=0.0,
                  vmax=3.0,
                  interpolation='nearest')
        PL.xticks(range(N), sel_overbeek_ids, rotation='vertical', fontsize=6)
        PL.yticks(range(N),
                  sel_overbeek_ids,
                  rotation='horizontal',
                  fontsize=6)
        PL.xlabel('Synthetic Measurement', fontsize=12)
        PL.ylabel('Endogenous Measurement', fontsize=12)
        PL.title('KL', fontsize=12)
        PL.colorbar()
        PL.show(block=False)
        saveFig('heatmap_KL')

Example #11

0

Show file

File: fetch_mh_indel_frequencies.py Project: zhaijj/SelfTarget

        indel_files = getIndelSummaryFiles(subdir, withpath=False)
        for indel_file in indel_files:

            oligo_ids = getOligoIdsFromFile(subdir + '/' + indel_file)
            mh_loc = '.' if highdir == '.' else highdir + '/ST_June_2017/data'
            mh_indels = loadMhExpIndels(mh_loc + '/' + mh_exp_indels_file,
                                        set(oligo_ids))

            fout = io.open(outdir + '/' + indel_file[:-23] + '_mhindels.txt',
                           'w')
            for oligo_id in oligo_ids:

                profile = {}
                acc, pacc, nullr = readSummaryToProfile(subdir + '/' +
                                                        indel_file,
                                                        profile,
                                                        oligoid=oligo_id)

                fout.write(u'@@@%s:%d:%d\n' % (oligo_id, acc, acc - nullr))
                mhs, indels = mh_indels[oligo_id]

                for (mh, indel) in zip(mhs, indels):
                    left, right, mh_len = mh.split(':')
                    if indel == 'Unmappable': continue
                    if indel in profile: nreads = profile[indel]
                    else: nreads = 0
                    fout.write(u'%s\t%s\t%s\t%s\t%d\n' %
                               (left, right, mh_len, indel, nreads))
            fout.close()