Beispiel #1
0
def prepareExample(example_dir):

    setHighDataDir(example_dir)

    #Generate all possible indels
    new_gen_dir, old_gen_dir = getHighDataDir(
    ) + '/generated_indels_new', getHighDataDir() + '/generated_indels_old'
    if not os.path.isdir(new_gen_dir): os.makedirs(new_gen_dir)
    if not os.path.isdir(old_gen_dir): os.makedirs(old_gen_dir)
    cmd = getIndelGenExe() + ' ' + getHighDataDir(
    ) + '/exp_target_pam_new.fasta ' + new_gen_dir + '/'
    print(cmd)
    os.system(cmd)
    cmd = getIndelGenExe() + ' ' + getHighDataDir(
    ) + '/exp_target_pam_old.fasta ' + old_gen_dir + '/'
    print(cmd)
    os.system(cmd)

    #Compile number of reads per sample for each indel
    reads_dir = getHighDataDir() + '/reads_for_gen_indels'
    compileGenIndelReads(gen_indel_dir=new_gen_dir,
                         out_dir=reads_dir,
                         sample_dirs=new_dirs)
    compileGenIndelReads(gen_indel_dir=old_gen_dir,
                         out_dir=reads_dir,
                         sample_dirs=old_dirs)
    setReadsDir(reads_dir)

    #Compute features for each indel
    features_dir = getHighDataDir() + '/features_for_gen_indels'
    computeFeaturesForGenIndels(gen_indel_dir=new_gen_dir,
                                out_dir=features_dir)
    computeFeaturesForGenIndels(gen_indel_dir=old_gen_dir,
                                out_dir=features_dir)
    setFeaturesDir(features_dir)
Beispiel #2
0
                   label=label,
                   plot_label='pie_rpt_fracs',
                   data_label='FracWithI1Rpt',
                   stacked=True)


def runAnalysis():

    spec = {
        'results_dir': getHighDataDir() + '/i1/i1_summaries',
        'dirname_to_result_fn': lambda x: '%s.txt' % x,
        'result_to_dirname_fn': lambda x: x.split('/')[-1][:-4],
        'py_func_load': defaultLoadData,
        'py_funcs_per_result': [(mergeWithIndelData, 'i1IndelData')],
        'py_funcs_all_results':
        [plotMergedPieDataWithAmbig, plotMergedI1Repeats],
        'check_output_fn': lambda x: True,
        'reads_colname': 'Total reads',
        'min_reads': MIN_READS,
        'id_colname': 'Oligo Id',
        'partitions': ['Non-Targeting'],
        'samples': ['K562 New']
    }
    analyseResultsPerPartition(spec)


if __name__ == '__main__':
    setHighDataDir('..')
    runAnalysis()
    import pdb
    pdb.set_trace()
Beispiel #3
0
                if left_c_seq == right_c_seq:
                    mh_seq = left_c_seq
            altered_seq = getSequence(oligo_det, details['L'] +1, details['R']-1)  #Note includes MH seq at both ends

        str_args = (id, mci, details['L'], details['R'],details['C'],itype,isize,mci_reads,total_reads,mh_seq)
        fout.write(u'%s\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%s\n' % str_args)

if __name__ == '__main__':

    if len(sys.argv) != 3 and len(sys.argv) != 4:
        print('Usage: compile_most_common_indels.py <high_dir> <subdir> <more_indels>')
    else:

        #Set up output file
        high_dir = sys.argv[1]
        if high_dir != '.': setHighDataDir(high_dir)
        subdir = sys.argv[2]
        more_indels = True
        if len(sys.argv) > 3: more_indels = eval(sys.argv[3])
    
        if more_indels: out_dir = createResultDirectory(high_dir + '/more_indel_summaries',subdir)
        else: out_dir = createResultDirectory(high_dir + '/most_common_indel_summaries', subdir)
        fout = io.open(out_dir + '/' + subdir.split('/')[-1] + '.txt', 'w')
        oligo_lookup = loadExpOligoLookup(subdir)

        #For each Oligo, summarise details of its most common indel
        fout.write(u'Oligo Id\tMost Common Indel\tLeft\tRight\tCentral\tType\tSize\tMCI Reads\tTotal reads\tMicrohomology Sequence\n')
        sum_files = getIndelSummaryFiles(subdir)
        for filename in sum_files:
            file_prefix = filename.split('/')[-1][:-23]
            oligo_details = {x[0]: x[1:] for x in oligo_lookup[file_prefix]}
Beispiel #4
0
from create_overbeek_templates import createOverbeekTemplates
from compute_overbeek_indel_profiles import computeOverbeekIndelProfiles


def printStatus(status):
    print('\n### ', status, ' ###\n ')


shutil.copytree('/data/endogenous_processing_example',
                '/results/endogenous_processing_example')

#----------------------------------------------------------------------
# Configure environment
#----------------------------------------------------------------------
setRunLocal(True)
setHighDataDir('/results/endogenous_processing_example/')
setPythonCmd('python')
setIndelMapExe('/usr/local/bin/indelmap')

#----------------------------------------------------------------
# Processing of raw Van-Overbeek et al reads to produce descriptions of indels
#----------------------------------------------------------------
#Note:  This provides a demonstration on just 1 oligo, going from raw overbeek reads to indel descriptions.
#       Sam files are assumed to be already collected (for further details of this part see
#       collect_overbeek_sams.py in same dir)

printStatus('Create fastq files from Van Overbeek sam files')
sam_dir, fastq_dir = getHighDataDir() + '/overbeek_sam_files', getHighDataDir(
) + '/overbeek_fastq_files'
if not os.path.isdir(fastq_dir): os.makedirs(fastq_dir)
extractReads(sam_dir + '/Overbeek_6.sam', fastq_dir + '/Overbeek6.fastq',
Beispiel #5
0
    )
    files = [results_subdir + '/' + x for x in os.listdir(results_subdir)]
    for filename in files:
        collectMhOfLen(filename, mh_len, fout)
    fout.close()


if __name__ == '__main__':

    if len(sys.argv) != 4:
        print(
            'Usage: collect_mh_frequencies_by_len.py results_mh_len highdir subdir'
        )
    else:

        mh_len = eval(sys.argv[1])
        highdir = sys.argv[2]
        results_subdir = sys.argv[3]
        subdir = '/'.join(results_subdir.split('/')[-2:])
        setHighDataDir(highdir)

        if not os.path.isdir(results_subdir):
            raise Exception('No such directory:' + results_subdir)

        out_dir = highdir + '/mh_freqs_by_len' + '/' + subdir
        if not os.path.isdir(out_dir): os.makedirs(out_dir)

        collectMhFrequenciesOfLen(
            results_subdir, mh_len,
            out_dir + '/mh_indels_of_len_%d.txt' % mh_len)
Beispiel #6
0
            for j, p1 in enumerate(all_our_profiles):
                kl_mat[i, j] = symmetricKL(o1, p1)
        PL.figure(figsize=(8, 6))
        PL.imshow(kl_mat,
                  cmap='hot_r',
                  vmin=0.0,
                  vmax=3.0,
                  interpolation='nearest')
        PL.xticks(range(N), sel_overbeek_ids, rotation='vertical', fontsize=6)
        PL.yticks(range(N),
                  sel_overbeek_ids,
                  rotation='horizontal',
                  fontsize=6)
        PL.xlabel('Synthetic Measurement', fontsize=12)
        PL.ylabel('Endogenous Measurement', fontsize=12)
        PL.title('KL', fontsize=12)
        PL.colorbar()
        PL.show(block=False)
        saveFig('heatmap_KL')


if __name__ == '__main__':

    setHighDataDir('/data/endogenous_comparisons')
    setPlotDir('/results/plots')
    setFigType('png')
    compareOverbeekProfiles(selected_overbeek_id='Overbeek16',
                            pred_results_dir='/data/endogenous_comparisons')
    compareOverbeekProfiles(selected_overbeek_id=None,
                            pred_results_dir='/data/endogenous_comparisons')
Beispiel #7
0

def printStatus(status):
    print('\n### ', status, ' ###\n ')


#----------------------------------------------------------------------
# Copy all example data to results directory since script runs in place
#----------------------------------------------------------------------
shutil.copytree('/data/indel_processing_example',
                '/results/indel_processing_example')

setRunLocal(True)
if not os.path.isdir('/results/indel_processing_example'):
    os.mkdir('/results/indel_processing_example')
setHighDataDir('/results/indel_processing_example/')
setPythonCmd('python')
setPearExe('/usr/local/bin/pear')
setIndelMapExe('/usr/local/bin/indelmap')
setIndelGenI1Exe('/usr/local/bin/indelgen_i1')
setIndelMhExe('/usr/local/bin/indelmh')

#----------------------------------------------------------------
# Processing of raw reads to produce descriptions of indels
#----------------------------------------------------------------

#Note:  This provides a demonstration on a cut-down data set of just 4 oligos.
#       In practice, the dataset was much too be large to be run in one script like this.
#       Instead individual steps were performed by calling each script in turn from the
#       command line to set off parallel jobs on a compute cluster.
Beispiel #8
0
from plot_i1_summaries import runAnalysis as plotI1
sys.path.append('kl_comparisons')
from plot_kl_analysis import runAnalysis as plotKLCmp
sys.path.append('microhomology')
from plot_mh_analysis import runAnalysis as plotMH
sys.path.append('microhomology_mismatch')
from plot_mh_mismatch_frequencies import runAnalysis as plotMhMismatch
sys.path.append('scaffold_compare')
from plot_old_new import runAnalysis as plotKLOldNew
sys.path.append('indel_details')
from plot_pie_indel_summaries import runAnalysis as plotIndelDetails
sys.path.append('../indel_prediction/model_testing')
from plot_old_new_predictions import runAnalysis as plotPred

setFigType('png')
setPlotDir('/results/plots')
setHighDataDir('/data/summary_data')

plotI1()
plotKLCmp()
plotMH()
plotMhMismatch()
plotKLOldNew()
plotIndelDetails()
plotPred()