Beispiel #1
0
def runAllIndelMap(start_idx=0,
                   stop_idx=10000000,
                   overbeek_only=False,
                   queue='normal',
                   map_dir='/mapped_reads/',
                   max_cut_dist=4,
                   num_parts=1,
                   order_by_incomplete=False):

    all_dir, out_dir = getAllDataDirs(), getLogDir()
    if overbeek_only: print('Computing for Overbeek guides only')

    completed_lookup = {}
    if order_by_incomplete:
        f = io.open('../quality_checks/status.log')
        completed_lookup = {
            toks[0]: min([eval(x) for x in toks[1:]]) != 0
            for toks in csv.reader(f, delimiter='\t')
        }
        f.close()
    completed = [
        x for x in all_dir if getDirLabel(x) in completed_lookup
        and completed_lookup[getDirLabel(x)]
    ]
    not_completed = [x for x in all_dir if x not in completed]

    max_files_per_dir = 20
    file_per_part = int(max_files_per_dir / num_parts + 0.99)

    i, idx = 0, 0
    for dirname in not_completed + completed:

        if len(not_completed) == i:
            print('-------------------------------------------------')
        i += 1

        print(getShortDir(dirname), idx)
        if not os.path.isdir(dirname + map_dir): continue

        for subdir in getSubdirs(dirname, withpath=False):
            if overbeek_only and subdir != 'Oligos_71': continue
            args = (dirname, getNullDir(dirname), subdir, file_per_part,
                    map_dir, max_cut_dist, getIndelMapExe(), getPythonCmd())
            cmd = getPythonCmd(
            ) + ' indelmap_subdir.py %s %s %s %d - 0 %s %d %s %s' % args
            idx = runCmdCheckIdx(cmd,
                                 idx,
                                 start_idx,
                                 stop_idx,
                                 out_dir,
                                 'out_indelmap_%s' % getDirLabel(dirname),
                                 numj=num_parts,
                                 queue=queue)
Beispiel #2
0
def runPerSubdir(python_script,
                 out_label,
                 caller,
                 extra_args='',
                 include_null=False):
    idx = 0
    for dirname in [
            x for x in getAllDataDirs() if include_null or not isNullDir(x)
    ]:
        if not os.path.isdir(dirname + '/mapped_reads'):
            print(getShortDir(dirname)), 'No mapped_reads directory'
        else:
            subdirs = getSubdirs(dirname)
            idx = runSubdir(idx,
                            subdirs,
                            getShortDir(dirname),
                            python_script,
                            out_label,
                            caller,
                            extra_args=extra_args)
Beispiel #3
0
from selftarget.oligo import getOligoIdsFromFile
from selftarget.data import getDirLabel, getIndelSummaryFiles, getSubdirs

def filterLargeI(profile):
    return {x:profile[x] for x in profile if (x[0] == '-' or x[0] != 'I' or x[1] != '1')}

if len(sys.argv) != 4:
    print('compare_pairwise.py <dirname1> <dirname2> <subdir>')
else:

    remove_largeI = True

    dirname1, dirname2 = sys.argv[1], sys.argv[2]
    subdir = sys.argv[3]

    if subdir not in getSubdirs(dirname1, withpath=False):
        raise Exception('No subdir %s in %s' % (subdir, dirname1) )
    if subdir not in getSubdirs(dirname2, withpath=False):
        raise Exception('No subdir %s in %s' % (subdir, dirname2) )    
        
    out_dir = 'profile_comparison_summaries' if not remove_largeI else 'profile_comparison_summaries_nolargeI'
    if not os.path.isdir(out_dir): os.mkdir(out_dir)
    out_dir += '/%s_vs_%s' % (getDirLabel(dirname1),getDirLabel(dirname2))
    if not os.path.isdir(out_dir): os.mkdir(out_dir)
    out_file = out_dir + '/%s.txt' % subdir
    
    fout = io.open(out_file,'w')
    fout.write(u'ID\tNum Reads 1\tNum Reads 2\tNum States 1\tNum States 2\tNum null reads 1\tNum null reads 2\tKL with Null\tKL without null\tPerc Accepted Reads 1\tPerc Accepted Reads 2\t1st Nonmatch Indel\tNum Top 3 Common\tNum Top 5 Common\tNum Top 10 Common\tProfile 1 Entropy (before mods)\tProfile 2 Entropy (before mods)\tProfile 1 Entropy (after mods)\tProfile 2 Entropy (after mods)\tPerc Overlap\tP1 Perc in Top 3\tP2 Perc in Top 3\tP1 Perc in Top 5\tP2 Perc in Top 5\tP1 Perc in Top 10\tP2 Perc in Top 10\n')
    
    dir1_files = getIndelSummaryFiles(dirname1 + '/mapped_reads/' + subdir, withpath=False)
    dir2_files = getIndelSummaryFiles(dirname2 + '/mapped_reads/' + subdir, withpath=False)
Beispiel #4
0
from selftarget.util import getLogDir, runCmdCheckIdx, runSubdir

if __name__ == '__main__':

    all_dir, out_dir = getAllDataDirs(), getLogDir()
    idx = 0
    for old_lib in [False]:  #[True,False]:

        lib_dirs = [
            x for x in all_dir
            if (isOldLib(x) == old_lib) and os.path.isdir(x + '/mapped_reads')
            and 'DPI7' in x and 'K562_1600x_LV7B_DPI7' not in x
            and '2A_TREX' not in x and 'K562_800x_7A_DPI7_may' not in x
        ]

        for dirname1, dirname2 in itertools.combinations(lib_dirs, 2):

            subdirs_1 = getSubdirs(dirname1, withpath=False)
            subdirs_2 = getSubdirs(dirname2, withpath=False)
            common_subdirs = set(subdirs_1).intersection(set(subdirs_2))

            label = '%s\t%s' % (getShortDir(dirname1), getShortDir(dirname2))
            extra_args = '%s %s ' % (dirname1, dirname2)
            idx = runSubdir(idx,
                            common_subdirs,
                            label,
                            'compare_pairwise.py',
                            'out_compare_pairwise',
                            __file__,
                            extra_args=extra_args)
    if len(sys.argv) >= 4: overbeek_only = eval(sys.argv[3])
    if len(sys.argv) >= 5: queue = sys.argv[4]

all_dir, out_dir = getAllDataDirs(), getLogDir()
if overbeek_only: print 'Computing for Overbeek guides only'

file_per_part = 1
max_files_per_dir = 20
num_parts = (max_files_per_dir + file_per_part - 1) / file_per_part

i, idx = 0, 0
for dirname in all_dir:

    i += 1

    print getShortDir(dirname), idx
    if not os.path.isdir(dirname + '/mapped_reads'): continue

    for subdir in getSubdirs(dirname, withpath=False):
        if overbeek_only and subdir != 'Oligos_71': continue
        cmd = './run_correct_indel.sh %s %s %s %d' % (
            dirname, getNullDir(dirname), subdir, file_per_part)
        idx = runCmdCheckIdx(cmd,
                             idx,
                             start_idx,
                             stop_idx,
                             out_dir,
                             'out_correct_indelmap_%s' % getDirLabel(dirname),
                             numj=num_parts,
                             queue=queue)