Esempio n. 1
0
def gen_qsubs():
  # Generate qsub shell scripts and commands for easy parallelization
  print('Generating qsub scripts...')
  qsubs_dir = _config.QSUBS_DIR + NAME + '/'
  util.ensure_dir_exists(qsubs_dir)
  qsub_commands = []

  num_scripts = 0
  for idx, row in design_df.iterrows():
    nm = row['Short name']

    for threshold in params['major_thresholds']:

      command = f'python {NAME}.py {nm} {threshold}'
      script_id = NAME.split('_')[0]

      # Write shell scripts
      sh_fn = qsubs_dir + f'q_{script_id}_{nm}_{threshold}.sh'
      with open(sh_fn, 'w') as f:
        f.write(f'#!/bin/bash\n{command}\n')
      num_scripts += 1

      # Write qsub commands
      qsub_commands.append(f'qsub -V -P regevlab -l h_rt=10:00:00,h_vmem=4G -wd {_config.SRC_DIR} {sh_fn} &')

  # Save commands
  commands_fn = qsubs_dir + '_commands.sh'
  with open(commands_fn, 'w') as f:
    f.write('\n'.join(qsub_commands))

  subprocess.check_output(f'chmod +x {commands_fn}', shell = True)

  print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
  return
Esempio n. 2
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating qsub scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '_510' + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0

    for _nm in [
            "190510Gif_D19-2120{0}".format(i)
            for i in range(26, 29) + range(35, 38)
    ]:
        for _split in range(15):
            command = '/cluster/shz24/anaconda3/envs/splice_env/bin/python %s.py %s %s' % (
                NAME, _nm, _split)
            script_id = NAME.split('_')[0]

            # Write shell scripts
            sh_fn = qsubs_dir + 'q_%s_%s_%s.sh' % (script_id, _nm, _split)
            with open(sh_fn, 'w') as f:
                f.write('#!/bin/bash\n%s\n' % (command))
            num_scripts += 1

            # Write qsub commands
            qsub_commands.append('qsub -m e -wd %s %s' %
                                 (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
    return
Esempio n. 3
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating qsub scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    for exp_p in exp_pairs:
        for exp in exp_p:
            command = 'python %s.py %s redo' % (NAME, exp)
            script_id = NAME.split('_')[0]

            # Write shell scripts
            sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, exp)
            with open(sh_fn, 'w') as f:
                f.write('#!/bin/bash\n%s\n' % (command))
            num_scripts += 1

            # Write qsub commands
            qsub_commands.append('qsub -m e -V -wd %s %s' %
                                 (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
    return
Esempio n. 4
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating qsub scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    for exp in [
            'VO_K562', 'VO_HCT116', 'VO_HEK293', 'Lib1-mES', 'Lib1-HCT116',
            'Lib1-HEK293T', 'DisLib-U2OS', 'DisLib-mES', 'DisLib-HEK293T',
            'DisLib-U2OS-HEK-Mixture', 'PRL-Lib1-mES', 'PRL-DisLib-mES'
    ]:
        command = 'python %s.py %s' % (NAME, exp)
        script_id = NAME.split('_')[0]

        # Write shell scripts
        sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, exp)
        with open(sh_fn, 'w') as f:
            f.write('#!/bin/bash\n%s\n' % (command))
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append('qsub -m e -wd %s %s' % (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
    return
Esempio n. 5
0
def main(inp_dir, out_dir):
    print NAME
    util.ensure_dir_exists(out_dir)

    gather(inp_dir, out_dir)

    return
Esempio n. 6
0
def demultiplex(split):
  inp_fn = inp_dir + '%s.fq' % (split)
  for name in list(exp_design['Name']) + ['other']:
    util.ensure_dir_exists(out_dir + name)
    util.exists_empty_fn(out_dir + name + '/%s.fa' % (split))

  lc = util.line_count(inp_fn)
  num_bad_q, num_tot = 0, 0
  timer = util.Timer(total = lc)
  with open(inp_fn) as f:
    for i, line in enumerate(f):
      if i % 4 == 0:
        header = line.strip()
      if i % 4 == 1:
        read = line.strip()
      if i % 4 == 3:
        num_tot += 1
        qs = line.strip()
        quals = [ord(s)-33 for s in qs]
        if np.mean(quals) < 30:
          num_bad_q += 1
          continue

        demultiplex_id, trimmed_read = match(read, header)
        
        out_fn = out_dir +  '%s/%s.fa' % (demultiplex_id, split)
        with open(out_fn, 'a') as f:
          f.write('>' + header[1:] + '\n' + trimmed_read + '\n')
      
      timer.update()

  print 'Rejected %s fraction of reads' % (num_bad_q / num_tot)

  return
Esempio n. 7
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    for nm in ill_nms:
        command = f'python {NAME}.py {nm}'
        script_id = NAME.split('_')[0]

        # Write shell scripts
        sh_fn = qsubs_dir + f'q_{script_id}_{nm}.sh'
        with open(sh_fn, 'w') as f:
            f.write(f'#!/bin/bash\n{command}\n')
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append(
            f'qsub -V -P regevlab -l h_rt=10:00:00 -wd {_config.SRC_DIR} {sh_fn} &'
        )

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))

    subprocess.check_output(f'chmod +x {commands_fn}', shell=True)

    print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
    return
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating qsub scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    l2_names = _data.D['Name']
    l3_names = _data.L3

    num_scripts = 0
    for nm in l2_names:
        command = 'python %s.py %s' % (NAME, nm)
        script_id = 'pre-l2'

        # Write shell scripts
        sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, nm)
        with open(sh_fn, 'w') as f:
            f.write('#!/bin/bash\n%s\n' % (command))
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append('qsub -m e -V -wd %s %s' %
                             (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
    return
Esempio n. 9
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    for condition in exp_design['Name']:
        exp_row = exp_design[exp_design['Name'] == condition].iloc[0]
        lib_nm = exp_row['Library']

        command = f'python {NAME}.py {condition}'
        script_id = NAME.split('_')[0]

        # Write shell scripts
        sh_fn = qsubs_dir + f'q_{script_id}_{condition}.sh'
        with open(sh_fn, 'w') as f:
            f.write('#!/bin/bash\n%s\n' % (command))
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append(
            'qsub -j y -P regevlab -V -l h_rt=4:00:00,h_vmem=2G -wd %s %s &' %
            (_config.SRC_DIR, sh_fn))

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))

    subprocess.check_output('chmod +x %s' % (commands_fn), shell=True)

    print('Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir))
    return
Esempio n. 10
0
def main(inp_dir, out_dir, srr_id='', start='none', end='none'):
    print NAME
    util.ensure_dir_exists(out_dir)

    # Function calls
    if srr_id == '' and start == 'none' and end == 'none':
        gen_qsubs()
        return

    if srr_id != '' and start == 'none' and end == 'none':
        if is_control(srr_id):
            print 'is control'
            return
        control_adjustment(inp_dir, out_dir, srr_id)
        return

    start, end = int(start), int(end)
    timer = util.Timer(total=end - start + 1)
    for idnum in range(start, end + 1):
        srr_id = 'SRR%s' % (idnum)
        ans = is_control(srr_id)
        if ans is False:
            control_adjustment(inp_dir, out_dir, srr_id)
        timer.update()

    return out_dir
Esempio n. 11
0
def gen_qsubs():
  # Generate qsub shell scripts and commands for easy parallelization
  print('Generating qsub scripts...')
  qsubs_dir = _config.QSUBS_DIR + NAME + '/'
  util.ensure_dir_exists(qsubs_dir)
  qsub_commands = []

  pacbio_srrs = exp_design[exp_design['Instrument'] == 'PacBio RS II']['Run']
  pacbio_nms = exp_design[exp_design['Instrument'] == 'PacBio RS II']['Library Name']

  num_scripts = 0
  for srr, nm in zip(pacbio_srrs, pacbio_nms):
    command = f'python {NAME}.py {srr} {nm}'
    script_id = NAME.split('_')[0]

    # Write shell scripts
    sh_fn = qsubs_dir + f'q_{script_id}_{srr}.sh'
    with open(sh_fn, 'w') as f:
      f.write(f'#!/bin/bash\n{command}\n')
    num_scripts += 1

    # Write qsub commands
    qsub_commands.append(f'qsub -V -P regevlab -l h_rt=4:00:00 -wd {_config.SRC_DIR} {sh_fn} &')

  # Save commands
  commands_fn = qsubs_dir + '_commands.sh'
  with open(commands_fn, 'w') as f:
    f.write('\n'.join(qsub_commands))

  subprocess.check_output(f'chmod +x {commands_fn}', shell = True)

  print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
  return
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating nohup scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    w_dir = _config.SRC_DIR
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    curr_num = 0
    num_scripts = 0
    nums = {'exons': 36, 'introns': 32}
    for typ in nums:
        for split in range(nums[typ]):
            script_id = NAME.split('_')[0]
            command = 'python -u %s.py %s %s' % (NAME, typ, split)

            script_abbrev = NAME.split('_')[0]
            sh_fn = qsubs_dir + 'q_%s_%s_%s.sh' % (script_abbrev, typ, split)
            with open(sh_fn, 'w') as f:
                f.write('#!/bin/bash\n%s\n' % (command))
            curr_num += 1

            # Write qsub commands
            qsub_commands.append('qsub -m e -wd %s %s' %
                                 (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (curr_num, qsubs_dir)
    return
def main(nm='', start='', end=''):
    print NAME
    print nm

    if nm == '' and start == '' and end == '':
        gen_qsubs()
        return

    start, end = int(start), int(end)
    out_dir = out_place + nm + '/'
    util.ensure_dir_exists(out_dir)

    print 'Preparing alignment output directories...'
    prepare_align_outdirs(out_dir, start, end)
    print 'Done'

    global expected_cutsite
    expected_cutsite = len('TCCGTGCTGTAACGAAAGGATGGGTGCGACGCGTCAT') + 27

    inp_dir = inp_place + nm + '/'

    timer = util.Timer(total=end - start + 1)
    for iter_exp in range(start, end + 1):
        data = defaultdict(list)
        for split in os.listdir(inp_dir):
            if split == 'aligns':
                continue
            inp_fn = inp_dir + '%s/%s.txt' % (split, iter_exp)
            remaster_aligns(inp_fn, data)
        save_alignments(data, out_dir, iter_exp)
        timer.update()

    return
def individualize(inp_dir, out_dir):
    # a_gather produces large dataframes of 2000 experiments concatenated together.
    # extracting dataframes for each individual experiment is slow, while it's faster to just read in individual csv's for each experiment. (This functions produces individual csv's).

    for inp_fn in os.listdir(inp_dir):
        if not fnmatch.fnmatch(inp_fn, '*csv'):
            continue

        # if inp_fn not in ['PRL-Lib1-mES.csv', 'PRL-DisLib-mES.csv', 'Lib1-mES.csv']:
        # continue

        inp_nm = inp_fn.replace('.csv', '')
        out_fold = out_dir + inp_nm + '/'
        util.ensure_dir_exists(out_fold)

        df = pd.read_csv(inp_dir + inp_fn)
        exps = set(df['Experiment'])
        print inp_nm
        timer = util.Timer(total=len(exps))
        for exp in exps:
            out_fn = out_fold + '%s.csv' % (exp)
            d = df[df['Experiment'] == exp]
            d.to_csv(out_fn)
            timer.update()

    return
def main(nm='', start='', end=''):
    print(NAME)
    print(nm)

    start, end = int(start), int(end)
    out_dir = out_place + nm + '/'
    util.ensure_dir_exists(out_dir)

    print('Preparing alignment output directories...')
    nms = all_names[start:end + 1]
    prepare_align_outdirs(out_dir, nms)
    print('Done')

    global expected_cutsite
    expected_cutsite = len('GATGGGTGCGACGCGTCAT') + 28

    inp_dir = inp_place + nm + '/'

    timer = util.Timer(total=len(nms))
    for target_nm in nms:
        data = defaultdict(list)
        for split in os.listdir(inp_dir):
            if split == 'aligns':
                continue
            inp_fn = inp_dir + '%s/%s.txt' % (split, target_nm)
            remaster_aligns(inp_fn, data)
        save_alignments(data, out_dir, target_nm)
        timer.update()

    return
Esempio n. 16
0
def gen_qsubs():
  # Generate qsub shell scripts and commands for easy parallelization
  print 'Generating qsub scripts...'
  qsubs_dir = _config.QSUBS_DIR + NAME + '/'
  util.ensure_dir_exists(qsubs_dir)
  qsub_commands = []

  num_scripts = 0
  for idx in range(0, 10):
    command = 'python %s.py %s' % (NAME, idx)
    script_id = NAME.split('_')[0]

    # Write shell scripts
    sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, idx)
    with open(sh_fn, 'w') as f:
      f.write('#!/bin/bash\n%s\n' % (command))
    num_scripts += 1

    # Write qsub commands
    qsub_commands.append('qsub -m e -wd %s %s' % (_config.SRC_DIR, sh_fn))

  # Save commands
  with open(qsubs_dir + '_commands.txt', 'w') as f:
    f.write('\n'.join(qsub_commands))

  print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
  return
def main(argv):
  print(NAME)

  modelexp_nm = argv[0]
  print(modelexp_nm)

  exp_design = pd.read_csv(_config.DATA_DIR + f'{modelexp_nm}.csv')
  hyperparam_cols = [col for col in exp_design.columns if col != 'Name']

  new_out_dir = out_dir + f'{modelexp_nm}/'
  util.ensure_dir_exists(new_out_dir)

  print(f'Collating experiments...')

  model_out_dir = _config.OUT_PLACE + f'_fitness_from_reads_pt_multi/{modelexp_nm}/'
  num_fails = 0
  timer = util.Timer(total = len(exp_design))
  for idx, row in exp_design.iterrows():
    int_nm = row['Name']
    real_nm = row['dataset']

    try:
      command = f'cp {model_out_dir}/model_{int_nm}/_final_fitness.csv {new_out_dir}/fitness_{int_nm}.csv'
      subprocess.check_output(command, shell = True)

      command = f'cp {model_out_dir}/model_{int_nm}/_final_genotype_matrix.csv {new_out_dir}/genotype_matrix_{int_nm}.csv'
      subprocess.check_output(command, shell = True)
    except:
      num_fails += 1

    timer.update()

  print(f'Collated {len(exp_design)} experiments with {num_fails} failures')

  return
Esempio n. 18
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating qsub scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    for idx in range(3696622, 3702820 + 1, 62):
        start = idx
        end = start + 61
        command = 'python %s.py none %s %s' % (NAME, start, end)
        script_id = NAME.split('_')[0]

        # Write shell scripts
        sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, start)
        with open(sh_fn, 'w') as f:
            f.write('#!/bin/bash\n%s\n' % (command))
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append('qsub -m e -wd %s %s' % (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
    return
Esempio n. 19
0
def main(inp_dir, out_dir, nm='none', start='none', end='none'):
    print NAME
    util.ensure_dir_exists(out_dir)

    if nm == 'none' and start == 'none' and end == 'none':
        gen_qsubs()
        return

    if nm != 'none' and start == 'none' and end == 'none':
        # Run single
        print nm
        res, context = set_master_expected_cutsite(nm)
        if res is False:
            return
        genotype_data(inp_dir, out_dir, nm, context)
        return

    # Run many
    start, end = int(start), int(end)
    timer = util.Timer(total=end - start + 1)
    for idnum in range(start, end + 1):
        srr_id = 'SRR%s' % (idnum)
        # print srr_id
        res, context = set_master_expected_cutsite(srr_id)
        if res is False:
            continue
        genotype_data(inp_dir, out_dir, srr_id, context)
        timer.update()

    return out_dir
Esempio n. 20
0
def gen_qsubs():
  # Generate qsub shell scripts and commands for easy parallelization
  print('Generating qsub scripts...')
  qsubs_dir = _config.QSUBS_DIR + NAME + '/'
  util.ensure_dir_exists(qsubs_dir)
  qsub_commands = []

  num_scripts = 0
  for bc in exp_design['Name']:
    if 'Cas9' in bc:
      continue

    for start_idx in range(0, 12000, 2000):
      command = 'python %s.py %s %s %s' % (NAME, bc, start_idx, start_idx + 1999)
      script_id = NAME.split('_')[0]

      # Write shell scripts
      sh_fn = qsubs_dir + 'q_%s_%s_%s.sh' % (script_id, bc, start_idx)
      with open(sh_fn, 'w') as f:
        f.write('#!/bin/bash\n%s\n' % (command))
      num_scripts += 1

      # Write qsub commands
      qsub_commands.append('qsub -V -l h_rt=2:00:00,h_vmem=1G -wd %s %s &' % (_config.SRC_DIR, sh_fn))

  # Save commands
  commands_fn = qsubs_dir + '_commands.sh'
  with open(commands_fn, 'w') as f:
    f.write('\n'.join(qsub_commands))

  subprocess.check_output('chmod +x %s' % (commands_fn), shell = True)

  print('Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir))
  return
Esempio n. 21
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    fns = [fn for fn in os.listdir(inp_dir) if '.fq' in fn]

    for fn in fns:
        command = 'python %s.py %s' % (NAME, fn)
        script_id = NAME.split('_')[0]

        # Write shell scripts
        sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, fn)
        with open(sh_fn, 'w') as f:
            f.write('#!/bin/bash\n%s\n' % (command))
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append('qsub -V -l h_rt=2:00:00,h_vmem=1G -wd %s %s &' %
                             (_config.SRC_DIR, sh_fn))

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))
    subprocess.check_output('chmod +x %s' % (commands_fn), shell=True)

    print('Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir))
    return
def main(inp_dir, out_dir):
    print NAME
    util.ensure_dir_exists(out_dir)

    individualize(inp_dir, out_dir)

    return
def gen_qsubs():
  # Generate qsub shell scripts and commands for easy parallelization
  print('Generating qsub scripts...')
  qsubs_dir = _config.QSUBS_DIR + NAME + '/'
  util.ensure_dir_exists(qsubs_dir)
  qsub_commands = []

  num_scripts = 0
  for idx, row in treat_control_df.iterrows():
    treat_nm = row['Treatment']
    if 'ABE' in treat_nm or 'Cas9' in treat_nm:
      continue

    command = 'python %s.py %s' % (NAME, treat_nm)
    script_id = NAME.split('_')[0]

    # Write shell scripts
    sh_fn = qsubs_dir + 'q_%s_%s.sh' % (script_id, treat_nm)
    with open(sh_fn, 'w') as f:
      f.write('#!/bin/bash\n%s\n' % (command))
    num_scripts += 1

    # Write qsub commands
    qsub_commands.append('qsub -V -P regevlab -l h_rt=4:00:00,h_vmem=1G -wd %s %s &' % (_config.SRC_DIR, sh_fn))

  # Save commands
  commands_fn = qsubs_dir + '_commands.sh'
  with open(commands_fn, 'w') as f:
    f.write('\n'.join(qsub_commands))

  subprocess.check_output('chmod +x %s' % (commands_fn), shell = True)

  print('Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir))
  return
Esempio n. 24
0
def gen_qsubs():
    # Generate qsub shell scripts and commands for easy parallelization
    print 'Generating qsub scripts...'
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    num_scripts = 0
    postcas9_nms = [
        '052218_U2OS_+_LibA_postCas9_rep1', '052218_U2OS_+_LibA_postCas9_rep2'
    ]

    for bc in postcas9_nms:
        for split in range(0, 2000, 100):
            command = 'python %s.py %s %s' % (NAME, bc, split)
            script_id = NAME.split('_')[0]

            # Write shell scripts
            sh_fn = qsubs_dir + 'q_%s_%s_%s.sh' % (script_id, bc, split)
            with open(sh_fn, 'w') as f:
                f.write('#!/bin/bash\n%s\n' % (command))
            num_scripts += 1

            # Write qsub commands
            qsub_commands.append('qsub -m e -V -wd %s %s' %
                                 (_config.SRC_DIR, sh_fn))

    # Save commands
    with open(qsubs_dir + '_commands.txt', 'w') as f:
        f.write('\n'.join(qsub_commands))

    print 'Wrote %s shell scripts to %s' % (num_scripts, qsubs_dir)
    return
def prepare_align_outdirs(out_plc, nms):
    util.ensure_dir_exists(out_plc)
    timer = util.Timer(total=len(nms))
    for exp in nms:
        out_idx_dir = out_plc + str(exp) + '/'
        util.ensure_dir_exists(out_idx_dir)
        if len(os.listdir(out_idx_dir)) > 0:
            subprocess.check_output('rm -rf %s*' % (out_idx_dir), shell=True)
        timer.update()
    return
Esempio n. 26
0
def prepare_align_outdirs(out_plc, start, end):
  util.ensure_dir_exists(out_plc)
  timer = util.Timer(total = end - start + 1)
  for exp in range(start, end + 1):
    out_idx_dir = out_plc + 'SRR' + str(exp) + '/'
    util.ensure_dir_exists(out_idx_dir)
    if len(os.listdir(out_idx_dir)) > 0:
      subprocess.check_output('rm -rf %s*' % (out_idx_dir), shell = True)
    timer.update()
  return
Esempio n. 27
0
def main(inp_dir, out_dir, run=True):
    print NAME
    util.ensure_dir_exists(out_dir)
    if not run:
        print '\tskipped'
        return out_dir

    # Function calls

    return out_dir
Esempio n. 28
0
def main(inp_dir, out_dir, run = True):
  print NAME  
  util.ensure_dir_exists(out_dir)
  if not run:
    print '\tskipped'
    return out_dir

  # Function calls

  return out_dir
Esempio n. 29
0
def main(data_nm=''):
    print NAME
    global out_dir
    util.ensure_dir_exists(out_dir)

    import fi2_ins_ratio
    import fk_1bpins

    exps = [
        'VO-spacers-HEK293-48h-controladj', 'VO-spacers-K562-48h-controladj',
        'DisLib-mES-controladj', 'DisLib-U2OS-controladj',
        'Lib1-mES-controladj'
    ]

    all_rate_stats = pd.DataFrame()
    all_bp_stats = pd.DataFrame()
    for exp in exps:
        rate_stats = fi2_ins_ratio.load_statistics(exp)
        rate_stats = rate_stats[rate_stats['Entropy'] > 0.01]
        bp_stats = fk_1bpins.load_statistics(exp)
        exps = rate_stats['_Experiment']

        if 'DisLib' in exp:
            crit = (rate_stats['_Experiment'] >=
                    73) & (rate_stats['_Experiment'] <= 300)
            rs = rate_stats[crit]
            all_rate_stats = all_rate_stats.append(rs, ignore_index=True)

            crit = (rate_stats['_Experiment'] >=
                    16) & (rate_stats['_Experiment'] <= 72)
            rs = rate_stats[crit]
            rs = rs[rs['Ins1bp Ratio'] < 0.3]  # remove outliers
            all_rate_stats = all_rate_stats.append(rs, ignore_index=True)

            crit = (bp_stats['_Experiment'] >= 73) & (bp_stats['_Experiment']
                                                      <= 300)
            rs = bp_stats[crit]
            all_bp_stats = all_bp_stats.append(rs, ignore_index=True)

            crit = (bp_stats['_Experiment'] >= 16) & (bp_stats['_Experiment']
                                                      <= 72)
            rs = bp_stats[crit]
            all_bp_stats = all_bp_stats.append(rs, ignore_index=True)

        elif 'VO' in exp or 'Lib1' in exp:
            all_rate_stats = all_rate_stats.append(rate_stats,
                                                   ignore_index=True)
            all_bp_stats = all_bp_stats.append(bp_stats, ignore_index=True)

        print exp, len(all_rate_stats)

    X, Y, Normalizer = featurize(all_rate_stats, 'Ins1bp/Del Ratio')
    generate_models(X, Y, all_bp_stats, Normalizer)

    return
def main(inp_dir, out_dir, srr_id=None):
    print NAME
    util.ensure_dir_exists(out_dir)

    # Function calls
    if srr_id is None:
        gen_qsubs()
    else:
        convert_alignment(srr_id, out_dir)

    return out_dir
def main(data_nm=''):
    print NAME
    global out_dir
    util.ensure_dir_exists(out_dir)

    # prepare_dataset_try1()
    # prepare_dataset_try2()
    prepare_dataset_try3()
    # prepare_dataset_try4()

    return
Esempio n. 32
0
def main(data_nm='', redo_flag=''):
    print NAME
    global out_dir
    util.ensure_dir_exists(out_dir)

    if redo_flag == 'redo':
        global redo
        redo = True

    prepare_statistics()

    return
Esempio n. 33
0
def main(inp_dir, out_dir, run = True):
  print NAME  
  util.ensure_dir_exists(out_dir)
  if not run:
    print '\tskipped'
    return out_dir

  # Function calls

  inp_fn = DEFAULT_INP_DIR
  make_db(inp_fn, out_dir)

  return out_dir
Esempio n. 34
0
import sys, os, fnmatch, datetime, subprocess, imp
sys.path.append('/cluster/mshen/')
import numpy as np
from collections import defaultdict
from mylib import util
import pandas as pd
import matplotlib
matplotlib.use('Pdf')
import matplotlib.pyplot as plt
import seaborn as sns

# Default params
inp_dir = _config.DATA_DIR
NAME = util.get_fn(__file__)
out_dir = _config.OUT_PLACE + NAME + '/'
util.ensure_dir_exists(out_dir)

##
# Functions
##

##
# qsub
##
def gen_qsubs():
  # Generate qsub shell scripts and commands for easy parallelization
  print 'Generating qsub scripts...'
  qsubs_dir = _config.QSUBS_DIR + NAME + '/'
  util.ensure_dir_exists(qsubs_dir)
  qsub_commands = []