Ejemplo n.º 1
0
def main():
    options = parse_args_for_rp_transfer()
    config = misc.get_config(options.config_file)

    # r_: means relevant to remote host, l_: to local host
    l_top_outdir = config['LOCAL_TOP_OUTDIR']
    r_top_outdir = config['REMOTE_TOP_OUTDIR']

    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, l_top_outdir)

    r_host, r_username = config['REMOTE_HOST'], config['USERNAME']
    fastq2rsem_ratio = config['FASTQ2RSEM_RATIO']
    r_cmd_df = config['REMOTE_CMD_DF']
    r_min_free = misc.ugly_usage(config['REMOTE_MIN_FREE'])
    r_max_usage = misc.ugly_usage(config['REMOTE_MAX_USAGE'])
    r_free_to_use  = calc_remote_free_space_to_use(
        r_host, r_username, r_top_outdir, l_top_outdir,
        r_cmd_df, r_max_usage, r_min_free, fastq2rsem_ratio)

    # tf: transfer/transferred
    tf_record = os.path.join(l_top_outdir, 'transferred_GSMs.txt')
    tf_gsms = get_gsms_transferred(tf_record)
    tf_gsms_bn = map(os.path.basename, tf_gsms)

    logger.info('Selecting samples to transfer based their estimated remote usage')
    gsms_to_tf = select_gsms_to_transfer(
        samples, tf_gsms_bn, l_top_outdir, r_free_to_use, fastq2rsem_ratio)

    if not gsms_to_tf:
        logger.info('Cannot find a GSM that fits the current disk usage rule')
        return

    logger.info('GSMs to transfer:')
    for k, gsm in enumerate(gsms_to_tf):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir))

    gsms_to_tf_ids = [os.path.relpath(_.outdir, l_top_outdir)
                      for _ in gsms_to_tf]
    tf_script = write_transfer_sh(
        gsms_to_tf_ids, options.rsync_template, l_top_outdir,
        r_username, r_host, r_top_outdir)

    os.chmod(tf_script, stat.S_IRUSR | stat.S_IWUSR| stat.S_IXUSR)
    rcode = misc.execute_log_stdout_stderr(tf_script)

    if rcode == 0:
        # different from processing in rsempipeline.py, where the completion is
        # marked by .COMPLETE flags, but by writting the completed GSMs to
        # gsms_transfer_record
        append_transfer_record(gsms_to_tf_ids, tf_record)
Ejemplo n.º 2
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free,
                                               max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:  # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError(
                '-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
Ejemplo n.º 3
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(
        top_outdir, cmd_df, min_free, max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:             # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return 

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError('-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
Ejemplo n.º 4
0
 def test_init_sample_outdirs(self, mock_os):
     mock_os.path.exists.return_value = False
     fake_samples = [mock.Mock(), mock.Mock()]
     ppr.init_sample_outdirs(fake_samples, 'some_top_outdir')
     self.assertEqual(mock_os.makedirs.call_count, 2)