예제 #1
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free,
                                               max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:  # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError(
                '-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
예제 #2
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(
        top_outdir, cmd_df, min_free, max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:             # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return 

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError('-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
예제 #3
0
 def test_select_gsms_to_process_ignore_disk_usage(self, mock_is_processed):
     mock_is_processed.return_value = False
     samples = [mock.Mock(), mock.Mock()]
     self.assertEqual(ppr.select_gsms_to_process(samples, 1024 ** 3, True), samples)
예제 #4
0
 def test_select_gsms_to_process_fit_disk_usage(self, mock_is_processed, mock_estimate_sra2fastq_usage):
     mock_is_processed.return_value = False
     mock_estimate_sra2fastq_usage.return_value = 513
     samples = [mock.Mock(), mock.Mock()]
     self.assertEqual(ppr.select_gsms_to_process(samples, 1024, False), [samples[0]])
예제 #5
0
 def test_select_gsms_to_process_all_processed(self, mock_is_processed):
     mock_is_processed.return_value = True
     samples = [mock.Mock(), mock.Mock()]
     self.assertEqual(ppr.select_gsms_to_process(samples, 1024 ** 3, False), [])