Example #1
0
 def test_fetch_sras_info_recreate(self, mock_fetch, mock_get_ftp_handler, mock_write, mock_os):
     mock_os.path.exists.return_value = True
     mock_fetch.return_value = PARSED_SRA_INFO_YAML_SINGLE_SRA
     ppr.fetch_sras_info(samples=[mock.Mock(), mock.Mock()],
                         flag_recreate_sras_info=True)
     self.assertEqual(mock_get_ftp_handler.call_count, 1)
     self.assertEqual(mock_fetch.call_count, 2)
     self.assertEqual(mock_write.call_count, 2)
Example #2
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free,
                                               max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:  # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError(
                '-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )
Example #3
0
def main():
    # because of ruffus, have to use some global variables
    # global variables: options, config, samples, env, logger, logger_mutex
    # minimize the number of global variables as much as possible
    global options, config
    options = parse_args_for_rp_run()
    config = misc.get_config(options.config_file)

    global samples
    G = PPR.gen_all_samples_from_soft_and_isamp
    samples = G(options.soft_files, options.isamp, config)
    PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR'])
    PPR.fetch_sras_info(samples, options.recreate_sras_info)

    top_outdir = config['LOCAL_TOP_OUTDIR']
    cmd_df = config['LOCAL_CMD_DF']
    min_free = misc.ugly_usage(config['LOCAL_MIN_FREE'])
    max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE'])
    free_to_use = calc_local_free_space_to_use(
        top_outdir, cmd_df, min_free, max_usage)

    logger.info('Selecting samples to process based their usage')
    samples = PPR.select_gsms_to_process(samples, free_to_use)

    if not samples:             # when samples == []
        logger.info('Cannot find a GSM that fits the disk usage rule')
        return 

    logger.info('GSMs to process:')
    for k, gsm in enumerate(samples):
        logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir))

    if 'gen_qsub_script' in options.target_tasks:
        if not options.qsub_template:
            raise IOError('-t/--qsub_template required when running gen_qsub_script')

    R.pipeline_run(
        logger=logger,
        target_tasks=options.target_tasks,
        forcedtorun_tasks=options.forced_tasks,
        multiprocess=options.jobs,
        verbose=options.verbose,
        touch_files_only=options.touch_files_only,
        # history_file=os.path.join('log', '.{0}.sqlite'.format(
        #     '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)])))
    )