def test_fetch_sras_info_recreate(self, mock_fetch, mock_get_ftp_handler, mock_write, mock_os): mock_os.path.exists.return_value = True mock_fetch.return_value = PARSED_SRA_INFO_YAML_SINGLE_SRA ppr.fetch_sras_info(samples=[mock.Mock(), mock.Mock()], flag_recreate_sras_info=True) self.assertEqual(mock_get_ftp_handler.call_count, 1) self.assertEqual(mock_fetch.call_count, 2) self.assertEqual(mock_write.call_count, 2)
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError( '-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use( top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError('-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )