def main(): options = parse_args_for_rp_transfer() config = misc.get_config(options.config_file) # r_: means relevant to remote host, l_: to local host l_top_outdir = config['LOCAL_TOP_OUTDIR'] r_top_outdir = config['REMOTE_TOP_OUTDIR'] G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, l_top_outdir) r_host, r_username = config['REMOTE_HOST'], config['USERNAME'] fastq2rsem_ratio = config['FASTQ2RSEM_RATIO'] r_cmd_df = config['REMOTE_CMD_DF'] r_min_free = misc.ugly_usage(config['REMOTE_MIN_FREE']) r_max_usage = misc.ugly_usage(config['REMOTE_MAX_USAGE']) r_free_to_use = calc_remote_free_space_to_use( r_host, r_username, r_top_outdir, l_top_outdir, r_cmd_df, r_max_usage, r_min_free, fastq2rsem_ratio) # tf: transfer/transferred tf_record = os.path.join(l_top_outdir, 'transferred_GSMs.txt') tf_gsms = get_gsms_transferred(tf_record) tf_gsms_bn = map(os.path.basename, tf_gsms) logger.info('Selecting samples to transfer based their estimated remote usage') gsms_to_tf = select_gsms_to_transfer( samples, tf_gsms_bn, l_top_outdir, r_free_to_use, fastq2rsem_ratio) if not gsms_to_tf: logger.info('Cannot find a GSM that fits the current disk usage rule') return logger.info('GSMs to transfer:') for k, gsm in enumerate(gsms_to_tf): logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir)) gsms_to_tf_ids = [os.path.relpath(_.outdir, l_top_outdir) for _ in gsms_to_tf] tf_script = write_transfer_sh( gsms_to_tf_ids, options.rsync_template, l_top_outdir, r_username, r_host, r_top_outdir) os.chmod(tf_script, stat.S_IRUSR | stat.S_IWUSR| stat.S_IXUSR) rcode = misc.execute_log_stdout_stderr(tf_script) if rcode == 0: # different from processing in rsempipeline.py, where the completion is # marked by .COMPLETE flags, but by writting the completed GSMs to # gsms_transfer_record append_transfer_record(gsms_to_tf_ids, tf_record)
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use(top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k + 1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError( '-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def main(): # because of ruffus, have to use some global variables # global variables: options, config, samples, env, logger, logger_mutex # minimize the number of global variables as much as possible global options, config options = parse_args_for_rp_run() config = misc.get_config(options.config_file) global samples G = PPR.gen_all_samples_from_soft_and_isamp samples = G(options.soft_files, options.isamp, config) PPR.init_sample_outdirs(samples, config['LOCAL_TOP_OUTDIR']) PPR.fetch_sras_info(samples, options.recreate_sras_info) top_outdir = config['LOCAL_TOP_OUTDIR'] cmd_df = config['LOCAL_CMD_DF'] min_free = misc.ugly_usage(config['LOCAL_MIN_FREE']) max_usage = misc.ugly_usage(config['LOCAL_MAX_USAGE']) free_to_use = calc_local_free_space_to_use( top_outdir, cmd_df, min_free, max_usage) logger.info('Selecting samples to process based their usage') samples = PPR.select_gsms_to_process(samples, free_to_use) if not samples: # when samples == [] logger.info('Cannot find a GSM that fits the disk usage rule') return logger.info('GSMs to process:') for k, gsm in enumerate(samples): logger.info('\t{0:3d} {1:30s} {2}'.format(k+1, gsm, gsm.outdir)) if 'gen_qsub_script' in options.target_tasks: if not options.qsub_template: raise IOError('-t/--qsub_template required when running gen_qsub_script') R.pipeline_run( logger=logger, target_tasks=options.target_tasks, forcedtorun_tasks=options.forced_tasks, multiprocess=options.jobs, verbose=options.verbose, touch_files_only=options.touch_files_only, # history_file=os.path.join('log', '.{0}.sqlite'.format( # '_'.join([_.name for _ in sorted(samples, key=lambda x: x.name)]))) )
def test_get_config(self): with mock.patch('rsempipeline.utils.misc.open', mock.mock_open(read_data='a: b')): self.assertEqual(misc.get_config('config.yaml'), {'a': 'b'})