def test_is_process(self, mock_g, mock_s, mock_d, mock_get_sras_info): mock_get_sras_info mock_d.return_value = False self.assertFalse(ppr.is_processed('some_gsm_dir')) mock_d.return_value = True mock_s.return_value = False self.assertFalse(ppr.is_processed('some_gsm_dir')) mock_d.return_value = True mock_s.return_value = True mock_g.return_value = False self.assertFalse(ppr.is_processed('some_gsm_dir')) mock_d.return_value = True mock_s.return_value = True mock_g.return_value = True self.assertTrue(ppr.is_processed('some_gsm_dir'))
def select_gsms_to_transfer(samples, transferred_gsms, l_top_outdir, r_free_to_use, fastq2rsem_ratio): """ select samples to transfer (different from select_samples_to_process in utils_pre_pipeline.py, which are to process) Walk through local top outdir, and for each GSMs, estimate its usage, and if it fits free_to_use space on remote host, count it as an element gsms_to_transfer :param samples: a list of Sample instances representing both transferred and non-transferred GSMs :param transferred_gsms: a list of string with GSM ids. e.g. [GSM1, GSM2] """ # not yet transferred GSMs non_tf_gsms = [_ for _ in samples if _.name not in transferred_gsms] gsms_to_transfer = [] for gsm in non_tf_gsms: gsm_id = os.path.relpath(gsm.outdir, l_top_outdir) if not PPR.is_processed(gsm.outdir): # debug info will be logged by PPR.processed continue rsem_usage = estimate_rsem_usage(gsm.outdir, fastq2rsem_ratio) if rsem_usage > r_free_to_use: logger.debug( '{0} ({1}) doesn\'t fit current remote free_to_use ({2})'.format( gsm_id, misc.pretty_usage(rsem_usage), misc.pretty_usage(r_free_to_use))) continue logger.info('{0} ({1}) fit remote free_to_use ({2})'.format( gsm_id, misc.pretty_usage(rsem_usage), misc.pretty_usage(r_free_to_use))) r_free_to_use -= rsem_usage gsms_to_transfer.append(gsm) return gsms_to_transfer