def fetch_sras_info_per(sample_url, ftp_handler): """ fetch information of sra files for one sample. """ urlparsed = urlparse.urlparse(sample_url) # e.g. before_srx_dir: /sra/sra-instant/reads/ByExp/sra/SRX/SRX573[/SRX123456] before_srx_dir = os.path.dirname(urlparsed.path) ftp_handler.cwd(before_srx_dir) # e.g. srx: SRX573027 srx = os.path.basename(urlparsed.path) try: srrs = ftp_handler.nlst(srx) # cool trick for flatten 2D list: # http://stackoverflow.com/questions/2961983/convert-multi-dimensional-list-to-a-1d-list-in-python sras = [_ for srr in srrs for _ in ftp_handler.nlst(srr)] # to get size, # http://stackoverflow.com/questions/3231910/python-ftplib-cant-get-size-of-file-before-download ftp_handler.sendcmd('TYPE i') # sizes returned are in unit of byte sizes = [ftp_handler.size(_) for _ in sras] sras_info = [{i: {'size': j, 'readable_size': pretty_usage(j)}} for (i, j) in zip(sras, sizes)] # e.g. [{sra1: {'size': 123}}), {sra2: {'size': 456}}, ...] return sras_info except Exception, err: logger.exception(err)
def fetch_sras_info_per(sample_url, ftp_handler): """ fetch information of sra files for one sample. """ urlparsed = urlparse.urlparse(sample_url) # e.g. before_srx_dir: /sra/sra-instant/reads/ByExp/sra/SRX/SRX573[/SRX123456] before_srx_dir = os.path.dirname(urlparsed.path) ftp_handler.cwd(before_srx_dir) # e.g. srx: SRX573027 srx = os.path.basename(urlparsed.path) try: srrs = ftp_handler.nlst(srx) # cool trick for flatten 2D list: # http://stackoverflow.com/questions/2961983/convert-multi-dimensional-list-to-a-1d-list-in-python sras = [_ for srr in srrs for _ in ftp_handler.nlst(srr)] # to get size, # http://stackoverflow.com/questions/3231910/python-ftplib-cant-get-size-of-file-before-download ftp_handler.sendcmd('TYPE i') # sizes returned are in unit of byte sizes = [ftp_handler.size(_) for _ in sras] sras_info = [{ i: { 'size': j, 'readable_size': pretty_usage(j) } } for (i, j) in zip(sras, sizes)] # e.g. [{sra1: {'size': 123}}), {sra2: {'size': 456}}, ...] return sras_info except Exception, err: logger.exception(err)
def select_gsms_to_transfer(samples, transferred_gsms, l_top_outdir, r_free_to_use, fastq2rsem_ratio): """ select samples to transfer (different from select_samples_to_process in utils_pre_pipeline.py, which are to process) Walk through local top outdir, and for each GSMs, estimate its usage, and if it fits free_to_use space on remote host, count it as an element gsms_to_transfer :param samples: a list of Sample instances representing both transferred and non-transferred GSMs :param transferred_gsms: a list of string with GSM ids. e.g. [GSM1, GSM2] """ # not yet transferred GSMs non_tf_gsms = [_ for _ in samples if _.name not in transferred_gsms] gsms_to_transfer = [] for gsm in non_tf_gsms: gsm_id = os.path.relpath(gsm.outdir, l_top_outdir) if not PPR.is_processed(gsm.outdir): # debug info will be logged by PPR.processed continue rsem_usage = estimate_rsem_usage(gsm.outdir, fastq2rsem_ratio) if rsem_usage > r_free_to_use: logger.debug( '{0} ({1}) doesn\'t fit current remote free_to_use ({2})'.format( gsm_id, misc.pretty_usage(rsem_usage), misc.pretty_usage(r_free_to_use))) continue logger.info('{0} ({1}) fit remote free_to_use ({2})'.format( gsm_id, misc.pretty_usage(rsem_usage), misc.pretty_usage(r_free_to_use))) r_free_to_use -= rsem_usage gsms_to_transfer.append(gsm) return gsms_to_transfer
def test_pretty_usage(self): self.assertEqual(misc.pretty_usage(1000), '1000.0 bytes') self.assertEqual(misc.pretty_usage(1023), '1023.0 bytes') self.assertEqual(misc.pretty_usage(1024), '1.0 KB') self.assertEqual(misc.pretty_usage(1025), '1.0 KB') self.assertEqual(misc.pretty_usage(-1000), '-1000.0 bytes') self.assertEqual(misc.pretty_usage(-1023), '-1023.0 bytes') self.assertEqual(misc.pretty_usage(-1024), '-1.0 KB') self.assertEqual(misc.pretty_usage(-1025), '-1.0 KB') self.assertEqual(misc.pretty_usage(1024**3), '1.0 GB') self.assertEqual(misc.pretty_usage(1.5 * 1024**3), '1.5 GB') self.assertEqual(misc.pretty_usage(1.59 * 1024**3), '1.6 GB') self.assertEqual(misc.pretty_usage(1.69 * 1024**4), '1.7 TB') self.assertEqual(misc.pretty_usage(1.69 * 1024**5), '1.7 PB')
def test_pretty_usage(self): self.assertEqual(misc.pretty_usage(1000), '1000.0 bytes') self.assertEqual(misc.pretty_usage(1023), '1023.0 bytes') self.assertEqual(misc.pretty_usage(1024), '1.0 KB') self.assertEqual(misc.pretty_usage(1025), '1.0 KB') self.assertEqual(misc.pretty_usage(-1000), '-1000.0 bytes') self.assertEqual(misc.pretty_usage(-1023), '-1023.0 bytes') self.assertEqual(misc.pretty_usage(-1024), '-1.0 KB') self.assertEqual(misc.pretty_usage(-1025), '-1.0 KB') self.assertEqual(misc.pretty_usage(1024 ** 3), '1.0 GB') self.assertEqual(misc.pretty_usage(1.5 * 1024 ** 3), '1.5 GB') self.assertEqual(misc.pretty_usage(1.59 * 1024 ** 3), '1.6 GB') self.assertEqual(misc.pretty_usage(1.69 * 1024 ** 4), '1.7 TB') self.assertEqual(misc.pretty_usage(1.69 * 1024 ** 5), '1.7 PB')