def fetch_sras_info_per(sample_url, ftp_handler):
    """
    fetch information of sra files for one sample.
    """
    urlparsed = urlparse.urlparse(sample_url)
    # e.g. before_srx_dir: /sra/sra-instant/reads/ByExp/sra/SRX/SRX573[/SRX123456]
    before_srx_dir = os.path.dirname(urlparsed.path)
    ftp_handler.cwd(before_srx_dir)
    # e.g. srx: SRX573027
    srx = os.path.basename(urlparsed.path)
    try:
        srrs = ftp_handler.nlst(srx)
        # cool trick for flatten 2D list:
        # http://stackoverflow.com/questions/2961983/convert-multi-dimensional-list-to-a-1d-list-in-python
        sras = [_ for srr in srrs for _ in ftp_handler.nlst(srr)]

        # to get size,
        # http://stackoverflow.com/questions/3231910/python-ftplib-cant-get-size-of-file-before-download
        ftp_handler.sendcmd('TYPE i')
        # sizes returned are in unit of byte
        sizes = [ftp_handler.size(_) for _ in sras]

        sras_info = [{i: {'size': j, 'readable_size': pretty_usage(j)}}
                     for (i, j) in zip(sras, sizes)]
        # e.g. [{sra1: {'size': 123}}), {sra2: {'size': 456}}, ...]
        return sras_info
    except Exception, err:
        logger.exception(err)
Exemple #2
0
def fetch_sras_info_per(sample_url, ftp_handler):
    """
    fetch information of sra files for one sample.
    """
    urlparsed = urlparse.urlparse(sample_url)
    # e.g. before_srx_dir: /sra/sra-instant/reads/ByExp/sra/SRX/SRX573[/SRX123456]
    before_srx_dir = os.path.dirname(urlparsed.path)
    ftp_handler.cwd(before_srx_dir)
    # e.g. srx: SRX573027
    srx = os.path.basename(urlparsed.path)
    try:
        srrs = ftp_handler.nlst(srx)
        # cool trick for flatten 2D list:
        # http://stackoverflow.com/questions/2961983/convert-multi-dimensional-list-to-a-1d-list-in-python
        sras = [_ for srr in srrs for _ in ftp_handler.nlst(srr)]

        # to get size,
        # http://stackoverflow.com/questions/3231910/python-ftplib-cant-get-size-of-file-before-download
        ftp_handler.sendcmd('TYPE i')
        # sizes returned are in unit of byte
        sizes = [ftp_handler.size(_) for _ in sras]

        sras_info = [{
            i: {
                'size': j,
                'readable_size': pretty_usage(j)
            }
        } for (i, j) in zip(sras, sizes)]
        # e.g. [{sra1: {'size': 123}}), {sra2: {'size': 456}}, ...]
        return sras_info
    except Exception, err:
        logger.exception(err)
Exemple #3
0
def select_gsms_to_transfer(samples, transferred_gsms,
                          l_top_outdir, r_free_to_use, fastq2rsem_ratio):
    """
    select samples to transfer (different from select_samples_to_process in
    utils_pre_pipeline.py, which are to process)

    Walk through local top outdir, and for each GSMs, estimate its usage, and
    if it fits free_to_use space on remote host, count it as an element
    gsms_to_transfer

    :param samples: a list of Sample instances representing both transferred
                    and non-transferred GSMs
    :param transferred_gsms: a list of string with GSM ids. e.g. [GSM1, GSM2]

    """
    # not yet transferred GSMs
    non_tf_gsms = [_ for _  in samples if _.name not in transferred_gsms]
    gsms_to_transfer = []
    for gsm in non_tf_gsms:
        gsm_id = os.path.relpath(gsm.outdir, l_top_outdir)

        if not PPR.is_processed(gsm.outdir):
            # debug info will be logged by PPR.processed
            continue

        rsem_usage = estimate_rsem_usage(gsm.outdir, fastq2rsem_ratio)

        if rsem_usage > r_free_to_use:
            logger.debug(
                '{0} ({1}) doesn\'t fit current remote free_to_use ({2})'.format(
                    gsm_id, misc.pretty_usage(rsem_usage), misc.pretty_usage(r_free_to_use)))
            continue

        logger.info('{0} ({1}) fit remote free_to_use ({2})'.format(
            gsm_id, misc.pretty_usage(rsem_usage), misc.pretty_usage(r_free_to_use)))
        r_free_to_use -= rsem_usage
        gsms_to_transfer.append(gsm)
    return gsms_to_transfer
Exemple #4
0
 def test_pretty_usage(self):
     self.assertEqual(misc.pretty_usage(1000), '1000.0 bytes')
     self.assertEqual(misc.pretty_usage(1023), '1023.0 bytes')
     self.assertEqual(misc.pretty_usage(1024), '1.0 KB')
     self.assertEqual(misc.pretty_usage(1025), '1.0 KB')
     self.assertEqual(misc.pretty_usage(-1000), '-1000.0 bytes')
     self.assertEqual(misc.pretty_usage(-1023), '-1023.0 bytes')
     self.assertEqual(misc.pretty_usage(-1024), '-1.0 KB')
     self.assertEqual(misc.pretty_usage(-1025), '-1.0 KB')
     self.assertEqual(misc.pretty_usage(1024**3), '1.0 GB')
     self.assertEqual(misc.pretty_usage(1.5 * 1024**3), '1.5 GB')
     self.assertEqual(misc.pretty_usage(1.59 * 1024**3), '1.6 GB')
     self.assertEqual(misc.pretty_usage(1.69 * 1024**4), '1.7 TB')
     self.assertEqual(misc.pretty_usage(1.69 * 1024**5), '1.7 PB')
Exemple #5
0
 def test_pretty_usage(self):
     self.assertEqual(misc.pretty_usage(1000), '1000.0 bytes')
     self.assertEqual(misc.pretty_usage(1023), '1023.0 bytes')
     self.assertEqual(misc.pretty_usage(1024), '1.0 KB')
     self.assertEqual(misc.pretty_usage(1025), '1.0 KB')
     self.assertEqual(misc.pretty_usage(-1000), '-1000.0 bytes')
     self.assertEqual(misc.pretty_usage(-1023), '-1023.0 bytes')
     self.assertEqual(misc.pretty_usage(-1024), '-1.0 KB')
     self.assertEqual(misc.pretty_usage(-1025), '-1.0 KB')
     self.assertEqual(misc.pretty_usage(1024 ** 3), '1.0 GB')
     self.assertEqual(misc.pretty_usage(1.5 * 1024 ** 3), '1.5 GB')
     self.assertEqual(misc.pretty_usage(1.59 * 1024 ** 3), '1.6 GB')
     self.assertEqual(misc.pretty_usage(1.69 * 1024 ** 4), '1.7 TB')
     self.assertEqual(misc.pretty_usage(1.69 * 1024 ** 5), '1.7 PB')