Example #1
0
def estimate_current_remote_usage(remote, username, r_dir, l_dir, fastq2rsem_ratio):
    """
    estimate the space that has already been or will be consumed by rsem_output
    by walking through each GSM and computing the sum of their estimated usage,
    if rsem.COMPLETE exists for a GSM, then ignore that GSM

    mechanism: fetch the list of files in r_dir, and find the
    fastq.gz for each GSM, then find the corresponding fastq.gz in
    l_dir, and estimate sizes based on them

    :param find_cmd: should be in the form of find {remote_dir}
    :param r_dir: remote rsem output directory
    :param l_dir: local rsem output directory

    """
    files = fetch_remote_file_list(remote, username, r_dir)
    usage = 0
    for dir_ in sorted(files):
        match = re.search(r'(GSM\d+$)', os.path.basename(dir_))
        if match:
            rsem_comp = os.path.join(dir_, 'rsem.COMPLETE')
            if (not rsem_comp in files) and (not misc.is_empty_dir(dir_, files)):
                # only count the disk spaces used by those GSMs that are
                # being processed
                gsm_dir = dir_.replace(r_dir, l_dir)
                usage += estimate_rsem_usage(gsm_dir, fastq2rsem_ratio)
    return usage
Example #2
0
 def test_is_empty_dir(self):
     self.assertFalse(misc.is_empty_dir('/p', ['/p', '/p/a.txt']))
     self.assertTrue(misc.is_empty_dir('/p', ['/p', '/s', '/s/a.txt']))
Example #3
0
 def test_is_empty_dir(self):
     self.assertFalse(misc.is_empty_dir('/p', ['/p', '/p/a.txt']))
     self.assertTrue(misc.is_empty_dir('/p', ['/p', '/s', '/s/a.txt']))