Exemple #1
0
def rsem(inputs, outputs):
    """
    :params inputs: a tuple of 1 or 2 fastq.gz files, e.g.
    ('/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_1.fastq.gz',
     '/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_2.fastq.gz')
    """
    inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')]
    # this is equivalent to the sample.outdir or GSM dir
    outdir = os.path.dirname(inputs[0])

    # the names of parameters are the same as that in gen_qsub_script, but
    # their values are more or less different, so better keep them separate
    fastq_gz_input = gen_fastq_gz_input(inputs)
    res = re.search(PATH_RE, outdir)
    gse = res.group('GSE')
    species = res.group('species')
    gsm = res.group('GSM')
    reference_name = config['LOCAL_REFERENCE_NAMES'][species]
    sample_name = '{outdir}/{gsm}'.format(**locals())
    n_jobs = options.j_rsem

    flag_file = outputs[-1]
    cmd = config['CMD_RSEM'].format(n_jobs=n_jobs,
                                    fastq_gz_input=fastq_gz_input,
                                    reference_name=reference_name,
                                    sample_name=sample_name,
                                    output_dir=outdir)
    misc.execute_log_stdout_stderr(cmd,
                                   flag_file=flag_file,
                                   debug=options.debug)
Exemple #2
0
def gen_qsub_script(inputs, outputs):
    """generate qsub script, usually named 0_submit.sh"""
    inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')]
    # SHOULD DO TRY EXCEPT IN CASE THE PREVIOUS STEP DIDN'T FINISH SUCCESSFULLY
    outdir = os.path.dirname(inputs[0])

    # only need the basename since the 0_submit.sh will be executed in the
    # GSM dir
    fastq_gz_input = gen_fastq_gz_input([os.path.basename(_) for _ in inputs])
    res = re.search(PATH_RE, outdir)
    gse = res.group('GSE')
    species = res.group('species')
    gsm = res.group('GSM')
    reference_name = config['REMOTE_REFERENCE_NAMES'][species]
    sample_name = '{gsm}'.format(gsm=gsm)

    qsub_script = os.path.join(outdir, '0_submit.sh')

    # TEMPLATES_DIR: the standard templates directory
    # os.getcwd(): the current working directory
    # use both for looking for the template
    jinja2_env = Environment(
        loader=FileSystemLoader([TEMPLATES_DIR, os.getcwd()]))
    template = jinja2_env.get_template(options.qsub_template)
    with open(qsub_script, 'wb') as opf:
        content = template.render(**locals())
        opf.write(content)
        logger.info('templated {0}'.format(qsub_script))
Exemple #3
0
def gen_qsub_script(inputs, outputs):
    """generate qsub script, usually named 0_submit.sh"""
    inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')]
    # SHOULD DO TRY EXCEPT IN CASE THE PREVIOUS STEP DIDN'T FINISH SUCCESSFULLY
    outdir = os.path.dirname(inputs[0])

    # only need the basename since the 0_submit.sh will be executed in the
    # GSM dir
    fastq_gz_input = gen_fastq_gz_input(
        [os.path.basename(_) for _ in inputs])
    res = re.search(PATH_RE, outdir)
    gse = res.group('GSE')
    species = res.group('species')
    gsm = res.group('GSM')
    reference_name = config['REMOTE_REFERENCE_NAMES'][species]
    sample_name = '{gsm}'.format(gsm=gsm)

    qsub_script = os.path.join(outdir, '0_submit.sh')

    # TEMPLATES_DIR: the standard templates directory
    # os.getcwd(): the current working directory
    # use both for looking for the template
    jinja2_env = Environment(loader=FileSystemLoader([TEMPLATES_DIR, os.getcwd()]))
    template = jinja2_env.get_template(options.qsub_template)
    with open(qsub_script, 'wb') as opf:
        content = template.render(**locals())
        opf.write(content)
        logger.info('templated {0}'.format(qsub_script))
Exemple #4
0
def rsem(inputs, outputs):
    """
    :params inputs: a tuple of 1 or 2 fastq.gz files, e.g.
    ('/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_1.fastq.gz',
     '/path/to/rsem_output/homo_sapiens/GSE50599/GSM1224499/SRR968078_2.fastq.gz')
    """
    inputs = [_ for _ in inputs if not _.endswith('.sra2fastq.COMPLETE')]
    # this is equivalent to the sample.outdir or GSM dir
    outdir = os.path.dirname(inputs[0])

    # the names of parameters are the same as that in gen_qsub_script, but
    # their values are more or less different, so better keep them separate
    fastq_gz_input = gen_fastq_gz_input(inputs)
    res = re.search(PATH_RE, outdir)
    gse = res.group('GSE')
    species = res.group('species')
    gsm = res.group('GSM')
    reference_name = config['LOCAL_REFERENCE_NAMES'][species]
    sample_name = '{outdir}/{gsm}'.format(**locals())
    n_jobs = options.j_rsem

    flag_file = outputs[-1]
    cmd = config['CMD_RSEM'].format(
        n_jobs=n_jobs,
        fastq_gz_input=fastq_gz_input,
        reference_name=reference_name,
        sample_name=sample_name,
        output_dir=outdir)
    misc.execute_log_stdout_stderr(cmd, flag_file=flag_file, debug=options.debug)
Exemple #5
0
    def test_gen_fastq_gz_input(self):
        self.assertEqual(rsem.gen_fastq_gz_input(
            [
                'path/to/SRR000000_1.fastq.gz',
                'path/to/SRR000000_2.fastq.gz'
            ]), '--paired-end <(/bin/zcat path/to/SRR000000_1.fastq.gz) <(/bin/zcat path/to/SRR000000_2.fastq.gz)')

        self.assertEqual(rsem.gen_fastq_gz_input(
            [
                'path/to/SRR000000_1.fastq.gz',
                'path/to/SRR111111_1.fastq.gz',
                'path/to/SRR000000_2.fastq.gz',
                'path/to/SRR111111_2.fastq.gz'
            ]), ('--paired-end '
                 '<(/bin/zcat path/to/SRR000000_1.fastq.gz path/to/SRR111111_1.fastq.gz) '
                 '<(/bin/zcat path/to/SRR000000_2.fastq.gz path/to/SRR111111_2.fastq.gz)'))

        self.assertEqual(rsem.gen_fastq_gz_input(['path/to/SRR000000_1.fastq.gz']),
                         '<(/bin/zcat path/to/SRR000000_1.fastq.gz)')

        self.assertEqual(rsem.gen_fastq_gz_input(['path/to/SRR000000_2.fastq.gz']),
                         '<(/bin/zcat path/to/SRR000000_2.fastq.gz)')

        self.assertIsNone(rsem.gen_fastq_gz_input(['invalid_fastq_gz_name']))