コード例 #1
0
ファイル: recipe.py プロジェクト: LPM-HMS/GenomeKey2
def run_germline(execution, max_cores, max_attempts, target_bed, input_path=None, s3fs=None):
    """
    Executes the germline variant calling pipeline

    :type execution: Execution
    :param str target_bed: The target bed to call variants in
    :param str input_path: The path to the input_file tsv of fastq files
    """
    #: chrom -> target_bed_path
    target_bed = os.path.abspath(os.path.expanduser(target_bed))
    input_path = os.path.abspath(os.path.expanduser(input_path))

    # Copy the target.bed to the output_dir
    assert os.path.exists(target_bed), '%s does not exist' % target_bed
    cp_target_bed_task = execution.add_task(lambda drm='local', out_bed=out_dir('target.bed'): 'cp %s %s' % (target_bed, out_bed),
                                            out_dir='', stage_name='Copy_Target_Bed')

    target_bed_tasks = [execution.add_task(bed.filter_bed_by_contig, dict(contig=contig), [cp_target_bed_task], 'work/contigs/{contig}')
                        for contig in util.get_bed_contigs(target_bed)]

    fastq_tasks = list(util.gen_fastq_tasks(execution, input_path))
    # fastq_tasks = [execution.add_task(load_input, dict(in_file=fastq_path, **tags), stage_name='Load_Fastqs')
    # for fastq_path, tags in parse_inputs(input_path)]

    fastqc_tasks = many2one(fastqc.fastqc, fastq_tasks, ['sample_name', 'library'], out_dir='SM_{sample_name}/qc/LB_{library}')

    # fastq_tasks = split_large_fastq_files(execution, fastq_tasks) # not working yet
    aligned_tasks = align(execution, fastq_tasks, target_bed_tasks)
    call_task = variant_call(execution, aligned_tasks, target_bed_tasks)

    execution.run(max_cores=max_cores, max_attempts=max_attempts,
                  cmd_wrapper=make_s3_cmd_fxn_wrapper(s3fs) if s3fs else shared_fs_cmd_fxn_wrapper)

    if execution.successful:
        execution.log.info('Final vcf: %s' % opj(s3fs if s3fs else execution.output_dir,
                                                 call_task.output_files[0]))


    # Copy the sqlite db to s3
    dburl = env.config['gk']['database_url']
    if s3fs and dburl.startswith('sqlite'):
        # TODO implement so there is a 1-to-1 relationship between a sqlite database and an Execution.  Currently this is pushing way too much information,
        # TODO but will soon be replaced.  Alternative: use amazon RDS!  Or perhaps both?  Could do a sqlalchemy merge and save to sqlite, or implement
        # TODO cosmos multiverse
        s3cmd.cp(dburl.replace('sqlite:///', ''), opj(s3fs, 'sqlite.db.backup'))
コード例 #2
0
ファイル: util.py プロジェクト: LPM-HMS/GenomeKey2
def download_from_s3(in_file, out_file=out_dir('{in_file}')):
    assert in_file.startswith('s3://')
    return s3cmd.cp(in_file, out_file)
コード例 #3
0
def download_from_s3(in_file, out_file=out_dir('{in_file}')):
    assert in_file.startswith('s3://')
    return s3cmd.cp(in_file, out_file)