Python sort_sample_config_fastq Examples, scilifelab.bcbio.sort_sample_config_fastq Python Examples

Example #1

0

Show file

File: run.py Project: percyfal/scilifelab

def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw):
    """Setup analysis that merges multiple sample runs.

    :param flist: list of file names, by default *-bcbb-config.yaml files
    :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input.

    :returns: updated flist with config files for merged samples
    """
    new_flist = []
    sample_d = sample_group_fn(flist)
    for k, v in sample_d.iteritems():
        if len(v) > 1:
            f = v[v.keys()[0]]
            out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR)
            LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d))
            dry_makedir(out_d, dry_run=False)
            pp = kw.get("post_process") if kw.get("post_process", None) else f.replace("-bcbb-config.yaml", "-post_process.yaml")
            with open(pp) as fh:
                conf = yaml.load(fh)
            conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) })
            pp_new = os.path.join(out_d, os.path.basename(pp))
            dry_unlink(pp_new, dry_run=kw.get('dry_run', True))
            dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            # Setup merged bcbb-config file
            bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True))
            bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0]))
            bcbb_config = sort_sample_config_fastq(bcbb_config)
            if not os.path.exists(bcbb_config_file) or kw.get('new_config', False):
                dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True))
                dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            ##new_flist.extend(v.values())
            new_flist.extend([bcbb_config_file])
    return new_flist

Example #2

0

Show file

def setup_merged_samples(flist, sample_group_fn=_group_samples, **kw):
    """Setup analysis that merges multiple sample runs.

    :param flist: list of file names, by default *-bcbb-config.yaml files
    :param sample_group_fn: function that groups files into samples and sample runs. The function takes flist as input.

    :returns: updated flist with config files for merged samples
    """
    new_flist = []
    sample_d = sample_group_fn(flist)
    for k, v in sample_d.iteritems():
        if len(v):
            f = v[v.keys()[0]]
            out_d = os.path.join(os.path.dirname(os.path.dirname(f)), MERGED_SAMPLE_OUTPUT_DIR)
            LOG.info("Sample {} has {} sample runs; setting up merge analysis in {}".format(k, len(v), out_d))
            dry_makedir(out_d, dry_run=False)
            pp = kw.get("post_process",f.replace("-bcbb-config.yaml", "-post_process.yaml"))
            with open(pp) as fh:
                conf = yaml.load(fh)
            conf = update_pp_platform_args(conf, **{'jobname': "{}_total".format(k), 'workdir': out_d, 'output': "{}_total-bcbb.log".format(k) })
            pp_new = os.path.join(out_d, os.path.basename(pp))
            dry_unlink(pp_new, dry_run=kw.get('dry_run', True))
            dry_write(pp_new, yaml.safe_dump(conf, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            # Setup merged bcbb-config file
            bcbb_config = merge_sample_config(v.values(), sample=k, out_d=out_d, dry_run=kw.get('dry_run', True))
            bcbb_config_file = os.path.join(out_d, os.path.basename(v.values()[0]))
            bcbb_config = sort_sample_config_fastq(bcbb_config, path=out_d)
            if not os.path.exists(bcbb_config_file) or kw.get('new_config', False):
                dry_unlink(bcbb_config_file, dry_run=kw.get('dry_run', True))
                dry_write(bcbb_config_file, yaml.safe_dump(bcbb_config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw.get('dry_run', True))
            ##new_flist.extend(v.values())
            new_flist.extend([bcbb_config_file])
    return new_flist

Example #3

0

Show file

File: run.py Project: percyfal/scilifelab

def setup_sample(f, analysis, amplicon=False, genome_build="hg19", **kw):
    """Setup config files, making backups and writing new files

    :param path: root path in which to search for samples
    :param dry_run: dry run flag
    """
    if not os.path.exists(f):
        return
    with open(f) as fh:
        config = yaml.load(fh)
    ## Check for correctly formatted config
    if not config.get("details", None):
        LOG.warn("Couldn't find 'details' section in config file {}: aborting setup!".format(f))
        return

    ## Save file to backup if backup doesn't exist
    f_bak = f.replace("-bcbb-config.yaml", "-bcbb-config.yaml.bak")
    if not os.path.exists(f_bak):
        LOG.info("Making backup of {} in {}".format(f, f_bak))
        dry_backup(os.path.abspath(f), dry_run=kw['dry_run'])

    ## Save command file to backup if it doesn't exist
    cmdf = f.replace("-bcbb-config.yaml", "-bcbb-command.txt")
    if os.path.exists(cmdf):
        cmdf_bak = cmdf.replace("-bcbb-command.txt", "-bcbb-command.txt.bak")
        if not os.path.exists(cmdf_bak):
            LOG.info("Making backup of {} in {}".format(cmdf, cmdf_bak))
            dry_backup(os.path.abspath(cmdf), dry_run=kw['dry_run'])

    ## Save post_process file to backup if it doesn't exist
    ppf = f.replace("-bcbb-config.yaml", "-post_process.yaml")
    if os.path.exists(ppf):
        ppf_bak = ppf.replace("-post_process.yaml", "-post_process.yaml.bak")
        if not os.path.exists(ppf_bak):
            LOG.info("Making backup of {} in {}".format(ppf, ppf_bak))
            dry_backup(ppf, dry_run=kw['dry_run'])

    if analysis:
        config = update_sample_config(config, "analysis", analysis)
    if genome_build:
        config = update_sample_config(config, "genome_build", genome_build)
    config = sort_sample_config_fastq(config)

    ## Remove config file and rewrite
    dry_unlink(f, kw['dry_run'])
    dry_write(f, yaml.safe_dump(config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run'])

    ## Setup post process only if not provided at command line
    if not kw.get("post_process", None):
        ppfile = f.replace("-bcbb-config.yaml", "-post_process.yaml")
        with open(ppfile) as fh:
            pp = yaml.load(fh)
        ## Need to set working directory to path of bcbb-config.yaml file
        if pp.get('distributed', {}).get('platform_args', None):
            platform_args = pp['distributed']['platform_args'].split()
            if "-D" in platform_args:
                platform_args[platform_args.index("-D")+1] = os.path.dirname(f)
            elif "--workdir" in platform_args:
                platform_args[platform_args.index("--workdir")+1] = os.path.dirname(f)
            pp['distributed']['platform_args'] = " ".join(platform_args)
        ## Change keys for all analyses
        for anl in pp.get('custom_algorithms',{}).keys():
            if kw.get('baits', None):
                pp['custom_algorithms'][anl]['hybrid_bait'] = kw['baits']
            if kw.get('targets', None):
                pp['custom_algorithms'][anl]['hybrid_target'] = kw['targets']
            if amplicon:
                pp['custom_algorithms'][anl]['mark_duplicates'] = False
        if amplicon:
            LOG.info("setting amplicon analysis")
            pp['algorithm']['mark_duplicates'] = False
        if kw.get('galaxy_config', None):
            pp['galaxy_config'] = kw['galaxy_config']
        if kw.get('distributed', None):
            LOG.info("setting distributed execution")
            pp['algorithm']['num_cores'] = 'messaging'
        else:
            LOG.info("setting parallell execution")
            pp['algorithm']['num_cores'] = kw['num_cores']
        if kw.get('snpEff', None):
            LOG.info("setting snpEff to {}".format(kw["snpEff"]))
            pp['program']['snpEff'] = kw['snpEff']
        dry_unlink(ppfile, dry_run=kw['dry_run'])
        dry_write(ppfile, yaml.safe_dump(pp, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run'])

Example #4

0

Show file

def setup_sample(f, analysis, amplicon=False, genome_build="hg19", **kw):
    """Setup config files, making backups and writing new files

    :param path: root path in which to search for samples
    :param dry_run: dry run flag
    """
    if not os.path.exists(f):
        return
    with open(f) as fh:
        config = yaml.load(fh)
    ## Check for correctly formatted config
    if not config.get("details", None):
        LOG.warn("Couldn't find 'details' section in config file {}: aborting setup!".format(f))
        return

    ## Save file to backup if backup doesn't exist
    f_bak = f.replace("-bcbb-config.yaml", "-bcbb-config.yaml.bak")
    if not os.path.exists(f_bak):
        LOG.info("Making backup of {} in {}".format(f, f_bak))
        dry_backup(os.path.abspath(f), dry_run=kw['dry_run'])

    ## Save command file to backup if it doesn't exist
    cmdf = f.replace("-bcbb-config.yaml", "-bcbb-command.txt")
    if os.path.exists(cmdf):
        cmdf_bak = cmdf.replace("-bcbb-command.txt", "-bcbb-command.txt.bak")
        if not os.path.exists(cmdf_bak):
            LOG.info("Making backup of {} in {}".format(cmdf, cmdf_bak))
            dry_backup(os.path.abspath(cmdf), dry_run=kw['dry_run'])

    ## Save post_process file to backup if it doesn't exist
    ppf = f.replace("-bcbb-config.yaml", "-post_process.yaml")
    if os.path.exists(ppf):
        ppf_bak = ppf.replace("-post_process.yaml", "-post_process.yaml.bak")
        if not os.path.exists(ppf_bak):
            LOG.info("Making backup of {} in {}".format(ppf, ppf_bak))
            dry_backup(ppf, dry_run=kw['dry_run'])

    if analysis:
        config = update_sample_config(config, "analysis", analysis)
    if genome_build:
        config = update_sample_config(config, "genome_build", genome_build)
    config = sort_sample_config_fastq(config)

    ## Remove config file and rewrite
    dry_unlink(f, kw['dry_run'])
    dry_write(f, yaml.safe_dump(config, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run'])

    ## Setup post process only if not provided at command line
    if not kw.get("post_process", None):
        ppfile = f.replace("-bcbb-config.yaml", "-post_process.yaml")
        with open(ppfile) as fh:
            pp = yaml.load(fh)
        ## Need to set working directory to path of bcbb-config.yaml file
        if pp.get('distributed', {}).get('platform_args', None):
            platform_args = pp['distributed']['platform_args'].split()
            if "-D" in platform_args:
                platform_args[platform_args.index("-D")+1] = os.path.dirname(f)
            elif "--workdir" in platform_args:
                platform_args[platform_args.index("--workdir")+1] = os.path.dirname(f)
            pp['distributed']['platform_args'] = " ".join(platform_args)
        ## Change keys for all analyses
        for anl in pp.get('custom_algorithms',{}).keys():
            if kw.get('baits', None):
                pp['custom_algorithms'][anl]['hybrid_bait'] = kw['baits']
            if kw.get('targets', None):
                pp['custom_algorithms'][anl]['hybrid_target'] = kw['targets']
            if amplicon:
                pp['custom_algorithms'][anl]['mark_duplicates'] = False
        if amplicon:
            LOG.info("setting amplicon analysis")
            pp['algorithm']['mark_duplicates'] = False
        if kw.get('galaxy_config', None):
            pp['galaxy_config'] = kw['galaxy_config']
        if kw.get('distributed', None):
            LOG.info("setting distributed execution")
            pp['algorithm']['num_cores'] = 'messaging'
        elif kw.get('num_cores', None):
            LOG.info("setting parallell execution")
            pp['algorithm']['num_cores'] = kw['num_cores']
        if kw.get('snpEff', None):
            LOG.info("setting snpEff to {}".format(kw["snpEff"]))
            pp['program']['snpEff'] = kw['snpEff']
        dry_unlink(ppfile, dry_run=kw['dry_run'])
        dry_write(ppfile, yaml.safe_dump(pp, default_flow_style=False, allow_unicode=True, width=1000), dry_run=kw['dry_run'])