Example #1
0
def _filter_out_genomes(data):
    """ Filters out genomes found in run_info.yaml
    """
    print "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
    print data
    # genome_build, sam_ref = ref_genome_info(data["info"], config, data["dirs"])
    sam_ref = data["sam_ref"]

    log.info("Removing genome from sample %s" % str(data["name"]))
    try:
        # not data ! should reach run_info.yaml somehow from here
        if data["filter_out_genomes"]:
            for genome in data["filter_out_genomes"].split(","):
                (out_file, ext) = os.path.splitext(os.path.basename(fastq1))
                out_file = out_file + "-stripped-" + genome + ext
                cl = [
                    "bowtie",
                    "--solexa1.3-quals",
                    "--un",
                    out_file,
                    sam_ref,
                    "-1",
                    data["fastq1"],
                    "-2",
                    data["fastq2"],
                    "/dev/null",
                ]
                log.info("Running %s" % cl)
                subprocess.check_call(cl)
    except KeyError:
        log.error("Not removing genomes, directive filter_out_genomes undefined in run_info.yaml")
        pass
Example #2
0
def main(config_file, fc_dir, project_dir, run_info_yaml=None, fc_alias=None, project_desc=None, lanes=None):
    if project_desc is None and lanes is None:
        log.error("No project description or lanes provided: cannot deliver files without this information")
        sys.exit()

    config = load_config(config_file)
    ## Set log file in project output directory
    config.update(log_dir=os.path.join(project_dir, "log"))
    log_handler = create_log_handler(config, log.name)

    fc_dir = os.path.normpath(fc_dir)
    fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml)
    with log_handler.applicationbound():
        run_info = prune_run_info_by_description(run_info['details'], project_desc, lanes)
    if len(run_info) == 0:
        log.error("No lanes found with matching description %s: please check your flowcell run information" % project_desc)
        sys.exit()

    dirs = dict(fc_dir=fc_dir, project_dir=project_dir)
    fc_name, fc_date = get_flowcell_id(run_info, dirs['fc_dir'])
    config.update(fc_name = fc_name, fc_date = fc_date)
    config.update(fc_alias = "%s_%s" % (fc_date, fc_name) if not fc_alias else fc_alias)
    dirs.update(fc_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, config['fc_alias'] ))
    dirs.update(data_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, "%s_%s" %(fc_date, fc_name) ))
    with log_handler.applicationbound():
        config = _make_delivery_directory(dirs, config)
        _save_run_info(run_info, dirs['fc_delivery_dir'], run_exit=options.only_run_info)
        run_main(run_info, config, dirs)