def _filter_out_genomes(data): """ Filters out genomes found in run_info.yaml """ print "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" print data # genome_build, sam_ref = ref_genome_info(data["info"], config, data["dirs"]) sam_ref = data["sam_ref"] log.info("Removing genome from sample %s" % str(data["name"])) try: # not data ! should reach run_info.yaml somehow from here if data["filter_out_genomes"]: for genome in data["filter_out_genomes"].split(","): (out_file, ext) = os.path.splitext(os.path.basename(fastq1)) out_file = out_file + "-stripped-" + genome + ext cl = [ "bowtie", "--solexa1.3-quals", "--un", out_file, sam_ref, "-1", data["fastq1"], "-2", data["fastq2"], "/dev/null", ] log.info("Running %s" % cl) subprocess.check_call(cl) except KeyError: log.error("Not removing genomes, directive filter_out_genomes undefined in run_info.yaml") pass
def main(config_file, fc_dir, project_dir, run_info_yaml=None, fc_alias=None, project_desc=None, lanes=None): if project_desc is None and lanes is None: log.error("No project description or lanes provided: cannot deliver files without this information") sys.exit() config = load_config(config_file) ## Set log file in project output directory config.update(log_dir=os.path.join(project_dir, "log")) log_handler = create_log_handler(config, log.name) fc_dir = os.path.normpath(fc_dir) fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml) with log_handler.applicationbound(): run_info = prune_run_info_by_description(run_info['details'], project_desc, lanes) if len(run_info) == 0: log.error("No lanes found with matching description %s: please check your flowcell run information" % project_desc) sys.exit() dirs = dict(fc_dir=fc_dir, project_dir=project_dir) fc_name, fc_date = get_flowcell_id(run_info, dirs['fc_dir']) config.update(fc_name = fc_name, fc_date = fc_date) config.update(fc_alias = "%s_%s" % (fc_date, fc_name) if not fc_alias else fc_alias) dirs.update(fc_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, config['fc_alias'] )) dirs.update(data_delivery_dir = os.path.join(dirs['project_dir'], options.data_prefix, "%s_%s" %(fc_date, fc_name) )) with log_handler.applicationbound(): config = _make_delivery_directory(dirs, config) _save_run_info(run_info, dirs['fc_delivery_dir'], run_exit=options.only_run_info) run_main(run_info, config, dirs)