def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) logger.info("Preparing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"], lane_items[0], fc_name, dirs=dirs, config=shared.update_config_w_custom(config, lane_items[0])) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = shared.update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if bc_files.has_key(item["barcode_id"]): for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config): cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get("include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if lane_ext is not None: cur_lane_name += "_s{0}".format(lane_ext) out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def process_lane(lane_items, fc_name, fc_date, dirs, config): """Prepare lanes, potentially splitting based on barcodes. """ lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name) logger.info("Preparing %s" % lane_name) full_fastq1, full_fastq2 = get_fastq_files( dirs["fastq"], dirs["work"], lane_items[0], fc_name, dirs=dirs, config=shared.update_config_w_custom(config, lane_items[0])) bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config) out = [] for item in lane_items: config = shared.update_config_w_custom(config, item) # Can specify all barcodes but might not have actual sequences # Would be nice to have a good way to check this is okay here. if item["barcode_id"] in bc_files: for fastq1, fastq2, lane_ext in _prep_fastq_files( item, bc_files, dirs, config): cur_lane_name = lane_name cur_lane_desc = item["description"] if item.get("name", "") and config["algorithm"].get( "include_short_name", True): cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc) if item["barcode_id"] is not None: cur_lane_name += "_%s" % (item["barcode_id"]) if lane_ext is not None: cur_lane_name += "_s{0}".format(lane_ext) out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config)) return out
def merge_sample(data): """Merge fastq and BAM files for multiple samples. """ logger.info("Combining fastq and BAM files %s" % str(data["name"])) config = shared.update_config_w_custom(data["config"], data["info"]) genome_build, sam_ref = shared.ref_genome_info(data["info"], config, data["dirs"]) if config["algorithm"].get("upload_fastq", False): fastq1, fastq2 = combine_fastq_files(data["fastq_files"], data["dirs"]["work"], config) else: fastq1, fastq2 = None, None sort_bam = merge_bam_files(data["bam_files"], data["dirs"]["work"], config) return [[{ "name": data["name"], "metadata": data["info"].get("metadata", {}), "info": data["info"], "genome_build": genome_build, "sam_ref": sam_ref, "work_bam": sort_bam, "fastq1": fastq1, "fastq2": fastq2, "dirs": data["dirs"], "config": config, "config_file": data["config_file"] }]]
def _item_needs_compute(lanes): """Determine if any item needs computing resources to spin up a cluster. """ for lane_items, _, _, _, config in lanes: # check if multiplexed if len(lane_items) > 1 or lane_items[0]["barcode_id"] is not None: return True # check if we need to process the input by splitting or conversion item = lane_items[0] config = shared.update_config_w_custom(config, item) split_size = config.get("distributed", {}).get("align_split_size", config["algorithm"].get("align_split_size", None)) if split_size is not None: return True if needs_fastq_conversion(item, config): return True return False
def merge_sample(data): """Merge fastq and BAM files for multiple samples. """ logger.info("Combining fastq and BAM files %s" % str(data["name"])) config = shared.update_config_w_custom(data["config"], data["info"]) genome_build, sam_ref = shared.ref_genome_info(data["info"], config, data["dirs"]) if config["algorithm"].get("upload_fastq", False): fastq1, fastq2 = combine_fastq_files(data["fastq_files"], data["dirs"]["work"], config) else: fastq1, fastq2 = None, None sort_bam = merge_bam_files(data["bam_files"], data["dirs"]["work"], config) return [[{"name": data["name"], "metadata": data["info"].get("metadata", {}), "info": data["info"], "genome_build": genome_build, "sam_ref": sam_ref, "work_bam": sort_bam, "fastq1": fastq1, "fastq2": fastq2, "dirs": data["dirs"], "config": config, "config_file": data["config_file"]}]]
def _item_needs_compute(lanes): """Determine if any item needs computing resources to spin up a cluster. """ for lane_items, _, _, _, config in lanes: # check if multiplexed if len(lane_items) > 1 or lane_items[0]["barcode_id"] is not None: return True # check if we need to process the input by splitting or conversion item = lane_items[0] config = shared.update_config_w_custom(config, item) split_size = config.get("distributed", {}).get( "align_split_size", config["algorithm"].get("align_split_size", None)) if split_size is not None: return True if needs_fastq_conversion(item, config): return True return False