def demultiplex_samples(data): """ demultiplex a fastqtransformed FASTQ file into separate sample barcode files """ work_dir = os.path.join(dd.get_work_dir(data), "umis") sample_dir = os.path.join(work_dir, dd.get_sample_name(data)) demulti_dir = os.path.join(sample_dir, "demultiplexed") files = data["files"] if len(files) == 2: logger.error( "Sample demultiplexing doesn't handle paired-end reads, but " "we can add it. Open an issue here https://github.com/bcbio/bcbio-nextgen/issues if you need this and we'll add it." ) sys.exit(1) else: fq1 = files[0] # check if samples need to be demultiplexed with open_fastq(fq1) as in_handle: read = next(in_handle) if "SAMPLE_" not in read: return [[data]] bcfile = get_sample_barcodes(dd.get_sample_barcodes(data), sample_dir) demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*")) if demultiplexed: return [split_demultiplexed_sampledata(data, demultiplexed)] umis = config_utils.get_program("umis", data, default="umis") cmd = ("{umis} demultiplex_samples --nedit 1 --barcodes {bcfile} " "--out_dir {tx_dir} {fq1}") msg = "Demultiplexing {fq1}." with file_transaction(data, demulti_dir) as tx_dir: do.run(cmd.format(**locals()), msg.format(**locals())) demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*")) return [split_demultiplexed_sampledata(data, demultiplexed)]
def demultiplex_samples(data): """ demultiplex a fastqtransformed FASTQ file into separate sample barcode files """ files = data["files"] if len(files) == 2: logger.error("Sample demultiplexing doesn't handle paired-end reads, but " "we can add it. Open an issue here https://github.com/chapmanb/bcbio-nextgen/issues if you need this and we'll add it.") sys.exit(1) else: fq1 = files[0] # check if samples need to be demultiplexed with open_fastq(fq1) as in_handle: read = in_handle.next() if "SAMPLE_" not in read: return [[data]] bcfile = dd.get_sample_barcodes(data) if not bcfile: logger.error("Sample demultiplexing needs a list of known indexes provided " "with via the sample_barcodes option in the algorithm section.") sys.exit(1) work_dir = os.path.join(dd.get_work_dir(data), "umis") sample_dir = os.path.join(work_dir, dd.get_sample_name(data)) demulti_dir = os.path.join(sample_dir, "demultiplexed") demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*")) if demultiplexed: return [split_demultiplexed_sampledata(data, demultiplexed)] umis = config_utils.get_program("umis", data, default="umis") cmd = ("{umis} demultiplex_samples --nedit 1 --barcodes {bcfile} " "--out_dir {tx_dir} {fq1}") msg = "Demultiplexing {fq1}." with file_transaction(data, demulti_dir) as tx_dir: do.run(cmd.format(**locals()), msg.format(**locals())) demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*")) return [split_demultiplexed_sampledata(data, demultiplexed)]
def scrnaseq_concatenate_metadata(samples): """ Create file same dimension than mtx.colnames with metadata and sample name to help in the creation of the SC object. """ barcodes = {} counts = "" metadata = {} for sample in dd.sample_data_iterator(samples): with open(dd.get_sample_barcodes(sample)) as inh: for line in inh: cols = line.strip().split(",") if len(cols) == 1: # Assign sample name in case of missing in barcodes cols.append("NaN") barcodes[(dd.get_sample_name(sample), cols[0])] = cols[1:] counts = dd.get_combined_counts(sample) meta = map(str, list(sample["metadata"].values())) meta_cols = list(sample["metadata"].keys()) meta = ["NaN" if not v else v for v in meta] metadata[dd.get_sample_name(sample)] = meta metadata_fn = counts + ".metadata" if not file_exists(metadata_fn): with open(metadata_fn, 'w') as outh: outh.write(",".join(["sample"] + meta_cols) + '\n') with open(counts + ".colnames") as inh: for line in inh: sample = line.split(":")[0] barcode = sample.split("-")[1] outh.write(",".join(barcodes[(sample, barcode)] + metadata[sample]) + '\n') return samples
def scrnaseq_concatenate_metadata(samples): """ Create file same dimension than mtx.colnames with metadata and sample name to help in the creation of the SC object. """ barcodes = {} counts = "" metadata = {} for sample in dd.sample_data_iterator(samples): with open(dd.get_sample_barcodes(sample)) as inh: for line in inh: cols = line.strip().split(",") if len(cols) == 1: # Assign sample name in case of missing in barcodes cols.append("NaN") barcodes[cols[0]] = cols[1:] counts = dd.get_combined_counts(sample) meta = map(str, list(sample["metadata"].values())) meta_cols = list(sample["metadata"].keys()) meta = ["NaN" if not v else v for v in meta] metadata[dd.get_sample_name(sample)] = meta metadata_fn = counts + ".metadata" if not file_exists(metadata_fn): with open(metadata_fn, 'w') as outh: outh.write(",".join(["sample"] + meta_cols) + '\n') with open(counts + ".colnames") as inh: for line in inh: sample = line.split(":")[0] barcode = sample.split("-")[1] outh.write(",".join(barcodes[barcode] + metadata[sample]) + '\n') return samples