Example #1
0
def demultiplex_samples(data):
    """
    demultiplex a fastqtransformed FASTQ file into separate sample barcode files
    """
    work_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(work_dir, dd.get_sample_name(data))
    demulti_dir = os.path.join(sample_dir, "demultiplexed")

    files = data["files"]
    if len(files) == 2:
        logger.error(
            "Sample demultiplexing doesn't handle paired-end reads, but "
            "we can add it. Open an issue here https://github.com/bcbio/bcbio-nextgen/issues if you need this and we'll add it."
        )
        sys.exit(1)
    else:
        fq1 = files[0]
    # check if samples need to be demultiplexed
    with open_fastq(fq1) as in_handle:
        read = next(in_handle)
        if "SAMPLE_" not in read:
            return [[data]]

    bcfile = get_sample_barcodes(dd.get_sample_barcodes(data), sample_dir)
    demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*"))
    if demultiplexed:
        return [split_demultiplexed_sampledata(data, demultiplexed)]
    umis = config_utils.get_program("umis", data, default="umis")
    cmd = ("{umis} demultiplex_samples --nedit 1 --barcodes {bcfile} "
           "--out_dir {tx_dir} {fq1}")
    msg = "Demultiplexing {fq1}."
    with file_transaction(data, demulti_dir) as tx_dir:
        do.run(cmd.format(**locals()), msg.format(**locals()))
    demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*"))
    return [split_demultiplexed_sampledata(data, demultiplexed)]
Example #2
0
def demultiplex_samples(data):
    """
    demultiplex a fastqtransformed FASTQ file into separate sample barcode files
    """
    files = data["files"]
    if len(files) == 2:
        logger.error("Sample demultiplexing doesn't handle paired-end reads, but "
            "we can add it. Open an issue here https://github.com/chapmanb/bcbio-nextgen/issues if you need this and we'll add it.")
        sys.exit(1)
    else:
        fq1 = files[0]
    # check if samples need to be demultiplexed
    with open_fastq(fq1) as in_handle:
        read = in_handle.next()
        if "SAMPLE_" not in read:
            return [[data]]
    bcfile = dd.get_sample_barcodes(data)
    if not bcfile:
        logger.error("Sample demultiplexing needs a list of known indexes provided "
                     "with via the sample_barcodes option in the algorithm section.")
        sys.exit(1)
    work_dir = os.path.join(dd.get_work_dir(data), "umis")
    sample_dir = os.path.join(work_dir, dd.get_sample_name(data))
    demulti_dir = os.path.join(sample_dir, "demultiplexed")
    demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*"))
    if demultiplexed:
        return [split_demultiplexed_sampledata(data, demultiplexed)]
    umis = config_utils.get_program("umis", data, default="umis")
    cmd = ("{umis} demultiplex_samples --nedit 1 --barcodes {bcfile} "
           "--out_dir {tx_dir} {fq1}")
    msg = "Demultiplexing {fq1}."
    with file_transaction(data, demulti_dir) as tx_dir:
        do.run(cmd.format(**locals()), msg.format(**locals()))
    demultiplexed = glob.glob(os.path.join(demulti_dir, "*.fq*"))
    return [split_demultiplexed_sampledata(data, demultiplexed)]
Example #3
0
def scrnaseq_concatenate_metadata(samples):
    """
    Create file same dimension than mtx.colnames
    with metadata and sample name to help in the
    creation of the SC object.
    """
    barcodes = {}
    counts =  ""
    metadata = {}
    for sample in dd.sample_data_iterator(samples):
        with open(dd.get_sample_barcodes(sample)) as inh:
            for line in inh:
                cols = line.strip().split(",")
                if len(cols) == 1:
                    # Assign sample name in case of missing in barcodes
                    cols.append("NaN")
                barcodes[(dd.get_sample_name(sample), cols[0])] = cols[1:]

        counts = dd.get_combined_counts(sample)
        meta = map(str, list(sample["metadata"].values()))
        meta_cols = list(sample["metadata"].keys())
        meta = ["NaN" if not v else v for v in meta]
        metadata[dd.get_sample_name(sample)] = meta

    metadata_fn = counts + ".metadata"
    if not file_exists(metadata_fn):
        with open(metadata_fn, 'w') as outh:
            outh.write(",".join(["sample"] + meta_cols) + '\n')
            with open(counts + ".colnames") as inh:
                for line in inh:
                    sample = line.split(":")[0]
                    barcode = sample.split("-")[1]
                    outh.write(",".join(barcodes[(sample, barcode)] + metadata[sample]) + '\n')
    return samples
Example #4
0
def scrnaseq_concatenate_metadata(samples):
    """
    Create file same dimension than mtx.colnames
    with metadata and sample name to help in the
    creation of the SC object.
    """
    barcodes = {}
    counts =  ""
    metadata = {}
    for sample in dd.sample_data_iterator(samples):
        with open(dd.get_sample_barcodes(sample)) as inh:
            for line in inh:
                cols = line.strip().split(",")
                if len(cols) == 1:
                    # Assign sample name in case of missing in barcodes
                    cols.append("NaN")
                barcodes[cols[0]] = cols[1:]

        counts = dd.get_combined_counts(sample)
        meta = map(str, list(sample["metadata"].values()))
        meta_cols = list(sample["metadata"].keys())
        meta = ["NaN" if not v else v for v in meta]
        metadata[dd.get_sample_name(sample)] = meta

    metadata_fn = counts + ".metadata"
    if not file_exists(metadata_fn):
        with open(metadata_fn, 'w') as outh:
            outh.write(",".join(["sample"] + meta_cols) + '\n')
            with open(counts + ".colnames") as inh:
                for line in inh:
                    sample = line.split(":")[0]
                    barcode = sample.split("-")[1]
                    outh.write(",".join(barcodes[barcode] + metadata[sample]) + '\n')
    return samples