Beispiel #1
0
def align_reads_bowtie2(unaligned_bam_file,
                        index_base,
                        alignment_file,
                        temp_prefix,
                        num_threads=1):
    """Aligns unaligned bam file."""

    bowtie2_align_parameter = (
        '--preserve-tags -D 20 -R 3 -N 1 -L 15 -i S,1,0.50')

    bowtie2_align_parameter = ' '.join(
        ['-p', str(num_threads), bowtie2_align_parameter])

    cmd = [
        get_binary_path(binary_name='bowtie2'), bowtie2_align_parameter,
        ' '.join(['-x', str(index_base), '-b',
                  str(unaligned_bam_file)]), '|',
        get_binary_path(binary_name='samtools'), 'view -uS - | ',
        get_binary_path(binary_name='samtools'), 'sort -T', temp_prefix, '-@',
        str(num_threads), '-o',
        str(alignment_file), '-'
    ]
    _, errs = run_executable(cmd_line=cmd)

    return alignment_file, errs
Beispiel #2
0
def count_bus_file(bus_file,
                   output_directory,
                   output_prefix,
                   t2g_file,
                   ec_file,
                   transcripts_file):
    """Generates matrix."""

    cmd = [
        get_binary_path(binary_name='bustools'),
        'count',
        '-o',
        str(output_prefix),
        '--genecounts',
        '-g',
        str(t2g_file),
        '-e',
        str(ec_file),
        '-t',
        str(transcripts_file),
        str(bus_file)
    ]

    outs, errs = run_executable(cmd)

    return output_prefix
Beispiel #3
0
def correct_cell_barcodes(cb_file,
                          output_directory,
                          bus_file,
                          corrected_bus_file):
    """Corrects cell barcodes."""

    CELL_BARCODE_FILE = output_directory / 'barcodes_no_suffix.tsv'

    with open_by_suffix(file_name=str(cb_file), mode='r') as f:

        with open_by_suffix(
                file_name=str(CELL_BARCODE_FILE),
                mode='w') as fo:
            for line in f:
                i = line.rstrip().split('-')[0]
                fo.write(i + '\n')
    logger.info('Number of whitelisted cell barcodes: '
                + f'{len([i for i in open(CELL_BARCODE_FILE)])}')

    cmd = [
        get_binary_path(binary_name='bustools'),
        'correct',
        '-w',
        str(output_directory / 'barcodes_no_suffix.tsv'),
        '-o',
        str(corrected_bus_file),
        str(bus_file)
    ]

    outs, errs = run_executable(cmd)
    logger.info(errs)

    return corrected_bus_file
Beispiel #4
0
def align_reads_kallisto(read1_file,
                         read2_file,
                         kallisto_index,
                         output_directory,
                         technology,
                         num_threads=1):
    """Aligns reads."""

    cmd = [
        get_binary_path(binary_name='kallisto'),
        'bus',
        '-i',
        str(kallisto_index),
        '-o',
        str(output_directory),
        '-x',
        technology,
        '-t',
        str(num_threads),
        '-n',
        read1_file,
        read2_file
    ]

    outs, errs = run_executable(cmd)
    logger.debug(' '.join(cmd))

    return outs, errs
Beispiel #5
0
def build_kallisto_index(kallisto_index,
                         kmer,
                         fasta_file):
    """Builds kallisto index.

    A wrapper of `kallisto index [arguments] FASTA-files`.

    Parameters
    ----------
    kallisto_index : str
        The path and name of kallisto index.
    kmer : int
        k-mer length, odd number.
    fasta_file : str
        The path and name of generated fasta file.

    Returns
    -------
    str
        The path and name of generated kallisto index.
    """

    cmd = [
        get_binary_path(binary_name='kallisto'),
        'index',
        '-i',
        str(kallisto_index),
        '-k',
        str(kmer),
        str(fasta_file)
    ]
    outs, errs = run_executable(cmd_line=cmd)
    logger.info(errs)

    return kallisto_index
Beispiel #6
0
def build_bwa_index(fasta_file):
    """Builds bwa index."""

    cmd = [get_binary_path(binary_name='bwa'), 'index', str(fasta_file)]
    cmd = ' '.join(cmd)
    _, errs = run_executable(cmd_line=cmd, use_shell=True)

    return fasta_file, errs
Beispiel #7
0
def build_bt2_index(fasta_file, index_base):
    """Builds bowtie2 index."""

    cmd = [
        get_binary_path(binary_name='bowtie2-build'),
        str(fasta_file),
        str(index_base)
    ]
    _, errs = run_executable(cmd_line=cmd)

    return index_base, errs
Beispiel #8
0
def align_reads_bwa(modified_read_file,
                    index_base,
                    alignment_file,
                    temp_prefix,
                    num_threads=1):
    """Aligns modified fastq file."""

    bwa_align_parameter = ['-t', str(num_threads), '-C']
    samtools_path = get_binary_path(binary_name='samtools')

    cmd = [
        get_binary_path(binary_name='bwa'), 'mem',
        ' '.join(bwa_align_parameter),
        str(index_base),
        str(modified_read_file), '|', samtools_path, 'view -uS - |',
        samtools_path, 'sort -T', temp_prefix, '-@',
        str(num_threads), '-o',
        str(alignment_file), '-'
    ]

    cmd = ' '.join(cmd)
    _, errs = run_executable(cmd_line=cmd, use_shell=True)

    return alignment_file, errs
Beispiel #9
0
def sort_bus_file(bus_file,
                  sorted_bus_file,
                  num_threads):
    """Sorts bus file."""

    # logger.info(f'Number of threads: {num_threads}')
    cmd = [
        get_binary_path(binary_name='bustools'),
        'sort',
        '-t',
        str(num_threads),
        '-o',
        str(sorted_bus_file),
        str(bus_file)
    ]

    outs, errs = run_executable(cmd)
    logger.info(errs)

    return sorted_bus_file