예제 #1
0
파일: bowtie.py 프로젝트: mtop/gefes
 def run(self):
     # Check both type of indexes exist #
     if not os.path.exists(self.contigs_fasta + '.1.bt2'): self.contigs_fasta.index_bowtie()
     if not os.path.exists(self.contigs_fasta + '.fai'): self.contigs_fasta.index_samtools()
     # Make our options #
     options = ['-p', nr_threads,
                '-x', self.assembly.results.contigs_fasta,
                '-1', self.sample.fwd_path,
                '-2', self.sample.rev_path,
                '-S', self.p.map_sam]
     # We have to tell bowtie2 if they we have FASTA files instead of FASTQ #
     if self.sample.format == 'fasta': options += ['-f']
     # Do the mapping #
     sh.bowtie2(*options)
     ## Create bam file, then sort it and finally index bamfile #
     sh.samtools('view', '-bt', self.contigs_fasta + '.fai', self.p.map_sam, '-o', self.p.map_bam)
     sh.samtools('sort', self.p.map_bam, self.p.map_s_bam.prefix_path)
     sh.samtools('index', self.p.map_s_bam)
     # Remove PCR duplicates #
     self.remove_duplicates()
     # Sort and index bam without duplicates #
     sh.samtools('sort', self.p.map_smd_bam, self.p.map_smds_bam.prefix_path)
     sh.samtools('index', self.p.map_smds_bam)
     # Compute coverage #
     sh.genomeCoverageBed('-ibam', self.p.map_smds_bam, _out=str(self.p.map_smds_coverage))
     # Clean up the ones we don't need #
     os.remove(self.p.map_sam)
     os.remove(self.p.map_bam)
     os.remove(self.p.map_smd_bam)
예제 #2
0
 def map(self,threads=nr_threads):
     """Maps reads from self.pool to self.assembly using bowtie2. PCR
     Duplicates are afterwards removed using MarkDuplicates. BEDTools is
     used to determine coverage."""
     # Check indexes #
     if not os.path.exists(self.contigs + '.1.bt2'):
         raise(Exception('Bowtie2 index file not created, run index() first'))
     if not os.path.exists(self.contigs + '.fai'):
         raise(Exception('Samtools index file not created, run index() first'))
     # Do the mapping #
     sh.bowtie2('-p', threads, '-x', self.contigs, '-1', self.pool.fwd, '-2', self.pool.rev, '-S', self.p.map_sam)
     # Create bam, sort and index bamfile #
     sh.samtools('view', '-bt', self.contigs + '.fai', self.p.map_sam, '-o', self.p.map_bam)
     sh.samtools('sort', self.p.map_bam, self.p.map_s_bam.prefix_path)
     sh.samtools('index', self.p.map_s_bam)
     # Remove PCR duplicates #
     self.remove_duplicates()
     # Sort and index bam without duplicates #
     sh.samtools('sort', self.p.map_smd_bam, self.p.map_smds_bam.prefix_path)
     sh.samtools('index', self.p.map_smds_bam)
     # Compute coverage #
     sh.genomeCoverageBed('-ibam', self.p.map_smds_bam, _out=self.p.map_smds_coverage)
     # Clean up #
     os.remove(self.p.map_sam)
     os.remove(self.p.map_bam)
     os.remove(self.p.map_smd_bam)
예제 #3
0
def index_fasta(fasta, compress):
    """Index and optionally compress a fasta file.


        \b
    Examples:
        bionorm index_fasta Medicago_truncatula/jemalong_A17.gnm5.ann1.FAKE/medtr.jemalong_A17.gnm5.FAKE.genome_main.fna
    """
    from sh import bgzip  # isort:skip
    from sh import samtools  # isort:skip

    target = Path(fasta)
    if len(target.suffixes) < 1:
        error_message = f"Target {target} does not have a file extension."
        logger.error(error_message)
        sys.exit(1)
    if target.suffix.lstrip(".") in COMPRESSED_TYPES:
        logger.error(f"Uncompress {target} befor indexing.")
        sys.exit(1)
    if target.suffix.lstrip(".") not in FASTA_TYPES:
        logger.error(
            f"File {target} does not have a recognized FASTA extension.")
        sys.exit(1)
    if compress:
        output = bgzip(["-f", "--index", str(target)])
        target = Path(target.parent) / f"{target.name}.gz"
    output = samtools(["faidx", str(target)])
    return target
예제 #4
0
def main():
    '''
    Call `samtools view` on the input file and split into fastqs by RNAME column.
    '''
    raw_args = docopt(__doc__)
    scheme = Schema({
        '<samfile>' : str,
        '--outdir' : str})
    parsed_args = scheme.validate(raw_args)
    outdir = parsed_args['--outdir']
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    infile = parsed_args['<samfile>']
    view = str(sh.samtools('view', infile, S=True)) if infile.endswith('.sam') else str(sh.samtools('view', infile))
    get_seqs_by_ctg(outdir, view)
    return 0
예제 #5
0
def _merge_condition(in_files, condition):
    """
    merge all of the bam files from a condition together
    as recomended in the MACS manual
    """
    condition_files = [filename for filename in in_files if
                       condition in filename]
    if not condition_files:
        return None
    condition_filename = os.path.join(os.path.dirname(condition_files[1]),
                                      condition + "_merged.bam")
    sorted_prefix = remove_suffix(condition_filename) + ".sorted"
    sorted_filename = sorted_prefix + ".bam"
    if file_exists(sorted_filename):
        return sorted_filename

    sh.samtools("merge", condition_filename, condition_files)
    sh.samtools("sort", condition_filename, sorted_prefix)
    sh.samtools("index", sorted_filename)
    return sorted_filename
예제 #6
0
def _merge_condition(in_files, condition):
    """
    merge all of the bam files from a condition together
    as recomended in the MACS manual
    """
    condition_files = [
        filename for filename in in_files if condition in filename
    ]
    if not condition_files:
        return None
    condition_filename = os.path.join(os.path.dirname(condition_files[1]),
                                      condition + "_merged.bam")
    sorted_prefix = remove_suffix(condition_filename) + ".sorted"
    sorted_filename = sorted_prefix + ".bam"
    if file_exists(sorted_filename):
        return sorted_filename

    sh.samtools("merge", condition_filename, condition_files)
    sh.samtools("sort", condition_filename, sorted_prefix)
    sh.samtools("index", sorted_filename)
    return sorted_filename
예제 #7
0
 def index_samtools(self):
     """Create an index on the fasta file compatible with samtools"""
     sh.samtools('faidx', self.path)
     return FilePath(self.path + '.fai')
예제 #8
0
파일: __init__.py 프로젝트: ddofer/fasta
 def index(self):
     """Create two indexes. For both bowtie2 and samtools on the contigs fasta file."""
     sh.bowtie2_build(self.contigs_fasta, self.contigs_fasta)
     sh.samtools('faidx', self.contigs_fasta)
예제 #9
0
def zero_coverage_positions(bam_file, ref_file):
    pileup = sh.samtools('mpileup', bam_file, f=ref_file, _iter=True)
    return map(compose(int, second, unicode.split), pileup)
 def index_samtools(self):
     """Create an index on the fasta file compatible with samtools."""
     sh.samtools('faidx', self.path)
     return FilePath(self.path + '.fai')