Esempio n. 1
0
def run_quast(contigs, reads, sample, threads, quast_opts):
    # the current quast bioconda recipe has issues with bedtools
    # so the read functionality is not working
    # the command is preserved here for future use
    # cmd = f"quast --threads {threads} -o {sample}/quast --p1 {reads[0]} --pe2 {reads[1]} {quast_opts} {contigs}"
    cmd = f"quast --threads {threads} -o {sample}/quast --glimmer {quast_opts} {contigs}"
    shell(cmd)
Esempio n. 2
0
def mergeBenchmarks(samples, rules):

    ## Keep merged and unmerged benchmarks in seperate dirs
    snakemake.shell(
        'mkdir benchmark/merged benchmark/unmerged -p 2> /dev/null')

    ## If there's a single sample, convert to list to avoid looping by char
    if (isinstance(samples, str)):
        samples = [samples]

    for rule in rules:
        f = 'benchmark/merged/' + rule + '.tab'

        ## Don't add the header if we don't have to
        if (not os.path.isfile(f)):
            snakemake.shell(
                'echo -e "s\th:m:s\tmax_rss\tmax_vms\tmax_uss\tmax_pss\tio_in\tio_out\tmean_load" >> {f}'
            )

        for sample in samples:
            snakemake.shell(
                'tail -n1 benchmark/{sample}_*_{rule}.tab >> {f}  2> /dev/null || true'
            )
            snakemake.shell(
                'mv benchmark/{sample}_*_{rule}.tab benchmark/unmerged 2> /dev/null|| true '
            )

    ## Move non-sample dependent rule benchmarks to merged
    snakemake.shell('mv benchmark/*tab benchmark/merged 2> /dev/null || true')
Esempio n. 3
0
def main(args):

    final_output_ext = args.output.split(".")[-1]

    if os.path.getsize(args.draft) == 0:
        # Create empty polished file
        shell("touch {}".format(args.output))
    else:

        final_output = args.output.replace('{}x'.format(args.iterations),
                                           '{nrepeat}x')

        for repeat in range(1, args.iterations + 1):
            polished_contigs = final_output.format(nrepeat=repeat)
            previous_polished_draft = final_output.format(nrepeat=repeat - 1)
            alignment = polished_contigs.replace(final_output_ext, '.aln.sam')
            if repeat == 1:
                previous_polished_draft = args.draft

            shell('minimap2 -t {args.threads} -ax map-ont '
                  '{previous_polished_draft} {args.raw_reads} > {alignment}; '
                  'racon '
                  '--include-unpolished '
                  '--quality-threshold={args.min_q} '
                  '-t {args.threads} '
                  '{args.raw_reads} '
                  '{alignment} '
                  '{previous_polished_draft} > {polished_contigs}')
Esempio n. 4
0
def align_to_genome_fasta_pe(fasta1,
                             fasta2,
                             genome_path,
                             out_sam,
                             threads=1,
                             verbose=False,
                             additional_flags=''):
    if verbose:
        command = "bwa mem -t {threads} {genome_path} {fastq1} {fastq2} {additional_flags} > {out_sam}; ".format(
            genome_path=genome_path,
            fastq1=fasta1,
            fastq2=fasta2,
            out_sam=out_sam,
            threads=threads,
            additional_flags=additional_flags)
    else:
        command = "bwa mem -t {threads} {genome_path} {fastq1} {fastq2} {additional_flags} 2> /dev/null 1> {out_sam}; ".format(
            genome_path=genome_path,
            fastq1=fasta1,
            fastq2=fasta2,
            out_sam=out_sam,
            threads=threads,
            additional_flags=additional_flags)

    logger.debug("Executing command: %s" % command)
    shell(command)

    if isfile(out_sam):
        return True
    else:
        return False
Esempio n. 5
0
def render_rmarkdown(input_file, output_file, root_dir, params=None):
    """
    Snakemake wrapper function to render an Rmarkdown document the way I want it to.

    In particular, this function uses bookdown instead of rmarkdown to
    enable figure/table enumeration and allows to pass
    parameters to a parametrized report.

    Args:
        input_file: path to input (Rmd) file
        output_file: path to output (html) file
        root_dir: knitr working directory (python/R will be executed in this directory)
        params: dictionary that will be passed to `params` arg of `rmarkdown::render`.

    """
    param_str = ""
    if params is not None:
        param_str = ", ".join([
            "{}={}".format(key, _literal_to_r_str(value))
            for key, value in params.items()
        ])

    cmd = (
        "MKL_THREADING_LAYER=GNU "  # was necessary to circumvent incompatibilities of Intel mkl with libgomp.
        "Rscript -e \"rmarkdown::render('{input_file}', "
        "   output_file='{output_file}', "
        "   output_format=bookdown::html_document2(), "
        "   knit_root_dir='{root_dir}', "
        "   params = list({params}))\"").format(
            input_file=os.path.abspath(input_file),
            output_file=os.path.abspath(output_file),
            root_dir=os.path.abspath(root_dir),
            params=param_str)

    shell(cmd)
Esempio n. 6
0
	def install(cls):
		fn1=cls.download_file("http://www.pegase-biosciences.com/wp-content/uploads/2013/04/CuReSim1.2.zip","curesim.zip")
		fn2=cls.download_file("http://www.pegase-biosciences.com/wp-content/uploads/2013/04/CuReSimEval1.1.zip","curesim_eval.zip")
		dir=os.path.dirname(fn1)
		snakemake.shell('(cd "{dir}" && unzip -j -o curesim.zip && unzip -j -o curesim_eval.zip) > /dev/null'.format(dir=dir))
		cls.install_file("CuReSim.jar",CURESIM)
		cls.install_file("CuReSimEval.jar",CURESIM_EVAL)
Esempio n. 7
0
def run_einverted(fasta, gap=12, threshold=15, match=3, mismatch=-4, outfile='einverted.tmp.out', outseq='einverted.tmp.outseq'):
    command = 'einverted -sequence {fasta} -gap {gap} -threshold {threshold} -match {match} ' \
              '-mismatch {mismatch} -outfile {outfile} -outseq {outseq} -auto Y -warning N'.format(
        fasta=fasta, gap=gap, threshold=threshold, match=match, mismatch=mismatch, outfile=outfile, outseq=outseq
    )
    #print(command)
    shell(command)
Esempio n. 8
0
def compute_syrah_to_s3(self, sra_id):
    from boto.s3.connection import S3Connection
    from boto.s3.key import Key
    from snakemake import shell

    conn = S3Connection()
    bucket = conn.get_bucket("soursigs-done")

    # Check if file is already on S3
    key = bucket.get_key(os.path.join("sigs", sra_id))
    if key is None:  # result not available yet, compute it
        with NamedTemporaryFile("w+t") as f:
            try:
                shell(
                    "fastq-dump -A {sra_id} -Z | syrah | "
                    "sourmash compute -k 21 --dna - -o {output} --name {sra_id}"
                    .format(sra_id=sra_id, output=f.name))
            except CalledProcessError as e:
                # We ignore SIGPIPE, since it is informational (and makes sense,
                # it happens because `head` is closed and `fastq-dump` can't pipe
                # its output anymore. More details:
                # http://www.pixelbeat.org/programming/sigpipe_handling.html
                if e.returncode != 141:
                    # TODO: save error to bucket, on 'errors/{sra_id}'?
                    raise e

            # save to S3
            k = Key(bucket)
            k.key = os.path.join("sigs", sra_id)
            f.seek(0)
            k.set_contents_from_string(f.read())

            raise Ignore()
Esempio n. 9
0
	def map_reads(self,number_of_threads=1):
		if self._fq2_fn==None:
			reads_string='"{}"'.format(self._fq1_fn)
		else:
			reads_string='"{}" "{}"'.format(self._fq1_fn,self._fq2_fn)

		if self._sort_by_name:
			snakemake.shell('"{yara_mapper}" -t {threads} "{genome_pref}" {reads_string} | "{samtools}" sort -n - "{bamprefix}"'.format(
						yara_mapper=YARA_MAPPER,
						genome_pref=self.index_prefix,
						reads_string=reads_string,
						bamprefix=self._bam_fn[:-4],
						threads=number_of_threads,
						samtools=smbl.prog.SAMTOOLS,
					)
				)
		else:
			snakemake.shell('"{yara_mapper}" -o "{bam}" -t {threads} "{genome_pref}" {reads_string}'.format(
						yara_mapper=YARA_MAPPER,
						genome_pref=self.index_prefix,
						reads_string=reads_string,
						bam=self._bam_fn,
						threads=number_of_threads,
					)
				)
Esempio n. 10
0
def test_samtools_sort_and_index(sample1_se_tiny_bam, sample1_se_tiny_bam_bai):
    """
    This test is primarily a trigger for the fixtures.
    """
    with pytest.raises(sp.CalledProcessError):
        shell('samtools view {sample1_se_tiny_bam} 2L:1-100')
    shell('samtools view {sample1_se_tiny_bam_bai[bam]} 2L:1-100')
Esempio n. 11
0
def index(in_bam):
    shell('samtools index {in_bam}'.format(in_bam=in_bam))

    if isfile(in_bam + '.bai'):
        return True
    else:
        return False
Esempio n. 12
0
	def create_fq(self):
		if self.number_of_read_tuples == 0:
			genome_size=os.stat(self._fa_fn).st_size
			self.number_of_read_tuples=int(self.coverage*genome_size/(self.read_length_1+self.read_length_2))

		snakemake.shell("""
				cd "{dir}"
				java -Xmx8g -jar \
				{curesim} \
				-f "{fa}" \
				-n {nb} \
				-m {rlen1} \
				-r 0 \
				-sd 0 \
				-y 0 \
				{other_params} \
				> /dev/null
			""".format(
				dir=self.get_dir(),
				curesim=smbl.prog.CURESIM,
				fa=self._fa_fn,
				nb=self.number_of_read_tuples,
				rlen1=self.read_length_1,
				other_params=self.other_params,
				rng_seed=self._rng_seed,
			)
		)
		self.recode_curesim_reads(
			os.path.join(
					self.get_dir(),
					"output.fastq",
				)
		)
Esempio n. 13
0
def run_asm(infiles, assembler, asm_opts, sample, threads, memory):
    if assembler == 'shovill':
        cmd = f"shovill --R1 {infiles[0]} --R2 {infiles[1]} --outdir {sample}/shovill --cpus {threads} --ram {memory} {asm_opts}"
    elif assembler == 'spades':
        cmd = f"spades.py -1 {infiles[0]} -2 {infiles[1]} -o {sample}/spades --threads {threads} --memory {memory} {asm_opts}"
    else:
        cmd = f"skesa --fastq {','.join(infiles)} --contigs_out {sample}/skesa.fasta --cores {threads} --memory {memory} {asm_opts}"
    shell(cmd)
Esempio n. 14
0
def relative_symlink(input, output):
    """ Helper function to easily symlink two files """
    import os
    from snakemake import shell
    odir = os.path.dirname(output)
    oname = os.path.basename(output)
    relative_path = './' + os.path.relpath(str(input), odir)
    shell("cd {odir}; ln -s {rp} {output}".format(odir=odir, rp=relative_path, output=oname))
Esempio n. 15
0
def cluster_reciprocal_identity(infile, outfile, threads=1, memory=800, alignment_coverage=0.99, perc_identity=0.99):


    shell('cd-hit-est -g 1 -aL {cov} -aS {cov} -d 0 -c {perc_identity} -T {threads} '
          '-M {memory} -i {infile} -o {outfile}'.format(
        cov = alignment_coverage, perc_identity = perc_identity, threads=threads, memory=memory,
        infile = infile, outfile = outfile
    ))
Esempio n. 16
0
def archiveLog(log):
    t = datetime.datetime.now().strftime('%Y-%m-%d.h%H-m%M-s%S')
    new_log = log + '.' + t
    try:
        shutil.copy2(log, new_log)
        snakemake.shell('gzip -9 ' + new_log)
    except (FileNotFoundError, PermissionError) as e:
        pass
Esempio n. 17
0
def index_genome(genome_path, silence=True):
    if silence:
        shell('bwa index {genome_path} &> /dev/null;'.format(
            genome_path=genome_path))
    else:
        shell('bwa index {genome_path}'.format(genome_path=genome_path))

    return genome_is_indexed(genome_path)
Esempio n. 18
0
def _control_script(igvcommands, igv_fp, igv_prefs):
    igvscript = tempfile.NamedTemporaryFile()
    igvscript.writelines(map(lambda x: bytes(x + '\n', 'ascii'), igvcommands))
    igvscript.flush()
    igvprefsfile = _write_prefs(igv_prefs)
    igvcommandstring = "xvfb-run -a -s '-screen 1 1920x1080x24' %s -o %s -b %s" % (
        igv_fp, igvprefsfile.name, igvscript.name)
    print(igvcommandstring)
    shell(igvcommandstring)
Esempio n. 19
0
	def install(cls):
		last_version="last-548"
		fn=cls.download_file("http://last.cbrc.jp/{}.zip".format(last_version),"last.zip")
		dir1=os.path.dirname(fn)
		snakemake.shell('(cd "{dir1}" && unzip last.zip)')
		dir2=os.path.join(dir1,last_version)
		cls.run_make(dir2)
		cls.install_file("{}/src/lastal".format(dir2),LASTAL)
		cls.install_file("{}/src/lastdb".format(dir2),LASTDB)
Esempio n. 20
0
	def map_reads(self,number_of_threads=1):
		
		snakemake.shell('"{storm}" -M 4 -A -g "{genome}" -r "{reads}" -N "{threads}" | "{samtools}" view -bS - > "{bam}"'.format(
				storm=STORM_NUCLEOTIDE,
				samtools=smbl.prog.SAMTOOLS,
				genome=self._fa_fn,
				reads=self._fq1_fn,
				bam=self._bam_fn,
				threads=number_of_threads,
			)
		)
Esempio n. 21
0
def sort_coordinate(in_bam, out_bam, delete_in_bam=False):
    shell('samtools sort {in_bam} > {out_bam}'.format(in_bam=in_bam,
                                                      out_bam=out_bam))

    if delete_in_bam:
        shell('rm {in_bam}'.format(in_bam=in_bam))

    if isfile(out_bam):
        return True
    else:
        return False
Esempio n. 22
0
def index_genome(genome_path, silence=True):

    if silence:
        shell(
            'makeblastdb -dbtype nucl -in {genome_path} &> /dev/null;'.format(
                genome_path=genome_path))
    else:
        shell('makeblastdb -dbtype nucl -in {genome_path}'.format(
            genome_path=genome_path))

    return genome_is_indexed(genome_path)
Esempio n. 23
0
	def create_fq(self):
		if self.coverage == 0:
			genome_size=os.stat(self._fa_fn).st_size
			self.coverage = 1.0 * self.number_of_read_tuples * (self.read_length_1+self.read_length_2) / (0.8 * genome_size)

		if self._reads_in_tuple==2:
			paired_params="-p -m {dist} -s {dist_dev}".format(
					dist=self.distance,
					dist_dev=self.distance_deviation,
				)
		else:
			paired_params=""

		command_1 ="""
				{art_il} -sam -na \
					-i "{fasta}" \
					-l {rlen} \
					-rs {rng_seed} \
					-f {coverage} \
					-o "{o_pref}" \
					{paired_params} \
					{other_params} \
					> /dev/null
			""".format(
				art_il=smbl.prog.ART_ILLUMINA,
				paired_params=paired_params,
				fasta=self._fa_fn,
				rlen=self.read_length_1,
				other_params=self.other_params,
				coverage=self.coverage,
				o_pref=self.art_prefix,
				rng_seed=self._rng_seed,
			)

		# correction of header (bug in ART)
		command_2 ="""
			cat "{sam_1}" | \
			grep -v ^@ | \
			"{samtools}" view -h -T "{fa}" - \
			> "{sam_2}"
		""".format(
				samtools=smbl.prog.SAMTOOLS,
				sam_1=self._sam1_fn,
				sam_2=self._sam2_fn,
				fa=self._fa_fn,
		)

		snakemake.shell(command_1)
		snakemake.shell(command_2)

		self.recode_sam_reads(
			sam=self._sam2_fn,
			simulator_name="art-illumina",
		)
Esempio n. 24
0
def remove_secondary_alignments(in_bam, out_bam, delete_in_bam=False):
    shell('samtools view -b -h -F 0x900 {in_bam} > {out_bam}'.format(
        in_bam=in_bam, out_bam=out_bam))

    if delete_in_bam:
        shell('rm {in_bam}'.format(in_bam=in_bam))

    if isfile(out_bam):
        return True
    else:
        return False
Esempio n. 25
0
    def run(self,
            output_filename=None,
            output_filename_classified=None,
            output_filename_unclassified=None,
            only_classified_output=False):
        """Performs the kraken analysis

        :param str output_filename: if not provided, a temporary file is used
            and stored in :attr:`kraken_output`.
        :param str output_filename_classified: not compressed
        :param str output_filename_unclassified: not compressed

        """
        if output_filename is None:
            self.kraken_output = TempFile().name
        else:
            self.kraken_output = output_filename

        params = {
            "database": self.database,
            "thread": self.threads,
            "file1": self.fastq[0],
            "kraken_output": self.kraken_output,
            "output_filename_unclassified": output_filename_unclassified,
            "output_filename_classified": output_filename_classified,
        }

        if self.paired:
            params["file2"] = self.fastq[1]

        command = "kraken %(file1)s "

        if self.paired:
            command += " %(file2)s --paired"

        command += " -db %(database)s "
        command += " --threads %(thread)s --output %(kraken_output)s --out-fmt legacy"
        # for kraken <=1.0 --out-fmt did not exist
        #command += " --out-fmt legacy"

        if output_filename_unclassified:
            command += " --unclassified-out %(output_filename_unclassified)s "

        if only_classified_output is True:
            command += " --only-classified-output"

        if output_filename_classified:
            command += " --classified-out %(output_filename_classified)s "

        command = command % params
        # Somehow there is an error using easydev.execute with pigz
        from snakemake import shell
        shell(command)
Esempio n. 26
0
def compute(sra_id):
    import boto3
    import botocore
    from snakemake import shell

    conn = boto3.client("s3")
    s3 = boto3.resource("s3")

    key_path = os.path.join("sigs", sra_id + ".sig")
    try:
        s3.Object("wort-sra", key_path).load()
    except botocore.exceptions.ClientError as e:
        if e.response["Error"]["Code"] == "404":
            pass  # Object does not exist, let's compute it later
        else:
            # Something else has gone wrong
            raise

    else:
        # The key already exists
        return

    with NamedTemporaryFile("w+b") as f:
        try:
            shell(
                "fastq-dump --disable-multithreading --fasta 0 --skip-technical --readids --read-filter pass --dumpbase --split-spot --clip -Z {sra_id} | "
                "sourmash compute -k 21,31,51 "
                "  --scaled 1000 "
                "  --track-abundance "
                "  --name {sra_id} "
                "  -o {output} "
                "  - ".format(sra_id=sra_id, output=f.name))
        except CalledProcessError as e:
            # We ignore SIGPIPE, since it is informational (and makes sense,
            # it happens because `head` is closed and `fastq-dump` can't pipe
            # its output anymore. More details:
            # http://www.pixelbeat.org/programming/sigpipe_handling.html
            if e.returncode != 141:
                raise e

        f.seek(0)

        compressed_fp = BytesIO()
        with gzip.GzipFile(fileobj=compressed_fp, mode="wb") as gz:
            shutil.copyfileobj(f, gz)

        conn.put_object(
            Body=compressed_fp.getvalue(),
            Bucket="wort-sra",
            Key=key_path,
            ContentType="application/json",
            ContentEncoding="gzip",
        )
Esempio n. 27
0
def get_contigs(sample, assembler):
    if assembler == 'shovill':
        shell(
            f"mv {sample}/shovill/contigs.fa {sample}/shovill.fasta && rm -rf {sample}/shovill"
        )
        return f"{sample}/shovill.fasta"
    elif assembler == 'spades':
        shell(
            f"mv {sample}/spades/contigs.fasta {sample}/spades.fasta && rm -rf {sample}/spades"
        )
        return f"{sample}/spades.fasta"
    else:
        return f"{sample}/skesa.fasta"
Esempio n. 28
0
def _inferseq_database(pairsfile, inferseq_database, min_perc_identity, max_internal_softclip_prop,
                       max_edge_distance, output_file, keep_intermediate):

    index_database(inferseq_database)

    database_dict = {rec.id: rec.seq for rec in SeqIO.parse(inferseq_database, 'fasta')}

    tmp_dir = dirname(output_file)

    pairs = pd.read_csv(pairsfile, sep='\t', keep_default_na=False, na_values=[
        '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A','N/A', '#NA', 'NULL', 'NaN', '-NaN', 'nan', '-nan'])

    handle_empty_pairsfile(pairs, output_file)

    logger.info("Aligning pairs to database...")
    assembly_flanks_fasta_prefix = write_flanks_to_align_to_database(pairs, tmp_dir)
    assembly_outbam = join(tmp_dir, 'mustache.inferseq_database.' + str(randint(0, 1e20)) + '.bam')
    bowtie2tools.align_fasta_to_genome(
        assembly_flanks_fasta_prefix + '.fasta',
        inferseq_database, assembly_outbam, silence=True,
        additional_flags='--all --score-min G,1,5'
    )

    logger.info("Inferring sequences from pairs aligned to database...")
    sequences_inferred_database = infer_sequences_database(assembly_outbam, database_dict,
                                                           min_perc_identity, max_internal_softclip_prop, max_edge_distance)

    if not keep_intermediate:
        shell('rm {fasta_prefix}* {outbam}*'.format(fasta_prefix=assembly_flanks_fasta_prefix, outbam=assembly_outbam))

    method1 = make_dataframe(sequences_inferred_database, method='inferred_database')

    all_inferred_results = method1.sort_values(
        by=['pair_id', 'method']
    )

    all_inferred_results.loc[:, 'pair_id'] = list(map(str, map(int, list(all_inferred_results['pair_id']))))
    all_inferred_results = all_inferred_results.query("inferred_seq_length > 0")

    logger.info("Writing results to file %s..." % output_file)

    if not output_file:
        output_file = 'mustache.inferseq_database.tsv'

    if pairs.shape[0] > 0:
        sample_id = list(pairs['sample'])[0]
        all_inferred_results.insert(0, 'sample', sample_id)
    else:
        all_inferred_results.insert(0, 'sample', None)

    all_inferred_results.to_csv(output_file, sep='\t', index=False)
Esempio n. 29
0
def bootstrap_read_counts(peaks, bam_path, total_reads):

    indices = sorted(
        list(np.random.choice(range(total_reads), total_reads, replace=True)))

    i = 0

    bam = pysam.AlignmentFile(bam_path, 'rb')
    outfile = pysam.AlignmentFile("boot.tmp.sam", "w", template=bam)

    for read in bam:

        try:
            while i == indices[0]:
                outfile.write(read)
                indices = indices[1:]
        except:
            break
        i += 1

    outfile.close()

    shell('samtools view -b boot.tmp.sam -o boot.tmp.bam')
    shell('samtools sort -o boot.tmp.sorted.bam boot.tmp.bam')
    shell('samtools index boot.tmp.sorted.bam')

    boot_read_counts = empirical_read_counts(
        peaks, pysam.AlignmentFile('boot.tmp.sorted.bam', 'r'))
    shell('rm boot.tmp*')

    return boot_read_counts
Esempio n. 30
0
	def map_reads(self,number_of_threads=1):
		if self._fq2_fn==None:
			reads_string='"{}"'.format(self._fq1_fn)
		else:
			reads_string='"{}" "{}"'.format(self._fq1_fn,self._fq2_fn)

		snakemake.shell('("{razer}" -o "{bam}" -tc {threads} "{genome}" {reads_string}) > /dev/null'.format(
				razer=RAZERS3,
				genome=self._fa_fn,
				reads_string=reads_string,
				bam=self._bam_fn,
				threads=number_of_threads,
			)
		)
Esempio n. 31
0
	def map_reads(self,number_of_threads=1):
		if self._fq2_fn==None:
			reads_string='"{}"'.format(self._fq1_fn)
		else:
			reads_string='-1 "{}" -2 "{}"'.format(self._fq1_fn,self._fq2_fn)

		snakemake.shell('"{bt2}" -p {threads} -x "{idx}" {reads_string} | "{samtools}" view -bS - > "{bam}"'.format(
				bt2=BOWTIE2,
				samtools=smbl.prog.SAMTOOLS,
				idx=self._fa_fn,
				reads_string=reads_string,
				bam=self._bam_fn,
				threads=number_of_threads,
			)
		)
Esempio n. 32
0
	def map_reads(self,number_of_threads=1):
		if self._fq2_fn==None:
			reads_string='"{}"'.format(self._fq1_fn)
		else:
			reads_string='"{}" "{}"'.format(self._fq1_fn,self._fq2_fn)

		snakemake.shell('"{bwa}" bwasw -t {threads} "{idx}" {reads_string} | "{samtools}" view -bS - > "{bam}"'.format(
				bwa=BWA,
				samtools=smbl.prog.SAMTOOLS,
				idx=self._fa_fn,
				reads_string=reads_string,
				bam=self._bam_fn,
				threads=number_of_threads,
			)
		)
Esempio n. 33
0
	def map_reads(self,number_of_threads=1):
		if self._fq2_fn==None:
			reads_string='"{}"'.format(self._fq1_fn)
		else:
			reads_string='"{}" "{}"'.format(self._fq1_fn,self._fq2_fn)

		snakemake.shell('"{gsnap}" -A sam -d {idx} -t {threads} {reads_string} | "{samtools}" view -bS - > "{bam}"'.format(
				gsnap=GSNAP,
				samtools=smbl.prog.SAMTOOLS,
				idx=self._fa_fn,
				reads_string=reads_string,
				bam=self._bam_fn,
				threads=number_of_threads,
			)
		)
Esempio n. 34
0
def _workflow(workdir, snakefile, configfile, cores, memory, unlock, rerun_incomplete, keep_going):

    cmd = 'snakemake -s {snakefile} --config wd={workdir} memory={memory} ' \
          '--cores {cores} --configfile {configfile} '
    
    if rerun_incomplete:
        cmd += '--rerun-incomplete '
    if keep_going:
        cmd += '--keep-going '
    if unlock:
        cmd += '--unlock '
    
    cmd = cmd.format(snakefile=snakefile, configfile=configfile, workdir=workdir, memory=memory, cores=cores)
    print('COMMAND:', cmd)
    shell(cmd)
Esempio n. 35
0
    def map_reads(self, number_of_threads=1):
        if self._fq2_fn == None:
            reads_string = '"{}"'.format(self._fq1_fn)
        else:
            reads_string = '"{}" "{}"'.format(self._fq1_fn, self._fq2_fn)

        snakemake.shell(
            '"{gsnap}" -A sam -d {idx} -t {threads} {reads_string} | "{samtools}" view -bS - > "{bam}"'
            .format(
                gsnap=GSNAP,
                samtools=smbl.prog.SAMTOOLS,
                idx=self._fa_fn,
                reads_string=reads_string,
                bam=self._bam_fn,
                threads=number_of_threads,
            ))
Esempio n. 36
0
def _wholegenome(reference, query, read_length, read_depth, min_alignment_quality, max_direct_repeat_length,
                 large_insertion_cutoff, query_id, output_prefix):

    min_alignment_inner_length = max_direct_repeat_length + 1

    if not bwatools.genome_is_indexed(reference):
        click.echo("Indexing reference...")
        bwatools.index_genome(reference)
    else:
        click.echo("Reference already indexed...")

    if not bowtie2tools.genome_is_indexed(query):
        click.echo("Indexing query...")
        bowtie2tools.index_genome(query)
    else:
        click.echo("Query already indexed...")

    click.echo("Making query reads...")
    query_reads_path = output_prefix + '.query.tmp.fq'
    make_reads(query, query_reads_path, read_length, read_depth)

    click.echo("Aligning query reads to reference...")
    out_sam = output_prefix + '.query.reference.tmp.sam'
    bwatools.align_to_genome_se(query_reads_path, reference, out_sam, threads=1, verbose=False)

    click.echo("Sorting and indexing alignment file...")
    out_bam = output_prefix + '.query.reference.tmp.bam'
    samtools.sort_coordinate(out_sam, out_bam, delete_in_bam=True)
    samtools.index(out_bam)

    find_file = output_prefix + '.find.tsv'
    _find(out_bam, min_softclip_length=8, min_softclip_count=1, min_alignment_quality=min_alignment_quality,
          min_alignment_inner_length=min_alignment_inner_length, min_distance_to_mate=max_direct_repeat_length + 2,
          min_softclip_ratio=0.01, max_indel_ratio=0.0, large_insertion_cutoff=large_insertion_cutoff,
          min_count_consensus=1, sample_id=query_id, output_file=find_file)

    pair_file = output_prefix + '.pair.tsv'
    _pair(find_file, out_bam, reference, max_direct_repeat_length=max_direct_repeat_length,
          min_alignment_quality=min_alignment_quality, min_alignment_inner_length=min_alignment_inner_length,
          max_junction_spanning_prop=0.01, large_insertion_cutoff=large_insertion_cutoff, output_file=pair_file)

    inferseq_file = output_prefix + '.inferseq.tsv'
    _inferseq_assembly(pair_file, out_bam, query, reference, min_perc_identity=0.95,
                       max_internal_softclip_prop=0.01, max_inferseq_size=500000,
                       min_inferseq_size=30, keep_intermediate=False, output_file=inferseq_file)

    shell('rm %s %s %s' % (query_reads_path, out_bam, out_bam+'.bai'))
Esempio n. 37
0
    def run(self, output_filename=None, output_filename_classified=None,
            output_filename_unclassified=None, only_classified_output=False):
        """Performs the kraken analysis

        :param str output_filename: if not provided, a temporary file is used
            and stored in :attr:`kraken_output`.
        :param str output_filename_classified: not compressed
        :param str output_filename_unclassified: not compressed

        """
        if output_filename is None:
            self.kraken_output = TempFile().name
        else:
            self.kraken_output = output_filename

        params = {
            "database": self.database,
            "thread": self.threads,
            "file1": self.fastq[0],
            "kraken_output": self.kraken_output,
            "output_filename_unclassified": output_filename_unclassified,
            "output_filename_classified": output_filename_classified,
            }

        if self.paired:
            params["file2"] = self.fastq[1]

        command = "kraken -db %(database)s %(file1)s "

        if self.paired:
            command += " %(file2)s --paired"
        command += " --threads %(thread)s --output %(kraken_output)s "
        command += " --out-fmt legacy"

        if output_filename_unclassified:
            command +=  " --unclassified-out %(output_filename_unclassified)s "

        if only_classified_output is True:
            command += " --only-classified-output"

        if output_filename_classified:
            command +=  " --classified-out %(output_filename_classified)s "

        command = command % params
        # Somehow there is an error using easydev.execute with pigz
        from snakemake import shell
        shell(command)
Esempio n. 38
0
def compute_syrah(sra_id):
    from snakemake import shell
    with NamedTemporaryFile('w+t') as f:
        try:
            shell('fastq-dump -A {sra_id} -Z | syrah | '
                  'sourmash compute -k 21 --dna - -o {output} --name {sra_id}'.
                  format(sra_id=sra_id, output=f.name))
        except CalledProcessError as e:
            # We ignore SIGPIPE, since it is informational (and makes sense,
            # it happens because `head` is closed and `fastq-dump` can't pipe
            # its output anymore. More details:
            # http://www.pixelbeat.org/programming/sigpipe_handling.html
            if e.returncode != 141:
                raise e

        f.seek(0)
        return f.read()
Esempio n. 39
0
    def _to_fastX(self, mode, output_filename, threads=2):
        """

        :param mode: fastq or fasta

        """
        # for now, we use samtools
        # can use bamtools as well but as long and output 10% larger (sequences
        # are split on 80-characters length)
        from snakemake import shell
        cmd = "samtools %s  -@ %s %s > %s" % (mode, threads, self.filename,
                                              output_filename)
        logger.info("Please be patient")
        logger.info("This may be long depending on your input data file: ")
        logger.info("typically, a minute per  500,000 reads")
        shell(cmd)
        logger.info("done")
Esempio n. 40
0
def _control_socket(igvcommands, igv_fp, igv_prefs):
    igvprefsfile = _write_prefs(igv_prefs)
    # Start up IGV.  Use a port between 10000 and the max available, based
    # on the PID of this process.  (TODO is using this pid safe?)
    port = 10000 + os.getpid() % (2**16 - 10000)
    xauth = "/tmp/xauth-%d" % os.getpid()
    xvfb_cmdline = [
        "xvfb-run", "-a", "-l", "-f", xauth, "-s", "-screen 1 1920x1080x24"
    ]
    igv_cmdline = [str(igv_fp), "-p", str(port), "-o", igvprefsfile.name]
    igvproc = subprocess.Popen(xvfb_cmdline + igv_cmdline)

    # Connect to running IGV
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    while True:
        try:
            s.connect(('localhost', port))
            break
        except ConnectionRefusedError:
            time.sleep(0.5)

    # Figure out what X11 display the IGV process is using.  It should be
    # the second child of the initial xfvb-run process (the first child
    # being Xvfb, I think.)
    with open("/proc/%s/task/%s/children" % (igvproc.pid, igvproc.pid)) as f:
        child_pid = f.read().split()[1]
    with open("/proc/%s/environ" % child_pid) as f:
        env_vars = [
            env_var.split('=', 1) for env_var in f.read().split('\x00')
        ]
        env_vars = {
            env_var[0]: env_var[1]
            for env_var in env_vars if len(env_var) == 2
        }
    display = env_vars['DISPLAY']
    # Based on http://unix.stackexchange.com/questions/5999/ :
    # This should make the window as large as the virtual X display, but in
    # practice my screenshots aren't going over 1280 x 1296.
    shell(
        "DISPLAY=" + display + " XAUTHORITY=" + xauth +
        " xdotool search --onlyvisible --name IGV windowsize --sync 100% 100%")

    # Generate screenshot
    s.sendall(bytes('\n'.join(igvcommands), 'ascii'))
    s.close()
    igvproc.wait()
Esempio n. 41
0
    def _to_fastX(self, mode, output_filename, threads=2):
        """

        :param mode: fastq or fasta

        """
        # for now, we use samtools
        # can use bamtools as well but as long and output 10% larger (sequences
        # are split on 80-characters length)
        from snakemake import shell
        cmd = "samtools %s  -@ %s %s > %s" % (mode, threads,
            self.filename, output_filename)
        logger.info("Please be patient")
        logger.info("This may be long depending on your input data file: ")
        logger.info("typically, a minute per  500,000 reads")
        shell(cmd)
        logger.info("done")
Esempio n. 42
0
def main(peak_files):

    merged_peaks = shell('cat %s | bedtools sort -i stdin | bedtools merge -i stdin' % ' '.join(peak_files),
                         iterable=True)


    for peak in merged_peaks:
        print(peak.split())
Esempio n. 43
0
def align_fasta_to_genome(fasta,
                          genome_path,
                          outfile,
                          threads=1,
                          silence=True,
                          additional_flags=''):

    shell(
        'blastn -query {fasta} -db {genome_path} -outfmt 5 -max_target_seqs 100000 -out {outfile} -parse_deflines '
        '{additional_flags}'.format(fasta=fasta,
                                    genome_path=genome_path,
                                    outfile=outfile,
                                    additional_flags=additional_flags))

    if isfile(outfile):
        return True
    else:
        return False
Esempio n. 44
0
	def create_fq(self):
		if self.coverage == 0:
			genome_size=os.stat(self._fa_fn).st_size
			self.coverage = 1.0 * self.number_of_read_tuples * (self.read_length_1+self.read_length_2) / (0.8 * genome_size)

		if self._reads_in_tuple==2:
			paired_params='--fragment-mean-size {dist} --fragment-size-std-dev {dist_dev} -or "{fq2}"'.format(
					dist=self.distance,
					dist_dev=self.distance_deviation,
					fq2=self.mason_prefix+"2.fq",
				)
		else:
			paired_params=""

		command ="""
				{mason} \
					-n {number_of_read_tuples} \
					-ir "{fasta}" \
					--illumina-read-length {rlen} \
					--seed {rng_seed} \
					-o "{fq1}" \
					-oa "{sam}" \
					{paired_params} \
					{other_params} \
					> /dev/null
			""".format(
				mason=smbl.prog.MASON_SIMULATOR,
				paired_params=paired_params,
				fasta=self._fa_fn,
				rlen=self.read_length_1,
				other_params=self.other_params,
				number_of_read_tuples=self.number_of_read_tuples,
				fq1=self.mason_prefix+"1.fq",
				rng_seed=self._rng_seed,
				sam=self._sam_fn,
			)

		snakemake.shell(command)

		self.recode_sam_reads(
			sam=self._sam_fn,
			simulator_name="mason",
		)
Esempio n. 45
0
    def run(self, output_filename_classified=None,
                output_filename_unclassified=None,
                only_classified_output=False):
        """Run the analysis using Kraken and create the Krona output

        .. todo:: reuse the KrakenResults code to simplify this method.

        """
        # Run Kraken (KrakenAnalysis)
        kraken_results = self.output_directory + os.sep + "kraken.out"

        self.ka.run(
            output_filename=kraken_results,
            output_filename_unclassified=output_filename_unclassified,
            output_filename_classified=output_filename_classified,
            only_classified_output=only_classified_output
        )

        # Translate kraken output to a format understood by Krona and save png
        # image
        self.kr = KrakenResults(kraken_results)

        df = self.kr.plot(kind="pie")
        pylab.savefig(self.output_directory + os.sep + "kraken.png")

        prefix = self.output_directory + os.sep

        self.kr.kraken_to_json(prefix + "kraken.json", self.dbname)
        self.kr.kraken_to_csv(prefix + "kraken.csv", self.dbname)

        # Transform to Krona HTML
        from snakemake import shell
        kraken_html = self.output_directory + os.sep + "kraken.html"
        status = self.kr.kraken_to_krona(output_filename=prefix+"kraken.out.summary")
        if status is True:
            shell("ktImportText %s -o %s" % (prefix+"kraken.out.summary", kraken_html))
        else:
            shell("touch {}".format(kraken_html))
Esempio n. 46
0
	def install(cls):
		gitdir_bcftools=cls.git_clone("git://github.com/nh13/bfast","")
		snakemake.shell('(cd "{}" && sh autogen.sh) > /dev/null'.format(cls.src_dir))
		cls.run_configure("")
		cls.run_make("")
		cls.install_file("bfast/bfast",BFAST)
Esempio n. 47
0
	def shell(cls,command):
		if cls.verbosity:
			snakemake.shell(command)
		else:
			snakemake.shell("({}) > /dev/null".format(command))
Esempio n. 48
0
	def shell(self,command):
		if self.verbosity:
			snakemake.shell(command)
		else:
			snakemake.shell("({}) > /dev/null".format(command))
Esempio n. 49
0
	def make_index(self):
		snakemake.shell('("{yara_indexer}" -o "{prefix}" "{fa}") > /dev/null'.format(
				yara_indexer=YARA_INDEXER,
				prefix=self.index_prefix,
				fa=self._fa_fn,
			))
Esempio n. 50
0
	def clean(self):
		"""Remove all temporary files."""

		snakemake.shell('rm -fR "{}" "{}"'.format(self.report_dir,self._html_fn))
Esempio n. 51
0
	def install(cls):
		gitdir=cls.git_clone("git://github.com/lh3/wgsim","wgsim")
		snakemake.shell('cd "{dir}" && gcc -g -O2 -Wall -o wgsim wgsim.c -lz -lm'.format(dir=gitdir))
		cls.install_file("wgsim/wgsim",WGSIM)
		cls.install_file("wgsim/wgsim_eval.pl",WGSIM_EVAL)
Esempio n. 52
0
	def clean(self):
		"""Clean working directory.
		"""
		snakemake.shell('rm -fR "{}"'.format(self.get_dir()))
Esempio n. 53
0
	def make_index(self):
		snakemake.shell('"{bwa}" index {fa}'.format(
				bwa=BWA,
				fa=self._fa_fn,
			))
Esempio n. 54
0
	def create_fq(self):
		if self.number_of_read_tuples == 0:
			genome_size=os.stat(self._fa_fn).st_size
			self.number_of_read_tuples=int(self.coverage*genome_size/(self.read_length_1+self.read_length_2))


		if self._reads_in_tuple==2:
			paired_params="-d {dist} -s {dist_dev}".format(
					dist=self.distance,
					dist_dev=self.distance_deviation,
				)
		else:
			paired_params=""

		if self.read_length_2==0:
			fake_read_length_2=42
		else:
			fake_read_length_2=self.read_length_2

		snakemake.shell("""
				{wgsim} \
				-1 {rlen1} \
				-2 {rlen2} \
				-S {rng_seed} \
				-N {nb} \
				-e {error_rate} \
				-r {mutation_rate} \
				-R {indels} \
				-X {prob_indel_ext} \
				{haploid}\
				{paired_params} \
				{other_params} \
				"{fa}" \
				"{fq1}" \
				"{fq2}" \
				> /dev/null
			""".format(
				wgsim=smbl.prog.WGSIM,
				fa=self._fa_fn,
				fq1=self._tmp_fq1_fn,
				fq2=self._tmp_fq2_fn,
				nb=self.number_of_read_tuples,
				rlen1=self.read_length_1,
				rlen2=fake_read_length_2,
				other_params=self.other_params,
				paired_params=paired_params,
				rng_seed=self._rng_seed,
				haploid="-h" if self.haploid_mode else "",
				error_rate=self.error_rate,
				mutation_rate=self.mutation_rate,
				indels=self.indels,
				prob_indel_ext=self.prob_indel_ext,
			)
		)
		if self._reads_in_tuple==1:
			self.recode_wgsim_reads(
				old_fq1=self._tmp_fq1_fn,
			)
		else:
			self.recode_wgsim_reads(
				old_fq1=self._tmp_fq1_fn,
				old_fq2=self._tmp_fq2_fn,
			)
Esempio n. 55
0
	def install(cls):
		ver="1.130"
		fn=cls.download_file("https://github.com/broadinstitute/picard/releases/download/{ver}/picard-tools-{ver}.zip".format(ver=ver),"picard.zip")
		dir=os.path.dirname(fn)
		snakemake.shell('(cd "{dir}" && unzip -j picard.zip) > /dev/null'.format(dir=dir))
		cls.install_file("picard.jar",PICARD)
Esempio n. 56
0
	def make_index(self):
		snakemake.shell('"{bt2b}" "{fa}" "{fa}"'.format(
				bt2b=BOWTIE2_BUILD,
				fa=self._fa_fn,
			))
Esempio n. 57
0
		]

def all_programs():
	return [
			plugin.get_installation_files() for plugin in smbl.prog.plugins.get_registered_plugins()
		]

def all_compatible_programs():
	return [
			plugin.get_installation_files() for plugin in smbl.prog.plugins.get_registered_plugins()
				if plugin.is_platform_supported()
		]

snakemake.shell(
		"""
			mkdir -p "{}" "{}" "{}"
		""".format(bin_dir,fa_dir,src_dir)
	)

def is_linux():
	return sys.platform.startswith('linux')

def is_cygwin():
	return sys.platform.startswith('cygwin')

def is_windows():
	return sys.platform.startswith('win')

def is_osx():
	return sys.platform.startswith('darwin')
Esempio n. 58
0
import snakemake
import re


def shell(
			cmd,
			remove_spaces=True,
			async=False,
			iterable=False,
			read=False, 
		):
	if remove_spaces:
		#print("removing spaces from command")
		cmd=re.sub(r'[ \t\f\v]+',' ',cmd).strip()

	return snakemake.shell(
			cmd=cmd,
			async=async,
			iterable=iterable,
			read=read,
		)