def _wholegenome(reference, query, read_length, read_depth, min_alignment_quality, max_direct_repeat_length, large_insertion_cutoff, query_id, output_prefix): min_alignment_inner_length = max_direct_repeat_length + 1 if not bwatools.genome_is_indexed(reference): click.echo("Indexing reference...") bwatools.index_genome(reference) else: click.echo("Reference already indexed...") if not bowtie2tools.genome_is_indexed(query): click.echo("Indexing query...") bowtie2tools.index_genome(query) else: click.echo("Query already indexed...") click.echo("Making query reads...") query_reads_path = output_prefix + '.query.tmp.fq' make_reads(query, query_reads_path, read_length, read_depth) click.echo("Aligning query reads to reference...") out_sam = output_prefix + '.query.reference.tmp.sam' bwatools.align_to_genome_se(query_reads_path, reference, out_sam, threads=1, verbose=False) click.echo("Sorting and indexing alignment file...") out_bam = output_prefix + '.query.reference.tmp.bam' samtools.sort_coordinate(out_sam, out_bam, delete_in_bam=True) samtools.index(out_bam) find_file = output_prefix + '.find.tsv' _find(out_bam, min_softclip_length=8, min_softclip_count=1, min_alignment_quality=min_alignment_quality, min_alignment_inner_length=min_alignment_inner_length, min_distance_to_mate=max_direct_repeat_length + 2, min_softclip_ratio=0.01, max_indel_ratio=0.0, large_insertion_cutoff=large_insertion_cutoff, min_count_consensus=1, sample_id=query_id, output_file=find_file) pair_file = output_prefix + '.pair.tsv' _pair(find_file, out_bam, reference, max_direct_repeat_length=max_direct_repeat_length, min_alignment_quality=min_alignment_quality, min_alignment_inner_length=min_alignment_inner_length, max_junction_spanning_prop=0.01, large_insertion_cutoff=large_insertion_cutoff, output_file=pair_file) inferseq_file = output_prefix + '.inferseq.tsv' _inferseq_assembly(pair_file, out_bam, query, reference, min_perc_identity=0.95, max_internal_softclip_prop=0.01, max_inferseq_size=500000, min_inferseq_size=30, keep_intermediate=False, output_file=inferseq_file) shell('rm %s %s %s' % (query_reads_path, out_bam, out_bam+'.bai'))
def _makedatabase(inferseqfiles, minimum_size, maximum_size, threads, memory, force, output_dir, prefix): click.echo("Parsing inferseq files") if len(inferseqfiles) == 1 and is_path_list(inferseqfiles[0]): inferseqfiles = [l.strip() for l in open(inferseqfiles[0], 'r')] click.echo("Combining the inferseq files...") inferseq = combine_inferseq_files(inferseqfiles, minimum_size, maximum_size) database_maker = DatabaseMaker(inferseq, threads, memory, output_dir) try: makedirs(output_dir) except FileExistsError: if force: click.echo('Deleting old database directory...') rmtree(output_dir) makedirs(output_dir) else: click.echo('Output directory already exists. Use --force to overwrite directory.') sys.exit() if inferseq.shape[0] == 0: click.echo("No termini found in the input file...") clustered_seqs = database_maker.get_header_dataframe() else: cluster_fna = database_maker.run_cluster_seqs() outfile = join(output_dir, prefix+'.fna') rename(cluster_fna, outfile) database_maker.remove_int_files() click.echo("Indexing database for use with bowtie2") index_genome(outfile)
def index_genome(inferseq_assembly): if not bowtie2tools.genome_is_indexed(inferseq_assembly): click.echo("Indexing inferseq assembly...") bowtie2tools.index_genome(inferseq_assembly) click.echo("Genome has been indexed...")
def index_database(inferseq_database): if not bowtie2tools.genome_is_indexed(inferseq_database): click.echo("Indexing inferseq database...") bowtie2tools.index_genome(inferseq_database) click.echo("Database has been indexed...")
def index_genome(inferseq_reference): if not bowtie2tools.genome_is_indexed(inferseq_reference): click.echo("Indexing inferseq reference genome...") bowtie2tools.index_genome(inferseq_reference) click.echo("Genome has been indexed...")