Beispiel #1
0
def _wholegenome(reference, query, read_length, read_depth, min_alignment_quality, max_direct_repeat_length,
                 large_insertion_cutoff, query_id, output_prefix):

    min_alignment_inner_length = max_direct_repeat_length + 1

    if not bwatools.genome_is_indexed(reference):
        click.echo("Indexing reference...")
        bwatools.index_genome(reference)
    else:
        click.echo("Reference already indexed...")

    if not bowtie2tools.genome_is_indexed(query):
        click.echo("Indexing query...")
        bowtie2tools.index_genome(query)
    else:
        click.echo("Query already indexed...")

    click.echo("Making query reads...")
    query_reads_path = output_prefix + '.query.tmp.fq'
    make_reads(query, query_reads_path, read_length, read_depth)

    click.echo("Aligning query reads to reference...")
    out_sam = output_prefix + '.query.reference.tmp.sam'
    bwatools.align_to_genome_se(query_reads_path, reference, out_sam, threads=1, verbose=False)

    click.echo("Sorting and indexing alignment file...")
    out_bam = output_prefix + '.query.reference.tmp.bam'
    samtools.sort_coordinate(out_sam, out_bam, delete_in_bam=True)
    samtools.index(out_bam)

    find_file = output_prefix + '.find.tsv'
    _find(out_bam, min_softclip_length=8, min_softclip_count=1, min_alignment_quality=min_alignment_quality,
          min_alignment_inner_length=min_alignment_inner_length, min_distance_to_mate=max_direct_repeat_length + 2,
          min_softclip_ratio=0.01, max_indel_ratio=0.0, large_insertion_cutoff=large_insertion_cutoff,
          min_count_consensus=1, sample_id=query_id, output_file=find_file)

    pair_file = output_prefix + '.pair.tsv'
    _pair(find_file, out_bam, reference, max_direct_repeat_length=max_direct_repeat_length,
          min_alignment_quality=min_alignment_quality, min_alignment_inner_length=min_alignment_inner_length,
          max_junction_spanning_prop=0.01, large_insertion_cutoff=large_insertion_cutoff, output_file=pair_file)

    inferseq_file = output_prefix + '.inferseq.tsv'
    _inferseq_assembly(pair_file, out_bam, query, reference, min_perc_identity=0.95,
                       max_internal_softclip_prop=0.01, max_inferseq_size=500000,
                       min_inferseq_size=30, keep_intermediate=False, output_file=inferseq_file)

    shell('rm %s %s %s' % (query_reads_path, out_bam, out_bam+'.bai'))
Beispiel #2
0
def _makedatabase(inferseqfiles, minimum_size, maximum_size, threads, memory, force, output_dir, prefix):

    click.echo("Parsing inferseq files")
    if len(inferseqfiles) == 1 and is_path_list(inferseqfiles[0]):
        inferseqfiles = [l.strip() for l in open(inferseqfiles[0], 'r')]

    click.echo("Combining the inferseq files...")
    inferseq = combine_inferseq_files(inferseqfiles, minimum_size, maximum_size)

    database_maker = DatabaseMaker(inferseq, threads, memory, output_dir)

    try:
        makedirs(output_dir)

    except FileExistsError:

        if force:
            click.echo('Deleting old database directory...')
            rmtree(output_dir)
            makedirs(output_dir)
        else:
            click.echo('Output directory already exists. Use --force to overwrite directory.')
            sys.exit()

    if inferseq.shape[0] == 0:
        click.echo("No termini found in the input file...")

        clustered_seqs = database_maker.get_header_dataframe()

    else:

        cluster_fna = database_maker.run_cluster_seqs()
        outfile = join(output_dir, prefix+'.fna')
        rename(cluster_fna, outfile)
        database_maker.remove_int_files()
        click.echo("Indexing database for use with bowtie2")
        index_genome(outfile)
def index_genome(inferseq_assembly):
    if not bowtie2tools.genome_is_indexed(inferseq_assembly):
        click.echo("Indexing inferseq assembly...")
        bowtie2tools.index_genome(inferseq_assembly)
    click.echo("Genome has been indexed...")
def index_database(inferseq_database):

    if not bowtie2tools.genome_is_indexed(inferseq_database):
        click.echo("Indexing inferseq database...")
        bowtie2tools.index_genome(inferseq_database)
    click.echo("Database has been indexed...")
Beispiel #5
0
def index_genome(inferseq_reference):
    if not bowtie2tools.genome_is_indexed(inferseq_reference):
        click.echo("Indexing inferseq reference genome...")
        bowtie2tools.index_genome(inferseq_reference)
    click.echo("Genome has been indexed...")