Example #1
0
def create_data_dir(args, fasta_path, bam_path):
    print "++Creating data directory for bam2aln processing."
    data_dir = os.path.join(args.output_dir, "data")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    reference_fasta_path = os.path.join(data_dir, "reference.fasta")
    if not os.path.exists(reference_fasta_path):
        shutil.copy2(fasta_path, reference_fasta_path)
        samtools.faidx(reference_fasta_path)

    reference_bam_path = os.path.join(data_dir, "reference.bam")
    if not os.path.exists(reference_bam_path):
        shutil.copy2(bam_path, reference_bam_path)
        samtools.index(reference_bam_path)
Example #2
0
def ssaha2_alignment(args, step_1="01_reference_conversion", step_2="02_reference_alignment"):
    fasta_path = prepare_reference(args, step_1)

    step_2_dir = os.path.join(args.output_dir, step_2)
    step_2_done_file = os.path.join(step_2_dir, "create_alignment.done")

    bam_path = ""
    if not os.path.exists(step_2_done_file):
        if not os.path.exists(step_2_dir):
            os.makedirs(step_2_dir)

        samtools.faidx(fasta_path)

        built_reference_path = fasta_path.replace(".fasta", "")
        cmd = "ssaha2Build -rtype solexa -skip 1 -save {} {}".format(built_reference_path, fasta_path)
        print cmd
        os.system(cmd)

        cmd_fmt = "ssaha2 -disk 2 -save {} -kmer 13 -skip 1 -seeds 1 -score 12 -cmatch 9 -ckmer 1 -output sam_soft -outfile {} {}"

        sam_paths = map(
            lambda read_path: os.path.join(step_2_dir, os.path.basename(read_path.replace(".fastq", ".sam"))),
            args.read_paths,
        )
        for sam_path, read_path in zip(sam_paths, args.read_paths):
            cmd = cmd_fmt.format(built_reference_path, sam_path, read_path)
            print cmd
            if not os.path.exists(sam_path):
                os.system(cmd)

        bam_path = prepare_alignment(fasta_path, sam_paths, step_2_dir, True)

        # Step: Mark step as completed.
        p.dump(bam_path, open(step_2_done_file, "w"))

    else:
        print "++Reference alignment file has already been completed."
        bam_path = p.load(open(step_2_done_file, "r"))
    assert os.path.exists(bam_path) and bam_path.endswith(".bam")

    print fasta_path, bam_path
    return fasta_path, bam_path