Esempio n. 1
0
def align_transcriptome(in_fasta, prefix, aligner, num_threads, t_alnm, ref_t, g_alnm, ref_g, post=True):
    if t_alnm == '':
        if aligner == "minimap2":
            t_alnm = prefix + "_transcriptome_alnm.sam"
            # Alignment to reference transcriptome
            sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with minimap2 to reference transcriptome\n")
            call("minimap2 --cs -ax map-ont -t " + num_threads + " " + ref_t + " " + in_fasta + " > " + t_alnm,
                 shell=True)

        elif aligner == "LAST":
            t_alnm = prefix + "_transcriptome_alnm.maf"
            # Alignment to reference transcriptome
            sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with LAST to reference transcriptome\n")
            call("lastdb ref_transcriptome " + ref_t, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_transcriptome " + in_fasta + " > " + t_alnm, shell=True)

    if g_alnm == '':
        if aligner == "minimap2":
            g_alnm = prefix + "_genome_alnm.sam"
            # Alignment to reference genome
            # [EDIT] I may change the options for minimap2 when dealing with cDNA and dRNA reads.
            sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with minimap2 to reference genome\n")
            call("minimap2 --cs -ax splice -t " + num_threads + " " + ref_g + " " + in_fasta + " > " + g_alnm, shell=True)

        elif aligner == "LAST":
            g_alnm = prefix + "_genome_alnm.maf"
            # Alignment to reference genome
            sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with LAST to reference genome\n")
            call("lastdb ref_genome " + ref_g, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_genome " + in_fasta + " > " + g_alnm, shell=True)

    if not post:
        return t_alnm, g_alnm

    # post-process
    t_alnm_filename, t_alnm_ext = os.path.splitext(t_alnm)
    t_alnm_ext = t_alnm_ext[1:]
    sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Processing transcriptome alignment file: " + t_alnm_ext + '\n')
    if t_alnm_ext == "maf":
        processed_maf_t = prefix + "_transcriptome_alnm_processed.maf"
        call("grep '^s ' " + t_alnm + " > " + processed_maf_t, shell=True)
        unaligned_length, strandness = get_besthit_maf.besthit_and_unaligned(in_fasta, processed_maf_t, prefix + "_transcriptome")
    elif t_alnm_ext == "sam":
        unaligned_length, strandness = get_primary_sam.primary_and_unaligned(t_alnm, prefix + "_transcriptome")

    g_alnm_filename, g_alnm_ext = os.path.splitext(g_alnm)
    g_alnm_ext = g_alnm_ext[1:]
    sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Processing genome alignment file: " + g_alnm_ext + '\n')
    if g_alnm_ext == "maf":
        processed_maf = prefix + "_processed.maf"
        call("grep '^s ' " + g_alnm + " > " + processed_maf, shell=True)
        get_besthit_maf.besthit_and_unaligned(in_fasta, processed_maf, prefix + "_genome")
    elif g_alnm_ext == "sam":
        get_primary_sam.primary_and_unaligned(g_alnm, prefix + "_genome")

    return t_alnm_ext, unaligned_length, g_alnm, t_alnm, strandness
Esempio n. 2
0
def align_genome(in_fasta, prefix, aligner, num_threads, g_alnm, ref_g):
    # if an alignment file is provided
    if g_alnm != '':
        pre, file_ext = os.path.splitext(g_alnm)
        file_extension = file_ext[1:]
        sys.stdout.write(
            strftime("%Y-%m-%d %H:%M:%S") + ": Processing alignment file: " +
            file_extension + "\n")
        if file_extension == "maf":
            processed_maf = prefix + "_processed.maf"

            call("grep '^s ' " + g_alnm + " > " + processed_maf, shell=True)

            # get best hit and unaligned reads
            unaligned_length, strandness = get_besthit_maf.besthit_and_unaligned(
                in_fasta, processed_maf, prefix)

        elif file_extension == "sam":
            # get the primary alignments and define unaligned reads.
            unaligned_length, strandness = get_primary_sam.primary_and_unaligned(
                g_alnm, prefix)

    # if alignment file is not provided
    else:
        if aligner == "minimap2" or aligner == "":  # Align with minimap2 by default
            file_extension = "sam"
            out_sam = prefix + "_genome_alnm.sam"
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with minimap2\n")
            call("minimap2 --cs -ax map-ont -t " + num_threads + " " + ref_g +
                 " " + in_fasta + " > " + out_sam,
                 shell=True)
            # get primary alignments and unaligned reads
            unaligned_length, strandness = get_primary_sam.primary_and_unaligned(
                out_sam, prefix)
        elif aligner == "LAST":
            file_extension = "maf"
            out_maf = prefix + "_genome_alnm.maf"
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with LAST\n")
            call("lastdb ref_genome " + ref, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_genome " + in_fasta +
                 " | grep '^s ' > " + out_maf,
                 shell=True)
            unaligned_length, strandness = get_besthit_maf.besthit_and_unaligned(
                in_fasta, out_maf, prefix)

    return file_extension, unaligned_length, strandness
Esempio n. 3
0
def align_genome(in_fasta,
                 prefix,
                 aligner,
                 num_threads,
                 g_alnm,
                 ref_g,
                 chimeric,
                 quantification=None,
                 q_mode=False):
    # if an alignment file is not provided
    if g_alnm == '':
        if aligner == "minimap2":  # Align with minimap2 by default
            g_alnm = prefix + "_genome_alnm.sam"
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with minimap2\n")
            sys.stdout.flush()
            call("minimap2 --cs -ax map-ont -t " + num_threads + " " + ref_g +
                 " " + in_fasta + " > " + g_alnm,
                 shell=True)

        elif aligner == "LAST":
            g_alnm = prefix + "_genome_alnm.maf"
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with LAST\n")
            sys.stdout.flush()
            call("lastdb ref_genome " + ref_g, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_genome " + in_fasta +
                 " " + g_alnm,
                 shell=True)

    # post-process
    pre, file_ext = os.path.splitext(g_alnm)
    file_extension = file_ext[1:]
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") + ": Processing alignment file: " +
        file_extension + "\n")
    sys.stdout.flush()
    if file_extension == "maf":
        processed_maf = prefix + "_processed.maf"
        call("grep '^s ' " + g_alnm + " > " + processed_maf, shell=True)

        # get best hit and unaligned reads
        unaligned_length, strandness = get_besthit_maf.besthit_and_unaligned(
            in_fasta, processed_maf, prefix)

    elif file_extension == "sam":
        # get the primary alignments and define unaligned reads.
        if chimeric:
            unaligned_length, strandness = get_primary_sam.primary_and_unaligned_chimeric(
                g_alnm, prefix, quantification, q_mode)
        else:
            unaligned_length, strandness = get_primary_sam.primary_and_unaligned(
                g_alnm, prefix, quantification)

    return file_extension, unaligned_length, strandness
Esempio n. 4
0
def main(argv):
    # Parse input and output files
    infile = ''
    prefix = 'training'
    ref = ''
    aligner = ''
    alnm_file = ''
    model_fit = True
    num_threads = '1'
    num_bins = 20
    try:
        opts, args = getopt.getopt(
            argv, "hi:r:a:o:m:b:t:",
            ["infile=", "ref=", "prefix=", "no_model_fit"])
    except getopt.GetoptError:
        usage()
        sys.exit(1)

    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit(0)
        elif opt in ("-i", "--infile"):
            infile = arg
        elif opt in ("-r", "--ref"):
            ref = arg
        elif opt == "-a":
            aligner = arg
        elif opt == "-m":
            alnm_file = arg
        elif opt in ("-o", "--prefix"):
            prefix = arg
        elif opt == "--no_model_fit":
            model_fit = False
        elif opt == "-b":
            num_bins = max(int(arg), 1)
        elif opt == "-t":
            num_threads = arg
        else:
            usage()
            sys.exit(1)

    if infile == '' or ref == '':
        print("Please specify the training reads and its reference genome!")
        usage()
        sys.exit(1)

    if aligner != '' and alnm_file != '':
        print(
            "Please specify either an alignment file (-m ) OR an aligner to use for alignment (-a )"
        )
        usage()
        sys.exit(1)

    # READ PRE-PROCESS AND ALIGNMENT ANALYSIS
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") +
        ": Read pre-process and unaligned reads analysis\n")

    # Read pre-process
    in_fasta = prefix + "_processed.fasta"  # use the prefix of input fasta file for processed fasta file
    processed_fasta = open(in_fasta, 'w')
    dic_reads = {}
    with open(infile, 'r') as f:
        for line in f:
            if line[0] == '>':
                name = '-'.join(line.strip()[1:].split())
                dic_reads[name] = ""
            else:
                dic_reads[name] += line.strip()
    for k, v in dic_reads.items():
        processed_fasta.write('>' + k + '\n' + v + '\n')
    processed_fasta.close()

    del dic_reads

    # if an alignment file is provided
    if alnm_file != '':
        pre, file_ext = os.path.splitext(alnm_file)
        file_extension = file_ext[1:]
        sys.stdout.write(
            strftime("%Y-%m-%d %H:%M:%S") + ": Processing alignment file: " +
            file_extension + "\n")
        if file_extension == "maf":
            processed_maf = prefix + "_processed.maf"

            call("grep '^s ' " + alnm_file + " > " + processed_maf, shell=True)

            # get best hit and unaligned reads
            unaligned_length = get_besthit_maf.besthit_and_unaligned(
                in_fasta, processed_maf, prefix)

        elif file_extension == "sam":
            # get the primary alignments and define unaligned reads.
            unaligned_length = get_primary_sam.primary_and_unaligned(
                alnm_file, prefix)
        else:
            print(
                "Please specify an acceptable alignment format! (maf or sam)\n"
            )
            usage()
            sys.exit(1)

    # if alignment file is not provided
    else:
        if aligner == "minimap2" or aligner == "":  # Align with minimap2 by default
            file_extension = "sam"
            out_sam = prefix + ".sam"
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with minimap2\n")
            call("minimap2 --cs -ax map-ont " + ref + " " + in_fasta + " > " +
                 out_sam,
                 shell=True)
            # get primary alignments and unaligned reads
            unaligned_length = get_primary_sam.primary_and_unaligned(
                out_sam, prefix)
        elif aligner == "LAST":
            file_extension = "maf"
            out_maf = prefix + ".maf"
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") + ": Alignment with LAST\n")
            call("lastdb ref_genome " + ref, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_genome " + in_fasta +
                 " | grep '^s ' > " + out_maf,
                 shell=True)
            unaligned_length = get_besthit_maf.besthit_and_unaligned(
                in_fasta, out_maf, prefix)
        else:
            print("Please specify an acceptable aligner (minimap2 or LAST)\n")
            usage()
            sys.exit(1)

    # Aligned reads analysis
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") + ": Aligned reads analysis\n")
    num_aligned = align.head_align_tail(prefix, num_bins, file_extension)

    # Length distribution of unaligned reads
    out_unaligned_ecdf = open(prefix + "_unaligned_length_ecdf", 'w')

    num_unaligned = len(unaligned_length)
    if num_unaligned != 0:
        max_length = max(unaligned_length)
        hist_unaligned, edges_unaligned = numpy.histogram(
            unaligned_length,
            bins=numpy.arange(0, max_length + 50, 50),
            density=True)
        cdf = numpy.cumsum(hist_unaligned * 50)
        out_unaligned_ecdf.write("Aligned / Unaligned ratio:" + "\t" +
                                 str(num_aligned * 1.0 / num_unaligned) + '\n')
        out_unaligned_ecdf.write("bin\t0-" + str(max_length) + '\n')
        for i in xrange(len(cdf)):
            out_unaligned_ecdf.write(
                str(edges_unaligned[i]) + '-' + str(edges_unaligned[i + 1]) +
                "\t" + str(cdf[i]) + '\n')
    else:
        out_unaligned_ecdf.write("Aligned / Unaligned ratio:\t100%\n")

    out_unaligned_ecdf.close()
    del unaligned_length

    # MATCH AND ERROR MODELS
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") + ": match and error models\n")
    error_model.hist(prefix, file_extension)

    if model_fit:
        sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Model fitting\n")
        model_fitting.model_fitting(prefix, int(num_threads))

    sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Finished!\n")
Esempio n. 5
0
def align_transcriptome(in_fasta,
                        prefix,
                        aligner,
                        num_threads,
                        t_alnm,
                        ref_t,
                        g_alnm=None,
                        ref_g=None):

    out_g = None
    if t_alnm != "":
        out_t = t_alnm
        t_alnm_filename, t_alnm_ext = os.path.splitext(t_alnm)
        t_alnm_ext = t_alnm_ext[1:]
        sys.stdout.write(
            strftime("%Y-%m-%d %H:%M:%S") +
            ": Processing the transcriptome alignment file: " + t_alnm_ext +
            "\n")
        if t_alnm_ext == "maf":
            processed_maf_t = prefix + "_transcriptome_alnm_processed.maf"
            call("grep '^s ' " + t_alnm + " > " + processed_maf_t, shell=True)

            unaligned_length, strandness = get_besthit_maf.besthit_and_unaligned(
                in_fasta, processed_maf_t, prefix)

        elif t_alnm_ext == "sam":

            unaligned_length, strandness = get_primary_sam.primary_and_unaligned(
                t_alnm, prefix)
    else:
        if aligner == "minimap2":
            t_alnm_ext = "sam"
            outsam_t = prefix + "_transcriptome_alnm.sam"
            out_t = outsam_t
            # Alignment to reference transcriptome
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Alignment with minimap2 to reference transcriptome\n")
            call("minimap2 --cs -ax map-ont -t " + num_threads + " " + ref_t +
                 " " + in_fasta + " > " + outsam_t,
                 shell=True)

            unaligned_length, strandness = get_primary_sam.primary_and_unaligned(
                outsam_t, prefix)

        elif aligner == "LAST":
            t_alnm_ext = "maf"
            outmaf_t = prefix + "_transcriptome_alnm.maf"
            out_t = outmaf_t
            # Alignment to reference transcriptome
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Alignment with LAST to reference transcriptome\n")
            call("lastdb ref_transcriptome " + ref_t, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_transcriptome " +
                 in_fasta + " | grep '^s ' > " + outmaf_t,
                 shell=True)

            unaligned_length, strandness = get_besthit_maf.besthit_and_unaligned(
                in_fasta, outmaf_t, prefix)

    if g_alnm or ref_g:
        if g_alnm:
            out_g = g_alnm
            g_alnm_filename, g_alnm_ext = os.path.splitext(g_alnm)
            g_alnm_ext = g_alnm_ext[1:]
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Processing the genome alignment file: " + g_alnm_ext + "\n")
            if g_alnm_ext == "maf":
                processed_maf_g = prefix + "_genome_alnm_processed.maf"
                call("grep '^s ' " + g_alnm + " > " + processed_maf_g,
                     shell=True)
        else:
            if aligner == "minimap2":
                g_alnm_ext = "sam"
                outsam_g = prefix + "_genome_alnm.sam"
                out_g = outsam_g
                # Alignment to reference genome
                # [EDIT] I may change the options for minimap2 when dealing with cDNA and dRNA reads.
                sys.stdout.write(
                    strftime("%Y-%m-%d %H:%M:%S") +
                    ": Alignment with minimap2 to reference genome\n")
                call("minimap2 -ax splice -t " + num_threads + " " + ref_g +
                     " " + in_fasta + " > " + outsam_g,
                     shell=True)

            elif aligner == "LAST":
                g_alnm_ext = "maf"
                outmaf_g = prefix + "_genome_alnm.maf"
                out_g = outmaf_g
                # Alignment to reference genome
                sys.stdout.write(
                    strftime("%Y-%m-%d %H:%M:%S") +
                    ": Alignment with LAST to reference genome\n")
                call("lastdb ref_genome " + ref_g, shell=True)
                call("lastal -a 1 -P " + num_threads + " ref_genome " +
                     in_fasta + " | grep '^s ' > " + outmaf_g,
                     shell=True)

    return t_alnm_ext, unaligned_length, out_g, out_t, strandness
Esempio n. 6
0
def main():

    # Parse input and output files
    infile = ''
    ref_g = ''
    ref_t = ''
    annot = ''
    model_fit = True
    intron_retention = True
    detect_IR = False
    quantify = False

    parser = argparse.ArgumentParser(
        description='Given the read profiles from characterization step, ' \
                    'simulate transcriptome ONT reads and output error profiles',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('-i',
                        '--read',
                        help='Input read for training.',
                        required=True)
    parser.add_argument('-rg',
                        '--ref_g',
                        help='Reference genome.',
                        required=True)
    parser.add_argument('-rt',
                        '--ref_t',
                        help='Reference Transcriptome.',
                        required=True)
    parser.add_argument('-annot',
                        '--annot',
                        help='Annotation file in ensemble GTF/GFF formats.',
                        required=True)
    parser.add_argument(
        '-a',
        '--aligner',
        help='The aligner to be used minimap2 or LAST (Default = minimap2)',
        default='minimap2')
    parser.add_argument(
        '-ga',
        '--g_alnm',
        help='Genome alignment file in sam or maf format (optional)',
        default='')
    parser.add_argument(
        '-ta',
        '--t_alnm',
        help='Transcriptome alignment file in sam or maf format (optional)',
        default='')
    parser.add_argument('-o',
                        '--output',
                        help='The output name and location for profiles',
                        default="training")
    parser.add_argument('--no_model_fit',
                        help='Disable model fitting step',
                        action='store_true')
    parser.add_argument('--no_intron_retention',
                        help='Disable Intron Retention analysis',
                        action='store_true')
    parser.add_argument(
        '--detect_IR',
        help='Detect Intron Retention events using input reads and exit',
        action='store_true')
    parser.add_argument('-b',
                        '--num_bins',
                        help='Number of bins to be used (Default = 20)',
                        default=20)
    parser.add_argument(
        '-t',
        '--num_threads',
        help=
        'Number of threads to be used in alignments and model fitting (Default = 1)',
        default=1)
    parser.add_argument('--quantify',
                        help='Quantify expression profile of input reads',
                        action='store_true')

    args = parser.parse_args()

    infile = args.read
    ref_g = args.ref_g
    ref_t = args.ref_t
    annot = args.annot
    aligner = args.aligner
    g_alnm = args.g_alnm
    t_alnm = args.t_alnm
    outfile = args.output
    num_bins = max(args.num_bins, 1)
    num_threads = max(args.num_threads, 1)

    if args.no_model_fit:
        model_fit = False
    if args.no_intron_retention:
        intron_retention = False
    if args.detect_IR:
        detect_IR = True
    if args.quantify:
        quantify = True

    print("Running the characterization step with following arguments: \n")
    print("infile", infile)
    print("ref_g", ref_g)
    print("ref_t", ref_t)
    print("annot", annot)
    print("aligner", aligner)
    print("g_alnm", g_alnm)
    print("t_alnm", t_alnm)
    print("outfile", outfile)
    print("model_fit", model_fit)
    print("num_bins", num_bins)
    print("num_threads", num_threads)
    print("detect_IR", detect_IR)
    print("quantify", quantify)

    #Quantifying the transcript abundance from input read
    sys.stdout.write('Quantifying transcripts abundance: \n')
    #sys.stdout.log.write('Quantifying transcripts abundance: \n')
    call("minimap2 -t " + str(num_threads) + " -x map-ont -p0 " + ref_t + " " +
         infile + " > " + outfile + "_mapping.paf",
         shell=True)
    call("python nanopore_transcript_abundance.py -i " + outfile +
         "_mapping.paf > " + outfile + "_abundance.tsv",
         shell=True)
    sys.stdout.write('Finished! \n')
    #sys.stdout.log.write('Finished! \n')

    if quantify == True:
        sys.exit(1)

    if (g_alnm != '' and t_alnm == '') or (g_alnm == '' and t_alnm != ''):
        print(
            "Please specify either both alignment files (-ga and -ta) OR an aligner to use for alignment (-a)"
        )
        usage()
        sys.exit(1)
    if g_alnm != "" and t_alnm != "":
        g_alnm_filename, g_alnm_ext = os.path.splitext(g_alnm)
        t_alnm_filename, t_alnm_ext = os.path.splitext(t_alnm)
        g_alnm_ext = g_alnm_ext[1:]
        t_alnm_ext = t_alnm_ext[1:]
        if g_alnm_ext != t_alnm_ext:
            print(
                "Please provide both alignments in a same format: sam OR maf\n"
            )
            usage()
            sys.exit(1)

    # READ PRE-PROCESS AND UNALIGNED READS ANALYSIS
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") +
        ": Read pre-process and unaligned reads analysis\n")

    # Read pre-process
    in_fasta = outfile + ".fasta"
    if in_fasta == infile:
        in_fasta = outfile + "_processed.fasta"
    out_fasta = open(in_fasta, 'w')
    dic_reads = {}
    with open(infile, 'r') as f:
        for line in f:
            if line[0] == '>':
                name = '-'.join(line.strip()[1:].split())
                dic_reads[name] = ""
            else:
                dic_reads[name] += line.strip()
    for k, v in dic_reads.items():
        out_fasta.write('>' + k + '\n' + v + '\n')
    out_fasta.close()

    del dic_reads

    # Read the annotation GTF/GFF3 file
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") +
        ": Parse the annotation file (GTF/GFF3)\n")
    # If gtf provided, convert to GFF3 (gt gtf_to_gff3)
    annot_filename, annot_file_extension = os.path.splitext(annot)
    annot_file_extension = annot_file_extension[1:]
    if annot_file_extension.upper() == "GTF":
        call("gt gtf_to_gff3 -tidy -o " + outfile + ".gff3" + annot,
             shell=True)

    # Next, add intron info into gff3:
    call("gt gff3 -tidy -retainids -checkids -addintrons -o " + outfile +
         "_addedintron.gff3 " + annot_filename + ".gff3",
         shell=True)

    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") +
        ": Read the length of reference transcripts \n")
    #Read the length of reference transcripts from the reference transcriptome
    dict_ref_len = {}
    with open(ref_t) as f:
        for line in f:
            if line.startswith(">"):
                ref_id = line.split()[0][1:]
                dict_ref_len[ref_id] = 0
            else:
                dict_ref_len[ref_id] += len(line.strip())

    #If both alignment files are provided:
    if g_alnm != "" and t_alnm != "":
        sys.stdout.write(
            strftime("%Y-%m-%d %H:%M:%S") +
            ": Processing the alignment files: " + t_alnm_ext + "\n")
        if t_alnm_ext == "maf":
            outmaf_g = outfile + "_genome_alnm.maf"
            outmaf_t = outfile + "_transcriptome_alnm.maf"
            if outmaf_g == g_alnm:
                outmaf_g = outfile + "_genome_alnm_processed.maf"
            if outmaf_t == t_alnm:
                outmaf_t = outfile + "_transcriptome_alnm_processed.maf"

            call("grep '^s ' " + g_alnm + " > " + outmaf_g, shell=True)
            call("grep '^s ' " + t_alnm + " > " + outmaf_t, shell=True)

            unaligned_length = list(
                get_besthit_maf.besthit_and_unaligned(in_fasta, outmaf_t,
                                                      outfile))

        elif t_alnm_ext == "sam":

            unaligned_length = list(
                get_primary_sam.primary_and_unaligned(g_alnm, t_alnm, outfile))

    else:
        if aligner == "minimap2":
            g_alnm_ext = "sam"
            t_alnm_ext = "sam"
            outsam_g = outfile + "_genome_alnm.sam"
            outsam_t = outfile + "_transcriptome_alnm.sam"
            # Alignment to reference genome

            # [EDIT] I should change the options for minimap when dealing with cDNA and dRNA reads.
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Alignment with minimap2 to reference genome\n")
            call("minimap2 -ax splice " + ref_g + " " + in_fasta + " > " +
                 outsam_g,
                 shell=True)
            # Alignment to reference transcriptome
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Alignment with minimap2 to reference transcriptome\n")
            call("minimap2 --cs -ax map-ont " + ref_t + " " + in_fasta +
                 " > " + outsam_t,
                 shell=True)

            # [EDIT] I may add a script to remove minimap2/LAST post-alignment files after alignment.
            unaligned_length = list(
                get_primary_sam.primary_and_unaligned(outsam_g, outsam_t,
                                                      outfile))

        elif aligner == "LAST":
            g_alnm_ext = "maf"
            t_alnm_ext = "maf"
            outmaf_g = outfile + "_genome_alnm.maf"
            outmaf_t = outfile + "_transcriptome_alnm.maf"
            # Alignment to reference genome
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Alignment with LAST to reference genome\n")
            call("lastdb ref_genome " + ref_g, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_genome " + in_fasta +
                 " | grep '^s ' > " + outmaf_g,
                 shell=True)
            # Alignment to reference transcriptome
            sys.stdout.write(
                strftime("%Y-%m-%d %H:%M:%S") +
                ": Alignment with LAST to reference transcriptome\n")
            call("lastdb ref_transcriptome " + ref_t, shell=True)
            call("lastal -a 1 -P " + num_threads + " ref_transcriptome " +
                 in_fasta + " | grep '^s ' > " + outmaf_t,
                 shell=True)

            unaligned_length = list(
                get_besthit_maf.besthit_and_unaligned(in_fasta, outmaf_t,
                                                      outfile))

        else:
            print("Please specify an acceptable aligner (minimap2 or LAST)\n")
            usage()
            sys.exit(1)

    if detect_IR == True:
        sys.stdout.write(
            strftime("%Y-%m-%d %H:%M:%S") +
            ": Detecting Intron Retention events using input reads\n")
        model_ir.intron_retention(outfile, ref_t)
        sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Finished\n")
        sys.exit(1)

    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") +
        ": Reads length distribution analysis\n")
    # Aligned reads length distribution analysis
    count_aligned = align.head_align_tail(outfile, num_bins, t_alnm_ext,
                                          dict_ref_len)

    # Unaligned reads length distribution analysis
    out1 = open(outfile + "_unaligned_length_ecdf", 'w')
    count_unaligned = len(unaligned_length)
    if count_unaligned != 0:
        max_length = max(unaligned_length)
        hist_unaligned, edges_unaligned = numpy.histogram(
            unaligned_length,
            bins=numpy.arange(0, max_length + 50, 50),
            density=True)
        cdf = numpy.cumsum(hist_unaligned * 50)
        out1.write("Aligned / Unaligned ratio:" + "\t" +
                   str(count_aligned * 1.0 / count_unaligned) + '\n')
        out1.write("bin\t0-" + str(max_length) + '\n')
        for i in xrange(len(cdf)):
            out1.write(
                str(edges_unaligned[i]) + '-' + str(edges_unaligned[i + 1]) +
                "\t" + str(cdf[i]) + '\n')
    else:
        out1.write("Aligned / Unaligned ratio:\t100%\n")
    out1.close()

    # MATCH AND ERROR MODELS
    sys.stdout.write(
        strftime("%Y-%m-%d %H:%M:%S") + ": match and error models\n")
    error_model.hist(outfile, t_alnm_ext)

    if intron_retention:
        sys.stdout.write(
            strftime("%Y-%m-%d %H:%M:%S") + ": Modeling Intron Retention\n")
        model_ir.intron_retention(outfile, ref_t)

    if model_fit:
        sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Model fitting\n")
        model_fitting.model_fitting(outfile, int(num_threads))

    call("find . -name \*ref_genome.* -delete", shell=True)
    call("find . -name \*ref_transcriptome.* -delete", shell=True)
    call("find . -name \*.pyc -delete", shell=True)
    sys.stdout.write(strftime("%Y-%m-%d %H:%M:%S") + ": Finished!\n")