def main(argv=None):
    settings = process_command_line(argv)
    # Read the transcripts if given
    if settings.transcripts:
        trans_dict = RILseq.read_transcripts(settings.transcripts)
    else:
        trans_dict = None
    # Get the ends of the reads from the bam files
#    sys.stderr.write('%s\n'%str(settings.bamfiles))
    if settings.all_reads:
        try:
            outall = open(settings.all_reads, 'w')
        except IOError:
            outall = None
    elif settings.add_all_reads:
        outall = sys.stdout
    else:
        outall = None
    for bf in RILseq.flat_list(settings.bamfiles):
        bfin = pysam.Samfile(bf)
        outhead = bf.rsplit('.', 1)[0]
        libname = outhead.rsplit('/',1)[-1]
        fsq1name = "%s/%s_ends_1.fastq"%(settings.dirout, libname)
        fsq2name = "%s/%s_ends_2.fastq"%(settings.dirout, libname)
        if settings.skip_mapping:
            fsq1 = open(os.devnull, 'w')
            fsq2 = fsq1
        else:
            fsq1 = open(fsq1name, 'w')
            fsq2 = open(fsq2name, 'w')
        single_mapped = RILseq.get_unmapped_reads(
            bfin, fsq1, fsq2, settings.length, settings.maxG,
            rev=settings.reverse_complement, all_reads=True,
            dust_thr=settings.dust_thr)
        reads_in = []
        # Map the fastq files to the genome
        for fqname in (fsq1name, fsq2name):
            bamheadname = fqname.rsplit('.',1)[0].rsplit('/',1)[-1]
            if settings.skip_mapping:
                bamname = "%s/%s.bam"%(settings.dirout, bamheadname)
            else:
                bamname = RILseq.run_bwa(
                    settings.bwa_exec, fqname, None,
                    settings.dirout, bamheadname, settings.max_mismatches,
                    settings.genome_fasta, settings.params_aln,
                    '', settings.samse_params,
                    settings.samtools_cmd, processors=settings.processors)
            bamin = pysam.Samfile(bamname)
            reads_in.append(RILseq.read_bam_file(
                    bamin, bamin.references, settings.allowed_mismatches))
        RILseq.write_reads_table(
            sys.stdout, reads_in[0], reads_in[1], bfin.references,
            settings.distance, not settings.keep_circular,
            trans_dict, write_single=outall, single_mapped=single_mapped,
            max_NM=settings.allowed_mismatches)
    return 0        # success
Example #2
0
def main(argv=None):
    settings = process_command_line(argv)
    if not os.path.exists(settings.dirout):
        os.makedirs(settings.dirout)

    if settings.genes_gff:
        try:
            pos_feat_list, all_features = RILseq.read_gtf(
                open(settings.genes_gff), settings.feature, settings.identifier)
        except IOError:
            settings.genes_gff = None
        gcounts = {}
        lib_order = []
    
    fastq_1_list = list(RILseq.flat_list(settings.fastq_1))
    fastq_2_list = list(RILseq.flat_list(settings.fastq_2))
    for i, r1_name in enumerate(RILseq.flat_list(settings.fastq_1)):
        try:
            r2_name = fastq_2_list[i]
        except IndexError:
            r2_name = None
        outhead = r1_name.rsplit('.', 1)[0]
        libname = outhead.rsplit('/',1)[-1]
        outhead = '%s_bwa'%libname
        bamname = RILseq.run_bwa(
            settings.bwa_exec, r1_name, r2_name,
            settings.dirout, outhead, settings.allowed_mismatches,
            settings.genome_fasta, settings.params_aln, settings.sampe_params,
            settings.samse_params, settings.samtools_cmd)
        samfile = pysam.Samfile(bamname)
        if settings.genes_gff:
            lib_order.append(libname)
            gcounts[libname] = RILseq.count_features(
                pos_feat_list, samfile, settings.overlap,
                rev=settings.reverse_complement)
        if settings.create_wig:
            outwigs = [open("%s/%s_coverage.wig"%(settings.dirout, fastq.split("_cutadapt")[0]), 'w')
               for fastq in fastq_1_list]
            coverage = RILseq.generate_wig(
                samfile, rev=settings.reverse_complement, first_pos=False)
            RILseq.print_wiggle(
                coverage, "%s_single_fragments_coverage"%libname,
                "%s single fragments coverage"%libname, outwigs[i])
    # Print the table of counts
    if settings.genes_gff:
        outtables = [open("%s/%s_counts.txt"%(settings.dirout, fastq.split("_cutadapt")[0]), 'w')
                     for fastq in fastq_1_list]
        for i, r1_name in enumerate(fastq_1_list):
            outt = csv.writer(outtables[i], delimiter='\t')
            outt.writerow(['Gene name'] + lib_order)
            for g in sorted(list(all_features)):
                row_out = [g]
                for libn in lib_order:
                    row_out.append(gcounts[libn][g])
                outt.writerow(row_out)
    return 0        # success
Example #3
0
def main(argv=None):
    settings = process_command_line(argv)
    if not os.path.exists(settings.dirout):
        os.makedirs(settings.dirout)
    outwig = open("%s/%s_coverage.wig"%(settings.dirout, settings.outhead), 'w')
    if settings.genes_gff:
        try:
            pos_feat_list, all_features = RILseq.read_gtf(
                open(settings.genes_gff), settings.feature, settings.identifier)
        except IOError:
            settings.genes_gff = None
        gcounts = {}
        lib_order = []
    fastq_2_list = list(RILseq.flat_list(settings.fastq_2))
    for i, r1_name in enumerate(RILseq.flat_list(settings.fastq_1)):
        try:
            r2_name = fastq_2_list[i]
        except IndexError:
            r2_name = None
        outhead = r1_name.rsplit('.', 1)[0]
        libname = outhead.rsplit('/',1)[-1]
        outhead = '%s_bwa'%libname
        bamname = RILseq.run_bwa(
            settings.bwa_exec, r1_name, r2_name,
            settings.dirout, outhead, settings.allowed_mismatches,
            settings.genome_fasta, settings.params_aln, settings.sampe_params,
            settings.samse_params, settings.samtools_cmd,
            processors=settings.processors)
        samfile = pysam.Samfile(bamname)
        if settings.genes_gff:
            lib_order.append(libname)
            gcounts[libname] = RILseq.count_features(
                pos_feat_list, samfile, settings.overlap,
                rev=settings.reverse_complement)
        coverage = RILseq.generate_wig(
            samfile, rev=settings.reverse_complement, first_pos=False)
        RILseq.print_wiggle(
            coverage, "%s_single_fragments_coverage"%libname,
            "%s single fragments coverage"%libname, outwig)
    # Print the table of counts
    if settings.genes_gff:
        outtable = open(
            "%s/%s_counts.txt"%(settings.dirout, settings.outhead), 'w')
        outt = csv.writer(outtable, delimiter='\t')
        outt.writerow(['Gene name'] + lib_order)
        for g in sorted(list(all_features)):
            row_out = [g]
            for libn in lib_order:
                row_out.append(gcounts[libn][g])
            outt.writerow(row_out)
    return 0        # success
Example #4
0
def main(argv=None):
    settings = process_command_line(argv)
    # Read the transcripts if given
    if settings.transcripts:
        trans_dict = RILseq.read_transcripts(settings.transcripts)
    else:
        trans_dict = None
    # Get the ends of the reads from the bam files


#    sys.stderr.write('%s\n'%str(settings.bamfiles))
    if settings.all_reads:
        try:
            outall = open(settings.all_reads, 'w')
        except IOError:
            outall = None
    elif settings.add_all_reads:
        outall = sys.stdout
    else:
        outall = None
    for bf in RILseq.flat_list(settings.bamfiles):
        bfin = pysam.Samfile(bf)
        outhead = bf.rsplit('.', 1)[0]
        libname = outhead.rsplit('/', 1)[-1]
        fsq1name = "%s/%s_ends_1.fastq" % (settings.dirout, libname)
        fsq2name = "%s/%s_ends_2.fastq" % (settings.dirout, libname)
        if settings.skip_mapping:
            fsq1 = open(os.devnull, 'w')
            fsq2 = fsq1
        else:
            fsq1 = open(fsq1name, 'w')
            fsq2 = open(fsq2name, 'w')
        single_mapped = RILseq.get_unmapped_reads(
            bfin,
            fsq1,
            fsq2,
            settings.length,
            settings.maxG,
            rev=settings.reverse_complement,
            all_reads=True,
            dust_thr=settings.dust_thr)
        reads_in = []
        # Map the fastq files to the genome
        for fqname in (fsq1name, fsq2name):
            bamheadname = fqname.rsplit('.', 1)[0].rsplit('/', 1)[-1]
            if settings.skip_mapping:
                bamname = "%s/%s.bam" % (settings.dirout, bamheadname)
            else:
                bamname = RILseq.run_bwa(settings.bwa_exec,
                                         fqname,
                                         None,
                                         settings.dirout,
                                         bamheadname,
                                         settings.max_mismatches,
                                         settings.genome_fasta,
                                         settings.params_aln,
                                         '',
                                         settings.samse_params,
                                         settings.samtools_cmd,
                                         processors=settings.processors)
            bamin = pysam.Samfile(bamname)
            reads_in.append(
                RILseq.read_bam_file(bamin, bamin.references,
                                     settings.allowed_mismatches))
        RILseq.write_reads_table(sys.stdout,
                                 reads_in[0],
                                 reads_in[1],
                                 bfin.references,
                                 settings.distance,
                                 not settings.keep_circular,
                                 trans_dict,
                                 write_single=outall,
                                 single_mapped=single_mapped,
                                 max_NM=settings.allowed_mismatches)
    return 0  # success