def main(argv=None):
    settings = process_command_line(argv)
    try:
        pos_feat_list, all_features = RILseq.read_gtf(
            open(settings.genes_gff), settings.feature, settings.identifier)
    except IOError:
        return 1
    lib_order = []
    all_counts = {}
    if settings.singles:
        settings.only_first = True
        settings.only_second = False
    for r1_name in RILseq.flat_list(settings.reads_files):
        sys.stderr.write('%s\n'%str(r1_name))
        lib_order.append(r1_name)
        all_counts[r1_name] = count_features(
            pos_feat_list, open(r1_name), settings.overlap, length=25,
            ignore_first=settings.only_second,
            ignore_second=settings.only_first, count_singles=settings.singles)
    outt = csv.writer(sys.stdout, delimiter='\t')
    if not settings.quiet:
        outt.writerow(['Gene name'] + lib_order)
    for g in sorted(list(all_features)):
        if settings.quiet and g.startswith('~'):
            continue
        row_out = [g]
        for libn in lib_order:
            row_out.append(all_counts[libn][g])
        outt.writerow(row_out)
    # application code here, like:
    # run(settings, args)
    return 0        # success
Example #2
0
def main(argv=None):
    settings = process_command_line(argv)
    if not os.path.exists(settings.dirout):
        os.makedirs(settings.dirout)

    if settings.genes_gff:
        try:
            pos_feat_list, all_features = RILseq.read_gtf(
                open(settings.genes_gff), settings.feature, settings.identifier)
        except IOError:
            settings.genes_gff = None
        gcounts = {}
        lib_order = []
    
    fastq_1_list = list(RILseq.flat_list(settings.fastq_1))
    fastq_2_list = list(RILseq.flat_list(settings.fastq_2))
    for i, r1_name in enumerate(RILseq.flat_list(settings.fastq_1)):
        try:
            r2_name = fastq_2_list[i]
        except IndexError:
            r2_name = None
        outhead = r1_name.rsplit('.', 1)[0]
        libname = outhead.rsplit('/',1)[-1]
        outhead = '%s_bwa'%libname
        bamname = RILseq.run_bwa(
            settings.bwa_exec, r1_name, r2_name,
            settings.dirout, outhead, settings.allowed_mismatches,
            settings.genome_fasta, settings.params_aln, settings.sampe_params,
            settings.samse_params, settings.samtools_cmd)
        samfile = pysam.Samfile(bamname)
        if settings.genes_gff:
            lib_order.append(libname)
            gcounts[libname] = RILseq.count_features(
                pos_feat_list, samfile, settings.overlap,
                rev=settings.reverse_complement)
        if settings.create_wig:
            outwigs = [open("%s/%s_coverage.wig"%(settings.dirout, fastq.split("_cutadapt")[0]), 'w')
               for fastq in fastq_1_list]
            coverage = RILseq.generate_wig(
                samfile, rev=settings.reverse_complement, first_pos=False)
            RILseq.print_wiggle(
                coverage, "%s_single_fragments_coverage"%libname,
                "%s single fragments coverage"%libname, outwigs[i])
    # Print the table of counts
    if settings.genes_gff:
        outtables = [open("%s/%s_counts.txt"%(settings.dirout, fastq.split("_cutadapt")[0]), 'w')
                     for fastq in fastq_1_list]
        for i, r1_name in enumerate(fastq_1_list):
            outt = csv.writer(outtables[i], delimiter='\t')
            outt.writerow(['Gene name'] + lib_order)
            for g in sorted(list(all_features)):
                row_out = [g]
                for libn in lib_order:
                    row_out.append(gcounts[libn][g])
                outt.writerow(row_out)
    return 0        # success
Example #3
0
def main(argv=None):
    settings = process_command_line(argv)
    if not os.path.exists(settings.dirout):
        os.makedirs(settings.dirout)
    outwig = open("%s/%s_coverage.wig"%(settings.dirout, settings.outhead), 'w')
    if settings.genes_gff:
        try:
            pos_feat_list, all_features = RILseq.read_gtf(
                open(settings.genes_gff), settings.feature, settings.identifier)
        except IOError:
            settings.genes_gff = None
        gcounts = {}
        lib_order = []
    fastq_2_list = list(RILseq.flat_list(settings.fastq_2))
    for i, r1_name in enumerate(RILseq.flat_list(settings.fastq_1)):
        try:
            r2_name = fastq_2_list[i]
        except IndexError:
            r2_name = None
        outhead = r1_name.rsplit('.', 1)[0]
        libname = outhead.rsplit('/',1)[-1]
        outhead = '%s_bwa'%libname
        bamname = RILseq.run_bwa(
            settings.bwa_exec, r1_name, r2_name,
            settings.dirout, outhead, settings.allowed_mismatches,
            settings.genome_fasta, settings.params_aln, settings.sampe_params,
            settings.samse_params, settings.samtools_cmd,
            processors=settings.processors)
        samfile = pysam.Samfile(bamname)
        if settings.genes_gff:
            lib_order.append(libname)
            gcounts[libname] = RILseq.count_features(
                pos_feat_list, samfile, settings.overlap,
                rev=settings.reverse_complement)
        coverage = RILseq.generate_wig(
            samfile, rev=settings.reverse_complement, first_pos=False)
        RILseq.print_wiggle(
            coverage, "%s_single_fragments_coverage"%libname,
            "%s single fragments coverage"%libname, outwig)
    # Print the table of counts
    if settings.genes_gff:
        outtable = open(
            "%s/%s_counts.txt"%(settings.dirout, settings.outhead), 'w')
        outt = csv.writer(outtable, delimiter='\t')
        outt.writerow(['Gene name'] + lib_order)
        for g in sorted(list(all_features)):
            row_out = [g]
            for libn in lib_order:
                row_out.append(gcounts[libn][g])
            outt.writerow(row_out)
    return 0        # success
Example #4
0
def main(argv=None):
    settings = process_command_line(argv)
    try:
        pos_feat_list, all_features = RILseq.read_gtf(open(settings.genes_gff),
                                                      settings.feature,
                                                      settings.identifier)
    except IOError:
        return 1
    lib_order = []
    all_counts = {}
    if settings.singles:
        settings.only_first = True
        settings.only_second = False
    for r1_name in RILseq.flat_list(settings.reads_files):
        sys.stderr.write('%s\n' % str(r1_name))
        lib_order.append(r1_name)
        all_counts[r1_name] = count_features(pos_feat_list,
                                             open(r1_name),
                                             settings.overlap,
                                             length=25,
                                             ignore_first=settings.only_second,
                                             ignore_second=settings.only_first,
                                             count_singles=settings.singles)
    outt = csv.writer(sys.stdout, delimiter='\t')
    if not settings.quiet:
        outt.writerow(['Gene name'] + lib_order)
    for g in sorted(list(all_features)):
        if settings.quiet and g.startswith('~'):
            continue
        row_out = [g]
        for libn in lib_order:
            row_out.append(all_counts[libn][g])
        outt.writerow(row_out)
    # application code here, like:
    # run(settings, args)
    return 0  # success