コード例 #1
0
ファイル: sxn_gene_variation.py プロジェクト: KorfLab/Suecica
in_folder, o_file, gff_file = command_line()

# Open GFF file and make dictionary containing gene names & their positions
# Then add the per-library gene read counts to a dictionary
gff_genes_dict = GFF.Parse(gff_file, create_nuc_dict=True)
library_gene_counts = {}
for (gene, [chromosome, spos, epos]) in gff_genes_dict:
    if not gff_genes_dict.is_transposon(
            gene) and not gff_genes_dict.is_common_rna(gene):
        library_gene_counts[gene] = []
for root, subfolders, files in os.walk(in_folder):
    for f_name in files:
        print("Working on", str(f_name))
        f_in = os.path.join(root, f_name)
        parse_sam = SAM.Parse(f_in)
        parse_sam.reads_per_gene(gff_genes_dict)
        parse_sam.start()
        temp_gene_counts = parse_sam.get_reads_per_gene()

        # Count up the total number of reads in the library
        gzipped = False
        total_reads = 0
        if f_in.endswith(".gz"):
            infile = gzip.open(f_in, 'rb')
            gzipped = True
        else:
            infile = open(f_in)
        for line in infile:
            if gzipped == True: line = line.decode('utf-8')
            line = line.split()
コード例 #2
0
        '-o',
        default=
        "/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_10nc_gene_count.tsv",
        type=str,
        help='Output file for gene-mapped read counts',
        metavar='OutputFile')

    args = parser.parse_args()
    i_file = args.i
    gff_file = args.gff
    o_file = args.o

    return (i_file, gff_file, o_file)


i_file, gff_file, o_file = command_line()
#gff_obj = GFF.Parse_Genes(gff_file, "AtChr1;AtChr2;AtChr3;AtChr4;AtChr5")
gff_obj = GFF.parse_genes(gff_file, "Chr1;Chr2;Chr3;Chr4;Chr5")
parse_sam = SAM.Parse(i_file)
parse_sam.reads_per_gene(gff_obj)
parse_sam.start()
gene_counts = parse_sam.get_reads_per_gene()

with open(o_file, 'w') as outfile:
    outline = "Gene\tReads_mapped\n"
    outfile.write(outline)
    for gene, count in sorted(gene_counts.items(),
                              key=lambda gene_counts: gene_counts[1],
                              reverse=True):
        outline = str(gene) + "\t" + str(count) + "\n"
        outfile.write(outline)