return(in_folder, o_file, gff_file) in_folder, o_file, gff_file = command_line() # Open GFF file and make dictionary containing gene names & their positions # Then add the per-library gene read counts to a dictionary gff_genes_dict = GFF.Parse_genes(gff_file, create_nuc_dict=True) library_gene_counts = {} for (gene, [chromosome, spos, epos]) in gff_genes_dict: library_gene_counts[gene] = [] for root, subfolders, files in os.walk(in_folder): for f_name in files: print("Working on", str(f_name)) f_in = os.path.join(root,f_name) temp_gene_counts = SAM.reads_per_gene(f_in, gff_genes_dict) # Count up the total number of reads in the library gzipped = False total_reads = 0 if f_in.endswith(".gz"): infile = gzip.open(f_in, 'rb') gzipped = True else: infile = open(f_in) for line in infile: if gzipped == True: line = str(line, encoding='utf8') line = line.split() if line[0][:1] != "@" and int(line[4]) >= 20: total_reads += 1 # Change read counts to RPKM value
#parser.add_argument('-o', default="Data/RNA-Seq/sue1-mRNA-Seq/set3/Thalyrata/sue_mRNA_set3_sw_aln_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="Data/RNA-Seq/Col_RNA-Seq/Thalyrata/SRR493036_aln_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_01nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_02nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_03nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_04nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_05nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_06nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_07nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_08nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_09nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_10nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') args = parser.parse_args() i_file = args.i gff_file = args.gff o_file = args.o return(i_file, gff_file, o_file) i_file, gff_file, o_file = command_line() #gff_obj = GFF.Parse_Genes(gff_file, "AtChr1;AtChr2;AtChr3;AtChr4;AtChr5") gff_obj = GFF.parse_genes(gff_file, "Chr1;Chr2;Chr3;Chr4;Chr5") gene_counts = SAM.reads_per_gene(i_file, gff_obj) with open(o_file, 'w') as outfile: outline = "Gene\tReads_mapped\n" outfile.write(outline) for gene, count in sorted(gene_counts.items(), key = lambda gene_counts: gene_counts[1], reverse=True): outline = str(gene) + "\t" + str(count) + "\n" outfile.write(outline)