#parser.add_argument('-o', default="Data/RNA-Seq/sue1-mRNA-Seq/set3/Thalyrata/sue_mRNA_set3_sw_aln_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="Data/RNA-Seq/Col_RNA-Seq/Thalyrata/SRR493036_aln_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_01nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_02nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_03nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_04nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_05nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_06nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_07nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_08nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') #parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_09nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_10nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') args = parser.parse_args() i_file = args.i gff_file = args.gff o_file = args.o return(i_file, gff_file, o_file) i_file, gff_file, o_file = command_line() #gff_obj = GFF.Parse_Genes(gff_file, "AtChr1;AtChr2;AtChr3;AtChr4;AtChr5") gff_obj = GFF.parse_genes(gff_file, "Chr1;Chr2;Chr3;Chr4;Chr5") gene_counts = SAM.reads_per_gene(i_file, gff_obj) with open(o_file, 'w') as outfile: outline = "Gene\tReads_mapped\n" outfile.write(outline) for gene, count in sorted(gene_counts.items(), key = lambda gene_counts: gene_counts[1], reverse=True): outline = str(gene) + "\t" + str(count) + "\n" outfile.write(outline)
dup_file = args.d gff_file = args.gff adj_value = args.adj use_random = args.r sun_genes_filename = args.o return(pileup_file, dup_file, gff_file, adj_value, use_random, sun_genes_filename) pileup_file, dup_file, gff_file, adj_value, use_random, sun_genes_filename = command_line() dup_list = [] with open(dup_file) as infile: for line in infile: line = line.strip() dup_list.append(line) gff_genes_dict = GFF.parse_genes(gff_file) if adj_value > 0: for gene_name, [chr, start_pos, end_pos] in gff_genes_dict.gene_dict.items(): start_pos = start_pos - adj_value end_pos = end_pos + adj_value gff_genes_dict.gene_dict[gene_name] = [chr, start_pos, end_pos] if use_random == "Y": # Construct a list of randomly selected genes to replace the list of duplicated genes #dup_list = sample(gff_genes_dict.gene_dict.keys(),len(dup_list)) dup_list = gff_genes_dict.gene_dict.keys() # Delete me soon dup_positions = ";".join([chr + ":" + str(s_pos) + "-" + str(e_pos) for gene_name, [chr,s_pos,e_pos] in sorted(gff_genes_dict.gene_dict.items(), key = lambda x: (x[1][0], x[1][1]) ) if gene_name in dup_list ] ) dup_pileup = Pileup.parse(pileup_file, False, dup_positions) suns_in_gene = Counter() suns_in_gene_prob = {}
'-o', default= "/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_10nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile') args = parser.parse_args() i_file = args.i gff_file = args.gff o_file = args.o return (i_file, gff_file, o_file) i_file, gff_file, o_file = command_line() #gff_obj = GFF.Parse_Genes(gff_file, "AtChr1;AtChr2;AtChr3;AtChr4;AtChr5") gff_obj = GFF.parse_genes(gff_file, "Chr1;Chr2;Chr3;Chr4;Chr5") parse_sam = SAM.Parse(i_file) parse_sam.reads_per_gene(gff_obj) parse_sam.start() gene_counts = parse_sam.get_reads_per_gene() with open(o_file, 'w') as outfile: outline = "Gene\tReads_mapped\n" outfile.write(outline) for gene, count in sorted(gene_counts.items(), key=lambda gene_counts: gene_counts[1], reverse=True): outline = str(gene) + "\t" + str(count) + "\n" outfile.write(outline)