Esempio n. 1
0
	#parser.add_argument('-o', default="Data/RNA-Seq/sue1-mRNA-Seq/set3/Thalyrata/sue_mRNA_set3_sw_aln_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="Data/RNA-Seq/Col_RNA-Seq/Thalyrata/SRR493036_aln_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_01nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_02nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_03nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_04nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_05nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_06nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_07nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_08nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	#parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_09nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	parser.add_argument('-o', default="/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_10nc_gene_count.tsv", type=str, help='Output file for gene-mapped read counts', metavar='OutputFile')
	
	args = parser.parse_args()
	i_file = args.i
	gff_file = args.gff
	o_file = args.o

	return(i_file, gff_file, o_file)

i_file, gff_file, o_file = command_line()
#gff_obj = GFF.Parse_Genes(gff_file, "AtChr1;AtChr2;AtChr3;AtChr4;AtChr5")
gff_obj = GFF.parse_genes(gff_file, "Chr1;Chr2;Chr3;Chr4;Chr5")
gene_counts = SAM.reads_per_gene(i_file, gff_obj)
	
with open(o_file, 'w') as outfile:
	outline = "Gene\tReads_mapped\n"
	outfile.write(outline)
	for gene, count in sorted(gene_counts.items(), key = lambda gene_counts: gene_counts[1], reverse=True):
		outline = str(gene) + "\t" + str(count) + "\n"
		outfile.write(outline)
Esempio n. 2
0
	dup_file = args.d
	gff_file = args.gff
	adj_value = args.adj
	use_random = args.r
	sun_genes_filename = args.o
	
	return(pileup_file, dup_file, gff_file, adj_value, use_random, sun_genes_filename)
	
pileup_file, dup_file, gff_file, adj_value, use_random, sun_genes_filename = command_line()

dup_list = []
with open(dup_file) as infile:
	for line in infile:
		line = line.strip()
		dup_list.append(line)
gff_genes_dict = GFF.parse_genes(gff_file)
if adj_value > 0:
	for gene_name, [chr, start_pos, end_pos] in gff_genes_dict.gene_dict.items():
		start_pos = start_pos - adj_value
		end_pos = end_pos + adj_value
		gff_genes_dict.gene_dict[gene_name] = [chr, start_pos, end_pos]
if use_random == "Y":
	# Construct a list of randomly selected genes to replace the list of duplicated genes
	#dup_list = sample(gff_genes_dict.gene_dict.keys(),len(dup_list))
	dup_list = gff_genes_dict.gene_dict.keys() # Delete me soon
dup_positions = ";".join([chr + ":" + str(s_pos) + "-" + str(e_pos) for gene_name, [chr,s_pos,e_pos] in sorted(gff_genes_dict.gene_dict.items(), key = lambda x: (x[1][0], x[1][1]) ) if gene_name in dup_list ] )
dup_pileup = Pileup.parse(pileup_file, False, dup_positions)

suns_in_gene = Counter()
suns_in_gene_prob = {}
Esempio n. 3
0
        '-o',
        default=
        "/home/mattchat/SuecicaDupSearch/Data/RNA-Seq/Col0_leaf_RNA-seq/libWhan_10nc_gene_count.tsv",
        type=str,
        help='Output file for gene-mapped read counts',
        metavar='OutputFile')

    args = parser.parse_args()
    i_file = args.i
    gff_file = args.gff
    o_file = args.o

    return (i_file, gff_file, o_file)


i_file, gff_file, o_file = command_line()
#gff_obj = GFF.Parse_Genes(gff_file, "AtChr1;AtChr2;AtChr3;AtChr4;AtChr5")
gff_obj = GFF.parse_genes(gff_file, "Chr1;Chr2;Chr3;Chr4;Chr5")
parse_sam = SAM.Parse(i_file)
parse_sam.reads_per_gene(gff_obj)
parse_sam.start()
gene_counts = parse_sam.get_reads_per_gene()

with open(o_file, 'w') as outfile:
    outline = "Gene\tReads_mapped\n"
    outfile.write(outline)
    for gene, count in sorted(gene_counts.items(),
                              key=lambda gene_counts: gene_counts[1],
                              reverse=True):
        outline = str(gene) + "\t" + str(count) + "\n"
        outfile.write(outline)