"snoRNA": "ncRNA", "snRNA": "ncRNA" } with open(gff_file) as gff_fd: for record in GFF.parse(gff_fd): annotations_dict[record.id] = record rcParams.update({'font.size': 7}) plt.figure(1, dpi=300, figsize=(8, 6)) index = 1 for sample, letter in zip(sample_set_names_list, letter_list_part1): collection = CollectionVCF(from_file=True, vcf_file=sample + "_good.vcf") plt.subplot(3, 2, index) collection.get_location(annotations_dict, use_synonym=True, synonym_dict=annotation_synonym_dict) location_pie(collection, annotation_colors=[], ref_genome=None, explode=True, annotation_black_list=annotation_black_list, allow_several_counts_of_record=False, counts_filename="location_counts.t", counts_dir="location_counts", legend_font=6, combine_mixed=True ) plt.title("%s. %s" % (letter, sample), fontweight='bold') index += 1 for format_ext in ["svg", "eps", "pdf", "png"]: plt.savefig("good_mutation_pie_mixed_combined_normed.%s" % format_ext, bbox_inches='tight') plt.close()
os.chdir(workdir) os.system("mkdir -p %s" % sample_set_name) os.chdir(sample_set_name) os.system("mkdir -p %s %s" % (clustering_dir, rainfall_dir)) #os.system("pwd") mutations = CollectionVCF( vcf_file="../SNP_annotated_raw_vcf/%s_SNP.vcf" % sample_set_name, from_file=True) """ mutations.rainfall_plot("%s_mutations" % (sample_set_name), ref_genome=reference, draw_gaps=True, masked_regions=bad_region_dict) """ mutations.get_location(annotations_dict, use_synonym=True, synonym_dict=annotation_synonym_dict) mutations.check_location(bad_regions) mutations.check_by_ref_and_alt(ref_alt_variants["desaminases"], "DA") annotation_black_list = [ "gene", "region", "ARS", "long_terminal_repeat", "noncoding_exon", "intron", "repeat_region", "telomere", "gene_cassette", "five_prime_UTR_intron" ] """ mutations.location_pie(annotation_black_list=annotation_black_list, figsize=(30, 30), pie_filename="%s_SNP_raw_variant_location_pie.svg" % sample_set_name, full_genome_pie_filename="%s_SNP_raw_variant_location_full_genome_pie.svg" % sample_set_name, counts_filename="%s_SNP_raw_variant_location_counts.t" % sample_set_name)
annotations_dict = {} with open(gff_file) as gff_fd: for record in GFF.parse(gff_fd): annotations_dict[record.id] = record for sample in samples_list: print("Handling %s" % sample) os.chdir(workdir) os.chdir(sample) if alignment_dir not in os.listdir("."): continue os.chdir(alignment_dir) os.system("mkdir -p %s" % clustering_dir) mutations = CollectionVCF(vcf_file=sample + suffix, from_file=True) mutations.get_location(annotations_dict) mutations.check_by_ref_and_alt(ref_alt_variants["desaminases"], "DA") """ for record in mutations: print (record) print(record.flags) """ #for record in mutations: # print(record.description) mutations.location_pie(annotation_black_list=[ "gene", "region", "ARS", "long_terminal_repeat" ], figsize=(40, 40), pie_filename="variant_location_pie.svg", counts_filename="variant_location_counts.t") print("Totaly %s mutations" % len(mutations))