Ejemplo n.º 1
0
                               "snoRNA": "ncRNA",
                               "snRNA": "ncRNA"
                               }
    with open(gff_file) as gff_fd:
        for record in GFF.parse(gff_fd):
            annotations_dict[record.id] = record


    rcParams.update({'font.size': 7})
    plt.figure(1, dpi=300, figsize=(8, 6))
    index = 1
    for sample, letter in zip(sample_set_names_list, letter_list_part1):
        collection = CollectionVCF(from_file=True, vcf_file=sample + "_good.vcf")

        plt.subplot(3, 2, index)
        collection.get_location(annotations_dict, use_synonym=True, synonym_dict=annotation_synonym_dict)
        location_pie(collection, annotation_colors=[],
                     ref_genome=None, explode=True, annotation_black_list=annotation_black_list,
                     allow_several_counts_of_record=False,
                     counts_filename="location_counts.t",
                     counts_dir="location_counts",
                     legend_font=6,
                     combine_mixed=True
                     )
        plt.title("%s. %s" % (letter, sample), fontweight='bold')

        index += 1
    for format_ext in ["svg", "eps", "pdf", "png"]:
        plt.savefig("good_mutation_pie_mixed_combined_normed.%s" % format_ext, bbox_inches='tight')
    plt.close()
Ejemplo n.º 2
0
        os.chdir(workdir)
        os.system("mkdir -p %s" % sample_set_name)
        os.chdir(sample_set_name)
        os.system("mkdir -p %s %s" % (clustering_dir, rainfall_dir))
        #os.system("pwd")
        mutations = CollectionVCF(
            vcf_file="../SNP_annotated_raw_vcf/%s_SNP.vcf" % sample_set_name,
            from_file=True)
        """
        mutations.rainfall_plot("%s_mutations" % (sample_set_name), ref_genome=reference, draw_gaps=True,
                                masked_regions=bad_region_dict)
        """

        mutations.get_location(annotations_dict,
                               use_synonym=True,
                               synonym_dict=annotation_synonym_dict)
        mutations.check_location(bad_regions)
        mutations.check_by_ref_and_alt(ref_alt_variants["desaminases"], "DA")

        annotation_black_list = [
            "gene", "region", "ARS", "long_terminal_repeat", "noncoding_exon",
            "intron", "repeat_region", "telomere", "gene_cassette",
            "five_prime_UTR_intron"
        ]
        """
        mutations.location_pie(annotation_black_list=annotation_black_list,
                               figsize=(30, 30),
                               pie_filename="%s_SNP_raw_variant_location_pie.svg" % sample_set_name,
                               full_genome_pie_filename="%s_SNP_raw_variant_location_full_genome_pie.svg" % sample_set_name,
                               counts_filename="%s_SNP_raw_variant_location_counts.t" % sample_set_name)
Ejemplo n.º 3
0
    annotations_dict = {}
    with open(gff_file) as gff_fd:
        for record in GFF.parse(gff_fd):
            annotations_dict[record.id] = record

    for sample in samples_list:
        print("Handling %s" % sample)

        os.chdir(workdir)
        os.chdir(sample)
        if alignment_dir not in os.listdir("."):
            continue
        os.chdir(alignment_dir)
        os.system("mkdir -p %s" % clustering_dir)
        mutations = CollectionVCF(vcf_file=sample + suffix, from_file=True)
        mutations.get_location(annotations_dict)
        mutations.check_by_ref_and_alt(ref_alt_variants["desaminases"], "DA")
        """
        for record in mutations:
            print (record)
            print(record.flags)
        """
        #for record in mutations:
        #    print(record.description)
        mutations.location_pie(annotation_black_list=[
            "gene", "region", "ARS", "long_terminal_repeat"
        ],
                               figsize=(40, 40),
                               pie_filename="variant_location_pie.svg",
                               counts_filename="variant_location_counts.t")
        print("Totaly %s mutations" % len(mutations))