help="Gff file with masked regions") parser.add_argument("-b", "--logbase", action="store", dest="logbase", default=2, type=int, help="Logbase of y axis") args = parser.parse_args() mutations = CollectionVCF(from_file=True, in_file=args.input, dont_parse_info_and_data=True) if args.ref_genome: reference_genome = ReferenceGenome(args.ref_genome) reference_genome.find_gaps() else: reference_genome = None if args.masked_regions: masked_regions = {} with open(args.masked_regions) as gff_fd: for record in GFF.parse(gff_fd): masked_regions[record.id] = record else: masked_regions = None mutations.rainfall_plot(args.output_prefix, single_fig=True, dpi=args.dpi,
parser.add_argument("-f", "--vcf_file", action="store", dest="vcf_file", help="Vcf file with SNVs") parser.add_argument("-a", "--annotations", action="store", dest="annotations", required=True, help="Gff file with annotations of reference genome") parser.add_argument("-m", "--masking", action="store", dest="masking", required=True, help="Gff file with masked regions") parser.add_argument("-d", "--threshold", action="store", dest="threshold", default=1000, type=int, help="Threshold for extractig clusters. Depends on extraction method.") parser.add_argument("-y", "--clustering_directory", action="store", dest="clust_dir", default="clustering", help="Directory where to output additional data about clustering") args = parser.parse_args() index_file = args.reference_index if args.reference_index else "%s.idx" % (".".join(args.reference.split(".")[:-1])) reference = ReferenceGenome(args.reference, index_file=index_file) sample = args.sample_name clustering_dir = args.clust_dir distance_threshold = args.threshold reference.find_gaps() min_cluster_size = 3 annotations_dict = {} annotation_synonym_dict = {"three_prime_UTR": "3'_UTR", "five_prime_UTR": "5'_UTR", "snoRNA": "ncRNA", "snRNA": "ncRNA" } annotation_black_list = ["gene", "region", "ARS", "long_terminal_repeat",
"--subplot_size", action="store", dest="subplot_size", default=4, type=int, help= "Size of subplot(inches) on distribution histogram with all scaffolds. Default: 4" ) args = parser.parse_args() count_dict = OrderedDict() reference = ReferenceGenome(args.reference, masked_regions=None, index_file="refgen.idx", filetype="fasta", mode=args.parsing_mode, black_list=[]) reference.find_gaps(min_gap_length=10) for sample_name, vcf_file in zip(args.sample_names, args.input): count_dict[sample_name] = CollectionVCF( from_file=True, in_file=vcf_file, parse_only_coordinates=True).count_variants_in_windows( args.window_size, args.window_size if args.window_step is None else args.window_step, reference.region_length, ignore_scaffolds_shorter_than_window=True, output_prefix="%s.%s" % (args.output_prefix, sample_name), skip_empty_windows=False)
def filter_by_power_05(record): return True if record.info_dict['Power'] >= 0.05 else False def filter_by_power_10(record): return True if record.info_dict['Power'] >= 0.10 else False if __name__ == "__main__": workdir = "analyse/" try: os.mkdir(workdir) except: pass reference = ReferenceGenome("LAN210_v0.10m.fasta", index_file="LAN210_v0.10m.idx") sample_set_names_list = ["PmCDA1_3d"] clustering_dir = "clustering" rainfall_dir = "rainfall" distance_threshold = 1000 reference.find_gaps() min_cluster_size = 3 bad_regions_file = "LAN210_v0.10m_masked_all_not_in_good_genes.gff" """ bad_regions = CollectionGFF(input_file=bad_regions_file, from_file=True) """ gff_file = "merged_annotations_Nagalakshmi_tranf_to_LAN210_v0.10m.gff3"
"the lowest memory consuming - index_db. Default: index_db") parser.add_argument("-b", "--region_black_list", action="store", dest="black_list", type=lambda s: s.split(","), default=[], help="Comma-separated ist of region names in genome to be not mutated") parser.add_argument("-v", "--out_vcf", action="store", dest="out_vcf", required=True, help=".vcf with snp set") parser.add_argument("-n", "--number_of_mutations", action="store", dest="mut_number", type=int, default=10000, help="Number of mutations in set") parser.add_argument("-z", "--zygoty", action="store", dest="zygoty", default="h**o", help="Zygoty of mutations in set. At moment only 100%% heterozygous or 100%% " "homozygous sets can be generated. " "Allowed values: h**o, hetero. Default: h**o") parser.add_argument("-s", "--substitutions", action="store", dest="substitutions", type=parse_substititions, help="Set of substitution. MUST BE i following form: " "<ref_base_1>:<comma-separetad_alternatives>-<ref_base_2>:<comma-separetad_alternatives> " "Alternatives can be not set. If so all possible variants will be choosen as " "a set of alternatives." "If no reference bases was set - sites with all four bases will be considered as mutation sites. " "Example: G:T,C,A-T-A:G ") args = parser.parse_args() masked_regions_dict = SeqIO.to_dict(GFF.parse(args.masking_gff)) reference_genome = ReferenceGenome(args.reference_genome, masked_regions=masked_regions_dict, index_file=args.ref_gen_idx, filetype="fasta", mode=args.parsing_mode, black_list=args.black_list) reference_genome.generate_snp_set(args.mut_number, substitution_dict=args.substitutions, zygoty=args.zygoty, out_vcf=args.out_vcf)
parser.add_argument("-m", "--masked_regions", action="store", dest="masked_regions", type=lambda s: s.split(","), help="Comma-separated list of Gff file with masked regions") parser.add_argument("-w", "--window_size", action="store", dest="window_size", default=100000, type=int, help="Size of the windows Default: 100000") parser.add_argument("-s", "--window_step", action="store", dest="window_step", default=None, type=int, help="Step of the sliding windows. Default: window size, i.e windows are staking") parser.add_argument("-p", "--parsing_mode", action="store", dest="parsing_mode", default="parse", help="Parsing mode for input sequence file. " "Possible variants: 'index_db', 'index', 'parse'(default)") args = parser.parse_args() reference = ReferenceGenome(args.input, masked_regions=None, index_file="refgen.idx", filetype="fasta", mode=args.parsing_mode, black_list=[], masking_gff_list=args.masked_regions) reference.count_gaped_and_masked_positions_in_windows(args.window_size, args.window_step, ignore_scaffolds_shorter_than_window=True, output_prefix=args.output_prefix, min_gap_len=1)