def correct_regions_from_gff( self, reference, variants_vcf, gff_file, output_prefix=None, feature_type_list=["CDS"], unification_key="Parent", #raw_seq_per_line=False, vcf_with_masking=None, override_vcf_by_mask=None, use_ambiguous_nuccleotides=None): feature_dict = AnnotationsRoutines.get_feature_dict( gff_file, output_prefix=output_prefix, feature_type_list=feature_type_list, unification_key=unification_key) region_file = "%s.coordinates_only.list" % output_prefix raw_regions = "%s.raw.seq" % output_prefix final_regions = "%s.fasta" % output_prefix regions_with_frameshift_file = "%s.frameshifts.region.ids" % output_prefix self.correct_reference( reference, raw_regions, variants_vcf, raw_seq_per_line=True, vcf_with_masking=vcf_with_masking, override_vcf_by_mask=override_vcf_by_mask, use_ambiguous_nuccleotides=use_ambiguous_nuccleotides, interval_list=region_file) region_with_frameshift = SynDict() def new_regions_generator(): with open(raw_regions, "r") as in_fd: for region_id in feature_dict: seq = "" for i in range(0, len(feature_dict[region_id])): seq_fragment = in_fd.readline().strip() if ((int(feature_dict[region_id][i][2]) - int(feature_dict[region_id][i][1]) + 1) - len(seq_fragment)) % 3 != 0: if region_id not in region_with_frameshift: region_with_frameshift[region_id] = [i] else: region_with_frameshift[region_id].append(i) seq += seq_fragment yield SeqRecord( seq=Seq(seq) if feature_dict[region_id][0][3] == "+" else Seq(seq).reverse_complement(), id=region_id, description="") SeqIO.write(new_regions_generator(), final_regions, format="fasta") region_with_frameshift.write(regions_with_frameshift_file, splited_values=True)
parser.add_argument("-o", "--output_prefix", action="store", dest="output_prefix", required=True, help="Output .gff file") parser.add_argument("-t", "--feature_types", action="store", dest="feature_types", type=lambda s: s.split(","), default=["CDS"], help="Comma-separated list of feature types to extract. " "Default: CDS only") parser.add_argument( "-u", "--unification_key", action="store", dest="unification_key", default="Parent", help="Annotation entry to use for unification. Default: Parent") args = parser.parse_args() AnnotationsRoutines.get_feature_dict(args.input_gff, output_prefix=args.output_prefix, feature_type_list=args.feature_types, unification_key=args.unification_key)