def map_unmapped_features(unmapped_features, target_chroms, lifted_feature_list, feature_db, feature_hierarchy, ref_parent_order, args): if len(unmapped_features) > 0 and target_chroms[0] != args.target: print("mapping unaligned features to whole genome") ref_chroms = [args.reference] target_chroms = [args.target] return liftover_types.map_unmapped_genes_agaisnt_all(unmapped_features, ref_chroms, target_chroms, lifted_feature_list, feature_db, feature_hierarchy, ref_parent_order, args) return unmapped_features
def main(): #parse args args = parse_args() gff = args.g target_fasta = args.t reference_fasta = args.r processes = int(args.p) output = args.o chroms_file = args.chroms db = args.db infer_transcripts = args.infer_transcripts infer_genes = args.infer_genes unplaced_seq = args.unplaced copies = args.copies cov_threshold = float(args.a) seq_threshold = float(args.s) seq_threshold_copies = float(args.sc) minimap2_path = args.m inter_files = args.dir max_alns = int(args.n) feature_types_file = args.f #read chroms if chroms_file is not None: ref_chroms, target_chroms = parse_chrm_files(chroms_file) else: ref_chroms = [reference_fasta] target_chroms = [target_fasta] parent_features_to_lift = get_parent_features_to_lift(feature_types_file) #lift genes lifted_feature_list = {} unmapped_features = [] feature_db, parent_features, intermediate_features, children_features, parent_order = liftover_types.lift_original_annotation( gff, target_fasta, reference_fasta, ref_chroms, target_chroms, processes, db, lifted_feature_list, unmapped_features, infer_transcripts, infer_genes, cov_threshold, seq_threshold, minimap2_path, inter_files, max_alns, parent_features_to_lift) unmapped_out = open(args.u, 'w') if len(unmapped_features) > 0 and target_chroms[0] != target_fasta: print("mapping unaligned features to whole genome") ref_chroms = [reference_fasta] target_chroms = [target_fasta] unmapped_features = liftover_types.map_unmapped_genes_agaisnt_all( unmapped_features, target_fasta, reference_fasta, ref_chroms, target_chroms, processes, lifted_feature_list, feature_db, parent_features, intermediate_features, children_features, parent_order, minimap2_path, inter_files, max_alns) if unplaced_seq is not None and chroms_file is not None: print("mapping unplaced genes") ref_chroms, target_chroms = parse_chrm_files(unplaced_seq) target_chroms = [target_fasta] liftover_types.map_unplaced_genes( unmapped_features, target_fasta, reference_fasta, ref_chroms, target_chroms, processes, lifted_feature_list, feature_db, parent_features, intermediate_features, children_features, parent_order, minimap2_path, inter_files, max_alns) for gene in unmapped_features: unmapped_out.write(gene.id + "\n") unmapped_out.close() if copies: print("mapping gene copies") ref_chroms = [reference_fasta] target_chroms = [target_fasta] remap = chroms_file is not None liftover_types.map_extra_copies( target_fasta, reference_fasta, ref_chroms, target_chroms, processes, lifted_feature_list, parent_features, children_features, feature_db, intermediate_features, parent_order, seq_threshold_copies, minimap2_path, inter_files, remap, max_alns) write_new_gff.write_new_gff(lifted_feature_list, output, parent_features, cov_threshold, seq_threshold)