def resolve_overlapping_homologues(all_aligned_segs, lifted_feature_list,
                                   features_to_remap, unmapped_features,
                                   threshold, feature_hierarchy, feature_db,
                                   ref_parent_order, seq_id_threshold,
                                   feature_locations, distance_factor,
                                   max_overlap):
    iter = 0
    max_iter = 10 * len(features_to_remap)
    while len(features_to_remap) > 0:
        iter += 1
        if iter > max_iter:
            break
        aligned_segs_for_remap = remove_features_and_get_alignments(
            features_to_remap, lifted_feature_list, all_aligned_segs)
        lift_features.lift_all_features(aligned_segs_for_remap, threshold,
                                        feature_db, feature_hierarchy.parents,
                                        feature_hierarchy, unmapped_features,
                                        lifted_feature_list, seq_id_threshold,
                                        feature_locations, distance_factor,
                                        ref_parent_order)
        features_to_check = get_successfully_remapped_features(
            lifted_feature_list,
            features_to_remap,
        )
        features_to_remap, feature_locations = check_homologues(
            lifted_feature_list, features_to_check, feature_hierarchy.parents,
            ref_parent_order, max_overlap)
    remove_unresolved_features(features_to_remap, feature_hierarchy.parents,
                               lifted_feature_list, unmapped_features)
    return lifted_feature_list
Esempio n. 2
0
def find_and_polish_broken_cds(args, lifted_feature_list,feature_hierarchy, ref_chroms, target_chroms,
                               unmapped_features, feature_db, ref_parent_order,):
    args.subcommand = "polish"
    polish_lifted_features = {}
    ref_fa, target_fa = Fasta(args.reference), Fasta(args.target)
    for target_feature in lifted_feature_list:
        aligned_segments_new = {}
        if polish.polish_annotations(lifted_feature_list, ref_fa, target_fa, args, feature_hierarchy, target_feature):
            aligned_segments = align_features.align_features_to_target(ref_chroms, target_chroms, args,
                                                                       feature_hierarchy,
                                                                       "chrm_by_chrm", unmapped_features)
            aligned_segments_new[target_feature] = list(aligned_segments.values())[0]
            for seg in aligned_segments_new[target_feature]:
                seg.query_name = target_feature
            args.d = 100000000
            lift_features.lift_all_features(aligned_segments_new, args.a, feature_db, feature_hierarchy,
                                            unmapped_features, polish_lifted_features, args.s, None, args,
                                            ref_parent_order)

    check_cds(polish_lifted_features, feature_hierarchy, args)
    for feature in polish_lifted_features:
        original_feature = lifted_feature_list[feature][0]
        polished_feature = polish_lifted_features[feature][0]
        replace = False
        if 'valid_ORFs' not in polished_feature.attributes or int(polished_feature.attributes['valid_ORFs'][0]) > \
                int(original_feature.attributes['valid_ORFs'][0]):
            replace = True
        elif polished_feature.attributes['valid_ORFs'][0] == original_feature.attributes['valid_ORFs'][0]:
            if polished_feature.attributes['sequence_ID'][0] > original_feature.attributes['sequence_ID'][0]:
                replace = True
            elif polished_feature.attributes['coverage'][0] > original_feature.attributes['coverage'][0]:
                replace = True
        if replace:
            lifted_feature_list[feature] = polish_lifted_features[feature]
Esempio n. 3
0
def map_extra_copies(target_fasta, reference_fasta, ref_chroms, target_chroms,
                     processes, lifted_feature_list, parent_dict,
                     children_dict, feature_db, intermediate_dict,
                     parent_order, seq_threshold, minimap2_path, inter_files,
                     remap, max_alns):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    unmapped_features = []
    liftover_type = "copies"
    extract_features.get_gene_sequences(parent_dict, ref_chroms,
                                        reference_fasta, processes,
                                        inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, remap, max_alns)

    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db,
                                    parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, seq_threshold)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, 0.0, intermediate_dict,
        children_dict, feature_db, parent_order, seq_threshold)
Esempio n. 4
0
def map_unplaced_genes(unmapped_features, target_fasta, reference_fasta,
                       ref_chroms, target_chroms, processes,
                       lifted_feature_list, feature_db, parent_dict,
                       intermediate_dict, children_dict, parent_order,
                       minimap2_path, inter_files, max_alns):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    liftover_type = "unplaced"
    unplaced_dict = {}
    for feature_name in parent_dict:
        feature = parent_dict[feature_name]
        if feature.seqid in ref_chroms:
            unplaced_dict[feature.id] = feature
    extract_features.get_gene_sequences(unplaced_dict, ref_chroms,
                                        reference_fasta, processes,
                                        inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, unplaced_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, True, max_alns)
    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db,
                                    unplaced_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, 0.0)

    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, 0.0, intermediate_dict,
        children_dict, feature_db, parent_order, 0.0)
Esempio n. 5
0
def lift_original_annotation(gff, target_fasta, reference_fasta, ref_chroms,
                             target_chroms, processes, db, lifted_feature_list,
                             unmapped_features, infer_transcripts, infer_genes,
                             cov_threshold, seq_threshold, minimap2_path,
                             inter_files, max_alns, parents_to_lift):
    liftover_type = "chrm_by_chrm"
    if target_chroms[0] == target_fasta:
        cov_threshold, seq_threshold = 0, 0
    parent_dict, children_dict, intermediate_dict, feature_db, original_parent_order = extract_features.extract_features_to_lift(
        gff, db, ref_chroms, reference_fasta, processes, infer_transcripts,
        infer_genes, inter_files, liftover_type, parents_to_lift)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, True, max_alns)

    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, cov_threshold,
                                    feature_db, parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, seq_threshold)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, cov_threshold, intermediate_dict,
        children_dict, feature_db, original_parent_order, seq_threshold)
    return feature_db, parent_dict, intermediate_dict, children_dict, original_parent_order
Esempio n. 6
0
def align_and_lift_features(ref_chroms, target_chroms, args, feature_hierarchy, liftover_type, unmapped_features,
                            feature_db,
                            features_to_lift, lifted_features_list, ref_parent_order, min_cov, min_seqid):
    aligned_segments = align_features.align_features_to_target(ref_chroms, target_chroms, args, feature_hierarchy,
                                                               liftover_type, unmapped_features)
    print("lifting features")
    feature_locations = None
    lift_features.lift_all_features(aligned_segments, min_cov, feature_db, features_to_lift, feature_hierarchy,
                                    unmapped_features, lifted_features_list, min_seqid, feature_locations, args.d)
    fix_overlapping_features.fix_incorrectly_overlapping_features(lifted_features_list, lifted_features_list,
                                                                  aligned_segments, unmapped_features,
                                                                  min_cov, feature_hierarchy,
                                                                  feature_db, ref_parent_order, min_seqid, args.d)
Esempio n. 7
0
def resolve_overlapping_homologues(all_aligned_segs, lifted_feature_list,
                                   features_to_remap, unmapped_features,
                                   threshold, parent_dict, intermediate_dict,
                                   children_dict, feature_db,
                                   original_parent_order, seq_id_threshold):
    all_overlapping_features = {}
    starting_remap_feature_num = len(features_to_remap)
    iter = 0
    while len(features_to_remap) > 0:
        iter += 1
        if iter > 10 * starting_remap_feature_num:
            break
        features_to_check = {}
        aligned_segs_to_remap = {}
        for feature_to_remap in features_to_remap:
            del lifted_feature_list[feature_to_remap]
            aligned_segs_to_remap[feature_to_remap] = all_aligned_segs[
                feature_to_remap]
            add_overlapping_feature(features_to_remap, feature_to_remap,
                                    all_overlapping_features)
        lift_features.lift_all_features(aligned_segs_to_remap,
                                        all_overlapping_features, threshold,
                                        feature_db, parent_dict, children_dict,
                                        intermediate_dict, unmapped_features,
                                        lifted_feature_list, seq_id_threshold)
        clean_overlapping_features(lifted_feature_list,
                                   all_overlapping_features, parent_dict,
                                   features_to_remap, unmapped_features)
        for feature_to_remap in features_to_remap:
            if feature_to_remap in lifted_feature_list:
                features_to_check[feature_to_remap] = lifted_feature_list[
                    feature_to_remap]
        features_to_remap = check_homologues(lifted_feature_list,
                                             features_to_check, parent_dict,
                                             original_parent_order)
    for feature in features_to_remap:
        unmapped_features.append(
            parent_dict[liftoff_utils.convert_id_to_original(feature)])
        del lifted_feature_list[feature]
    return lifted_feature_list