Esempio n. 1
0
def find_and_polish_broken_cds(args, lifted_feature_list,feature_hierarchy, ref_chroms, target_chroms,
                               unmapped_features, feature_db, ref_parent_order,):
    args.subcommand = "polish"
    polish_lifted_features = {}
    ref_fa, target_fa = Fasta(args.reference), Fasta(args.target)
    for target_feature in lifted_feature_list:
        aligned_segments_new = {}
        if polish.polish_annotations(lifted_feature_list, ref_fa, target_fa, args, feature_hierarchy, target_feature):
            aligned_segments = align_features.align_features_to_target(ref_chroms, target_chroms, args,
                                                                       feature_hierarchy,
                                                                       "chrm_by_chrm", unmapped_features)
            aligned_segments_new[target_feature] = list(aligned_segments.values())[0]
            for seg in aligned_segments_new[target_feature]:
                seg.query_name = target_feature
            args.d = 100000000
            lift_features.lift_all_features(aligned_segments_new, args.a, feature_db, feature_hierarchy,
                                            unmapped_features, polish_lifted_features, args.s, None, args,
                                            ref_parent_order)

    check_cds(polish_lifted_features, feature_hierarchy, args)
    for feature in polish_lifted_features:
        original_feature = lifted_feature_list[feature][0]
        polished_feature = polish_lifted_features[feature][0]
        replace = False
        if 'valid_ORFs' not in polished_feature.attributes or int(polished_feature.attributes['valid_ORFs'][0]) > \
                int(original_feature.attributes['valid_ORFs'][0]):
            replace = True
        elif polished_feature.attributes['valid_ORFs'][0] == original_feature.attributes['valid_ORFs'][0]:
            if polished_feature.attributes['sequence_ID'][0] > original_feature.attributes['sequence_ID'][0]:
                replace = True
            elif polished_feature.attributes['coverage'][0] > original_feature.attributes['coverage'][0]:
                replace = True
        if replace:
            lifted_feature_list[feature] = polish_lifted_features[feature]
Esempio n. 2
0
def map_unplaced_genes(unmapped_features, target_fasta, reference_fasta,
                       ref_chroms, target_chroms, processes,
                       lifted_feature_list, feature_db, parent_dict,
                       intermediate_dict, children_dict, parent_order,
                       minimap2_path, inter_files, max_alns):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    liftover_type = "unplaced"
    unplaced_dict = {}
    for feature_name in parent_dict:
        feature = parent_dict[feature_name]
        if feature.seqid in ref_chroms:
            unplaced_dict[feature.id] = feature
    extract_features.get_gene_sequences(unplaced_dict, ref_chroms,
                                        reference_fasta, processes,
                                        inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, unplaced_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, True, max_alns)
    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db,
                                    unplaced_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, 0.0)

    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, 0.0, intermediate_dict,
        children_dict, feature_db, parent_order, 0.0)
Esempio n. 3
0
def map_extra_copies(target_fasta, reference_fasta, ref_chroms, target_chroms,
                     processes, lifted_feature_list, parent_dict,
                     children_dict, feature_db, intermediate_dict,
                     parent_order, seq_threshold, minimap2_path, inter_files,
                     remap, max_alns):
    liftoff_utils.clear_scores(lifted_feature_list, parent_dict)
    unmapped_features = []
    liftover_type = "copies"
    extract_features.get_gene_sequences(parent_dict, ref_chroms,
                                        reference_fasta, processes,
                                        inter_files, liftover_type)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, remap, max_alns)

    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, 0.0, feature_db,
                                    parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, seq_threshold)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, 0.0, intermediate_dict,
        children_dict, feature_db, parent_order, seq_threshold)
Esempio n. 4
0
def lift_original_annotation(gff, target_fasta, reference_fasta, ref_chroms,
                             target_chroms, processes, db, lifted_feature_list,
                             unmapped_features, infer_transcripts, infer_genes,
                             cov_threshold, seq_threshold, minimap2_path,
                             inter_files, max_alns, parents_to_lift):
    liftover_type = "chrm_by_chrm"
    if target_chroms[0] == target_fasta:
        cov_threshold, seq_threshold = 0, 0
    parent_dict, children_dict, intermediate_dict, feature_db, original_parent_order = extract_features.extract_features_to_lift(
        gff, db, ref_chroms, reference_fasta, processes, infer_transcripts,
        infer_genes, inter_files, liftover_type, parents_to_lift)
    aligned_segments = align_features.align_features_to_target(
        ref_chroms, target_chroms, processes, target_fasta, parent_dict,
        children_dict, liftover_type, unmapped_features, reference_fasta,
        minimap2_path, inter_files, True, max_alns)

    print("lifting features")
    lift_features.lift_all_features(aligned_segments, {}, cov_threshold,
                                    feature_db, parent_dict, children_dict,
                                    intermediate_dict, unmapped_features,
                                    lifted_feature_list, seq_threshold)
    fix_overlapping_features.fix_incorrectly_overlapping_features(
        lifted_feature_list, lifted_feature_list, parent_dict,
        aligned_segments, unmapped_features, cov_threshold, intermediate_dict,
        children_dict, feature_db, original_parent_order, seq_threshold)
    return feature_db, parent_dict, intermediate_dict, children_dict, original_parent_order
Esempio n. 5
0
def align_and_lift_features(ref_chroms, target_chroms, args, feature_hierarchy, liftover_type, unmapped_features,
                            feature_db,
                            features_to_lift, lifted_features_list, ref_parent_order, min_cov, min_seqid):
    aligned_segments = align_features.align_features_to_target(ref_chroms, target_chroms, args, feature_hierarchy,
                                                               liftover_type, unmapped_features)
    print("lifting features")
    feature_locations = None
    lift_features.lift_all_features(aligned_segments, min_cov, feature_db, features_to_lift, feature_hierarchy,
                                    unmapped_features, lifted_features_list, min_seqid, feature_locations, args.d)
    fix_overlapping_features.fix_incorrectly_overlapping_features(lifted_features_list, lifted_features_list,
                                                                  aligned_segments, unmapped_features,
                                                                  min_cov, feature_hierarchy,
                                                                  feature_db, ref_parent_order, min_seqid, args.d)