def write_new_gff(lifted_features, out_file, parent_dict, cov_threshold, seq_threshold): copy_num_dict = {} if out_file != 'stdout': f = open(out_file, 'w') else: f = "stdout" parents = liftoff_utils.get_parent_list(lifted_features, parent_dict) parents.sort(key=lambda x: x.id) final_parent_list = [] for parent in parents: parent.score = "." if parent.id in copy_num_dict: copy_num_dict[parent.id] += 1 else: copy_num_dict[parent.id] = 0 copy_num = copy_num_dict[parent.id] parent.attributes["extra_copy_number"] = str(copy_num) if float(parent.attributes["coverage"][0]) < cov_threshold: parent.attributes["partial_mapping"] = "True" if float(parent.attributes["sequence_ID"][0]) < seq_threshold: parent.attributes["low_identity"] = "True" final_parent_list.append(parent) final_parent_list.sort(key=lambda x: (x.seqid, x.start)) for final_parent in final_parent_list: child_features = lifted_features[final_parent.attributes["copy_id"][0]] parent_child_dict = build_parent_dict(child_features, parent_dict) write_feature([final_parent], f, child_features, parent_child_dict)
def check_homologues(all_lifted_features, lifted_features_to_check, parent_dict, ref_parent_order, inter): all_feature_list = liftoff_utils.get_parent_list(all_lifted_features) features_to_check_list = liftoff_utils.get_parent_list( lifted_features_to_check) target_parent_order = liftoff_utils.find_parent_order(all_feature_list) remap_features = {} if inter == None: inter = build_interval_list(all_feature_list) for feature in features_to_check_list: overlaps = find_overlaps(feature.start, feature.end, feature.seqid, feature.strand, feature.attributes["copy_id"][0], inter) for overlap in overlaps: feature_to_compare = overlap[2][1] compare_overlapping_feature(feature_to_compare, parent_dict, feature, remap_features, ref_parent_order, target_parent_order) return remap_features
def check_homologues(all_lifted_features, lifted_features_to_check, parent_dict, ref_parent_order, max_overlap): all_feature_list = liftoff_utils.get_parent_list(all_lifted_features) features_to_check_list = liftoff_utils.get_parent_list( lifted_features_to_check) target_parent_order = liftoff_utils.find_parent_order(all_feature_list) remap_features = set() feature_locations = build_interval_list(all_feature_list) for feature in features_to_check_list: overlaps = liftoff_utils.find_overlaps( feature.start - 1, feature.end - 1, feature.seqid, feature.strand, feature.attributes["copy_id"][0], feature_locations, parent_dict, all_lifted_features, max_overlap) for overlap in overlaps: if overlap[2][0] != feature.attributes["copy_id"][0]: feature_to_compare = overlap[2][1] compare_overlapping_feature(feature_to_compare, feature, remap_features, ref_parent_order, target_parent_order) return remap_features, feature_locations
def check_homologues(all_lifted_features, lifted_features_to_check, parent_dict, original_parent_order): all_feature_list = liftoff_utils.get_parent_list(all_lifted_features, parent_dict) features_to_check_list = liftoff_utils.get_parent_list( lifted_features_to_check, parent_dict) all_feature_list.sort(key=lambda x: (x.seqid, x.start)) new_parent_order = liftoff_utils.find_parent_order(all_feature_list) chrom_index_dict = {} feature_index = 0 for feature in all_feature_list: if feature.seqid not in chrom_index_dict: chrom_index_dict[feature.seqid] = feature_index feature_index += 1 remap_features = {} for feature in features_to_check_list: nearby_features = all_feature_list[chrom_index_dict[feature.seqid]:] compare_nearby_features(nearby_features, parent_dict, feature, remap_features, original_parent_order, new_parent_order) return remap_features
def write_new_gff(lifted_features, parents_dict, args): if args.o != 'stdout': f = open(args.o, 'w') else: f = "stdout" parents = liftoff_utils.get_parent_list(lifted_features) parents.sort(key=lambda x: x.id) final_parent_list = finalize_parent_features(parents, args) final_parent_list.sort(key=lambda x: (x.seqid, x.start)) for final_parent in final_parent_list: child_features = lifted_features[final_parent.attributes["copy_id"][0]] parent_child_dict = build_parent_dict(child_features, parents_dict) write_feature([final_parent], f, child_features, parent_child_dict)
def remove_remapped_features_from_overlaps(lifted_feature_list, all_overlapping_features): parents = liftoff_utils.get_parent_list(lifted_feature_list) inter = build_interval_list(parents) for feature_name in all_overlapping_features: for overlapping_feature in all_overlapping_features[feature_name]: updated_overlaps = find_overlaps(overlapping_feature.start - 1, overlapping_feature.end - 1, overlapping_feature.seqid, overlapping_feature.strand, feature_name, inter) if len(updated_overlaps) == 0: all_overlapping_features[feature_name].remove( overlapping_feature) return inter
def write_new_gff(lifted_features, args, feature_db): if args.o != 'stdout': f = open(args.o, 'w') else: f = sys.stdout out_type = feature_db.dialect['fmt'] write_header(f, out_type) parents = liftoff_utils.get_parent_list(lifted_features) parents.sort(key=lambda x: x.id) final_parent_list = finalize_parent_features(parents, args) final_parent_list.sort(key=lambda x: (x.seqid, x.start)) for final_parent in final_parent_list: child_features = lifted_features[final_parent.attributes["copy_id"][0]] parent_child_dict = build_parent_dict(child_features, final_parent) write_feature([final_parent], f, child_features, parent_child_dict, out_type)
def clean_overlapping_features(lifted_feature_list, all_overlapping_features, parent_dict, features_to_remap, unmapped_features): parent_list = liftoff_utils.get_parent_list(lifted_feature_list, parent_dict) feature_by_chrom_dict = {} for feature in parent_list: if feature.seqid in feature_by_chrom_dict: feature_by_chrom_dict[feature.seqid].append(feature) else: feature_by_chrom_dict[feature.seqid] = [feature] for feature_name in all_overlapping_features: for overlapping_features in all_overlapping_features[feature_name]: if overlapping_features[ 2] not in feature_by_chrom_dict or find_overlapping_features( overlapping_features, feature_by_chrom_dict[ overlapping_features[2]]) is False: all_overlapping_features[feature_name].remove( overlapping_features) for feature in unmapped_features: if feature.id == feature_name: unmapped_features.remove(feature) features_to_remap[feature.id] = [(-1, -1, None, None)]