def write_new_gff(lifted_features, out_file, parent_dict, cov_threshold,seq_threshold ): copy_num_dict ={} if out_file != 'stdout': f=open(out_file, 'w') else: f="stdout" parents = liftoff_utils.get_parent_list(lifted_features, parent_dict) parents.sort(key=lambda x: x.id) final_parent_list = [] for parent in parents: parent.score = "." if parent.id in copy_num_dict: copy_num_dict[parent.id] +=1 else: copy_num_dict[parent.id] =0 copy_num=copy_num_dict[parent.id] parent.attributes["extra_copy_number"]=str(copy_num) if float(parent.attributes["coverage"][0]) < cov_threshold: parent.attributes["partial_mapping"] = "True" if float(parent.attributes["sequence_ID"][0]) < seq_threshold: parent.attributes["low_identity"] = "True" final_parent_list.append(parent) final_parent_list.sort(key=lambda x: (x.seqid, x.start)) for final_parent in final_parent_list: child_features = lifted_features[final_parent.attributes["copy_id"][0]] parent_child_dict = build_parent_dict(child_features, parent_dict) write_feature([final_parent], f, child_features, parent_child_dict)
def check_homologues(all_lifted_features, lifted_features_to_check, parent_dict, original_parent_order): all_feature_list = liftoff_utils.get_parent_list(all_lifted_features, parent_dict) features_to_check_list = liftoff_utils.get_parent_list(lifted_features_to_check, parent_dict) all_feature_list.sort(key = lambda x: (x.seqid, x.start)) new_parent_order = liftoff_utils.find_parent_order(all_feature_list) chrom_index_dict = {} feature_index = 0 for feature in all_feature_list: if feature.seqid not in chrom_index_dict: chrom_index_dict[feature.seqid] = feature_index feature_index += 1 remap_features = {} for feature in features_to_check_list: nearby_features = all_feature_list[chrom_index_dict[feature.seqid]:] compare_nearby_features(nearby_features, parent_dict, feature, remap_features, original_parent_order, new_parent_order) return remap_features
def clean_overlapping_features(lifted_feature_list, all_overlapping_features, parent_dict, features_to_remap, unmapped_features): parent_list = liftoff_utils.get_parent_list(lifted_feature_list, parent_dict) feature_by_chrom_dict = {} for feature in parent_list: if feature.seqid in feature_by_chrom_dict: feature_by_chrom_dict[feature.seqid].append(feature) else: feature_by_chrom_dict[feature.seqid] = [feature] for feature_name in all_overlapping_features: for overlapping_features in all_overlapping_features[feature_name]: if overlapping_features[2] not in feature_by_chrom_dict or find_overlapping_features(overlapping_features, feature_by_chrom_dict[overlapping_features[2]]) is False: all_overlapping_features[feature_name].remove(overlapping_features) for feature in unmapped_features: if feature.id == feature_name: unmapped_features.remove(feature) features_to_remap[feature.id]=[(-1, -1, None, None)]
def clean_overlapping_features(lifted_feature_list, all_overlapping_features, parent_dict): parent_list = liftoff_utils.get_parent_list(lifted_feature_list, parent_dict) feature_by_chrom_dict = {} for feature in parent_list: if feature.seqid in feature_by_chrom_dict: feature_by_chrom_dict[feature.seqid].append(feature) else: feature_by_chrom_dict[feature.seqid] = [feature] for feature_name in all_overlapping_features: for overlapping_features in all_overlapping_features[feature_name]: if find_overlapping_features( overlapping_features, feature_by_chrom_dict[overlapping_features[2]]) is False: all_overlapping_features[feature_name].remove( overlapping_features)
def write_new_gff(lifted_features, out_file, parent_dict): copy_num_dict = {} if out_file != 'stdout': f = open(out_file, 'w') else: f = "stdout" parents = liftoff_utils.get_parent_list(lifted_features, parent_dict) for parent in parents: child_features = lifted_features[parent.attributes["copy_id"][0]] parent_child_dict = build_parent_dict(child_features, parent_dict) parent.score = "." if parent.id in copy_num_dict: copy_num_dict[parent.id] += 1 else: copy_num_dict[parent.id] = 1 copy_num = copy_num_dict[parent.id] parent.attributes["copy_number"] = str(copy_num) if float(parent.attributes["coverage"][0]) < 0.5: parent.attributes["partial_mapping"] = "True" write_feature([parent], f, child_features, parent_child_dict)