Exemple #1
0
def write_new_gff(lifted_features, out_file,  parent_dict, cov_threshold,seq_threshold ):
    copy_num_dict ={}
    if out_file != 'stdout':
        f=open(out_file, 'w')
    else:
        f="stdout"
    parents = liftoff_utils.get_parent_list(lifted_features, parent_dict)
    parents.sort(key=lambda x: x.id)
    final_parent_list = []
    for parent in parents:
        parent.score = "."
        if parent.id in copy_num_dict:
            copy_num_dict[parent.id] +=1
        else:
            copy_num_dict[parent.id] =0
        copy_num=copy_num_dict[parent.id]
        parent.attributes["extra_copy_number"]=str(copy_num)
        if float(parent.attributes["coverage"][0]) < cov_threshold:
            parent.attributes["partial_mapping"] = "True"
        if float(parent.attributes["sequence_ID"][0]) < seq_threshold:
            parent.attributes["low_identity"] = "True"
        final_parent_list.append(parent)
    final_parent_list.sort(key=lambda x: (x.seqid, x.start))
    for final_parent in final_parent_list:
        child_features = lifted_features[final_parent.attributes["copy_id"][0]]
        parent_child_dict = build_parent_dict(child_features, parent_dict)
        write_feature([final_parent], f, child_features, parent_child_dict)
Exemple #2
0
def check_homologues(all_lifted_features, lifted_features_to_check, parent_dict, original_parent_order):
    all_feature_list = liftoff_utils.get_parent_list(all_lifted_features, parent_dict)
    features_to_check_list = liftoff_utils.get_parent_list(lifted_features_to_check, parent_dict)
    all_feature_list.sort(key = lambda x: (x.seqid, x.start))
    new_parent_order = liftoff_utils.find_parent_order(all_feature_list)
    chrom_index_dict = {}
    feature_index = 0
    for feature in all_feature_list:
        if feature.seqid not in chrom_index_dict:
            chrom_index_dict[feature.seqid] = feature_index
        feature_index += 1
    remap_features = {}
    for feature in features_to_check_list:
        nearby_features = all_feature_list[chrom_index_dict[feature.seqid]:]
        compare_nearby_features(nearby_features, parent_dict, feature, remap_features, original_parent_order,
                                new_parent_order)
    return remap_features
Exemple #3
0
def clean_overlapping_features(lifted_feature_list, all_overlapping_features, parent_dict, features_to_remap, unmapped_features):
    parent_list = liftoff_utils.get_parent_list(lifted_feature_list, parent_dict)
    feature_by_chrom_dict = {}
    for feature in parent_list:
        if feature.seqid in feature_by_chrom_dict:
            feature_by_chrom_dict[feature.seqid].append(feature)
        else:
            feature_by_chrom_dict[feature.seqid] = [feature]
    for feature_name in all_overlapping_features:
        for overlapping_features in all_overlapping_features[feature_name]:
            if overlapping_features[2] not in feature_by_chrom_dict or  find_overlapping_features(overlapping_features, feature_by_chrom_dict[overlapping_features[2]]) is False:
                all_overlapping_features[feature_name].remove(overlapping_features)
                for feature in unmapped_features:
                    if feature.id == feature_name:
                        unmapped_features.remove(feature)
                        features_to_remap[feature.id]=[(-1, -1, None, None)]
Exemple #4
0
def clean_overlapping_features(lifted_feature_list, all_overlapping_features,
                               parent_dict):
    parent_list = liftoff_utils.get_parent_list(lifted_feature_list,
                                                parent_dict)
    feature_by_chrom_dict = {}
    for feature in parent_list:
        if feature.seqid in feature_by_chrom_dict:
            feature_by_chrom_dict[feature.seqid].append(feature)
        else:
            feature_by_chrom_dict[feature.seqid] = [feature]
    for feature_name in all_overlapping_features:
        for overlapping_features in all_overlapping_features[feature_name]:
            if find_overlapping_features(
                    overlapping_features,
                    feature_by_chrom_dict[overlapping_features[2]]) is False:
                all_overlapping_features[feature_name].remove(
                    overlapping_features)
Exemple #5
0
def write_new_gff(lifted_features, out_file, parent_dict):
    copy_num_dict = {}
    if out_file != 'stdout':
        f = open(out_file, 'w')
    else:
        f = "stdout"
    parents = liftoff_utils.get_parent_list(lifted_features, parent_dict)
    for parent in parents:
        child_features = lifted_features[parent.attributes["copy_id"][0]]
        parent_child_dict = build_parent_dict(child_features, parent_dict)
        parent.score = "."
        if parent.id in copy_num_dict:
            copy_num_dict[parent.id] += 1
        else:
            copy_num_dict[parent.id] = 1

        copy_num = copy_num_dict[parent.id]
        parent.attributes["copy_number"] = str(copy_num)

        if float(parent.attributes["coverage"][0]) < 0.5:
            parent.attributes["partial_mapping"] = "True"
        write_feature([parent], f, child_features, parent_child_dict)