Exemple #1
0
def write_new_gff(lifted_features, out_file, parent_dict, cov_threshold,
                  seq_threshold):
    copy_num_dict = {}
    if out_file != 'stdout':
        f = open(out_file, 'w')
    else:
        f = "stdout"
    parents = liftoff_utils.get_parent_list(lifted_features, parent_dict)
    parents.sort(key=lambda x: x.id)
    final_parent_list = []
    for parent in parents:
        parent.score = "."
        if parent.id in copy_num_dict:
            copy_num_dict[parent.id] += 1
        else:
            copy_num_dict[parent.id] = 0
        copy_num = copy_num_dict[parent.id]
        parent.attributes["extra_copy_number"] = str(copy_num)
        if float(parent.attributes["coverage"][0]) < cov_threshold:
            parent.attributes["partial_mapping"] = "True"
        if float(parent.attributes["sequence_ID"][0]) < seq_threshold:
            parent.attributes["low_identity"] = "True"
        final_parent_list.append(parent)
    final_parent_list.sort(key=lambda x: (x.seqid, x.start))
    for final_parent in final_parent_list:
        child_features = lifted_features[final_parent.attributes["copy_id"][0]]
        parent_child_dict = build_parent_dict(child_features, parent_dict)
        write_feature([final_parent], f, child_features, parent_child_dict)
def check_homologues(all_lifted_features, lifted_features_to_check,
                     parent_dict, ref_parent_order, inter):
    all_feature_list = liftoff_utils.get_parent_list(all_lifted_features)
    features_to_check_list = liftoff_utils.get_parent_list(
        lifted_features_to_check)
    target_parent_order = liftoff_utils.find_parent_order(all_feature_list)
    remap_features = {}
    if inter == None:
        inter = build_interval_list(all_feature_list)
    for feature in features_to_check_list:
        overlaps = find_overlaps(feature.start, feature.end, feature.seqid,
                                 feature.strand,
                                 feature.attributes["copy_id"][0], inter)
        for overlap in overlaps:
            feature_to_compare = overlap[2][1]
            compare_overlapping_feature(feature_to_compare, parent_dict,
                                        feature, remap_features,
                                        ref_parent_order, target_parent_order)
    return remap_features
def check_homologues(all_lifted_features, lifted_features_to_check,
                     parent_dict, ref_parent_order, max_overlap):
    all_feature_list = liftoff_utils.get_parent_list(all_lifted_features)
    features_to_check_list = liftoff_utils.get_parent_list(
        lifted_features_to_check)
    target_parent_order = liftoff_utils.find_parent_order(all_feature_list)
    remap_features = set()
    feature_locations = build_interval_list(all_feature_list)
    for feature in features_to_check_list:
        overlaps = liftoff_utils.find_overlaps(
            feature.start - 1, feature.end - 1, feature.seqid, feature.strand,
            feature.attributes["copy_id"][0], feature_locations, parent_dict,
            all_lifted_features, max_overlap)
        for overlap in overlaps:
            if overlap[2][0] != feature.attributes["copy_id"][0]:
                feature_to_compare = overlap[2][1]
                compare_overlapping_feature(feature_to_compare, feature,
                                            remap_features, ref_parent_order,
                                            target_parent_order)
    return remap_features, feature_locations
def check_homologues(all_lifted_features, lifted_features_to_check,
                     parent_dict, original_parent_order):
    all_feature_list = liftoff_utils.get_parent_list(all_lifted_features,
                                                     parent_dict)
    features_to_check_list = liftoff_utils.get_parent_list(
        lifted_features_to_check, parent_dict)
    all_feature_list.sort(key=lambda x: (x.seqid, x.start))
    new_parent_order = liftoff_utils.find_parent_order(all_feature_list)
    chrom_index_dict = {}
    feature_index = 0
    for feature in all_feature_list:
        if feature.seqid not in chrom_index_dict:
            chrom_index_dict[feature.seqid] = feature_index
        feature_index += 1
    remap_features = {}
    for feature in features_to_check_list:
        nearby_features = all_feature_list[chrom_index_dict[feature.seqid]:]
        compare_nearby_features(nearby_features, parent_dict, feature,
                                remap_features, original_parent_order,
                                new_parent_order)
    return remap_features
Exemple #5
0
def write_new_gff(lifted_features, parents_dict, args):
    if args.o != 'stdout':
        f = open(args.o, 'w')
    else:
        f = "stdout"
    parents = liftoff_utils.get_parent_list(lifted_features)
    parents.sort(key=lambda x: x.id)
    final_parent_list = finalize_parent_features(parents, args)
    final_parent_list.sort(key=lambda x: (x.seqid, x.start))
    for final_parent in final_parent_list:
        child_features = lifted_features[final_parent.attributes["copy_id"][0]]
        parent_child_dict = build_parent_dict(child_features, parents_dict)
        write_feature([final_parent], f, child_features, parent_child_dict)
def remove_remapped_features_from_overlaps(lifted_feature_list,
                                           all_overlapping_features):
    parents = liftoff_utils.get_parent_list(lifted_feature_list)
    inter = build_interval_list(parents)
    for feature_name in all_overlapping_features:
        for overlapping_feature in all_overlapping_features[feature_name]:
            updated_overlaps = find_overlaps(overlapping_feature.start - 1,
                                             overlapping_feature.end - 1,
                                             overlapping_feature.seqid,
                                             overlapping_feature.strand,
                                             feature_name, inter)
            if len(updated_overlaps) == 0:
                all_overlapping_features[feature_name].remove(
                    overlapping_feature)
    return inter
Exemple #7
0
def write_new_gff(lifted_features, args, feature_db):
    if args.o != 'stdout':
        f = open(args.o, 'w')
    else:
        f = sys.stdout
    out_type = feature_db.dialect['fmt']
    write_header(f, out_type)
    parents = liftoff_utils.get_parent_list(lifted_features)
    parents.sort(key=lambda x: x.id)
    final_parent_list = finalize_parent_features(parents, args)
    final_parent_list.sort(key=lambda x: (x.seqid, x.start))
    for final_parent in final_parent_list:
        child_features = lifted_features[final_parent.attributes["copy_id"][0]]
        parent_child_dict = build_parent_dict(child_features, final_parent)
        write_feature([final_parent], f, child_features, parent_child_dict,
                      out_type)
def clean_overlapping_features(lifted_feature_list, all_overlapping_features,
                               parent_dict, features_to_remap,
                               unmapped_features):
    parent_list = liftoff_utils.get_parent_list(lifted_feature_list,
                                                parent_dict)
    feature_by_chrom_dict = {}
    for feature in parent_list:
        if feature.seqid in feature_by_chrom_dict:
            feature_by_chrom_dict[feature.seqid].append(feature)
        else:
            feature_by_chrom_dict[feature.seqid] = [feature]
    for feature_name in all_overlapping_features:
        for overlapping_features in all_overlapping_features[feature_name]:
            if overlapping_features[
                    2] not in feature_by_chrom_dict or find_overlapping_features(
                        overlapping_features, feature_by_chrom_dict[
                            overlapping_features[2]]) is False:
                all_overlapping_features[feature_name].remove(
                    overlapping_features)
                for feature in unmapped_features:
                    if feature.id == feature_name:
                        unmapped_features.remove(feature)
                        features_to_remap[feature.id] = [(-1, -1, None, None)]