def convert_all_children_coords(shortest_path_nodes, children, parent): shortest_path_nodes.sort(key=lambda x: x.query_block_start) mapped_children = {} total_bases, mismatches, insertions, deletions = 0, 0, 0, 0 for child in children: total_bases += (child.end - child.start + 1) nearest_start_coord, nearest_end_coord = find_nearest_aligned_start_and_end( child.start, child.end, shortest_path_nodes, parent) if nearest_start_coord != -1 and nearest_end_coord != -1: lifted_start, start_node = convert_coord(nearest_start_coord, shortest_path_nodes) lifted_end, end_node = convert_coord(nearest_end_coord, shortest_path_nodes) deletions += find_deletions(start_node, end_node, shortest_path_nodes) deletions += (nearest_start_coord - child.start) + (child.end - nearest_end_coord) mismatches += find_mismatched_bases(child.start, child.end, shortest_path_nodes, parent) insertions += find_insertions(start_node, end_node, shortest_path_nodes) strand = get_strand(shortest_path_nodes[0], parent) new_child = new_feature.new_feature( child.id, child.featuretype, shortest_path_nodes[0].reference_name, 'Liftoff', strand, min(lifted_start, lifted_end) + 1, max(lifted_start, lifted_end) + 1, dict(child.attributes)) mapped_children[new_child.id] = new_child else: deletions += (child.end - child.start + 1) return mapped_children, (total_bases - deletions) / total_bases, (total_bases - mismatches - (insertions + deletions)) / \ total_bases
def add_children(parent_dict, child_dict, lowest_children, feature_db): c = feature_db.conn.cursor() cond = ', '.join('"{0}"'.format(w) for w in lowest_children) query = "select * from relations join features on features.id = relations.child where relations.child IN ({})".format( cond) c.execute(query) results = c.fetchall() added_children_ids = [] for result in results: feature_tup = tuple(result) parent = feature_tup[0] if parent in parent_dict: child = new_feature.new_feature(feature_tup[3], feature_tup[6], feature_tup[4], feature_tup[5], feature_tup[10], feature_tup[7], feature_tup[8], json.loads(feature_tup[12])) if child.featuretype != "intron": if "Parent" not in child.attributes: add_parent_tag(child, feature_db) child_dict[parent].append(child) added_children_ids.append(child.id) single_level_features = np.setdiff1d(lowest_children, added_children_ids) for feature in single_level_features: if feature in parent_dict: child_dict[feature] = [parent_dict[feature]]
def make_new_parent(feature_list, parent, feature_hierarchy): children = [feature for feature in feature_list.values() if "Parent" in feature.attributes and feature.attributes["Parent"][0] == parent] starts, ends = [child.start for child in children], [child.end for child in children] ref_parent = get_ref_parent(parent, feature_hierarchy) target_parent_feature = new_feature.new_feature(ref_parent.id, ref_parent.featuretype, children[0].seqid, 'Liftoff', children[0].strand, min(starts), max(ends), dict(ref_parent.attributes)) feature_list[target_parent_feature.id] = target_parent_feature return target_parent_feature
def add_intermediates(intermediate_ids, intermediate_dict, feature_db): c = feature_db.conn.cursor() cond = ', '.join('"{0}"'.format(w) for w in intermediate_ids) query = "select * from features where id IN ({})".format(cond) for result in c.execute(query): feature_tup = tuple(result) intermediate_feature = new_feature.new_feature( feature_tup[0], feature_tup[3], feature_tup[1], feature_tup[2], feature_tup[7], feature_tup[4], feature_tup[5], json.loads(feature_tup[9])) intermediate_dict[intermediate_feature.id] = intermediate_feature if "Parent" not in intermediate_feature.attributes: add_parent_tag(intermediate_feature, feature_db)
def add_parents(parent_dict, child_dict, highest_parents, parent_types_to_lift, feature_db): c = feature_db.conn.cursor() cond = ', '.join('"{0}"'.format(w) for w in highest_parents) query = "SELECT * FROM features WHERE id IN ({})".format(cond) for result in c.execute(query): feature_tup = tuple(result) parent = new_feature.new_feature(feature_tup[0], feature_tup[3], feature_tup[1], feature_tup[2], feature_tup[7], feature_tup[4], feature_tup[5], json.loads(feature_tup[9])) if parent.featuretype in parent_types_to_lift: parent_dict[parent.id] = parent child_dict[parent.id] = []