def _write_combined_features(in_handle, out_handle): """Prepare BED file with intersecting feature identifiers combined. """ def _write_combined(pos, ids): if len(ids) == 0: ids.append(".") else: ids = sorted(list(set(ids))) out_handle.write("{0}\t{1}\n".format("\t".join(pos), ";".join(ids))) last = None ids = [] for parts in (l.split("\t") for l in in_handle): cur_id = parts[:3] cur_attr = parts[-2] if cur_id != last: if last is not None: _write_combined(last, ids) last = cur_id ids = [] if cur_attr != ".": attrs = parse_attributes(cur_attr) ids.append(attrs.get("ensembl_gene_id", attrs["ID"])) if last: _write_combined(last, ids)
def get_gff_name(field): attrs = parse_attributes(field) for key in ("ID", "gene_name", "transcript_id", "gene_id", "Parent"): if key in attrs: return attrs[key]