def get_all_ids(attr_path, biotype=None, filter_set=set(), id_type="Transcript"): """ returns the set of ensembl IDs in the entire Gencode database pulled from the attribute """ assert id_type in ["Transcript", "Gene"] if id_type == "Transcript": if biotype is None: return {x.split()[3] for x in skip_header(attr_path) if x not in filter_set} else: return {x.split()[3] for x in skip_header(attr_path) if x.split()[4] == biotype if x not in filter_set} else: if biotype is None: return {x.split()[0] for x in skip_header(attr_path) if x not in filter_set} else: return {x.split()[0] for x in skip_header(attr_path) if x.split()[4] == biotype if x not in filter_set}
def get_gene_map(attr_path): """ Returns a dictionary mapping all transcript IDs to their respective gene IDs """ return {x.split()[3]: x.split()[0] for x in skip_header(attr_path)}
def get_all_biotypes(attr_path): """ Returns all biotypes in the attribute database. """ return {x.split()[4] for x in skip_header(attr_path)}
def get_gene_biotype_map(attr_path): """ Returns a dictionary mapping all gene IDs to their respective biotypes """ return {x.split()[0]: x.split()[2] for x in skip_header(attr_path)}