Example #1
0
def get_all_ids(attr_path, biotype=None, filter_set=set(), id_type="Transcript"):
    """
    returns the set of ensembl IDs in the entire Gencode database pulled from the attribute
    """
    assert id_type in ["Transcript", "Gene"]
    if id_type == "Transcript":
        if biotype is None:
            return {x.split()[3] for x in skip_header(attr_path) if x not in filter_set}
        else:
            return {x.split()[3] for x in skip_header(attr_path) if x.split()[4] == biotype if x not in filter_set}
    else:
        if biotype is None:
            return {x.split()[0] for x in skip_header(attr_path) if x not in filter_set}
        else:
            return {x.split()[0] for x in skip_header(attr_path) if x.split()[4] == biotype if x not in filter_set}
Example #2
0
def get_gene_map(attr_path):
    """
    Returns a dictionary mapping all transcript IDs to their respective gene IDs
    """
    return {x.split()[3]: x.split()[0] for x in skip_header(attr_path)}
Example #3
0
def get_all_biotypes(attr_path):
    """
    Returns all biotypes in the attribute database.
    """
    return {x.split()[4] for x in skip_header(attr_path)}
Example #4
0
def get_gene_biotype_map(attr_path):
    """
    Returns a dictionary mapping all gene IDs to their respective biotypes
    """
    return {x.split()[0]: x.split()[2] for x in skip_header(attr_path)}