def load_pheno_list():
    """
    Loads a list of phenotypes from multiple files.
    """
    result = []
    result.extend(util.read_file_lines(PHENO_LIST))
    result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";"))
    result.extend(util.read_file_lines(PHENO_MANUAL))

    result.extend(load_pheno_ontology())
    '''
    full_result = []
    for p in result:
        p = re.sub(r'\([^)]*\)',' ',p)
        full_result.extend(p.split())

    result.extend(full_result)
    result = list(set(result))
    '''
    # Filter by blacklist
    blacklist = read_blacklist()
    return [
        pheno.lower() for pheno in result
        if pheno.lower() not in blacklist and len(pheno) > 1
    ]
Beispiel #2
0
def load_pheno_list():
    """
    Loads a list of phenotypes from multiple files.
    """
    result = []
    result.extend(util.read_file_lines(PHENO_LIST))
    result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";"))
    result.extend(util.read_file_lines(PHENO_MANUAL))

    #result.extend(load_pheno_ontology())

    # Filter by blacklist
    blacklist = read_blacklist()
    return [
        pheno.lower() for pheno in result
        if pheno.lower() not in blacklist and len(pheno) > 1
    ]
Beispiel #3
0
def load_pheno_list():
    """
    Loads a list of phenotypes from multiple files.
    """
    result = []
    result.extend(util.read_file_lines(PHENO_LIST))
    split = []
    for r in result:
        split.extend(r.split(r'[\.,;]'))
    result.extend(split)
    result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";"))
    result.extend(util.read_file_lines(PHENO_MANUAL))

    # Filter by blacklist
    blacklist = read_blacklist()
    #blacklist = [stem_word(b) for b in blacklist]

    return [pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno)>1]
def load_gene_list():
    """
    Loads a list of genes from the genes dictionary, sans those present in the
    gene blacklist.
    """
    blacklist = read_blacklist()
    gene_blacklist = [item.lower() for item in blacklist]

    genes = util.read_tsv_flat(GENE_LIST)

    genes_filtered = [
        gene.lower() for gene in genes if gene.lower() not in gene_blacklist
    ]

    genes_filtered = [gene for gene in genes_filtered if len(gene) >= 1]

    genes_filtered.extend(enumerate_allele_extensions(genes_filtered))

    genes_filtered = [
        gene.lower() for gene in genes if gene.lower() not in gene_blacklist
    ]

    return genes_filtered