def load_pheno_list(): """ Loads a list of phenotypes from multiple files. """ result = [] result.extend(util.read_file_lines(PHENO_LIST)) result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";")) result.extend(util.read_file_lines(PHENO_MANUAL)) result.extend(load_pheno_ontology()) ''' full_result = [] for p in result: p = re.sub(r'\([^)]*\)',' ',p) full_result.extend(p.split()) result.extend(full_result) result = list(set(result)) ''' # Filter by blacklist blacklist = read_blacklist() return [ pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno) > 1 ]
def load_pheno_list(): """ Loads a list of phenotypes from multiple files. """ result = [] result.extend(util.read_file_lines(PHENO_LIST)) result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";")) result.extend(util.read_file_lines(PHENO_MANUAL)) #result.extend(load_pheno_ontology()) # Filter by blacklist blacklist = read_blacklist() return [ pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno) > 1 ]
def load_pheno_list(): """ Loads a list of phenotypes from multiple files. """ result = [] result.extend(util.read_file_lines(PHENO_LIST)) split = [] for r in result: split.extend(r.split(r'[\.,;]')) result.extend(split) result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";")) result.extend(util.read_file_lines(PHENO_MANUAL)) # Filter by blacklist blacklist = read_blacklist() #blacklist = [stem_word(b) for b in blacklist] return [pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno)>1]
def load_gene_list(): """ Loads a list of genes from the genes dictionary, sans those present in the gene blacklist. """ blacklist = read_blacklist() gene_blacklist = [item.lower() for item in blacklist] genes = util.read_tsv_flat(GENE_LIST) genes_filtered = [ gene.lower() for gene in genes if gene.lower() not in gene_blacklist ] genes_filtered = [gene for gene in genes_filtered if len(gene) >= 1] genes_filtered.extend(enumerate_allele_extensions(genes_filtered)) genes_filtered = [ gene.lower() for gene in genes if gene.lower() not in gene_blacklist ] return genes_filtered