def load_pheno_list(): """ Loads a list of phenotypes from multiple files. """ result = [] result.extend(util.read_file_lines(PHENO_LIST)) result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";")) result.extend(util.read_file_lines(PHENO_MANUAL)) result.extend(load_pheno_ontology()) ''' full_result = [] for p in result: p = re.sub(r'\([^)]*\)',' ',p) full_result.extend(p.split()) result.extend(full_result) result = list(set(result)) ''' # Filter by blacklist blacklist = read_blacklist() return [ pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno) > 1 ]
def load_pheno_list(): """ Loads a list of phenotypes from multiple files. """ result = [] result.extend(util.read_file_lines(PHENO_LIST)) result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";")) result.extend(util.read_file_lines(PHENO_MANUAL)) #result.extend(load_pheno_ontology()) # Filter by blacklist blacklist = read_blacklist() return [ pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno) > 1 ]
def load_pheno_list(): """ Loads a list of phenotypes from multiple files. """ result = [] result.extend(util.read_file_lines(PHENO_LIST)) split = [] for r in result: split.extend(r.split(r'[\.,;]')) result.extend(split) result.extend(util.read_tsv_flat(PHENO_EQ_LIST, delimiter=";")) result.extend(util.read_file_lines(PHENO_MANUAL)) # Filter by blacklist blacklist = read_blacklist() #blacklist = [stem_word(b) for b in blacklist] return [pheno.lower() for pheno in result if pheno.lower() not in blacklist and len(pheno)>1]