def get_raw_references(datapath, phenotype_set): """Parse a phenotype file and collect descriptions and raw phenotypes raw phenotypes are phenotypes in the original ontology Args: datapath path to phenotab file phenotype_set set of acceptable phenotypes Returns: two objects - dict mapping reference codes to reference descriptions and phenotypes - set of phenotypes that could not be mapped """ badphenotypes = set() references = dict() with open_file(datapath, "rt") as f: reader = csv.DictReader(f, delimiter="\t", quotechar="\"") for row in reader: if not valid_reference_id(row["Reference"]): continue phenotype = row["Phenotype"] if phenotype not in phenotype_set: badphenotypes.add(phenotype) continue rowval = tofreq[row["Frequency"]] id = row["Source"] + ":" + row["Disease_number"] if id not in references: references[id] = Representation(name=id) references[id].title = row["Disease_title"] references[id].set(phenotype, rowval) return references, badphenotypes
def make_target_reference(reference, oomap, oo_median=None): """convert a single representation from one ontology to another.""" result = Representation(name=reference.name) result.title = reference.title for phenotype, value in reference.data.items(): for oo_phenotype, oo_score in oomap[phenotype]: # perhaps compute a rescaled oo value newvalue = value if oo_median is not None: newvalue = value * tanh(oo_score / oo_median) # always take the larger value if previously set if result.has(oo_phenotype): newvalue = max(newvalue, result.get(oo_phenotype)) result.set(oo_phenotype, newvalue) return result