Example #1
0
def get_raw_references(datapath, phenotype_set):
    """Parse a phenotype file and collect descriptions and raw phenotypes  
    
    raw phenotypes are phenotypes in the original ontology
    
    Args:
        datapath       path to phenotab file
        phenotype_set  set of acceptable phenotypes
    
    Returns:
        two objects
        - dict mapping reference codes to reference descriptions and phenotypes
        - set of phenotypes that could not be mapped
    """

    badphenotypes = set()
    references = dict()
    with open_file(datapath, "rt") as f:
        reader = csv.DictReader(f, delimiter="\t", quotechar="\"")
        for row in reader:
            if not valid_reference_id(row["Reference"]):
                continue
            phenotype = row["Phenotype"]
            if phenotype not in phenotype_set:
                badphenotypes.add(phenotype)
                continue
            rowval = tofreq[row["Frequency"]]
            id = row["Source"] + ":" + row["Disease_number"]
            if id not in references:
                references[id] = Representation(name=id)
                references[id].title = row["Disease_title"]
            references[id].set(phenotype, rowval)

    return references, badphenotypes
Example #2
0
def make_target_reference(reference, oomap, oo_median=None):
    """convert a single representation from one ontology to another."""

    result = Representation(name=reference.name)
    result.title = reference.title

    for phenotype, value in reference.data.items():
        for oo_phenotype, oo_score in oomap[phenotype]:
            # perhaps compute a rescaled oo value
            newvalue = value
            if oo_median is not None:
                newvalue = value * tanh(oo_score / oo_median)
            # always take the larger value if previously set
            if result.has(oo_phenotype):
                newvalue = max(newvalue, result.get(oo_phenotype))
            result.set(oo_phenotype, newvalue)

    return result