Esempio n. 1
0
def matrix(data,freq):
    # get total and ICmax
    total = len(data)
    IC_max = hpo_helper.IC(1,total)
    result = defaultdict(float)
    method_cache = {}
    n = total
    for d in data:
        print(n)
        n -= 1
        hpos = d['hpo']

        # apparently each hpo co-occur with itself
        for h in hpos:
            key = '-'.join([h,h])

            method_cache[h] = method_cache.get(h,hpo_helper.IC(freq[h],total)/(IC_max))
            result[key] = result.get(key, method_cache[h])
        # check each combination of the hpos 
        for h in itertools.combinations(hpos,2):
            key = '-'.join(sorted([h[0],h[1]]))
            # using normaliser conveniently make the result consistently smaller than if h0 is a subclass of h1 if later using IC(h0)*IC(h1)/max_IC**2 for getting weights.
            normaliser = 2*min(freq[h[0]],hpo_freq[h[1]])
            result[key] += (method_cache[h[0]] + method_cache[h[1]]) / normaliser
    return result
Esempio n. 2
0
def asym_WAM(dbs, df, ic_df, freq, ancestors):
    print('build hpo_asym_WAM')
    weight_df = pd.DataFrame(index=ic_df.index)
    max_ic = hpo_helper.IC(1, freq['HP:0000001'])
    buff = {}
    for i, h in enumerate(ic_df.index):
        this = ic_df.reset_index().apply(beta,
                                         axis=1,
                                         dbs=dbs,
                                         ic_df=ic_df,
                                         freq=freq,
                                         max_ic=max_ic,
                                         h2=h,
                                         buff=buff,
                                         mode='asym')
        weight_df[h] = this.values
    return (df.multiply(weight_df))
Esempio n. 3
0
 def IC_wrapper(a):
     return hpo_helper.IC(a, t)
Esempio n. 4
0
    basename = os.path.basename(input)
    num = basename.split('.')[0].split('_')[1]
    outfile = os.path.join(outfolder, 'hpofreq_' + num + '.json')
    return outfile


'''
main
'''
if __name__ == "__main__":
    usage = "usage: %prog [options] arg1 arg2"
    parser = OptionParser(usage=usage)

    parser.add_option("--input", dest="input", help="input sim file?")
    (options, args) = parser.parse_args()
    # get dbs
    dbs = phenopolis_utils.get_mongo_collections()
    # get input data
    input_data = hpo_helper.get_json(options.input)
    # get total and ICmax
    total = len(input_data)
    IC_max = hpo_helper.IC(1, total)
    # expand hpos
    hpo_helper.expand_hpo(input_data)
    # get hpo_freq
    hpo_freq = hpo_helper.get_hpo_freq(input_data)
    # get output file
    outfile = get_outfile(options.input)
    hpo_helper.write_json(hpo_freq, outfile)
    print('done')