def save_pkl(self, fn):
     db = {
             'decipher': self.decipher,
             'orpha': self.orpha,
             'omim': self.omim
         }
     save_object(db, fn)
def gen_patient_embeddings(source,
                           EXP_ID,
                           exp_id,
                           enriched='no',
                           n_same_time=None,
                           exp_variable=None):
    start = time.time()

    if n_same_time:
        patients_phenotypes = \
            './_data/patients/'+source+'_patients_phenotype_'+n_same_time+'.csv'
        patient_embeddings = \
            './_data/patients/'+source+'_patient_embeddings_'+n_same_time+'.pkl'
    else:
        patients_phenotypes = \
            './_data/patients/'+source+'_patients_phenotype.csv'
        patient_embeddings = \
            './_data/patients/'+source+'_patient_embeddings.pkl'

    with open(patients_phenotypes) as csv_file:
        patient_sims = csv.reader(csv_file)
        hpo = Hpo()
        hpo_vectors = HpoVecs(enriched,
                              EXP_ID,
                              exp_id,
                              exp_variable=exp_variable).vecs
        patients = {}
        for line in patient_sims:
            patients[line[0]] = \
                compute_embedding_average(line[1:], hpo, hpo_vectors)

    save_object(patients, patient_embeddings)

    print(time.time() - start)
コード例 #3
0
 def save_pkl(self, filename):
     db = {}
     db['decipher'] = self.decipher
     db['decipher_desc'] = self.decipher_desc
     db['orpha'] = self.orpha
     db['orpha_desc'] = self.orpha_desc
     db['omim'] = self.omim
     db['omim_desc'] = self.omim_desc
     save_object(db, filename)
コード例 #4
0
    def learn_embeddings(self, walks):
        '''
        Learn embeddings by optimizing the Skipgram objective using SGD.
        '''
        # walks = [map(str, walk) for walk in walks] #py2
        walks = [list(map(str, walk)) for walk in walks]
        save_object(walks, './_data/walks/walks_hpo_orpha.pkl')
        model = Word2Vec(walks,
                         size=self.dimensions,
                         window=self.window_size,
                         min_count=0,
                         sg=1,
                         workers=self.workers,
                         iter=self.iter)
        # model.save_word2vec_format(args.output) #deprecated
        model.wv.save_word2vec_format(self.output)

        return
def gen_mapping_objects(source, n_same_time=None):

    start = time.time()
    if n_same_time:
        with open('./_emu/emu-' + source + '_' + n_same_time +
                  '.json') as json_file:
            patient_sims = json.load(json_file)

        f = open('_data/patients/' + source + '_patients_phenotype_' +
                 n_same_time + '.csv',
                 'w',
                 newline='')
        writer = csv.writer(f)

        patients_conditions = {}
        fn = '_data/patients/' + source + '_patients_disease_' + n_same_time + '.pkl'
    else:
        with open('./_emu/emu-' + source + '.json') as json_file:
            patient_sims = json.load(json_file)

        f = open('_data/patients/' + source + '_patients_phenotype.csv',
                 'w',
                 newline='')
        writer = csv.writer(f)

        patients_conditions = {}
        fn = '_data/patients/' + source + '_patients_disease.pkl'
    i = 0

    for disease in patient_sims:
        for phenotype in patient_sims[disease]['sims']:
            identifier = 'P' + str(i)
            id_arr = [identifier]
            for symptom in phenotype:
                id_arr.append(symptom)
            writer.writerow(id_arr)
            patients_conditions[identifier] = disease
            i += 1

    f.close()
    save_object(patients_conditions, fn)

    print(time.time() - start)
コード例 #6
0
walk_length = 50
iterations = 3
conds = 200
noise_ptgs = [.15, .3, .45, .6]
patients_per_cond = 2
lamb = 4
enriched_embeddings = 'no'

# print('Generating embeddings...')
# start_time_symptom_embeddings = time.time()
# genEmbeddings(input='_data/graph/hp-obo.edgelist', output='_data/emb/hp-obo_'+EXP_ID+'_'+str(exp_int)+'.emb', walk_length=walk_length, iter=iterations)
# amount_time_symptom_embeddings = time.time()-start_time_symptom_embeddings

rows = []
metadata_list = []
save_object(metadata_list, '_data/results/experiment_number' + EXP_ID + '.pkl')
sim_names = {
    'cos_sim', 'jaccard_best_avg', 'resnik_best_avg', 'lin_best_avg',
    'jc_best_avg'
}
fig, ax = plt.subplots(2, 2)
i = 0

for noise_ptg in noise_ptgs:
    aucs = {
        'cos_sim': [],
        'jaccard_best_avg': [],
        'resnik_best_avg': [],
        'lin_best_avg': [],
        'jc_best_avg': []
    }
 def save_pkl(self, fn):
     save_object(self.hpos, fn)