def LinearRegression_score(body): if 'X' in body['paras'].keys() and isinstance(body['paras']['X'], str): file_name = body['paras']['X'] body['paras']['X'] = file.read_csv(file_name) if 'y' in body['paras'].keys() and isinstance(body['paras']['y'], str): file_name = body['paras']['y'] body['paras']['y'] = file.read_csv(file_name) try: obj = load_obj(os.path.join('.', 'LinearRegression_constructor.npy')) res = obj.score(**body['paras']) save_obj(obj, 'LinearRegression') except Exception as e: return jsonify({'Error': str(e)}) return jsonify({'return': str(res)})
return np.sum(r / np.log2(np.arange(2, r.size + 2))) return 0. # calculate nDCG def ndcg_at_k(r, k): dcg_max = dcg_at_k(sorted(r, reverse=True), k) if not dcg_max: return 0. return dcg_at_k(r, k) / dcg_max # load ranked data # Creatinine data = ufile.read_csv( "/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q1.csv" )[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q2.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q3.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q4.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q5.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q6.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q7.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q8.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q9.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Creatinine_top10mix_sp/Creatinine_top10mix_sp_Q10.csv")[1:] # HbA1c # Glucose # no lab
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/NoLab_top10mix_Q9.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/NoLab_top10mix_Q10.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q1.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q2.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q3.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q4.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q5.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q6.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q7.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q8.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q9.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Glucose_top10mix_Q10.csv') data = ufile.read_csv( '/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q1.csv' ) #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q2.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q3.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q4.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q5.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q6.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q7.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q8.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q9.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/rating/mix_csv/Creatinine_top10mix_Q10.csv') # load post-processed exp #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/Glucose_top10mix_post_Q1.csv") #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/Glucose_top10mix_post_Q2.csv") #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/Glucose_top10mix_post_Q3.csv")
log = slogger('etacts-ext') cache = Cache( config={ 'CACHE_TYPE': 'filesystem', 'CACHE_DEFAULT_TIMEOUT': 86400, 'CACHE_THRESHOLD': 500, 'CACHE_DIR': 'app/resources/cache' }) ''' load the tag-to-semantic_category relationships ''' # tag - semantic type cvocab = ufile.read_csv('app/resources/cvocab.csv') if cvocab is None: log.error('impossible to load the controlled vocabulary - interrupting') sys.exit() type2tag = {} tag2type = {} for tag in cvocab: ptag = tag[0].strip() tkn = tag[1].replace('[', '').replace(']', '').split('\',') if len(tkn) > 3: continue ltype = set() for t in tkn: # type-to-tag typ = t.replace('\'', '').strip() ltag = type2tag.setdefault(typ, set())
from flask.ext.cache import Cache from log import slogger import file as ufile import sys log = slogger ('etacts-ext') cache = Cache(config={'CACHE_TYPE': 'filesystem', 'CACHE_DEFAULT_TIMEOUT': 86400, 'CACHE_THRESHOLD': 500, 'CACHE_DIR': 'app/resources/cache'}) ''' load the tag-to-semantic_category relationships ''' # tag - semantic type cvocab = ufile.read_csv ('app/resources/cvocab.csv') if cvocab is None: log.error ('impossible to load the controlled vocabulary - interrupting') sys.exit() type2tag = {} tag2type = {} for tag in cvocab: ptag = tag[0].strip() tkn = tag[1].replace('[','').replace(']','').split('\',') if len(tkn) > 3: continue ltype = set() for t in tkn: # type-to-tag typ = t.replace('\'','').strip() ltag = type2tag.setdefault(typ, set()) ltag.add(ptag)
#data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Glucose_top10mix_sp_Q8.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Glucose_top10mix_sp_Q9.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Glucose_top10mix_sp_Q10.csv")[1:] # load data: CE Glucose #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q1.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q2.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q3.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q4.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q5.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q6.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q7.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q8.csv")[1:] #data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q9.csv")[1:] data = ufile.read_csv( "/Users/luyu/Documents/Master Thesis/rating/lab_10q_sp/Glucose_top10mix_sp/Correctly Extracted/Glucose_top10mix_cesp_Q10.csv" )[1:] # extract id and question id = [d[0] for d in data if d[0] is not ''] candidates = [d[1] for d in data if d[1] is not ''] # extract lab-value statement for 3 lab tests glu_exp = [d[2] if d[2] is not '' else '[]' for d in data] del glu_exp[1] a1c_exp = [d[3] if d[3] is not '' else '[]' for d in data] del a1c_exp[1] cre_exp = [d[4] for d in data if d[4] is not ''] cre_exp = [d[4] if d[4] is not '' else '[]' for d in data] del cre_exp[1] # extract vector-space similarities tfidf = [float(d[5]) for d in data if d[5] is not '']
import re, nltk, string from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import file as ufile import pandas as pd data = ufile.read_csv( "/Users/luyu/Downloads/ValX_demo/diabetes_criteria_test_20words_3lab20words.csv" ) # extract lab-value statement for 3 lab tests q_id = [d[0] for d in data] q_inc = [d[1] for d in data] q_ques = [d[2] for d in data] q_fom = [d[3] for d in data] glu_exp = [d[4] for d in data] a1c_exp = [d[5] for d in data] cre_exp = [d[6] for d in data] stop = "gm|%,|hr|hrs|min|mins|minute|minutes|hour|hours|okay hour|day|days|week|weeks|month|months|yr|yrs|year|years".split( "|") #for d in glu_exp:print(d) glu_exp_n = [] cre_exp_n = [] for c in glu_exp: ch = ''.join([cc for cc in c]) ch = ch.replace('[', '') ch = ch.replace(']', '') ch = ch.replace('"', '')
import re, nltk, string from nltk.corpus import stopwords import file as ufile import pandas as pd # define a group of impossible units stop = "gm|hr|hrs|min|mins|minute|minutes|hour|hours|okay hour|day|days|week|weeks|month|months|yr|yrs|year|years".split("|") data = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/exp/Glucose_top10mix_exp_Q3.csv") #ini = ufile.read_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_top10mix_csv/Glucose_top10mix_Q1.csv") glu_exp = [d[2] for d in data] out = [] # remove expressions containing impossible units for d in data: #print(d[0], d[2], d[3], d[4]) for s in stop: if s in d[2]: d[2] = '[' + d[2].split(s)[1][:-2] +']' d[2] = d[2].replace("s,',", "") out.append([d[0], d[1], d[2], d[3], d[4]]) #print(d[0], d[2]) ufile.write_csv("/Users/luyu/Documents/Master Thesis/rating/mix_top10/Glucose_top10mix/Glucose_exp/post_exp/test.csv", out)
elif float(seg[2]) > 107.0: lab_range.append(3) return min_max_normalization(lab_range) # load data: Creatinine #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q1.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q2.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q3.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q4.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q5.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q6.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q7.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q8.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q9.csv') candidates_d = ufile.read_csv( '/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q10.csv') # load data: HbA1c #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q1.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q2.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q3.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q4.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q5.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q6.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q7.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q8.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q9.csv') #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q10.csv') # load data: Glucose #candidates_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/glucose/glucose_Q1.csv')
import file as ufile from random import sample import pandas as pd import re # sample questions with 5-20 word length data = ufile.read_csv( '/Users/luyu/Downloads/ValX_demo/diabetes_criteria_test.csv') d_20w = [] for d in data: d[1] = d[1].replace('\n', '') if 5 <= len(d[1].split()) <= 20: d_20w.append(d) # write data #ufile.write_csv('/Users/luyu/Documents/Master Thesis/diabetes_criteria_test_5to20words.csv', d_20w) # count valx-parsed questions with expressions box = ufile.read_csv( '/Users/luyu/Documents/Master Thesis/diabetes_criteria_test_5to20words_parsed.csv' ) q_id = [d[0] for d in box] txt = [d[2] for d in box] glu = [d[4] for d in box] a1c = [d[5] for d in box] cre = [d[6] for d in box] with_exp = [] no_exp = []
#data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/glucose/glucose_Q10.csv') # load data: HbA1c #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q1.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q2.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q3.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q4.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q5.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q6.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q7.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q8.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q9.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q10.csv') # load data: Creatinine data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q1.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q2.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q3.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q4.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q5.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q6.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q7.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q8.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q9.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q10.csv') # load data: no lab #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q1.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q2.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q3.csv') #data = ufile.read_csv('/Users/luyu/Documents/Master Thesis/noLab/noLab_Q4.csv')
cat_pos_tag.append( len([ff[1] for ff in nltk.pos_tag(f.split()) if 'VB' in ff[1]])) return min_max_normalization(cat_pos_tag) # get number of adjectives def num_of_adj(cat): cat_pos_tag = [] for f in cat: cat_pos_tag.append( len([ff[1] for ff in nltk.pos_tag(f.split()) if 'JJ' in ff[1]])) return min_max_normalization(cat_pos_tag) # load data creatinine_d = ufile.read_csv( '/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q1.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q2.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q3.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q4.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q5.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q6.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q7.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q8.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q9.csv') #creatinine_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/creatinine/creatinine_Q10.csv') #hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q1.csv') #hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q2.csv') #hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q3.csv') #hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q4.csv') #hba1c_d = ufile.read_csv('/Users/luyu/Documents/Master Thesis/hba1c/hba1c_Q5.csv')
elif 0.84 <= float(seg[2]) <= 1.21: lab_range.append(2) elif 1.21 < float(seg[2]) < 20.0: lab_range.append(3) elif 20.0 < float(seg[2]) < 74.3: lab_range.append(1) elif 74.3 <= float(seg[2]) <= 107.0: lab_range.append(2) elif float(seg[2]) > 107.0: lab_range.append(3) else: lab_range.append(0) return lab_range # load data pool = ufile.read_csv('/Users/luyu/Documents/Master Thesis/Question_Pool.csv') #del pool[0] # extract expression for 3 lab id = [d[0] for d in pool] txt = [d[1] for d in pool] glu_exp = [d[2] for d in pool] a1c_exp = [d[3] for d in pool] cre_exp = [d[4] for d in pool] # post-processing glucose results glu_exp_n = [] for c in glu_exp: ch = ''.join([cc for cc in c]) ch = ch.replace('[', '') ch = ch.replace(']', '')