def process_blank(readcsvname, writecsvname): print('process_blank') csvdata = data_process.read_csv(readcsvname) datas = [] i = 1 replace = [] for item in csvdata: num = 0 lengh = len(item) for itemdata in item: # print 'itemdata',i,itemdata if itemdata == '': item.pop(num) itemdata = itemdata.split('\xe3\x80\x80') if len(itemdata) > 1: # print 'split itemdata',i,itemdata for zz in itemdata: replace.append(zz) else: replace.append(itemdata[0]) num += 1 datas.append(replace) replace = [] i += 1 data_process.write_in_csv(writecsvname, datas)
def pickData(readcsvname1, readcsvname2, function): print 'pickData' preslist = [] funclist = [] presdata = data_process.read_csv(readcsvname1) funcdata = data_process.read_csv(readcsvname2) presdatalist = [] for item in presdata: presdatalist.append(item) num = 0 for item in funcdata: # print '功效',item if num == 0: item[0] = item[0].replace('', '') for itemdata in item: if itemdata.decode('utf8').find(function) > -1: funclist.append(item) preslist.append(presdatalist[num]) num += 1 print '功效 %s 的样本集大小为:%d' % (function, len(funclist)) #换功效时 需要修改最后的保存文件!!! writecsvname = 'Apriori_QFCS_Prescription.csv' data_process.write_in_csv(writecsvname, preslist) writecsvname = 'Apriori_QFCS_Function.csv' data_process.write_in_csv(writecsvname, funclist)
def composition_process(readcsvname, writecsvname): print('composition_process') # readcsvname='composition_6.csv' csvdata = data_process.read_csv(readcsvname) datas = [] i = 1 for item in csvdata: # print 'row',i,item[0].split(' ') item = item[0].split(' ') num = 0 for itemdata in item: if itemdata == '': item.pop(num) num += 1 datas.append(item) i += 1 # j=1 # for item in datas: # print 'j', j # j+=1 # print item # for itemdata in item: # print 'zz',itemdata data_process.write_in_csv(writecsvname, datas)
def pickFunction(readcsvname1, readcsvname2, writecsvname1, writecsvname2, function): print 'pickFunction' funcdata = data_process.read_csv(readcsvname1) presdata = data_process.read_csv(readcsvname2) funclist = [] preslist = [] for item in presdata: preslist.append(item) for item in funcdata: funclist.append(item) finalpreslist = [] finalfunclist = [] print "要选择的方剂功效为 %s:" % function print "正在进行中....." num = 0 for item in funclist: for itemdata in item: itemdata = itemdata.decode('utf8') if itemdata.find(function) > -1 and len(preslist[num]) >= 3: finalfunclist.append(item) finalpreslist.append(preslist[num]) break num += 1 print "功效%s 在5W数据集的方剂中找到含有该功效方剂 %d 条。" % (function, len(finalfunclist)) print "收集负例中.....(设置负例为正例个数的1.3倍)" num = 0 count = 0 neglength = len(finalfunclist) * 1.3 for item in funclist: if (num % 50 == 0): check = True for itemdata in item: itemdata = itemdata.decode('utf8') if itemdata.find(function) > -1: check = False break #近义词也要考虑过滤 if itemdata.find('清热') > -1: check = False break if itemdata.find('解毒') > -1: check = False break if check and count < neglength: if len(preslist[num]) > 3: finalfunclist.append(item) finalpreslist.append(preslist[num]) count += 1 if count > neglength: break num += 1 print "功效%s 在5W数据集的方剂中正负样例共有 功效:%d 配伍:%d 条。" % ( function, len(finalfunclist), len(finalpreslist)) data_process.write_in_csv(writecsvname1, finalfunclist) data_process.write_in_csv(writecsvname2, finalpreslist)
def pickData(readcsvname1, readcsvname2, readcsvname3, readcsvname4, function): print 'pickData' preslist = [] funclist = [] presdata = data_process.read_csv(readcsvname1) funcdata = data_process.read_csv(readcsvname2) webPresdata = data_process.read_csv(readcsvname3) webFuncdata = data_process.read_csv(readcsvname4) presdatalist = [] for item in presdata: presdatalist.append(item) webPresdatalist = [] for item in webPresdata: webPresdatalist.append(item) num = 0 for item in funcdata: # print '功效',item if num == 0: item[0] = item[0].replace('', '') for itemdata in item: if itemdata.decode('utf8').find(function) > -1: funclist.append(item) preslist.append(presdatalist[num]) num += 1 positiveCount = len(funclist) print '功效 %s 的个数为:%d' % (function, positiveCount) negativeCount = positiveCount * 1.2 num = 0 count = 0 for item in webFuncdata: # print '功效', item if num == 0: item[0] = item[0].replace('', '') if count < negativeCount: for itemdata in item: # print 'itemdata.decode(utf8).find(function)',num,itemdata.decode('utf8').find(function) if itemdata.decode('utf8').find(function) == -1: funclist.append(item) # print 'test',webPresdatalist[num] preslist.append(webPresdatalist[num]) count += 1 break num += 1 print '功效 %s 的测试样本集大小为:%d' % (function, len(funclist)) #换功效时 需要修改最后的保存文件!!! writecsvname = 'combinePrescription.csv' data_process.write_in_csv(writecsvname, preslist) writecsvname = 'combineFunction_QFCS.csv' data_process.write_in_csv(writecsvname, funclist)
def processAll_tiaocan(allList,preName,xuexilv,d_a,zz): print('规律总结处理...') oneList=[] writeList=[] for item in allList: s='' l=[] for itemdata in item: l.append(itemdata) s=s+itemdata+ ',' print(s[:-1]) oneList.append(s[:-1]) writeList.append(l) data_process.write_in_csv('../myMedicalModel/tiaocan_lr0.01/'+str(xuexilv)+'-'+str(d_a)+'-'+str(zz)+'-ECMA_'+preName+'_Apriori_16_avg_unit.csv', writeList) # data_process.write_in_csv('../myMedicalModel/modelvsECMSR_Apriori/AprioriResults/ECMA_' + preName + '_Apriori_16_avg_0.001_64_0.001-final-'+str(cishu)+'.csv', writeList) oneSet=list(set(oneList)) sortList=[] for item in oneSet: num=oneList.count(item) sortList.append([num,item])
def composition_process(readcsvname, writecsvname): print('composition_process') # readcsvname='composition_6.csv' csvdata = data_process.read_csv(readcsvname) datas = [] pattern = re.compile(ur'(?:、|,|;|。|,)') pattern1 = re.compile(ur'(?:\(|()') pattern2 = re.compile(ur'(?:\)|))') for item in csvdata: item[0] = item[0].decode('utf-8') item[0] = re.sub(pattern, ' ', item[0]) item[0] = re.sub(pattern1, ' (', item[0]) item[0] = re.sub(pattern2, ') ', item[0]) item = item[0].split(' ') num = 0 for itemdata in item: if itemdata == '': item.pop(num) num += 1 datas.append(item) data_process.write_in_csv(writecsvname, datas)
def processAll(allList,preName,cishu,top): print('规律总结处理...') oneList=[] writeList=[] for item in allList: s='' l=[] for itemdata in item: l.append(itemdata) s=s+itemdata+ ',' print(s[:-1]) oneList.append(s[:-1]) writeList.append(l) # data_process.write_in_csv(str(xuexilv)+'-'+str(d_a)+'-'+str(zz)+'-ECMA_'+preName+'_Apriori_16_avg.csv', writeList) # data_process.write_in_csv('../myMedicalModel/modelvsECMSR_Apriori/AprioriResults/ECMA_' + preName + '_Apriori_16_avg_0.0001_64_0.001-'+str(cishu)+'.csv', writeList) data_process.write_in_csv( '../myMedicalModel/load_result-627-uit80-H-top8/ECMA_' + preName + '_Apriori_0.001_128_0.0002-top'+top+'-' + str(cishu) + '.csv', writeList) oneSet=list(set(oneList)) sortList=[] for item in oneSet: num=oneList.count(item) sortList.append([num,item])
zz.append(medicallist[weightlist[i][0] - 1]) allresults.append(medicallist[weightlist[i][0] - 1]) zz.append(weightlist[i][1]) importantMedical.append(zz) print '当功效为‘清热解毒’时,占主导作用的药物组合是:\n' count = 0 # print importantMedical for item in importantMedical: # print item print '药物%d:' % (count + 1), item[0], item[1] count += 1 print '一共有 %d 中药物' % len(importantMedical) # writecsvname='../resultsdata/result_'+readcsvname.split('/')[-1] # data_process.write_in_csv(writecsvname, importantMedical) medicaListSet = list(set(allresults)) # 统计每种药物出现的次数 numarray = [] n = [] for item in medicaListSet: n.append(item) n.append(allresults.count(item)) numarray.append(n) n = [] # 以次数排序 numarray = sorted(numarray, key=lambda x: x[1], reverse=True) writecsvname = '../resultsdata/result.csv' data_process.write_in_csv(writecsvname, numarray)
# # 手动去除allMedicalCount_1.csv里频次为1的药物;和调和药“甘草” # writecsvname = '../formulaData_1/QRJD_medical_count.csv' # data_process.write_in_csv(writecsvname , numarray) #统计处方中不同频次的药物占比 # readcsvname = 'allMedicalCount_combine_QFCS.csv' # tongji(readcsvname) #step 2 计算方剂向量特征 #(1)使用one-hot表示,每个方剂的维数等于所有方剂中药物的去重个数,若出现则为1 *********presFeature_onehot.csv #数据1:全取自防风数据集 csvname1='../formulaData_1/QRJD_pres.csv' csvname2 = '../formulaData_1/QRJD_medical_count.csv' pFeatrue= presFeature(csvname1,csvname2) writecsvname = '../formulaData_1/presFeature_onehot_QRJD_584t.csv' data_process.write_in_csv(writecsvname , pFeatrue) # # # (2)使用配伍单位数值表示,每个方剂的维数等于所有方剂中药物的去重个数*********presFeature_realValue.csv # csvname1 = 'prescription_6.csv' # csvname2 = 'allMedicalCount_1.csv' # pFeatrue = presFeature_1(csvname1, csvname2) # writecsvname = 'presFeature_realValue_combine_QFCS_223t.csv' # data_process.write_in_csv(writecsvname, pFeatrue) # # # (3)使用配伍单位数值表示,每个方剂的维数等于所有方剂中药物的去重个数,在方剂中做归一化处理*********presFeature_standardValue.csv # csvname1 = 'prescription_6.csv' # csvname2 = 'allMedicalCount_1.csv' # 数据2:与web爬取数据结合的正负例 # csvname1='combinePrescription.csv' # csvname2 = 'allMedicalCount_combine_QFCS.csv'
result = sess.run([logits, model.B,model.Q], feed_dict={model.input_pl: batch_input, labels: batch_tags}) for j in range(len(batch_tags)): if np.argmax(batch_tags[j]) == 0: if np.argmax(labelList[k_count]) == np.argmax(batch_tags[j]): evalCount += 1 rs += np.sum(np.argmax(labelList[k_count]) == np.argmax(batch_tags[j])) # print('labelList[k_count]',labelList[k_count]) if not np.argmax(labelList[k_count]): preClass = True else: preClass = False medicalList = [] if FLAGS.visualize == True and preNum==finalNum: #保存每个方剂中的WP1参数—获取药物相互作用注意力 start if i < 4: data_process.write_in_csv('../myMedicalModel/atentionVision/%s_inter.csv' % i, Q_value[k_count]) # 保存每个方剂中的WP1参数—获取药物相互作用注意力 end f.write('<div style="margin:15px;">\n') #result[1][0]保存的是方剂中每个药物对应的attention因子,具体result[1][0][k][j]取出 for k in range(len(result[1][0])): f.write('\t<p> —— 测试方剂 %s (类标:%s ; 预测类标:%s):—— </p>\n'%(i, tags[i],preClass)) f.write('<p style="margin:10px;font-family:SimHei">\n') ww = TOKENIZER_RE.findall(words[i*batch_size][0]) for j in range(word_pad_length): if (attMetricAll[k_count][j]/finalNum) < a: color= 0 else: color=attMetricAll[k_count][j]/finalNum alpha = "{:.2f}".format(color) if len(ww) <= j: w = " "
if num != 0: zz = [] item[0] = item[0].replace('[[', '') item[0] = item[0].replace(']]', '') item[0] = item[0].replace('', '') # print 'zz',item[0] zz.append(num) zz.append(float(item[0])) weightlist.append(zz) num += 1 weightlist = sorted(weightlist, key=lambda x: x[1], reverse=True) for i in range(0, 11): zz = [] zz.append(medicallist[weightlist[i][0] - 1]) zz.append(weightlist[i][1]) importantMedical.append(zz) print '当功效为‘祛风除湿’时,占主导作用的药物组合是:\n' count = 0 # print importantMedical for item in importantMedical: # print item print '药物%d:' % (count + 1), item[0], item[1] count += 1 print '一共有 %d 中药物' % len(importantMedical) writecsvname = 'result_' + readcsvname data_process.write_in_csv(writecsvname, importantMedical)
# for item in csvdata: # num = 0 # for itemdata in item: # if itemdata == '': # item.pop(num) # num += 1 # datas.append(item) # data_process.write_in_csv(writecsvname, datas) #删除没有内容的项 end # # (3)药名-数量单位 一一提取匹配;处理“各”的情况 补填;处理“等分”,“少许”等词清除 readcsvname='../formulaData_1/pres_5W_3.csv' writecsvname='../formulaData_1/pres_5W_4.csv' finalmedicallist=extractnumfromstr(readcsvname, writecsvname) data_process.write_in_csv(writecsvname, finalmedicallist) #8-1 目前考虑用one-hot表示,先不做这步 # #step 5 把单位kg,钱,两 统一为 g(克) # csvname='../formulaData_1/pres_5W_4.csv' # normalList= dataDetailProcess.unitTransformation(csvname) # writecsvname='../formulaData_1/pres_5W_5.csv' # data_process.write_in_csv(writecsvname, normalList) # # #step 6 清洗一些none单位的杂音 如‘等分,少许’ # csvname='../formulaData_1/pres_5W_5.csv' # noneList= dataDetailProcess.noneStandard(csvname) # writecsvname='../formulaData_1/pres_5W_6.csv' # data_process.write_in_csv(writecsvname, noneList)
# coding=utf-8 import data_process readcsvname = '../formulaData_Experiment/ExResult_onehot_QRJD.csv' readdata = data_process.read_csv(readcsvname) datalist = [] data = [] for item in readdata: print 'item', item[0] num = 0 for i in item[0].split(' '): print '1', i if (num == 3): print '2', i print i.split(':')[-1] maxiter = i.split(':')[-1] data.append(float(maxiter)) num += 1 acc = item[1].split('acc:')[-1] acc = acc.replace('"', '') print acc data.append(float(acc)) datalist.append(data) data = [] writecsvname = '../formulaData_1/L1_draw.csv' medicaldata = data_process.write_in_csv(writecsvname, datalist)
attMetric.append(thisMetic) f.write('</div>\n') if FLAGS.visualize == True and preNum < finalNum: f.write('\t<p>Test accuracy: %s</p>\n' % (rs / total)) f.write( '\t<p>该功效下%s个经典方剂(即测试集前%s个方剂) accuracy :%s</p>\n' % (evalNum, evalNum, evalCount / evalNum)) f.write( '\t<p>该功效下%s个经典方剂 avg-dice : %s</p>\n' % (evalNum, sum(allDice) / evalNum)) f.write('</body></html>') f.close() data_process.write_in_csv( '../myMedicalModel/modelvsECMSR_Apriori/metircsResults/attMetric' + preName + str(preNum) + ".csv", attMetric) if attMetricAll == []: attMetricAll = attMetric else: for x, itemx in enumerate(attMetricAll): for y, itemy in enumerate(itemx): attMetricAll[x][y] = attMetricAll[ x][y] + attMetric[x][y] ##########################################################最终结果显示 if FLAGS.visualize == True and preNum == finalNum: f = open( '../myMedicalModel/modelvsECMSR_Apriori/html/final_%s_visualizeTCM_%s_noLSTM_HWH_epoches%s_r1_num%s.html' % (preName, preName, FLAGS.num_epochs, preNum), 'w') f.write(
count = 0 # print item zz = [] for itemdata in item: # print count # print itemdata if (count % 2) == 0: zz.append(itemdata) count += 1 else: count += 1 # print zz finallist.append(zz) return finallist if __name__ == '__main__': print('准备Apriori算法数据....') #换其他功效时 修改这里即可 function = '祛风除湿' # readcsvname1='prescription_6.csv' # readcsvname2='function_1.csv' # # pickData(readcsvname1, readcsvname2,function) readcsvname = 'Apriori_QFCS_Prescription.csv' finallist = onlyWord(readcsvname) writename = 'Apriori_QFCS_data.csv' data_process.write_in_csv(writename, finallist)