Esempio n. 1
0
def pickData(readcsvname1, readcsvname2, function):
    print 'pickData'
    preslist = []
    funclist = []
    presdata = data_process.read_csv(readcsvname1)
    funcdata = data_process.read_csv(readcsvname2)
    presdatalist = []
    for item in presdata:
        presdatalist.append(item)
    num = 0
    for item in funcdata:
        # print '功效',item
        if num == 0:
            item[0] = item[0].replace('', '')
        for itemdata in item:
            if itemdata.decode('utf8').find(function) > -1:
                funclist.append(item)
                preslist.append(presdatalist[num])
        num += 1

    print '功效 %s 的样本集大小为:%d' % (function, len(funclist))

    #换功效时 需要修改最后的保存文件!!!
    writecsvname = 'Apriori_QFCS_Prescription.csv'
    data_process.write_in_csv(writecsvname, preslist)
    writecsvname = 'Apriori_QFCS_Function.csv'
    data_process.write_in_csv(writecsvname, funclist)
Esempio n. 2
0
def func2feature(csvname1, csvname2):
    print 'func2feature'
    funcdata = data_process.read_csv(csvname1)
    countdata = data_process.read_csv(csvname2)

    countlist = []
    for item in countdata:
        countlist.append(item[0])

    featurelist = []
    #这里可以修改需要判别的功效,放一个时会检索不到(‘.-’)
    locmark = countlist.index('祛风除湿'.decode('utf-8'))
    print 'locmark', locmark
    for item in funcdata:
        check = 0
        for itemdata in item:
            itemdata = itemdata.replace('疏风', '祛风')
            itemdata = itemdata.replace('散风', '祛风')
            itemdata = itemdata.replace('驱风', '祛风')
            try:
                loc = countlist.index(itemdata.decode('utf-8'))
                if loc == locmark:
                    check = 1
            except:
                pass
        if check == 1:
            featurelist.append(1)
        else:
            featurelist.append(0)
    print 'len(featurelist):', len(featurelist)
    print '有多少方剂属于该功效(祛风除湿):', featurelist.count(1)
    return featurelist
Esempio n. 3
0
def pickFunction(readcsvname1, readcsvname2, writecsvname1, writecsvname2,
                 function):
    print 'pickFunction'
    funcdata = data_process.read_csv(readcsvname1)
    presdata = data_process.read_csv(readcsvname2)
    funclist = []
    preslist = []
    for item in presdata:
        preslist.append(item)
    for item in funcdata:
        funclist.append(item)

    finalpreslist = []
    finalfunclist = []
    print "要选择的方剂功效为 %s:" % function
    print "正在进行中....."
    num = 0
    for item in funclist:
        for itemdata in item:
            itemdata = itemdata.decode('utf8')
            if itemdata.find(function) > -1 and len(preslist[num]) >= 3:
                finalfunclist.append(item)
                finalpreslist.append(preslist[num])
                break
        num += 1
    print "功效%s 在5W数据集的方剂中找到含有该功效方剂 %d 条。" % (function, len(finalfunclist))
    print "收集负例中.....(设置负例为正例个数的1.3倍)"
    num = 0
    count = 0
    neglength = len(finalfunclist) * 1.3
    for item in funclist:
        if (num % 50 == 0):
            check = True
            for itemdata in item:
                itemdata = itemdata.decode('utf8')
                if itemdata.find(function) > -1:
                    check = False
                    break
                #近义词也要考虑过滤
                if itemdata.find('清热') > -1:
                    check = False
                    break
                if itemdata.find('解毒') > -1:
                    check = False
                    break
            if check and count < neglength:
                if len(preslist[num]) > 3:
                    finalfunclist.append(item)
                    finalpreslist.append(preslist[num])
                    count += 1
            if count > neglength:
                break
        num += 1

    print "功效%s 在5W数据集的方剂中正负样例共有 功效:%d 配伍:%d 条。" % (
        function, len(finalfunclist), len(finalpreslist))
    data_process.write_in_csv(writecsvname1, finalfunclist)
    data_process.write_in_csv(writecsvname2, finalpreslist)
Esempio n. 4
0
def pickData(readcsvname1, readcsvname2, readcsvname3, readcsvname4, function):
    print 'pickData'
    preslist = []
    funclist = []
    presdata = data_process.read_csv(readcsvname1)
    funcdata = data_process.read_csv(readcsvname2)

    webPresdata = data_process.read_csv(readcsvname3)
    webFuncdata = data_process.read_csv(readcsvname4)

    presdatalist = []
    for item in presdata:
        presdatalist.append(item)

    webPresdatalist = []
    for item in webPresdata:
        webPresdatalist.append(item)
    num = 0
    for item in funcdata:
        # print '功效',item
        if num == 0:
            item[0] = item[0].replace('', '')
        for itemdata in item:
            if itemdata.decode('utf8').find(function) > -1:
                funclist.append(item)
                preslist.append(presdatalist[num])
        num += 1
    positiveCount = len(funclist)
    print '功效 %s 的个数为:%d' % (function, positiveCount)
    negativeCount = positiveCount * 1.2
    num = 0
    count = 0
    for item in webFuncdata:
        # print '功效', item
        if num == 0:
            item[0] = item[0].replace('', '')
        if count < negativeCount:
            for itemdata in item:
                # print 'itemdata.decode(utf8).find(function)',num,itemdata.decode('utf8').find(function)
                if itemdata.decode('utf8').find(function) == -1:
                    funclist.append(item)
                    # print 'test',webPresdatalist[num]
                    preslist.append(webPresdatalist[num])
                    count += 1
                break
        num += 1

    print '功效 %s 的测试样本集大小为:%d' % (function, len(funclist))

    #换功效时 需要修改最后的保存文件!!!
    writecsvname = 'combinePrescription.csv'
    data_process.write_in_csv(writecsvname, preslist)
    writecsvname = 'combineFunction_QFCS.csv'
    data_process.write_in_csv(writecsvname, funclist)
Esempio n. 5
0
def presFeature_2(csvname1,csvname2):
    print 'presFeature_1'
    prescriptiondata = data_process.read_csv(csvname1)
    medicaldata = data_process.read_csv(csvname2)

    medicaList=[]
    for item in medicaldata:
        medicaList.append(item[0].replace('', ''))

    pFeatrue= []
    presLabelFeatrue=[]
    j=1
    wrongnum=1
    for item in prescriptiondata:
        # print 'item:',item
        item[0] = item[0].replace('', '')
        mark = 0
        # featrue = [0] * 1563
        featrue = [0] * 223
        lenth=len(item)
        thisall=0
        for i in range(0,lenth):
            if ((i % 2)!=0):
                if item[i] != 'None':
                    thisall = thisall+dataFeatureValue.findnum(item[i])
                else:
                    thisall=thisall+28
        for itemdata in item:
                if(( mark % 2) == 0):
                    try:
                        location=medicaList.index(itemdata)
                        # print 'location',location
                        if item[mark+1]!='None':
                            itemvalue=dataFeatureValue.findnum(item[mark+1])
                            # finalValue=(itemvalue-float(mediaclvalueList[location][2])+1)/(float(mediaclvalueList[location][3])+1)
                            #特征既有配伍成分,又考虑了单位数量和归一化
                            featrue[location]=itemvalue/thisall
                        else:
                            featrue[location] = 28/thisall
                    except:
                        # print 'wrong',wrongnum,item[0],mark,itemdata
                        wrongnum += 1
                    mark += 1
                else:
                     mark+=1
        j+=1
        pFeatrue.append(featrue)

    print len(pFeatrue),j,wrongnum
    return pFeatrue
Esempio n. 6
0
def presFeature(csvname1,csvname2):
    print 'presFeature'

    prescriptiondata = data_process.read_csv(csvname1)
    medicaldata = data_process.read_csv(csvname2)

    medicaList=[]
    for item in medicaldata:
        medicaList.append(item[0].replace('', ''))
    # print 'mediacl', medicaList
    # for item in medicaList:
    #     print (item)

    pFeatrue= []
    presLabelFeatrue=[]
    wrongnum=1
    #allData_normal1.csv一共有药物1487种
    for item in prescriptiondata:
        # print 'item:',item
        item[0] = item[0].replace('', '')
        mark = 0
        #多少种药就是多少维 668种药,668维
        # featrue = [0] * 1563
        featrue = [0] * 584
        for itemdata in item:
                if mark==0:
                    mark+=1
                    continue
                if(( mark % 2) == 1):
                    try:
                        location=medicaList.index(itemdata)
                        # print 'location',location
                        # itemvalue=dataFeatureValue.findnum(item[mark+1])
                        # finalValue=(itemvalue-float(mediaclvalueList[location][2])+1)/(float(mediaclvalueList[location][3])+1)
                        #特征既有配伍成分,有考虑了单位数量
                        # featrue[location]=finalValue
                        #只关心配伍成分,不关心单位数量
                        featrue[location]=1
                    except:
                        # print 'wrong',wrongnum,item[0],mark,itemdata
                        wrongnum += 1
                    mark += 1
                else:
                     mark+=1
        pFeatrue.append(featrue)

    print len(pFeatrue),wrongnum
    return pFeatrue
Esempio n. 7
0
def tongji(readcsvname):
    print 'tongji'
    data = data_process.read_csv(readcsvname)
    medicallist=[]
    for item in data:
        medicallist.append(item)

    num=0
    count = 0
    countp=0
    all=len(medicallist)
    print all
    for item in medicallist:
        if num ==0:
            aa=item[1]
            count += 1
        else:
            # print 'zz', item[1]
            if item[1]==aa:
                count+=1
            else:
                # print count
                p=float(count)/all
                countp=countp+p
                print '频次:%s , 占比:%f'%(aa,p)
                count=0
                count += 1
            aa = item[1]
        num+=1

    p = float(count) / all
    countp = countp + p
    print '频次:%s , 占比:%f' % (aa, p)
    print 'countp',countp
def composition_process(readcsvname, writecsvname):
    print('composition_process')
    # readcsvname='composition_6.csv'
    csvdata = data_process.read_csv(readcsvname)
    datas = []
    i = 1
    for item in csvdata:
        # print 'row',i,item[0].split(' ')
        item = item[0].split(' ')
        num = 0
        for itemdata in item:
            if itemdata == '':
                item.pop(num)
            num += 1
        datas.append(item)
        i += 1

    # j=1
    # for item in datas:
    #     print 'j', j
    #     j+=1
    #     print item
    #     for itemdata in item:
    #         print 'zz',itemdata

    data_process.write_in_csv(writecsvname, datas)
def seperateNone():
    print('seperateNone')
    readcsvname = 'csvtest_3.csv'
    # readcsvname ='allData_none.csv'
    csvdata = data_process.read_csv(readcsvname)
    normalList = []
    noneLise = []
    nn = 1
    #统计每类数据里带NONE的方剂有多少条
    count = 1
    for item in csvdata:
        # print 'item',item
        check = 1
        for itemdata in item:
            # print 'itemdata',itemdata
            if (itemdata == 'None'):
                check = 0
                noneLise.append(item)
                break
        if (check == 1):
            normalList.append(item)
        zz = item[0].split('*')
        # print 'zz',zz
        if (zz[1] == nn):
            count += 1
        else:
            print('count :', count)
            count = 1
        #nn用于判断类别有没有改变
        nn = zz[1]
def process_blank(readcsvname, writecsvname):
    print('process_blank')
    csvdata = data_process.read_csv(readcsvname)
    datas = []
    i = 1
    replace = []
    for item in csvdata:
        num = 0
        lengh = len(item)
        for itemdata in item:
            # print 'itemdata',i,itemdata
            if itemdata == '':
                item.pop(num)
            itemdata = itemdata.split('\xe3\x80\x80')
            if len(itemdata) > 1:
                # print 'split itemdata',i,itemdata
                for zz in itemdata:
                    replace.append(zz)
            else:
                replace.append(itemdata[0])
            num += 1
        datas.append(replace)
        replace = []
        i += 1
    data_process.write_in_csv(writecsvname, datas)
def splitnumandstr(readcsvname):
    print('splitnumandstr')
    csvdata = data_process.read_csv(readcsvname)

    pattern1 = re.compile(ur'(?:\(|()')
    pattern2 = re.compile(ur'(?:\)|))')

    data_after = []
    datalist = []
    for item in csvdata:
        # print ('%%%%%%%%%%%%%num',i)
        check = True
        for itemdata in item:
            itemdata = itemdata.replace('', '')
            itemdata = itemdata.decode('utf-8')

            #清洗数据:处理描述中的里括号里的内容***不要括号里的内容**********
            try:
                num1, num2 = kuohaoClear(itemdata)
                itemdata = itemdata[0:num1] + itemdata[num2:]
            except:
                # print 'kuohao error',item[0]
                if pattern1.search(itemdata):
                    itemdata = itemdata[:pattern1.search(itemdata).start()]
                    data_after.append(itemdata)
                    check = False
                if pattern2.search(itemdata):
                    itemdata = itemdata[pattern2.search(itemdata).end():]
                    check = True
            if check:
                data_after.append(itemdata)
        datalist.append(data_after)
        data_after = []
    return datalist
Esempio n. 12
0
def function_count(csvname):
    print 'function_count 计算有多少种功效,每种功效出现的次数和比例*'
    csv_data = data_process.read_csv(csvname)
    flist = []

    for item in csv_data:
        checknum = 0
        for itemdata in item:
            if checknum != 0:
                itemdata = itemdata.replace('疏风', '祛风')
                itemdata = itemdata.replace('散风', '祛风')
                itemdata = itemdata.replace('驱风', '祛风')
                flist.append(itemdata)
            checknum += 1
    print '所有方剂中的功效有(没有去重):', len(flist)
    #去重 计算有多少不同的功效
    flistset = list(set(flist))

    # 统计每种药物出现的次数
    numarray = []
    n = []
    for item in flistset:
        n.append(item)
        n.append(flist.count(item))
        numarray.append(n)
        n = []
    # 以次数排序
    numarray = sorted(numarray, key=lambda x: x[1], reverse=True)
    print '所有方剂中的功效有(去重):', len(numarray)

    return numarray
Esempio n. 13
0
def countallmedical(readcsvname):
    print 'countallmedical'
    csvdata = data_process.read_csv(readcsvname)
    medicaList = []
    medical_value = []
    pattern = re.compile(ur'[\u4e00-\u9fa5]')
    j = 1
    for item in csvdata:
        print 'item:', item[0]
        mark = 0
        for itemdata in item:
            if mark == 0:
                mark += 1
                continue
            data_value = []
            if ((mark % 2) == 1):
                itemdata = itemdata.strip()
                itemdata = itemdata.replace('l', '')
                itemdata = itemdata.decode('utf8')
                # print 'itemdata zzz', itemdata
                if (pattern.search(itemdata)):
                    # print 'j', j, mark
                    # 存取出的药物
                    medicaList.append(itemdata)
                    # 存药物对应的数值
                    # data_value.append(itemdata)
                    # data_value.append(findnum(item[mark + 1]))
                mark += 1
                medical_value.append(data_value)
            else:
                mark += 1
                continue
        j += 1
    allcount = len(medicaList)
    print '所有处方中共有药物(medicaList): ', allcount
    medicaListSet = list(set(medicaList))

    medicalcount = len(medicaListSet)
    print 'medicaList去重后得到处方中不同药物数量: ', medicalcount

    # medicalminmax=maxValueandminValue(medicaListSet,  medical_value)
    # print 'medicalminmax去重后得到处方中不同药物数量: ', len(medicalminmax)

    #统计每种药物出现的次数
    numarray = []
    n = []
    for item in medicaListSet:
        n.append(item)
        n.append(medicaList.count(item))
        numarray.append(n)
        n = []

    print 'numarray1'
    #以次数排序
    numarray = sorted(numarray, key=lambda x: x[1], reverse=True)
    print 'numarray2'
    return numarray
Esempio n. 14
0
def main():
    wgt_path = r'./data/HS300_idx_wt.csv'
    trade_data_path = r'./data/tmp/fixed_daily_data.csv'
    idx_df = r'./data/HS300_idx_wt.csv'
    name2code = get_name2code(wgt_path)
    code2name = dict([(v, k) for k, v in name2code.items()])
    wgt_df = read_csv(wgt_path)
    trade_df = read_csv(trade_data_path)
    idx_df = read_csv(idx_df)
    date_list = wgt_df['trade_date'].drop_duplicates().to_list()
    date_list.sort()
    for date in date_list[::-1]:
        date = 20190531
        idx_df_day = idx_df[idx_df.trade_date == date]
        wgt_df_day = wgt_df[wgt_df.trade_date == date]
        trade_df_day = trade_df[trade_df.trade_date == date][['ts_code', 'trade_date', 'total_mv',
                                                              'circ_mv', 'total_share', 'close',
                                                              'float_share', 'free_share']]
        wgt_code = idx_df_day['ts_code'].drop_duplicates().to_list()
        trade_code = trade_df_day['ts_code'].drop_duplicates().to_list()
        suspended_stock = select_suspend_stock(trade_code, wgt_code)
        if len(suspended_stock) == 0:
            print(date)
            original_weight = get_ts_code_original_weight(idx_df_day)
            mkt_val = get_market_value(trade_df_day)
            wgt = caculate_weight_by_market_value(wgt_code, mkt_val)
            for i in range(len(trade_code)-1):
                a, b = trade_code[i], trade_code[i+1]
                try:
                    r_a = original_weight[a]
                    r_b = original_weight[b]
                    b1 = r_a/r_b
                    w_a = wgt[a]
                    w_b = wgt[b]
                    b2 = w_a/w_b
                    print(b1-b2)
                except:
                    continue
            exit()
Esempio n. 15
0
def checkitem(readcsvname1, readcsvname2, readcsvname3):
    print '检查配伍-功效-主治对应与否中....'
    data1 = data_process.read_csv(readcsvname1)
    data2 = data_process.read_csv(readcsvname2)
    data3 = data_process.read_csv(readcsvname3)

    i = 0
    for item in data1:
        if i == 1620:
            print 'excel 第1622行 配伍(蓝实,决明子。。。) ', item
            for itemdata in item:
                print itemdata
        i += 1
    j = 0
    for item in data2:
        if j == 1620:
            print '第1622行 功效(疏风散热,清肝明目。) ', item[0]
        j += 1
    k = 0
    for item in data3:
        if k == 1620:
            print '第1622行 主治(肝胆风热上攻,两目??(目旁加流字右边),视物不明。) ', item[0]
        k += 1
def splitnumandstr(readcsvname):
    print('splitnumandstr')
    # readcsvname='csvtest.csv'
    # readcsvname='composition_1_1.csv'
    csvdata = data_process.read_csv(readcsvname)
    i = 1
    data_after = []
    datalist = []
    for item in csvdata:
        print('%%%%%%%%%%%%%num', i)

        for itemdata in item:
            itemdata = itemdata.replace('', '')
            #把 “各” 字 单独处理
            itemdata = itemdata.replace('各', '#')
            print('itemdata', itemdata)

            #清洗数据:处理描述中的里括号里的内容***不要括号里的内容**********
            try:
                num1 = itemdata.index('(')
                num2 = itemdata.index(')')
                itemdata = itemdata + '*'
                print('*****************num1,num2******************', num1,
                      num2)
                itemdata = itemdata[0:num1] + itemdata[num2 + 1:-1]
                print('*****************清除英文括号内容******************', itemdata)
                num11 = itemdata.index('(')
                num22 = itemdata.index(')')
                itemdata = itemdata + '*'
                print('*****************num1,num2******************', num11,
                      num22)
                itemdata = itemdata[0:num11] + itemdata[num22 + 1:-1]
                print('*****************清除中文括号内容******************', itemdata)
            except:
                pass
            data_after.append(itemdata)
        i += 1
        datalist.append(data_after)
        # print 'data_after^^^^^^^^^^^^^^^^^',data_after
        data_after = []

    # writecsvname = 'composition_1_2.csv'

    # writecsvname='csvtest_1.csv'
    # data_process.write_in_csv(writecsvname,datalist)
    return datalist
def createAllList():
    print('createAllList')
    addlist = []
    for inum in range(1, 7):
        print('inum', inum)
        readcsvname = 'composition_' + str(inum) + '_3.csv'
        # readcsvname = 'csvtest_2.csv'
        csvdata = data_process.read_csv(readcsvname)
        pnum = '*' + str(inum)
        # print 'csvdata',csvdata
        i = 1
        for item in csvdata:
            pnum = pnum + '*' + str(i)
            item.insert(0, pnum)
            # print 'zzzz:',item
            addlist.append(item)
            pnum = '*' + str(inum)
            i += 1
Esempio n. 18
0
def computeAverage(csvname):
    print  'computeAverage'
    prescriptiondata = data_process.read_csv(csvname)
    num=0
    itemvalue=0
    for item in prescriptiondata:
        mark = 0
        for itemdata in item:
            if ((mark % 2) == 0):
                mark += 1
            else:
                if itemdata != 'None':
                    value=dataFeatureValue.findnum(itemdata)
                    if value<1000:
                        itemvalue = itemvalue+value
                        num += 1
                else:
                    continue
                mark += 1
    ave=itemvalue/num
    print 'itemvalue,num,ave=itemvalue/num:',itemvalue,num,ave
def noneStandard(readcsvname):
    print('noneStandard')
    # readcsvname ='allData_none.csv'
    csvdata = data_process.read_csv(readcsvname)

    noneList = []
    for item in csvdata:
        midList = []
        for itemdata in item:
            itemdata = itemdata.replace('', '')
            itemdata = itemdata.replace('等分', '')
            itemdata = itemdata.replace('少许', '')
            itemdata = itemdata.replace('3倍于上药', '')
            itemdata = itemdata.replace('减半', '')
            itemdata = itemdata.replace('倍加', '')
            midList.append(itemdata)
        noneList.append(midList)

    # writecsvname = 'allData_none1.csv'
    # data_process.write_in_csv(writecsvname , noneList)
    return noneList
Esempio n. 20
0
def func2feature(csvname1, function):
    print 'func2feature'
    funcdata = data_process.read_csv(csvname1)

    featurelist = []

    for item in funcdata:
        check = False
        for itemdata in item:
            itemdata = itemdata.decode('utf-8')
            itemdata = itemdata.replace('疏风', '祛风')
            itemdata = itemdata.replace('散风', '祛风')
            itemdata = itemdata.replace('驱风', '祛风')
            if itemdata.find(function) > -1:
                check = True
        if check:
            featurelist.append(1)
        else:
            featurelist.append(0)
    print 'len(featurelist):', len(featurelist)
    print '有多少方剂属于该功效(祛风除湿):', featurelist.count(1)
    return featurelist
def composition_process(readcsvname, writecsvname):
    print('composition_process')
    # readcsvname='composition_6.csv'
    csvdata = data_process.read_csv(readcsvname)
    datas = []
    pattern = re.compile(ur'(?:、|,|;|。|,)')
    pattern1 = re.compile(ur'(?:\(|()')
    pattern2 = re.compile(ur'(?:\)|))')
    for item in csvdata:
        item[0] = item[0].decode('utf-8')
        item[0] = re.sub(pattern, ' ', item[0])
        item[0] = re.sub(pattern1, ' (', item[0])
        item[0] = re.sub(pattern2, ') ', item[0])

        item = item[0].split(' ')
        num = 0
        for itemdata in item:
            if itemdata == '':
                item.pop(num)
            num += 1
        datas.append(item)
    data_process.write_in_csv(writecsvname, datas)
Esempio n. 22
0
def onlyWord(readcsvname):
    print 'onlyWord'
    preslist = []
    presdata = data_process.read_csv(readcsvname)
    for item in presdata:
        preslist.append(item)

    finallist = []
    for item in preslist:
        count = 0
        # print item
        zz = []
        for itemdata in item:
            # print count
            # print itemdata
            if (count % 2) == 0:
                zz.append(itemdata)
                count += 1
            else:
                count += 1
        # print zz
        finallist.append(zz)

    return finallist
Esempio n. 23
0
              lenList=[]
              evalCount = 0
              a=0.01
              print('start testing')
              allList = []
              words, tags = load_csv('../data/testData/HXHYvision.txt', target_columns=[0], columns_to_ignore=None, target_dict=label_dict)
              words_with_index = string_parser(words, fit=True)
              word_input = tflearn.data_utils.pad_sequences(words_with_index, maxlen=word_pad_length)

              total = len(word_input)
              evalNum = total-1
              rs = 0.
              #load evalData start
              evalData=[]
              evalCav='../data/evalData/%s_evaluate.csv'%preName
              evalList=data_process.read_csv(evalCav)
              for item in evalList:
                evalData.append(item)
              # load evalData end
              if FLAGS.visualize == True and preNum < finalNum:
                  k_count = 0
                  f = open('../myMedicalModel/atentionVision/html/%s_visualizeTCM_%s_noLSTM_HWH_epoches%s_r1_num%s.html' % (
                  preName, preName, FLAGS.num_epochs, preNum), 'w')
                  f.write(
                      '<html style="margin:0;padding:0;"><meta http-equiv="Content-Type" content="text/html; charset=GBK"><body style="margin:0;padding:0;">\n')
                  for i in range(int(total / batch_size)):
                      batch_input, batch_tags = (
                      word_input[i * batch_size:(i + 1) * batch_size], tags[i * batch_size:(i + 1) * batch_size])
                      result = sess.run([logits, model.B, model.Q], feed_dict={model.input_pl: batch_input, labels: batch_tags})
                      # arr保存预测概率
                      arr = result[0].tolist()
Esempio n. 24
0
def extractnumfromstr(readcsvname,writecsvname):
    print ('extractnumwithstr')
    csvdata = data_process.read_csv(readcsvname)

    # 正则匹配要用' ur'' '才能正确匹配中文
    # (?:..):(...)的不分组版本,用于使用| 或 后接数量词
    pattern1 = re.compile(ur'\d+.\d+(?:g|kg|ml|l|千克|克|钱半|斤半|分半|升半|升|个|钱|片|根|条|份|张|枚|寸|具|朵|只|粒|茎|两半|斤|文|挺|对|头|L|ML|分|节|cm|握|株|两|铢)')
    pattern2 = re.compile(ur'\d+(?:g|kg|ml|l|千克|克|钱半|分半|斤半|升半|升|个|钱|片|根|条|份|张|枚|寸|具|朵|只|合|粒|茎|两半|斤|文|挺|对|头|L|ML|分|节|cm|握|株|两|铢)')
    pattern3 = re.compile(
        ur'\d+(?:g|kg|ml|l|千克|克|钱半|分半|斤半|升半|升|个|钱|片|根|条|份|张|枚|寸|具|朵|只|粒|茎|两半|斤|文|挺|合|对|头|L|ML|分|节|cm|握|株|两|铢)\d+(?:g|kg|ml|l|克|钱半|分半|斤半|升半|升|个|钱|片|根|条|份|张|枚|具|朵|只|粒|茎|两半|斤|文|挺|对|头|L|ML|分|节|cm|握|株|两|铢)')
    pattern4 = re.compile(
        ur'(?:一|二|三|四|五|六|七|八|九|十|两|半)(?:g|kg|ml|l|千克|克|钱半|斤半|分半|升半|升|个|钱|片|根|条|份|合|张|枚|寸|具|朵|只|粒|茎|两半|斤|文|挺|对|头|L|ML|分|节|cm|握|株|两|铢)')

    pattern_other=re.compile(ur'(?:等分|适量|少许)')
    medicallist = []
    for item in csvdata:
        print '****************************************************************** 处方: ', item[0]
        checkBH = 0
        medical = []
        for itemdata in item:
            check_ge=False
            weight = 'None'
            if checkBH:
                itemdata = itemdata.replace('', '')
                itemdata = itemdata.replace('.', '.')
                itemdata = itemdata.replace('o', '0')
                itemdata = itemdata.decode('utf8')
                # print 'itemdata', itemdata
                if itemdata.find('各')>-1:
                    check_ge=True
                itemdata = itemdata.replace('各', '')
                itemdata = re.sub(pattern_other, '', itemdata)
                match1 = pattern3.search(itemdata)#组合,1两2钱
                match2 = pattern1.search(itemdata)#小数
                match3 = pattern2.search(itemdata)#整数
                match4 = pattern4.search(itemdata)#汉字单位

                if match1:
                    yaowu = re.sub(pattern3, '', itemdata)
                    weight = match1.group()
                    if yaowu:
                        medical.append(yaowu)
                    medical.append(weight)
                    if medical[-2] == 'None':
                        medical.pop(-2)
                elif match2:
                    yaowu = re.sub(pattern1, '', itemdata)
                    weight = match2.group()
                    if yaowu:
                        medical.append(yaowu)
                    medical.append(weight)
                    if medical[-2] == 'None':
                        medical.pop(-2)
                elif match3:
                    yaowu = re.sub(pattern2, '', itemdata)
                    weight = match3.group()
                    if yaowu:
                        medical.append(yaowu)
                    medical.append(weight)
                    if medical[-2] == 'None':
                        medical.pop(-2)
                elif match4:
                    yaowu = re.sub(pattern4, '', itemdata)
                    weight = match4.group()
                    if yaowu:
                        medical.append(yaowu)
                    medical.append(weight)
                    if medical[-2] == 'None':
                        medical.pop(-2)
                else:
                    if itemdata:
                        medical.append(itemdata)
                        medical.append('None')

                if check_ge:
                    num=0
                    for i in medical:
                        if i=='None':
                            medical[num]=weight
                        num+=1
            else:
                medical.append(itemdata)
            checkBH+=1
            # print 'medical',medical
        medicallist.append(medical)
        medical=[]
    finalmedicallist=[]
    for content in medicallist:
        num=0
        for icontent in content:
            if icontent.find('~')>-1:
                content[num]=icontent[icontent.find('~')+1:]
            if icontent.find('-') > -1:
                content[num] = icontent[icontent.find('-') + 1:]
            num+=1
        finalmedicallist.append(content)
    return finalmedicallist
def unitTransformation(readcsvname):
    print('unitTransformation()')
    # readcsvname = 'csvtest_normal.csv'
    # readcsvname = 'allData_normal.csv'
    # readcsvname ='allData_none.csv'
    csvdata = data_process.read_csv(readcsvname)
    normalList = []
    for item in csvdata:
        # print 'item',item
        midList = []
        for itemdata in item:
            # print 'itemdata', itemdata
            itemdata = itemdata.decode('utf8')
            if (itemdata.find('两') > 0):
                try:
                    zz = itemdata.split('两')
                    # print 'split itemdata', itemdata
                    unit = float(zz[0]) * 50
                    # print 'unit', unit
                    changeunit = str(unit) + 'g'
                    # print 'changeunit', changeunit
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('钱') > 0):
                try:
                    zz = itemdata.split('钱')
                    unit = float(zz[0]) * 3.125
                    # print 'unit',unit
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('kg') > 0):
                try:
                    zz = itemdata.split('kg')
                    unit = float(zz[0]) * 1000
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('斤') > 0):
                try:
                    zz = itemdata.split('斤')
                    unit = float(zz[0]) * 500
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('分') > 0):
                try:
                    zz = itemdata.split('斤')
                    unit = float(zz[0]) * 0.3
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            else:
                midList.append(itemdata)
        normalList.append(midList)

    # writecsvname = 'csvtest_normal1.csv'
    # writecsvname = 'allData_normal1.csv'
    # data_process.write_in_csv(writecsvname , normalList)
    return normalList
Esempio n. 26
0
# coding=utf-8
import data_process

readcsvname = '../formulaData_Experiment/ExResult_onehot_QRJD.csv'
readdata = data_process.read_csv(readcsvname)
datalist = []
data = []
for item in readdata:
    print 'item', item[0]
    num = 0
    for i in item[0].split(' '):
        print '1', i
        if (num == 3):
            print '2', i
            print i.split(':')[-1]
            maxiter = i.split(':')[-1]
            data.append(float(maxiter))

        num += 1
    acc = item[1].split('acc:')[-1]
    acc = acc.replace('"', '')
    print acc
    data.append(float(acc))
    datalist.append(data)
    data = []

writecsvname = '../formulaData_1/L1_draw.csv'
medicaldata = data_process.write_in_csv(writecsvname, datalist)
Esempio n. 27
0
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.externals import joblib
import random

# 版本二 加了抽样,样本正反例数据平衡 start
train_x = []
train_y = []
rem = []
# presCsvname='presFeature_realValue.csv'
presCsvname = 'presFeature_onehot_668.csv'
funcCsvname = 'funcFeature.csv'
data = data_process.read_csv(presCsvname)
labeldata = data_process.read_csv(funcCsvname)
# csv内容存放在list才可再读
labellist = []
num = 0
for j in labeldata:
    labellist.append(j)
    if num == 0:
        j[0] = j[0].replace('', '')
    # print j
    if int(j[0]) == 1:
        rem.append(num)
        train_y.append(float(j[0]))
    num += 1
print 'len(rem)1', len(rem)
print 'len(train_y)1', len(train_y)
Esempio n. 28
0
            # words ='*********** 依次输出找到的频繁项集'
            # print words
            data_process.write_str_in_csv_a(writeCsv, words)

            for item in aprioriEvaList:
                medicalstr = ''
                for itemdata in item:
                    medicalstr = medicalstr + itemdata + ','
                words = '支持度 最高的 %d - 频繁项集 为 : %s' % (len(item), medicalstr)
                # print words
                data_process.write_str_in_csv_a(writeCsv, words)

            words = '########################## medical evaluating.... ##########################'
            printTowrite(words)
            evalueatecsv = '../%sFile/%s_evaluate.csv' % (preName, preName)
            evalueateData = data_process.read_csv(evalueatecsv)
            evalueateDataList = []
            for item in evalueateData:
                item[0] = item[0].replace('', '')
                for i in range(len(item)):
                    # item[i] = item[i].decode('utf8',errors='ignore')
                    item[i] = item[i].decode('utf8')

                evalueateDataList.append(item)
            # print evalueateDataList
            for item in evalueateDataList:
                zstr = ''
                for itemset in item:
                    zstr = zstr + itemset + ','
                # print zstr
            finalaprioriEvaList = []
Esempio n. 29
0
        maxiter = i * 10
        for j in range(0, 4):
            # NOL1_weight_onehot_QRJD_s0.1_50.csv
            # readcsvname = '../formulaData_Experiment/NOL1_weight_onehot_QRJD_s0.1_'+str(maxiter)+'.csv'
            if (qq == 1):
                lamda = 0.7
            if (qq == 2):
                lamda = 1.3
            if (qq == 3):
                lamda = 2.7
            if (qq == 4):
                lamda = 4.3
            print 'lamda', lamda
            readcsvname = '../formulaData_Experiment/weight_onehot_QRJD_s0.1_500_' + str(
                lamda) + '.csv'
            weightdata = data_process.read_csv(readcsvname)
            qq += 1

            csvname = '../formulaData_1/QRJD_medical_count.csv'
            medicaldata = data_process.read_csv(csvname)
            medicallist = []
            importantMedical = []
            weightlist = []
            for item in medicaldata:
                medicallist.append(item[0])

            weightlist = []
            num = 0
            for item in weightdata:
                if num != 0:
                    zz = []
def extractnumwithstr(readcsvname):
    print('extractnumwithstr')
    # readcsvname='csvtest_1.csv'
    # readcsvname='composition_5_2.csv'
    csvdata = data_process.read_csv(readcsvname)

    # i:指示第i个处方
    i = 1
    #正则匹配要用' ur'' '才能正确匹配中文
    #(?:..):(...)的不分组版本,用于使用| 或 后接数量词
    pattern1 = re.compile(
        ur'\d+.\d+(?:g|kg|ml|l|个|钱|片|根|条|份|张|枚|具|朵|只|粒|茎|两|斤|挺|对|头|L|ML|分|节|cm|握|株|两半)'
    )
    pattern2 = re.compile(
        ur'\d+(?:g|kg|ml|l|个|钱|片|根|条|份|张|枚|具|朵|只|粒|茎|两|斤|挺|对|头|L|ML|分|节|cm|握|株|两半)'
    )
    finalmedicallist = []
    for item in csvdata:
        print '****************************************************************** 处方: ', i
        medicallist = []
        point = []
        medicaldict = []
        for itemdata in item:
            weight = ''
            yaowulist = []
            itemdata = itemdata.replace('', '')
            itemdata = itemdata.replace('.', '.')
            itemdata = itemdata.replace('o', '0')
            itemdata = itemdata.decode('utf8')
            # print 'itemdata', itemdata

            #在处方内容中通过正则匹配找出数量单位 start
            weight1 = pattern1.findall(itemdata)
            weight2 = pattern2.findall(itemdata)
            #把正确的值放在变量weight中
            if (weight1):
                weight = weight1[0]
                yaowulist = pattern1.split(itemdata)
            elif (weight2):
                weight = weight2[0]
                yaowulist = pattern2.split(itemdata)
            # print '$$$$$$$$$$$$findal',weight1,weight2,weight
            # 在处方内容中通过正则匹配找出数量单位 end
            # print 'yaowulist',yaowulist
            # for ii in yaowulist:
            #     print ii

            # 把处方的每味药提出来重新放在medicallist列表元素[0]里,同时已经去除了药的数量单位
            if (yaowulist):
                try:
                    yaowulist.remove('')
                    for zz in yaowulist:
                        medicallist.append(zz)
                except:
                    pass
            else:
                medicallist.append(itemdata)

            # 把处方的每味药所对应的数量单位存入medicallist 列表元素[1]的位置列表里
            if (weight):
                medicallist.append(weight)
            else:
                medicallist.append('None')

            #medicallist eg.[u'\u9632\u98ce', u'3l']
            medicaldict.append(medicallist)
            medicallist = []

        # print "medicallist", medicallist

        print 'medicaldict', medicaldict
        #j统计每个处方里的第j味药
        j = 0
        for k, v in medicaldict:
            #用point记录#在哪味药上
            # print k,v
            if (k.find('各') > 0):
                point.append(j)
                medicaldict[j][0] = medicaldict[j][0].replace('各', '')
                # print (medicaldict[j][0])
                print '检测到“各”字,该味药在处方中所处位置:', j, k
            j += 1
        print '该方剂一共配药数量为:', j
        print '该方剂中出现“各”字的位置有:', point

        f = 0
        print '##################### 开始处理所有药的数量单位 ##################'
        # print ('medicaldict', medicaldict)

        for m, n in medicaldict:
            if (point != []):
                for pointnum in point:
                    # print 'test', pointnum
                    if (f > pointnum):
                        continue
                    elif (n == 'None'):
                        # print 'test4',medicaldict[f][1],pointnum
                        medicaldict[f][1] = medicaldict[pointnum][1]
                        break
            f += 1
        # print '@@@@@@~~最后处理结果(列表):', medicaldict

        #重新整理medicaldict数据格式,并存入csv里
        onepiece = []
        for x, y in medicaldict:
            print x, y
            onepiece.append(x)
            onepiece.append(y)
            # print 'onepiece',onepiece

        # data_process.createListCSV('csvtest_2.csv',medicaldict)
        finalmedicallist.append(onepiece)
        #处方数增一
        i += 1

    return finalmedicallist