コード例 #1
0
def deleteblank(readcsvname, writecsvname):
    print 'deleteblank'
    csvdata = web_data_process.read_csv(readcsvname)
    finalList = []
    for item in csvdata:
        if len(item) != 1:
            finalList.append(item)
    print 'lenth', len(finalList)
    web_data_process.write_in_csv(writecsvname, finalList)
コード例 #2
0
def webSix(readcsvname, writecsvname):
    print 'webFive'
    csvdata = web_data_process.read_csv(readcsvname)
    data = []
    num = 0
    for content in csvdata:
        j = 0
        for item in content:
            item = item.decode('utf-8')
            str = match1(item)
            content[j] = str
            # print 'item - x',num,j,item,x
            j += 1
        data.append(content)
        num += 1
    web_data_process.write_in_csv(writecsvname, data)
コード例 #3
0
def deletebianhao(readcsvname1, readcsvname2):
    print 'deletebianhao'
    csvdata1 = web_data_process.read_csv(readcsvname1)
    csvdata2 = web_data_process.read_csv(readcsvname2)
    formulaList = []
    functionList = []
    for item in csvdata1:
        item.pop(0)
        formulaList.append(item)
    for item in csvdata2:
        item.pop(0)
        functionList.append(item)

    writecsvname1 = 'webFormula_final_2.csv'
    writecsvname2 = 'webFunction_3.csv'
    web_data_process.write_in_csv(writecsvname1, formulaList)
    web_data_process.write_in_csv(writecsvname2, functionList)
コード例 #4
0
def webFive(readcsvname, writecsvname):
    print 'webFive'
    csvdata = web_data_process.read_csv(readcsvname)
    data = []
    num = 0
    for content in csvdata:
        j = 0
        for item in content:
            item = item.decode('utf-8')
            pos = item.find('去')
            if pos > -1:
                item = item[0:pos]

            x = wordmatch(item)
            # x = item.replace('炙', '')
            # x = x.replace('不', '')
            # x = x.replace('蒸', '')
            # x = x.replace('炒', '')
            # x = x.replace('熬', '')
            # x = x.replace('锉', '')
            # x = x.replace('炒香', '')
            # x = x.replace('炮', '')
            # x = x.replace('切', '')
            # x = x.replace('轧细', '')
            # x = x.replace('捣碎', '')
            # x = x.replace('裹煨', '')
            # x = x.replace('研粉', '')
            # x = x.replace('调下', '')
            # x = x.replace('另研', '')
            # x = x.replace('碎绵裹', '')

            # 通过正则表达去除多余的单位,只保留数值+g的单位。
            str = match(x)
            content[j] = str
            # print 'item - x',num,j,item,x
            j += 1
        data.append(content)
        num += 1
    web_data_process.write_in_csv(writecsvname, data)
コード例 #5
0
def UnifiedDose(readcsvname, writecsvname):
    print 'UnifiedDose'
    csvdata = web_data_process.read_csv(readcsvname)
    normalList = []
    for item in csvdata:
        # print 'item',item
        midList = []
        for itemdata in item:
            # print 'itemdata', itemdata
            itemdata = itemdata.decode('utf8')
            if (itemdata.find('两') > 0):
                try:
                    zz = itemdata.split('两')
                    # print 'split itemdata', itemdata
                    unit = float(zz[0]) * 50
                    # print 'unit', unit
                    changeunit = str(unit) + 'g'
                    # print 'changeunit', changeunit
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('钱') > 0):
                try:
                    zz = itemdata.split('钱')
                    unit = float(zz[0]) * 3.125
                    # print 'unit',unit
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('kg') > 0):
                try:
                    zz = itemdata.split('kg')
                    unit = float(zz[0]) * 1000
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('Kg') > 0):
                try:
                    zz = itemdata.split('Kg')
                    unit = float(zz[0]) * 1000
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('斤') > 0):
                try:
                    zz = itemdata.split('斤')
                    unit = float(zz[0]) * 500
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            elif (itemdata.find('分') > 0):
                try:
                    zz = itemdata.split('斤')
                    unit = float(zz[0]) * 0.3
                    changeunit = str(unit) + 'g'
                    midList.append(changeunit)
                except:
                    midList.append(itemdata)
            else:
                midList.append(itemdata)
        normalList.append(midList)

    web_data_process.write_in_csv(writecsvname, normalList)
コード例 #6
0
def webProcessNum(readcsvname, writecsvname):
    print 'webProcessNum'
    csvdata = web_data_process.read_csv(readcsvname)
    finaldata = []
    num = 0

    pattern3 = re.compile(
        ur'\d+(?:g|kg|ml|l|个|Kg|钱|片|根|条|份|张|枚|具|朵|只|粒|茎|两|斤|挺|对|头|L|ML|分|节|cm|握|株)'
    )
    pattern4 = re.compile(
        ur'\d+.\d+(?:g|kg|ml|Kg|l|个|钱|片|根|条|份|张|枚|具|朵|只|粒|茎|两|斤|挺|对|头|L|ML|分|节|cm|握|株)'
    )

    pattern1 = re.compile(ur'[\u4e00-\u9fa5]+(\(|()')
    pattern2 = re.compile(ur'(?:\(|(|)|\))')

    pattern5 = re.compile(
        ur'(?<![\u4e00-\u9fa5])(?:\(|()\d+(?:g|kg|ml|l|个|Kg|钱|片|根|条|份|张|枚|具|朵|只|粒|茎|两|斤|挺|对|头|L|ML|分|节|cm|握|株)(?:\)|))(?![\u4e00-\u9fa5])'
    )
    pattern6 = re.compile(ur'(?:\)|))(?=[\u4e00-\u9fa5]+)')

    pattern7 = re.compile(ur'(?:\(|()')
    for content in csvdata:
        j = 0
        medicallist = []
        yaowulist = []
        try:
            content.remove('')
        except:
            pass
        for item in content:
            item = item.decode('utf-8')
            if j == 0:
                medicallist.append(content[0])
            else:
                #情况一:升麻 =》只有字
                if not pattern3.search(item):
                    # print '1', item
                    word = item
                    wordnumber = 'None'
                    medicallist.append(word)
                    medicallist.append(wordnumber)

                #情况三:(6g)=>只有数量
                if pattern5.search(item):
                    # print '3',item
                    wordnumber = re.sub(pattern2, '', item)
                    k = 0
                    for item in medicallist:
                        if item == 'None':
                            medicallist[k] = wordnumber
                        k += 1

                #情况五:处理‘各’:各(30g) or 焦栀各(各9g)
                if item.find('各') > -1:
                    # print '各',item
                    item = item.replace('各', '')
                    item = re.sub(pattern2, '', item)
                    #找到整数 或者 小数
                    weight1 = pattern4.findall(item)  #小数
                    weight2 = pattern3.findall(item)  #整数
                    # 把正确的值放在变量weight中
                    if (weight1):
                        wordnumber = weight1[0]
                        yaowulist = pattern4.split(item)
                    elif (weight2):
                        wordnumber = weight2[0]
                        yaowulist = pattern3.split(item)
                        # 把处方的每味药提出来重新放在medicallist列表元素[0]里,同时已经去除了药的数量单位
                    if (yaowulist):
                        try:
                            for zz in yaowulist:
                                if zz != u'':
                                    word = zz
                                    # print 'word',word
                                    medicallist.append(word)
                                    medicallist.append(wordnumber)
                        except:
                            pass
                    k = 0
                    for item in medicallist:
                        if item == 'None':
                            medicallist[k] = wordnumber
                        k += 1

                #情况二:麻黄(6g)
                if pattern1.search(item):
                    # print '麻黄(6g)',item
                    word = item[0:pattern7.search(item).start()]
                    # print 'bb',word
                    wordnumber = item[pattern7.search(item).end():]
                    wordnumber = re.sub(pattern2, '', wordnumber)
                    # print 'nn',wordnumber

                    medicallist.append(word)
                    medicallist.append(wordnumber)

                #情况四:(6g)麻黄
                if pattern6.search(item):
                    wordnumber = item[:pattern6.search(item).start()]
                    wordnumber = re.sub(pattern2, '', wordnumber)
                    word = item[pattern6.search(item).end():]

                    medicallist.append(wordnumber)
                    medicallist.append(word)

            j += 1
        finaldata.append(medicallist)
        num += 1
    web_data_process.write_in_csv(writecsvname, finaldata)
コード例 #7
0
def webFour(readcsvname, writecsvname):
    print 'webFour'
    data = web_dataDetailProcess.composition_process(readcsvname)
    web_data_process.write_in_csv(writecsvname, data)