Python readListFromTxt Exemples, utils.io.readListFromTxt Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : topic_extract_theme_ai.py Projet : michaelwangtd/graphToolset

def extractTheme(tagList,tagbaseFilePath):
    themeList = []
    tagbaseList = io.readListFromTxt(tagbaseFilePath)
    for item in tagList:
        if item in tagbaseList:
            themeList.append(item)
    return themeList

Exemple #2

0

Afficher le fichier

Fichier : tag_extract_itjz.py Projet : michaelwangtd/graphToolset

def filterTagFromTagbase(content,tagbaseFilePath):
    resultList = []
    # 获取标签库列表
    tagbaseList = io.readListFromTxt(tagbaseFilePath)
    for item in tagbaseList:
        if item in content:
            resultList.append(item)
    return resultList

Exemple #3

0

Afficher le fichier

def extractTheme(tagList,tagbaseFilePath):
    themeList = []
    tagbaseList = io.readListFromTxt(tagbaseFilePath)
    for item in tagList:
        if item not in index.TAGBASE_STOP_WORD_LIST:
            if item in tagbaseList:
                themeList.append(item)
    return themeList

Exemple #4

0

Afficher le fichier

Fichier : topic_extract_theme_ai.py Projet : michaelwangtd/graphToolset

def cleanTheme(tagList):
    themeList = []
    # 获取标签库中标签
    filePath = io.getSourceFilePath('tagbase.txt')
    tagbaseList = io.readListFromTxt(filePath)
    for item in tagList:
        if item in tagbaseList:
            themeList.append(item)
    return themeList

Exemple #5

0

Afficher le fichier

Fichier : topic_extract_theme_ai.py Projet : michaelwangtd/graphToolset

def updateTagbase():
    '''
        作为一个单独模块，对tagbase.txt进行调整
    '''
    # 对标签库进行了去重操作
    tagbaseFilePath = io.getSourceFilePath('tagbase.txt')

    tagbaseList = io.readListFromTxt(tagbaseFilePath)   # 68638
    cleanTagbaseList = list(set(tagbaseList))   # 67523
    io.writeList2Txt('tagbase.txt',cleanTagbaseList)

Exemple #6

0

Afficher le fichier

Fichier : tag_extract_itjz.py Projet : michaelwangtd/graphToolset

def scanTheme2Tag(themeList,tagbaseFilePath):
    '''
        从标签库中筛选标签
    '''
    tagList = []
    tagbaseList = io.readListFromTxt(tagbaseFilePath)
    for item in themeList:
        if item in tagbaseList:
            tagList.append(item)
    return tagList

Exemple #7

0

Afficher le fichier

Fichier : iron_tag_all_info.py Projet : michaelwangtd/graphToolset

 inputFilePath = io.getSourceFilePath('investEvents_20161227144154.txt')
 outputFilePath = io.getSourceFilePath(
     'investEvents_taged_20161227144154.txt')
 tagbaseFilePath = io.getSourceFilePath(
     'tagbase_iron_tag_all_product_company.txt')
 newseedInfoOutputFilePath = io.getProcessedFilePath(
     'newseed_taged_info.csv')
 # get infoList
 infoList = io.loadData2Json(inputFilePath)
 # persist tagbase from redis
 tagbaseDic = util.getTagbaseDicFromRedis(initDic, tagbaseNameList)
 util.persistentTagbase(tagbaseDic, tagbaseFilePath)
 # load cut word user dict
 jieba.load_userdict(tagbaseFilePath)
 # get tagbaseList
 tagbaseList = io.readListFromTxt(tagbaseFilePath)
 # prepare for output
 fw = open(outputFilePath, 'w', encoding='utf-8')
 i = 1
 j = 0
 # traverse infoList
 for item in infoList:
     if item['startup']['productDesc']:
         productDesc = item['startup']['productDesc']
         # get cleaned desc
         cleanedDesc = getCleanedDesc(productDesc)
         # get cut word list
         cutWordList = getCutWordList(cleanedDesc)
         # extract tag
         ironTagList = extractTag(cutWordList, tagbaseList)
         print(i, 'extracted tag:', ironTagList)