예제 #1
0
def testCollectCrossDomainLinks():
    shicai_dict_path = ROOT_PATH.seg_dictwin64 + u'jieba_shicai_cta.txt'
    yixuebaike_dict_path = ROOT_PATH.seg_dictwin64 + u'jieba_yixuebaike.txt'
    shicai_Entities = medGraphSupOpt.loadEntitiesFromDict(shicai_dict_path)
    yixuebaike_Entities = medGraphSupOpt.loadEntitiesFromDict(yixuebaike_dict_path)
    
    modelStoragePath = ROOT_PATH.root_win64 + u'model\\word2vec\\zongheword2vecModel.vector'
    
    linkWriteFileInfo = ROOT_PATH.root_win64 + u'model_cache\\shicai2bingzheng_links.txt=a'
    
    crossDomainLinkDic = medGraphSupOpt.collectCrossDomainLinks(modelStoragePath, shicai_Entities, yixuebaike_Entities, [5, 20, 300], 20, linkWriteFileInfo)
    print('dic size: ' + str(len(crossDomainLinkDic.keys())))
예제 #2
0
def testFindCrossLinkedWords():
    shicai_dict_path = ROOT_PATH.seg_dictwin64 + u'jieba_shicai.txt'
    yixuebaike_dict_path = ROOT_PATH.seg_dictwin64 + u'jieba_yixuebaike.txt'
    shicai_Entities = medGraphSupOpt.loadEntitiesFromDict(shicai_dict_path)
    yixuebaike_Entities = medGraphSupOpt.loadEntitiesFromDict(yixuebaike_dict_path)
    
    modelStoragePath = ROOT_PATH.root_win64 + u'model\\word2vec\\zongheword2vecModel.vector'
    
    shicaiEntityStr = u'莲子/n'
    tagTuples = medGraphSupOpt.findCrossLinkedWords(modelStoragePath, shicaiEntityStr, shicai_Entities, yixuebaike_Entities, [10, 50, 500])
    print('source: ' + shicaiEntityStr)
    print('linked to: ')
    for e in tagTuples:
        print(e[0] + ': ' + str(e[1]))
예제 #3
0
def testFindLinkedWords():
    dict_path = ROOT_PATH.seg_dictwin64 + u'jieba_yixuebaike.txt'
    domainEntities = medGraphSupOpt.loadEntitiesFromDict(dict_path)
    
    modelStoragePath = ROOT_PATH.root_win64 + u'model\\word2vec\\zongheword2vecModel.vector'
    
    entityStr = u'肺炎/n'
    simTuples = medGraphSupOpt.findLinkedWords(modelStoragePath, entityStr, domainEntities, scanRange=10)
    for e in simTuples:
        print(e[0] + ': ' + str(e[1]))
    print('--------------------------------------')
예제 #4
0
def testCrossLinkedWordsPrecess():
    dict_path = ROOT_PATH.seg_dictwin64 + u'jieba_shicai.txt'
    sourceEntities = medGraphSupOpt.loadEntitiesFromDict(dict_path)
    
    modelStoragePath = ROOT_PATH.root_win64 + u'model\\word2vec\\zongheword2vecModel.vector'
    
    entityStr = u'党参/n'
    # get the source scan tuples result, to pure the mid scan result
#     pureTuples = findLinkedWords(modelStoragePath, entityStr, sourceEntities, pureScanRange)
    sourceTuples = medGraphSupOpt.seekSimWords(modelStoragePath, entities=[entityStr], pureCleanWords=[], scanRange=10)
    supTuples = WordTypeFilter().ditInOutWordFilter(sourceTuples, sourceEntities, 'out')
    cleanTuples = WordTypeFilter().ditInOutWordFilter(sourceTuples, sourceEntities, 'in')
    pureSupWords = [entityStr]
    pureSupWords.extend(e[0] for e in supTuples)
    pureCleanWords = []
    pureCleanWords.extend(e[0] for e in cleanTuples)
    # get the mid scan tuples result, to link the tag scan result
    # need to test use sup tuples into positive word list
    midTuples = medGraphSupOpt.seekSimWords(modelStoragePath, entities=pureSupWords, pureCleanWords=pureCleanWords, scanRange=20)
#     midTuples = seekSimWords(modelStoragePath, [entityStr], pureCleanWords=pureCleanWords, scanRange=midScanRange)
    for e in midTuples:
        print(e[0] + ': ' + str(e[1]))
    print('--------------------------------------')