コード例 #1
0
def reBOWModel2(attrlist):
    preurl = '/home/ren/remote/ruleMining2/'
    # attrlist = ['spouse', 'song', 'mother', 'film', 'father', 'child', 'filmCastmember', 'songPerformer']
    #attrlist = ['mother']
    model1, model2, model3 = [], {}, {}
    alldatas1, alldatas2, alldatas3 = [], {}, {}
    for attr_i in range(len(attrlist)):
        attr = attrlist[attr_i]
        datas = writereadFile.readContent(preurl + 'outdatas_baseline/train_' +
                                          attr + '.csv')
        dataset = []
        for data in datas:
            dataset.extend(data.split(' '))
        alldatas1.append(dataset)

        clusDict2 = json.load(
            open(preurl + 'outdatas_baseline/cluster_' + attr + '.json'))
        clusDict3 = json.load(
            open(preurl + 'outdatas_baseline/second_' + attr + '.json'))

        # alldata2List = []
        alldatas2[attr_i] = []
        alldatas3[attr_i] = {}
        clusDict2keys = list(clusDict2.keys())
        clusDict2keys.sort()
        for j in clusDict2keys:
            dataset2 = []
            for data_2 in clusDict2[j]:
                dataset2.extend(data_2.split(' '))
            # alldata2List.append(dataset2)
            alldatas2[attr_i].append(dataset2)
            # alldata3List = []
            alldatas3[attr_i][j] = []
            clus3_j = clusDict3[j]
            clus3_jkeys = list(clus3_j.keys())
            clus3_jkeys.sort()
            for k in clus3_jkeys:
                # print(attr_i,j,k)
                dataset3 = []
                for data_3 in clus3_j[k]:
                    dataset3.extend(data_3.split(' '))
                # alldata3List.append(dataset3)
                alldatas3[attr_i][j].append(dataset3)
    # print(len(alldatas1))
    # for p in alldatas2:
    #     print(p,len(alldatas2[p]))
    # for q in alldatas3:
    #     for o in alldatas3[q]:
    #         print(q,o,len(alldatas3[q][o]))

    model1 = reBOWModelist(alldatas1)
    for key2 in alldatas2:
        model2[key2] = reBOWModelist(alldatas2[key2])
        model3[key2] = {}
        for key3 in alldatas3[key2].keys():
            model3[key2][key3] = reBOWModelist(alldatas3[key2][key3])
    return model1, model2, model3
コード例 #2
0
def reRulesDict(wikiDatas, attrlist):
    preurl = '/home/ren/remote/ruleMining2/'
    # attrlist = ['spouse', 'song', 'mother', 'film', 'father', 'child', 'filmCastmember', 'songPerformer']
    #attrlist = ['mother']
    rulesDcit = {}
    rulesDcit2 = {}
    rulesDcit3 = {}
    for attr_i in range(len(attrlist)):
        attr = attrlist[attr_i]
        datas = writereadFile.readContent(preurl + 'outdatas_baseline/train_' +
                                          attr + '.csv')
        rulesRootlist = rulesGet.lzx_SCsearch_conf_filt(wikiDatas,
                                                        datas)  #放了一阶规则和二阶规则
        rulesDcit[attr_i] = rulesRootlist
        rulesDcit2[attr_i] = {}
        rulesDcit3[attr_i] = {}
        clusDict2 = json.load(
            open(preurl + 'outdatas_baseline/cluster_' + attr + '.json'))
        clusDict3 = json.load(
            open(preurl + 'outdatas_baseline/second_' + attr + '.json'))
        for j in clusDict2.keys():
            r2_rulesList = rulesGet.lzx_SCsearch_conf_filt(
                wikiDatas, clusDict2[j])
            r2 = r2_rulesList[0]
            newr2 = newRulesFirst(rulesRootlist[0], r2)
            r2_2 = r2_rulesList[1]
            newr2_2 = newRulesSecond(rulesRootlist[1], r2_2)
            rulesDcit2[attr_i][j] = [newr2, newr2_2]

            rulesDcit3[attr_i][j] = {}
            clus3_j = clusDict3[j]
            for k in clus3_j.keys():
                if clus3_j[k] == clusDict2[j]:
                    new_rules3 = newr2
                    new_rules3_2 = newr2_2
                else:
                    r3_rulesList = rulesGet.lzx_SCsearch_conf_filt(
                        wikiDatas, clus3_j[k])
                    r3 = r3_rulesList[0]
                    new_rules3 = newRulesFirst(newr2, r3)
                    r3_2 = r3_rulesList[1]
                    new_rules3_2 = newRulesSecond(newr2_2, r3_2)
                rulesDcit3[attr_i][j][k] = [new_rules3, new_rules3_2]

    return rulesDcit, rulesDcit2, rulesDcit3
コード例 #3
0
def testMarch_BOW(attrlist):
    preurl = '/home/ren/remote/ruleMining2/'
    # attrlist = ['spouse', 'song', 'mother', 'film', 'father', 'child', 'filmCastmember', 'songPerformer']
    #attrlist = ['mother']
    testmarchDcit = {}
    testmarchDcit2 = {}
    testmarchDcit3 = {}
    model1, model2, model3 = reBOWModel2(attrlist)
    # for i in range(len(attrlist)):
    #     testmarchDcit[i] = []
    #     testmarchDcit2[i] = {}
    #     testmarchDcit3[i] = {}

    for attr_i in range(len(attrlist)):
        attr = attrlist[attr_i]
        datas = writereadFile.readContent(preurl + 'outdatas_baseline/test_' +
                                          attr + '.csv')
        for testSent in datas:
            marchindex, sim1 = testSents(testSent, model1)
            if sim1 > 0:
                if marchindex not in testmarchDcit:
                    testmarchDcit[marchindex] = []
                testmarchDcit[marchindex].append(testSent)
            model2Test = model2[marchindex]
            model3Test = model3[marchindex]
            marchindex2, sim2 = testSents(testSent, model2Test)
            marchindex3, sim3 = testSents(testSent,
                                          model3Test[str(marchindex2)])
            if (marchindex, marchindex2) not in testmarchDcit2:
                testmarchDcit2[(marchindex, marchindex2)] = []
            if sim2 > 0:
                testmarchDcit2[(marchindex, marchindex2)].append(testSent)
            if (marchindex, marchindex2, marchindex3) not in testmarchDcit3:
                testmarchDcit3[(marchindex, marchindex2, marchindex3)] = []
            if sim3 > 0:
                testmarchDcit3[(marchindex, marchindex2,
                                marchindex3)].append(testSent)
    return testmarchDcit, testmarchDcit2, testmarchDcit3
コード例 #4
0
    url1 = 'outdatas_baseline'
    print(url1)
    allpredictsets_lvl2_ignore = []
    allpredictsets_lvl3_ignore = []
    allpredictsets_lvl2_fault = []
    allpredictsets_lvl3_fault = []

    allrelBoD = {}
    alltestsent = []
    allroot = {}
    allroot_2 = {}
    for attr_i in range(len(attrlist)):
        attr = attrlist[attr_i]
        print(attr)
        wikiDatas = writereadFile.readWikidatas('/home/ren/remote/ruleMining2/wikidatas/' + attr + '_wikidata.csv')
        trains = writereadFile.readContent('/home/ren/remote/ruleMining2/outdatas_baseline/train_' + attr + '.csv')
        templateDictsRoot = {}
        templateRoot = reBetaPattern.reTemplate(trains)
        allrelBoD[attr_i] = templateRoot
        templateDictsRoot[0] = templateRoot
        # print templateDictsRoot

        rulesRootlist = rulesGet.lzx_SCsearch_conf_filt(wikiDatas, trains)
        rulesRoot = rulesRootlist[0]
        allroot[attr_i] = rulesRoot
        numbrRoot = len(rulesRoot)

        rulesRoot_2 = rulesRootlist[1]
        allroot_2[attr_i] = rulesRoot_2
        numbrRoot_2 = len(rulesRoot_2)