def main(inputFileName1, alignFileName1, inputFileName2, alignFileName2, mono1FileName, mono2FileName, \
        outputFileName, numClusInit, typeClusInit, fileLength, monoPower, biPower, edgeThresh1, edgeThresh2):
    
    # Read the input file and get word counts
    # 3 languages say: en, fr, de; de is the common in en-de and fr-de
    # 1: en 2:de 3: fr
    enWordDict = Counter()
    enBigramDict = Counter()
    deWordDict = Counter()
    deBigramDict = Counter()
    frWordDict = Counter()
    frBigramDict = Counter()
    
    alignDictEnDe, enWordDict, enBigramDict, deWordDict, deBigramDict \
    = readBilingualData(fileLength, inputFileName1, alignFileName1, mono1FileName, mono2FileName,\
                        enWordDict, enBigramDict, deWordDict, deBigramDict)
                        
    alignDictFrDe, frWordDict, frBigramDict, deWordDict, deBigramDict \
    = readBilingualData(fileLength, inputFileName2, alignFileName2, mono1FileName, mono2FileName,\
                        frWordDict, frBigramDict, deWordDict, deBigramDict)
    
    lang1, lang2, lang3, lang12, lang21, lang32, lang23 = initializeLanguagePairObjets(\
                alignDictEnDe, alignDictFrDe, enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict,\
                numClusInit, typeClusInit, edgeThresh1, edgeThresh2)
                                           
    del alignDictEnDe, alignDictFrDe
    del enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict
    
    # Run the clustering algorithm and get new clusters    
    runOchClustering(lang1, lang2, lang3, lang12, lang21, lang32, lang23, monoPower, biPower)
    
    # Print the clusters
    printClusters(outputFileName, lang1, lang2, lang3, None, None)
Exemple #2
0
def main(inputFileName1, alignFileName1, inputFileName2, alignFileName2, mono1FileName, mono2FileName, \
        outputFileName, numClusInit, typeClusInit, fileLength, monoPower, biPower, edgeThresh1, edgeThresh2):

    # Read the input file and get word counts
    # 3 languages say: en, fr, de; de is the common in en-de and fr-de
    # 1: en 2:de 3: fr
    enWordDict = Counter()
    enBigramDict = Counter()
    deWordDict = Counter()
    deBigramDict = Counter()
    frWordDict = Counter()
    frBigramDict = Counter()

    alignDictEnDe, enWordDict, enBigramDict, deWordDict, deBigramDict \
    = readBilingualData(fileLength, inputFileName1, alignFileName1, mono1FileName, mono2FileName,\
                        enWordDict, enBigramDict, deWordDict, deBigramDict)

    alignDictFrDe, frWordDict, frBigramDict, deWordDict, deBigramDict \
    = readBilingualData(fileLength, inputFileName2, alignFileName2, mono1FileName, mono2FileName,\
                        frWordDict, frBigramDict, deWordDict, deBigramDict)

    lang1, lang2, lang3, lang12, lang21, lang32, lang23 = initializeLanguagePairObjets(\
                alignDictEnDe, alignDictFrDe, enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict,\
                numClusInit, typeClusInit, edgeThresh1, edgeThresh2)

    del alignDictEnDe, alignDictFrDe
    del enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict

    # Run the clustering algorithm and get new clusters
    runOchClustering(lang1, lang2, lang3, lang12, lang21, lang32, lang23,
                     monoPower, biPower)

    # Print the clusters
    printClusters(outputFileName, lang1, lang2, lang3, None, None)
Exemple #3
0
def main(inputFileName, alignFileName, mono1FileName, mono2FileName, outputFileName, numClusInit, typeClusInit, fileLength, monoPower, biPower, edgeThresh):
    
    
    enWordDict = Counter()
    enBigramDict = Counter()
    frWordDict = Counter()
    frBigramDict = Counter()
    
    # Read the input file and get word counts
    alignDict, enWordDict, enBigramDict, frWordDict, frBigramDict \
    = readBilingualData(fileLength, inputFileName, alignFileName, mono1FileName, mono2FileName,\
                        enWordDict, enBigramDict, frWordDict, frBigramDict)
    
    lang1, lang2, lang12, lang21 = initializeLanguagePairObjets(alignDict, enWordDict, \
                                           enBigramDict, frWordDict, frBigramDict, numClusInit, typeClusInit, edgeThresh)
                                           
    del alignDict, enWordDict, enBigramDict, frWordDict, frBigramDict
    
    # Run the clustering algorithm and get new clusters    
    runOchClustering(lang1, lang2, lang12, lang21, monoPower, biPower)
    
    # Print the clusters
    printClusters(outputFileName, lang1, lang2, None, None, None)
def main(
    inputFileName1,
    alignFileName1,
    inputFileName2,
    alignFileName2,
    inputFileName3,
    alignFileName3,
    inputFileName4,
    alignFileName4,
    mono1FileName,
    mono2FileName,
    outputFileName,
    numClusInit,
    typeClusInit,
    fileLength,
    monoPower,
    biPower,
    edgeThresh1,
    edgeThresh2,
    edgeThresh3,
    edgeThresh4,
):

    # Read the input file and get word counts
    # 3 languages say: en, fr, de; de is the common in en-de, fr-de anf fourth-de
    # 1: en 2:de 3:fr 4:fourth
    enWordDict = Counter()
    enBigramDict = Counter()
    deWordDict = Counter()
    deBigramDict = Counter()
    frWordDict = Counter()
    frBigramDict = Counter()
    fourthWordDict = Counter()
    fourthBigramDict = Counter()
    fifthWordDict = Counter()
    fifthBigramDict = Counter()

    alignDictEnDe, enWordDict, enBigramDict, deWordDict, deBigramDict = readBilingualData(
        fileLength,
        inputFileName1,
        alignFileName1,
        mono1FileName,
        mono2FileName,
        enWordDict,
        enBigramDict,
        deWordDict,
        deBigramDict,
    )

    alignDictFrDe, frWordDict, frBigramDict, deWordDict, deBigramDict = readBilingualData(
        fileLength,
        inputFileName2,
        alignFileName2,
        mono1FileName,
        mono2FileName,
        frWordDict,
        frBigramDict,
        deWordDict,
        deBigramDict,
    )

    alignDictFourthDe, fourthWordDict, fourthBigramDict, deWordDict, deBigramDict = readBilingualData(
        fileLength,
        inputFileName3,
        alignFileName3,
        mono1FileName,
        mono2FileName,
        frWordDict,
        frBigramDict,
        deWordDict,
        deBigramDict,
    )

    alignDictFifthDe, fifthWordDict, fifthBigramDict, deWordDict, deBigramDict = readBilingualData(
        fileLength,
        inputFileName4,
        alignFileName4,
        mono1FileName,
        mono2FileName,
        fifthWordDict,
        fifthBigramDict,
        deWordDict,
        deBigramDict,
    )

    lang1, lang2, lang3, lang4, lang5, lang12, lang21, lang32, lang23, lang42, lang24, lang52, lang25 = initializeLanguagePairObjets(
        alignDictEnDe,
        alignDictFrDe,
        alignDictFourthDe,
        alignDictFifthDe,
        enWordDict,
        enBigramDict,
        deWordDict,
        deBigramDict,
        frWordDict,
        frBigramDict,
        fourthWordDict,
        fourthBigramDict,
        fifthWordDict,
        fifthBigramDict,
        numClusInit,
        typeClusInit,
        edgeThresh1,
        edgeThresh2,
        edgeThresh3,
        edgeThresh4,
    )

    del alignDictEnDe, alignDictFrDe, alignDictFourthDe, alignDictFifthDe
    del enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict, fourthWordDict, fourthBigramDict
    del fifthWordDict, fifthBigramDict
    # Run the clustering algorithm and get new clusters
    runOchClustering(
        lang1,
        lang2,
        lang3,
        lang4,
        lang5,
        lang12,
        lang21,
        lang32,
        lang23,
        lang42,
        lang24,
        lang52,
        lang25,
        monoPower,
        biPower,
    )

    # Print the clusters
    printClusters(outputFileName, lang1, lang2, lang3, lang4, lang5)