def main(inputFileName1, alignFileName1, inputFileName2, alignFileName2, mono1FileName, mono2FileName, \ outputFileName, numClusInit, typeClusInit, fileLength, monoPower, biPower, edgeThresh1, edgeThresh2): # Read the input file and get word counts # 3 languages say: en, fr, de; de is the common in en-de and fr-de # 1: en 2:de 3: fr enWordDict = Counter() enBigramDict = Counter() deWordDict = Counter() deBigramDict = Counter() frWordDict = Counter() frBigramDict = Counter() alignDictEnDe, enWordDict, enBigramDict, deWordDict, deBigramDict \ = readBilingualData(fileLength, inputFileName1, alignFileName1, mono1FileName, mono2FileName,\ enWordDict, enBigramDict, deWordDict, deBigramDict) alignDictFrDe, frWordDict, frBigramDict, deWordDict, deBigramDict \ = readBilingualData(fileLength, inputFileName2, alignFileName2, mono1FileName, mono2FileName,\ frWordDict, frBigramDict, deWordDict, deBigramDict) lang1, lang2, lang3, lang12, lang21, lang32, lang23 = initializeLanguagePairObjets(\ alignDictEnDe, alignDictFrDe, enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict,\ numClusInit, typeClusInit, edgeThresh1, edgeThresh2) del alignDictEnDe, alignDictFrDe del enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict # Run the clustering algorithm and get new clusters runOchClustering(lang1, lang2, lang3, lang12, lang21, lang32, lang23, monoPower, biPower) # Print the clusters printClusters(outputFileName, lang1, lang2, lang3, None, None)
def main(inputFileName, alignFileName, mono1FileName, mono2FileName, outputFileName, numClusInit, typeClusInit, fileLength, monoPower, biPower, edgeThresh): enWordDict = Counter() enBigramDict = Counter() frWordDict = Counter() frBigramDict = Counter() # Read the input file and get word counts alignDict, enWordDict, enBigramDict, frWordDict, frBigramDict \ = readBilingualData(fileLength, inputFileName, alignFileName, mono1FileName, mono2FileName,\ enWordDict, enBigramDict, frWordDict, frBigramDict) lang1, lang2, lang12, lang21 = initializeLanguagePairObjets(alignDict, enWordDict, \ enBigramDict, frWordDict, frBigramDict, numClusInit, typeClusInit, edgeThresh) del alignDict, enWordDict, enBigramDict, frWordDict, frBigramDict # Run the clustering algorithm and get new clusters runOchClustering(lang1, lang2, lang12, lang21, monoPower, biPower) # Print the clusters printClusters(outputFileName, lang1, lang2, None, None, None)
def main( inputFileName1, alignFileName1, inputFileName2, alignFileName2, inputFileName3, alignFileName3, inputFileName4, alignFileName4, mono1FileName, mono2FileName, outputFileName, numClusInit, typeClusInit, fileLength, monoPower, biPower, edgeThresh1, edgeThresh2, edgeThresh3, edgeThresh4, ): # Read the input file and get word counts # 3 languages say: en, fr, de; de is the common in en-de, fr-de anf fourth-de # 1: en 2:de 3:fr 4:fourth enWordDict = Counter() enBigramDict = Counter() deWordDict = Counter() deBigramDict = Counter() frWordDict = Counter() frBigramDict = Counter() fourthWordDict = Counter() fourthBigramDict = Counter() fifthWordDict = Counter() fifthBigramDict = Counter() alignDictEnDe, enWordDict, enBigramDict, deWordDict, deBigramDict = readBilingualData( fileLength, inputFileName1, alignFileName1, mono1FileName, mono2FileName, enWordDict, enBigramDict, deWordDict, deBigramDict, ) alignDictFrDe, frWordDict, frBigramDict, deWordDict, deBigramDict = readBilingualData( fileLength, inputFileName2, alignFileName2, mono1FileName, mono2FileName, frWordDict, frBigramDict, deWordDict, deBigramDict, ) alignDictFourthDe, fourthWordDict, fourthBigramDict, deWordDict, deBigramDict = readBilingualData( fileLength, inputFileName3, alignFileName3, mono1FileName, mono2FileName, frWordDict, frBigramDict, deWordDict, deBigramDict, ) alignDictFifthDe, fifthWordDict, fifthBigramDict, deWordDict, deBigramDict = readBilingualData( fileLength, inputFileName4, alignFileName4, mono1FileName, mono2FileName, fifthWordDict, fifthBigramDict, deWordDict, deBigramDict, ) lang1, lang2, lang3, lang4, lang5, lang12, lang21, lang32, lang23, lang42, lang24, lang52, lang25 = initializeLanguagePairObjets( alignDictEnDe, alignDictFrDe, alignDictFourthDe, alignDictFifthDe, enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict, fourthWordDict, fourthBigramDict, fifthWordDict, fifthBigramDict, numClusInit, typeClusInit, edgeThresh1, edgeThresh2, edgeThresh3, edgeThresh4, ) del alignDictEnDe, alignDictFrDe, alignDictFourthDe, alignDictFifthDe del enWordDict, enBigramDict, deWordDict, deBigramDict, frWordDict, frBigramDict, fourthWordDict, fourthBigramDict del fifthWordDict, fifthBigramDict # Run the clustering algorithm and get new clusters runOchClustering( lang1, lang2, lang3, lang4, lang5, lang12, lang21, lang32, lang23, lang42, lang24, lang52, lang25, monoPower, biPower, ) # Print the clusters printClusters(outputFileName, lang1, lang2, lang3, lang4, lang5)