def consolidateRules(cntsFile): sys.stderr.write("Consolidating rules from file : %s ...\n" % (cntsFile)) rulesUsedDict = {} rC = open(cntsFile, "r") try: for line in rC: (src, tgt, cnts) = line.split(" ||| ") rule = src + " ||| " + tgt if rulesUsedDict.has_key(rule): rulesUsedDict[rule] += int(cnts) else: rulesUsedDict[rule] = int(cnts) finally: rC.close() tot_used_rules = len(rulesUsedDict.keys()) tot_PT_rules = PhraseTable.getTotalRules() sys.stderr.write("Total SCFG rules found for the set : %g\n" % (tot_PT_rules)) sys.stderr.write("# of unique rules used in N-best derivations : %g\n" % (tot_used_rules)) sys.stderr.write( "%% of rules used in the N-best list : %g\n" % ((float(tot_used_rules) * 100.0) / float(tot_PT_rules)) ) wC = open(cntsFile, "w") for rule, r_cnt in sorted(rulesUsedDict.iteritems(), key=operator.itemgetter(1)): wC.write("%s ||| %d\n" % (rule, r_cnt)) wC.close()
def consolidateRules(cntsFile): sys.stderr.write("Consolidating rules from file : %s ...\n" % (cntsFile)) rulesUsedDict = {} rC = open(cntsFile, 'r') try: for line in rC: (src, tgt, cnts) = line.split(" ||| ") rule = src + " ||| " + tgt if (rulesUsedDict.has_key(rule)): rulesUsedDict[rule] += int(cnts) else: rulesUsedDict[rule] = int(cnts) finally: rC.close() tot_used_rules = len(rulesUsedDict.keys()) tot_PT_rules = PhraseTable.getTotalRules() sys.stderr.write("Total SCFG rules found for the set : %g\n" % (tot_PT_rules)) sys.stderr.write("# of unique rules used in N-best derivations : %g\n" % (tot_used_rules)) sys.stderr.write("%% of rules used in the N-best list : %g\n" % ((float(tot_used_rules) * 100.0) / float(tot_PT_rules))) wC = open(cntsFile, 'w') for rule, r_cnt in sorted(rulesUsedDict.iteritems(), key=operator.itemgetter(1)): wC.write("%s ||| %d\n" % (rule, r_cnt)) wC.close()