Example #1
0
def consolidateRules(cntsFile):

    sys.stderr.write("Consolidating rules from file : %s ...\n" % (cntsFile))
    rulesUsedDict = {}
    rC = open(cntsFile, "r")
    try:
        for line in rC:
            (src, tgt, cnts) = line.split(" ||| ")
            rule = src + " ||| " + tgt
            if rulesUsedDict.has_key(rule):
                rulesUsedDict[rule] += int(cnts)
            else:
                rulesUsedDict[rule] = int(cnts)
    finally:
        rC.close()

    tot_used_rules = len(rulesUsedDict.keys())
    tot_PT_rules = PhraseTable.getTotalRules()
    sys.stderr.write("Total SCFG rules found for the set           : %g\n" % (tot_PT_rules))
    sys.stderr.write("# of unique rules used in N-best derivations : %g\n" % (tot_used_rules))
    sys.stderr.write(
        "%% of rules used in the N-best list           : %g\n" % ((float(tot_used_rules) * 100.0) / float(tot_PT_rules))
    )
    wC = open(cntsFile, "w")
    for rule, r_cnt in sorted(rulesUsedDict.iteritems(), key=operator.itemgetter(1)):
        wC.write("%s ||| %d\n" % (rule, r_cnt))
    wC.close()
Example #2
0
def consolidateRules(cntsFile):

    sys.stderr.write("Consolidating rules from file : %s ...\n" % (cntsFile))
    rulesUsedDict = {}
    rC = open(cntsFile, 'r')
    try:
        for line in rC:
            (src, tgt, cnts) = line.split(" ||| ")
            rule = src + " ||| " + tgt
            if (rulesUsedDict.has_key(rule)): rulesUsedDict[rule] += int(cnts)
            else: rulesUsedDict[rule] = int(cnts)
    finally:
        rC.close()

    tot_used_rules = len(rulesUsedDict.keys())
    tot_PT_rules = PhraseTable.getTotalRules()
    sys.stderr.write("Total SCFG rules found for the set           : %g\n" %
                     (tot_PT_rules))
    sys.stderr.write("# of unique rules used in N-best derivations : %g\n" %
                     (tot_used_rules))
    sys.stderr.write("%% of rules used in the N-best list           : %g\n" %
                     ((float(tot_used_rules) * 100.0) / float(tot_PT_rules)))
    wC = open(cntsFile, 'w')
    for rule, r_cnt in sorted(rulesUsedDict.iteritems(),
                              key=operator.itemgetter(1)):
        wC.write("%s ||| %d\n" % (rule, r_cnt))
    wC.close()