Ejemplo n.º 1
0
    def loadRules(self):
        '''Loads the filtered rules and filters them further by using the Suffix Tree of test data'''

        PhraseTable.tot_rule_pairs = 0
        prev_src = ''
        uniq_src_rules = 0
        entriesLst = []

        t_beg = time.time()
        rF = open(settings.opts.ruleFile, 'r')
        sys.stderr.write("Loading SCFG rules from file     : %s\n" %
                         (settings.opts.ruleFile))
        try:
            for line in rF:
                line = line.strip()
                (src, tgt,
                 probs) = line.split(' ||| ')  # For Kriya phrase table
                #                (src, tgt, f_align, r_align, probs) = line.split(' ||| ')     # For Moses phrase table

                if settings.opts.force_decode and not PhraseTable.tgtMatchesRef(
                        tgt):
                    continue
                if settings.opts.one_nt_decode and src.find('X__2') >= 0:
                    continue
                PhraseTable.tot_rule_pairs += 1

                if prev_src != src:
                    uniq_src_rules += 1
                    if PhraseTable.src_trie is None:
                        PhraseTable.src_trie = SimpleSuffixTree(
                            src, settings.opts.fr_rule_terms)
                    else:
                        PhraseTable.src_trie.addText(src)

                    if prev_src:
                        entriesLst.sort(key=operator.attrgetter("prob_e_f"),
                                        reverse=True)
                        PhraseTable.ruleDict[prev_src] = []
                        tgt_options = 0
                        for trans_option in entriesLst:
                            rule_obj = trans_option.rule
                            rule_obj.scoreRule()
                            PhraseTable.ruleDict[prev_src].append(rule_obj)
                            tgt_options += 1
                            if (settings.opts.ttl > 0
                                    and tgt_options >= settings.opts.ttl):
                                break
                        del entriesLst[:]

                rule = RuleItem.initRule(src, tgt, probs)
                entriesLst.append(TransOption(rule.getScore4TTL(), rule))
                prev_src = src

            # Handle the last rule
            entriesLst.sort(key=operator.attrgetter("prob_e_f"), reverse=True)
            PhraseTable.ruleDict[prev_src] = []
            tgt_options = 0
            for trans_option in entriesLst:
                rule_obj = trans_option.rule
                rule_obj.scoreRule()
                PhraseTable.ruleDict[prev_src].append(rule_obj)
                tgt_options += 1
                if (settings.opts.ttl > 0
                        and tgt_options >= settings.opts.ttl):
                    break
            del entriesLst[:]

        finally:
            rF.close()
            t_end = time.time()
            sys.stderr.write(
                "Unique source rules found                     : %d\n" %
                (uniq_src_rules))
            sys.stderr.write(
                "Total pairs of SCFG rules loaded              : %d\n" %
                (PhraseTable.tot_rule_pairs))
            sys.stderr.write(
                "Time taken for loading rules in dict and Trie : %1.3f sec\n\n"
                % (t_end - t_beg))

        return None
Ejemplo n.º 2
0
    def loadRules(self):
        '''Loads the filtered rules and filters them further by using the Suffix Tree of test data'''

        PhraseTable.tot_rule_pairs = 0
        prev_src = ''
        uniq_src_rules = 0
        entriesLst = []

        t_beg = time.time()
        rF = open(settings.opts.ruleFile, 'r')
        sys.stderr.write( "Loading SCFG rules from file     : %s\n" % (settings.opts.ruleFile) )
        try:
            for line in rF:
                line = line.strip()
                (src, tgt, probs) = line.split(' ||| ')                       # For Kriya phrase table
#                (src, tgt, f_align, r_align, probs) = line.split(' ||| ')     # For Moses phrase table

                if settings.opts.force_decode and not PhraseTable.tgtMatchesRef(tgt): continue
                if settings.opts.one_nt_decode and src.find('X__2') >= 0: continue
                PhraseTable.tot_rule_pairs += 1

                if prev_src != src:
                    uniq_src_rules += 1
                    if PhraseTable.src_trie is None:
                        PhraseTable.src_trie = SimpleSuffixTree(src, settings.opts.fr_rule_terms)
                    else:
                        PhraseTable.src_trie.addText(src)

                    if prev_src:
                        entriesLst.sort(key=operator.attrgetter("prob_e_f"), reverse=True)
                        PhraseTable.ruleDict[prev_src] = []
                        tgt_options = 0
                        for trans_option in entriesLst:
                            rule_obj = trans_option.rule
                            rule_obj.scoreRule()
                            PhraseTable.ruleDict[prev_src].append( rule_obj )
                            tgt_options += 1
                            if(settings.opts.ttl > 0 and tgt_options >= settings.opts.ttl): break
                        del entriesLst[:]

                rule = RuleItem.initRule(src, tgt, probs)
                entriesLst.append( TransOption(rule.getScore4TTL(), rule) )
                prev_src = src

            # Handle the last rule
            entriesLst.sort(key=operator.attrgetter("prob_e_f"), reverse=True)
            PhraseTable.ruleDict[prev_src] = []
            tgt_options = 0
            for trans_option in entriesLst:
                rule_obj = trans_option.rule
                rule_obj.scoreRule()
                PhraseTable.ruleDict[prev_src].append( rule_obj )
                tgt_options += 1
                if(settings.opts.ttl > 0 and tgt_options >= settings.opts.ttl): break
            del entriesLst[:]

        finally:
            rF.close()
            t_end = time.time()
            sys.stderr.write( "Unique source rules found                     : %d\n" % (uniq_src_rules) )
            sys.stderr.write( "Total pairs of SCFG rules loaded              : %d\n" % (PhraseTable.tot_rule_pairs) )
            sys.stderr.write( "Time taken for loading rules in dict and Trie : %1.3f sec\n\n" % (t_end - t_beg) )

        return None