Ejemplo n.º 1
0
 def em_step(self, iteration):
     ffile = open(self.ffilename)
     efile = open(self.efilename)
     afile = open(self.afilename)
     alignments = Alignment.reader_pharaoh(ffile, efile, afile)
     dirname = os.path.join(self.outputdir,
                            'iter_%s' % str(iteration + 1).rjust(3, '0'))
     os.mkdir(dirname)
     if logger.level >= 1:
         logger.writeln('\niteration %s' % (iteration + 1))
     likelihood = 0
     starttime = time.time()
     for i, alignment in enumerate(alignments, 1):
         if i % FLAGS.emtrain_log_interval == 0:
             logger.writeln('%s sentences at %s secs/sent' %
                            (i, (time.time() - starttime) / i))
             starttime = time.time()
         extractor = Extractor(
             maxabslen=100000,
             maxlen=10000,
             minhole=1,
             maxvars=100000,
             lexical_weighter=self.lexical_weighter,
             forbid_adjacent=self.forbid_adjacent,
             maximize_derivation=self.maximize_derivation,
             require_aligned_terminal=self.require_aligned_terminal)
         hg = extractor.extract_hypergraph(alignment)
         if hg is None:
             continue
         # compute expected counts
         self.compute_expected_counts(hg)
         likelihood += hg.root.inside
         treefilename = os.path.join(dirname,
                                     'tree_%s' % str(i).rjust(8, '0'))
         self.write_viterbi_tree(hg, treefilename)
         #for edge in hg.edges():
         #    logger.writeln('%s %s' % (self.counter.get_prob(edge.rule),
         #                              edge.rule))
     if logger.level >= 1:
         logger.writeln('likelihood: %s' % likelihood)
     if logger.level >= 1:
         logger.writeln('normalizing...')
     self.counter.normalize_vbdp(self.alpha, self.threshold)
     if logger.level >= 1:
         logger.writeln('prob table size: %s' % len(self.counter.prob))
Ejemplo n.º 2
0
 def em_step(self, iteration):
     ffile = open(self.ffilename)
     efile = open(self.efilename)
     afile = open(self.afilename)
     alignments = Alignment.reader_pharaoh(ffile, efile, afile)
     percent_counter = PercentCounter(total=self.corpus_size)
     dirname = os.path.join(self.outputdir,
                            'iter_%s' % str(iteration + 1).rjust(3, '0'))
     os.mkdir(dirname)
     if logger.level >= 1:
         logger.writeln('\niteration %s' % (iteration + 1))
     likelihood = 0
     for i, alignment in enumerate(alignments):
         percent_counter.print_percent(i)
         # if logger.level >= 1:
         #     logger.writeln()
         #     logger.writeln('>>> sentence_pair_%s' % i)
         extractor = Extractor(lexical_weighter=self.lexical_weighter,
                               maximize_derivation=self.maximize_derivation)
         hg = extractor.extract_hypergraph(alignment)
         if hg is None:
             continue
         # compute expected counts
         self.compute_expected_counts(hg)
         likelihood += hg.root.inside
         treefilename = os.path.join(dirname,
                                     'tree_%s' % str(i + 1).rjust(8, '0'))
         self.write_viterbi_tree(hg, treefilename)
         #for edge in hg.edges():
         #    logger.writeln('%s %s' % (self.counter.get_prob(edge.rule),
         #                              edge.rule))
     if logger.level >= 1:
         logger.writeln('likelihood: %s' % likelihood)
     if logger.level >= 1:
         logger.writeln('normalizing...')
     self.counter.normalize_vbdp(self.alpha, self.threshold)
     if logger.level >= 1:
         logger.writeln('prob table size: %s' % len(self.counter.prob))
Ejemplo n.º 3
0
#!/usr/bin/env python3

import sys

from extractor import Extractor
from alignment import Alignment
import hypergraph
import logger

if __name__ == '__main__':
    ffilename = sys.argv[1]
    efilename = sys.argv[2]
    afilename = sys.argv[3]
    n = int(sys.argv[4])
    ffile = open(ffilename)
    efile = open(efilename)
    afile = open(afilename)
    alignments = Alignment.reader_pharaoh(ffile, efile, afile)
    for i, alignment in enumerate(alignments):
        if i == n:
            extractor = Extractor(maximize_derivation=True)
            hg = extractor.extract_hypergraph(alignment)
            hg.show()