def create_decoder(config_str, weights): """ Creates an instance of cdec.Decoder :param config_str: cdec's configuration string :param dict weights: parameters of the instrumental distribution :return: an instance of cdec.Decoder """ # perhaps make sure formalism=scfg and intersection_strategy=full? # decoder = cdec.Decoder(config_str=config_str, formalism='scfg', intersection_strategy='Full') decoder = cdec.Decoder(config_str=config_str) for k, v in weights.iteritems(): decoder.weights[k] = v return decoder
def create_decoder_from_files(cdec_ini, weights_file, scaling=1.0): """ Creates an instance of cdec.Decoder :param cdec_ini: cdec's configuration file :param weights_file: parameters of the instrumental distribution :param scaling: scaling factor (defaults to 1.0) :return: an instance of cdec.Decoder """ with open(cdec_ini) as f: config_str = f.read() logging.info('cdec.ini:\n\t%s', '\n\t'.join(config_str.strip().split('\n'))) # perhaps make sure formalism=scfg and intersection_strategy=full? # decoder = cdec.Decoder(config_str=config_str, formalism='scfg', intersection_strategy='Full') decoder = cdec.Decoder(config_str=config_str) logging.info('Loading weights: %s', weights_file) decoder.read_weights(weights_file, scaling) # logging.info('Weights: %s', dict(decoder.weights)) return decoder
def main(): logging.basicConfig(level=logging.INFO, format='%(message)s') parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help='cdec config', required=True) parser.add_argument('-w', '--weights', help='initial weights', required=True) parser.add_argument('-r', '--reference', help='reference file', required=True) parser.add_argument('-s', '--source', help='source file', required=True) args = parser.parse_args() with open(args.config) as fp: config = fp.read() decoder = cdec.Decoder(config) decoder.read_weights(args.weights) with open(args.reference) as fp: references = fp.readlines() with open(args.source) as fp: sources = fp.readlines() assert len(references) == len(sources) rampion(decoder, sources, references) for fname, fval in sorted(dict(decoder.weights).iteritems()): print('{0}\t{1}'.format(fname, fval))
def setUp(self): self.decoder = cdec.Decoder(formalism='scfg') self.decoder.read_weights(weights) with gzip.open(grammar_file) as f: self.grammar = f.read()
#coding: utf8 import cdec import gzip from math import exp weights = '../tests/system_tests/australia/weights' grammar_file = '../tests/system_tests/australia/australia.scfg.gz' # Load decoder width configuration decoder = cdec.Decoder(formalism='scfg') # Read weights decoder.read_weights(weights) print dict(decoder.weights) # Read grammar with gzip.open(grammar_file) as f: grammar = f.read() # Input sentence sentence = u'澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。' print ' Input:', sentence.encode('utf8') # Decode forest = decoder.translate(sentence, grammar=grammar) # Get viterbi translation print 'Output[0]:', forest.viterbi().encode('utf8') f_tree, e_tree = forest.viterbi_trees() print ' FTree[0]:', f_tree.encode('utf8') print ' ETree[0]:', e_tree.encode('utf8')
def make_decoder(config, weights): global decoder decoder = cdec.Decoder(config) decoder.read_weights(weights)