Esempio n. 1
0
def create_decoder(config_str, weights):
    """
    Creates an instance of cdec.Decoder
    :param config_str: cdec's configuration string
    :param dict weights: parameters of the instrumental distribution
    :return: an instance of cdec.Decoder
    """
    # perhaps make sure formalism=scfg and intersection_strategy=full?
    # decoder = cdec.Decoder(config_str=config_str, formalism='scfg', intersection_strategy='Full')
    decoder = cdec.Decoder(config_str=config_str)
    for k, v in weights.iteritems():
        decoder.weights[k] = v

    return decoder
Esempio n. 2
0
def create_decoder_from_files(cdec_ini, weights_file, scaling=1.0):
    """
    Creates an instance of cdec.Decoder
    :param cdec_ini: cdec's configuration file
    :param weights_file: parameters of the instrumental distribution
    :param scaling: scaling factor (defaults to 1.0)
    :return: an instance of cdec.Decoder
    """
    with open(cdec_ini) as f:
        config_str = f.read()
        logging.info('cdec.ini:\n\t%s', '\n\t'.join(config_str.strip().split('\n')))
        # perhaps make sure formalism=scfg and intersection_strategy=full?
        # decoder = cdec.Decoder(config_str=config_str, formalism='scfg', intersection_strategy='Full')
        decoder = cdec.Decoder(config_str=config_str)

    logging.info('Loading weights: %s', weights_file)
    decoder.read_weights(weights_file, scaling)
    # logging.info('Weights: %s', dict(decoder.weights))
    return decoder
Esempio n. 3
0
def main():
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--config', help='cdec config', required=True)
    parser.add_argument('-w', '--weights', help='initial weights', required=True)
    parser.add_argument('-r', '--reference', help='reference file', required=True)
    parser.add_argument('-s', '--source', help='source file', required=True)
    args = parser.parse_args()

    with open(args.config) as fp:
        config = fp.read()

    decoder = cdec.Decoder(config)
    decoder.read_weights(args.weights)
    with open(args.reference) as fp:
        references = fp.readlines()
    with open(args.source) as fp:
        sources = fp.readlines()
    assert len(references) == len(sources)
    rampion(decoder, sources, references)

    for fname, fval in sorted(dict(decoder.weights).iteritems()):
        print('{0}\t{1}'.format(fname, fval))
Esempio n. 4
0
 def setUp(self):
     self.decoder = cdec.Decoder(formalism='scfg')
     self.decoder.read_weights(weights)
     with gzip.open(grammar_file) as f:
         self.grammar = f.read()
Esempio n. 5
0
#coding: utf8
import cdec
import gzip
from math import exp

weights = '../tests/system_tests/australia/weights'
grammar_file = '../tests/system_tests/australia/australia.scfg.gz'

# Load decoder width configuration
decoder = cdec.Decoder(formalism='scfg')
# Read weights
decoder.read_weights(weights)

print dict(decoder.weights)

# Read grammar
with gzip.open(grammar_file) as f:
    grammar = f.read()

# Input sentence
sentence = u'澳洲 是 与 北韩 有 邦交 的 少数 国家 之一 。'
print '    Input:', sentence.encode('utf8')

# Decode
forest = decoder.translate(sentence, grammar=grammar)

# Get viterbi translation
print 'Output[0]:', forest.viterbi().encode('utf8')
f_tree, e_tree = forest.viterbi_trees()
print ' FTree[0]:', f_tree.encode('utf8')
print ' ETree[0]:', e_tree.encode('utf8')
Esempio n. 6
0
def make_decoder(config, weights):
    global decoder
    decoder = cdec.Decoder(config)
    decoder.read_weights(weights)