Beispiel #1
0
    def _START_MODEL_(config, default=None, overwrite_q=False):
        """
        Requires: config, config_learning
        Produces: wmap
        """
        # parameters of the instrumental distribution
        proxy_weights = scaled_fmap(config.items('proxy'))
        if default is not None:
            proxy_weights = {k: default for k, v in proxy_weights.iteritems()}

        # parameters of the target distribution
        target_weights = scaled_fmap(config.items('target'))
        if default is not None:
            target_weights = {
                k: default
                for k, v in target_weights.iteritems()
            }

        # overwrite q
        if overwrite_q:
            proxy_weights = {
                f: target_weights[f]
                for f, v in proxy_weights.iteritems()
            }

        #if len(frozenset(proxy_weights.iterkeys()) - frozenset(target_weights.iterkeys())) > 0:
        #    raise ValueError('The features in q(d) should be a subset of the features in p(d)')

        return JointWMap(
            WMap(sorted(proxy_weights.iteritems(), key=lambda (k, v): k)),
            WMap(sorted(target_weights.iteritems(), key=lambda (k, v): k)))
Beispiel #2
0
    def _START_MODEL_(config):
        """
        Requires: config, config_learning
        Produces: wmap
        """
        # parameters of the instrumental distribution
        proxy_weights = scaled_fmap(config.items('proxy'))
        # parameters of the target distribution
        target_weights = scaled_fmap(config.items('target'))

        return JointWMap(
            WMap(sorted(proxy_weights.iteritems(), key=lambda (k, v): k)),
            WMap(sorted(target_weights.iteritems(), key=lambda (k, v): k)))
Beispiel #3
0
    def configure(self, path):
        """
        :param path: path to a configuration file containing weights
        :returns: WMap (proxy and target weights)
        """

        # load a given configuration file (with weights)
        if not os.path.exists(path):
            raise IOError(
                'Config file not found: %s\nPerhaps you used --resume incorrectly?'
                % path)
        config = Config(path)

        # parameters of the instrumental distribution
        proxy_weights = scaled_fmap(config.items('proxy'))

        # parameters of the target distribution
        target_weights = scaled_fmap(config.items('target'))

        return JointWMap(
            WMap(sorted(proxy_weights.iteritems(), key=lambda (k, v): k)),
            WMap(sorted(target_weights.iteritems(), key=lambda (k, v): k)))
Beispiel #4
0
def main():
    options, config = argparse_and_config()

    # make output dir
    output_dir = '{0}/samples'.format(options.workspace)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    logging.info('Writing samples to: %s', output_dir)

    # cdec configuration string
    # TODO: I need to allow duplicates in cdec-features, currently duplicates overwrite each other
    # perhaps the easiest to do is to separate cdec-features from chisel.ini
    cdec_cfg_string = cdeclib.make_cdec_config_string(config.items('cdec'), config.items('cdec:features'))
    logging.debug('cdec.ini: %s', repr(cdec_cfg_string))

    # parameters of the instrumental distribution
    proxy_weights = scaled_fmap(config.items('proxy'), options.proxy_scaling)
    logging.debug('proxy (scaling=%f): %s', options.proxy_scaling, dict2str(proxy_weights, sort=True))

    # parameters of the target distribution
    target_weights = scaled_fmap(config.items('target'), options.target_scaling)
    logging.debug('target (scaling=%f): %s', options.target_scaling, dict2str(target_weights, sort=True))

    # loads scorer modules
    if config.has_section('chisel:scorers'):
        ff.load_scorers(config.items('chisel:scorers'))

    # scorers' configuration
    if config.has_section('chisel:scorers:config'):
        scorers_config = dict(config.items('chisel:scorers:config'))
    else:
        scorers_config = {}
    logging.debug('chisel:scorers:config: %s', scorers_config)
    # configure scorers
    ff.configure_scorers(scorers_config)

    
    # FF++: an improved FF framework
    # 1. load implementations
    if config.has_section('chisel:scorers++'):
        scorerspp_map = dict(config.items('chisel:scorers++'))
        ffpp.load_scorers(scorerspp_map.iteritems())
    # 2. config scorers
    if config.has_section('chisel:scorers++:config'):
        scorerspp_config = dict(config.items('chisel:scorers++:config'))
    else:
        scorerspp_config = {}
    logging.info('chisel:scorers++:config: %s', scorerspp_config)
    ffpp.configure_scorers(scorerspp_config)
    # FF++ done
    
    # logs which features were added to the proxy
    extra_features = {k: v for k, v in target_weights.iteritems() if k not in proxy_weights}
    logging.debug('Extra features: %s', extra_features)

    # reads segments from input
    segments = [SegmentMetaData.parse(line.strip(),
                                      'cdec',
                                      grammar_dir=options.grammars)
                for line in sys.stdin]  

    # sample and save results
    logging.info('Distributing %d segments to %d jobs', len(segments), options.jobs)
    pool = Pool(options.jobs)
    feedback = pool.map(partial(sample_and_save,
                               proxy_weights=proxy_weights,
                               target_weights=target_weights,
                               cdec_cfg_str=cdec_cfg_string,
                               output_dir=output_dir,
                               options=options),
                       segments)
    feedback.sort(key=lambda t: t[0])  # sort by segment id
    
    print tabulate(feedback, headers=('job', 'derivations', 'strings'), tablefmt='pipe')