Esempio n. 1
0
    def prepareSGNMT(self, args):
        """Initialise the SGNMT for agent training.
        The SGNMT returns hiddens states only when the function
        ``_get_hidden_states()'' is being called.
        It should stop after reading the first words, and returns the predictor
        """
        # UTF-8 support
        if sys.version_info < (3, 0):
            sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)
            sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
            sys.stdin = codecs.getreader('UTF-8')(sys.stdin)

        utils.load_src_wmap(args.src_wmap)
        utils.load_trg_wmap(args.trg_wmap)
        utils.load_trg_cmap(args.trg_cmap)
        decode_utils.base_init(args)
        self.decoder = decode_utils.create_decoder()
        self.predictor = self.decoder.predictors[0][
            0]  # only sim_t2t predictor
        self.outputs = decode_utils.create_output_handlers()
        self._load_all_initial_hypos(args)
Esempio n. 2
0
    print("                             may require loading the decoder from ")
    print(
        "                             scratch, some (like changing predictor")
    print("                             weights) can be done on the fly. For ")
    print("                             printing help text for all available")
    print("                             parameters use")
    print("                               !sgnmt config (without arguments)")
    print("!sgnmt decode <file_name>     Decode sentences in the given file")
    print("!sgnmt reset                  Reset predictors, e.g. set sentence")
    print("                             counter to 1 for fst predictor.")
    print("!sgnmt quit                   Quit SGNMT")
    print("!sgnmt help                   Print this help")


utils.load_src_wmap(args.src_wmap)
utils.load_trg_wmap(args.trg_wmap)
utils.load_trg_cmap(args.trg_cmap)
decoder = decode_utils.create_decoder(args)
outputs = decode_utils.create_output_handlers()

if args.input_method == 'file':
    # Check for additional input files
    if getattr(args, "src_test2"):
        decode_utils.do_decode(decoder, outputs,
                               _process_inputs(args.sync_symbol))
    else:
        decode_utils.do_decode(decoder, outputs,
                               _process_input(args.sync_symbol))
#        with codecs.open(args.src_test, encoding='utf-8') as f:
#            decode_utils.do_decode(decoder,
#                                   outputs,
Esempio n. 3
0
    """Print help text for shell usage in interactive mode."""
    print("Available SGNMT directives:")
    print("!sgnmt config <name> <value>  Update the configuration. Some changes")
    print("                             may require loading the decoder from ")
    print("                             scratch, some (like changing predictor")
    print("                             weights) can be done on the fly. For ")
    print("                             printing help text for all available")
    print("                             parameters use")
    print("                               !sgnmt config (without arguments)")
    print("!sgnmt decode <file_name>     Decode sentences in the given file")
    print("!sgnmt quit                   Quit SGNMT")
    print("!sgnmt help                   Print this help")


utils.load_src_wmap(args.src_wmap)
utils.load_trg_wmap(args.trg_wmap)
utils.load_trg_cmap(args.trg_cmap)
decoder = decode_utils.create_decoder()
outputs = decode_utils.create_output_handlers()

if args.input_method == 'file':
    with codecs.open(args.src_test, encoding='utf-8') as f:
        decode_utils.do_decode(decoder,
                               outputs,
                               [line.strip().split() for line in f])
elif args.input_method == 'dummy':
    decode_utils.do_decode(decoder, outputs, False)
else: # Interactive mode: shell or stdin
    print("Start interactive mode.")
    print("PID: %d" % os.getpid())
    print("Test sentences are read directly from stdin.")
Esempio n. 4
0
    def __init__(self,
                 source_sentence,
                 samples,
                 model,
                 data_stream,
                 config,
                 n_best=1,
                 track_n_models=1,
                 normalize=True,
                 store_full_main_loop=False,
                 **kwargs):
        """Creates a new extension which adds model selection based on
            the accuracy score to the training main loop.
            
        Args:
            source_sentence (Variable): Input variable to the sampling
            computation graph
            samples (Variable): Samples variable of the CG
            model (NMTModel): See the model module
            data_stream (DataStream): Data stream to the development
            set
            config (dict): NMT configuration
            n_best (int): beam size
            track_n_models (int): Number of n-best models for which to
            create checkpoints.
            normalize (boolean): Enables length normalization
            store_full_main_loop (boolean): Stores the iteration state
            in the old style of
            Blocks 0.1. Not recommended
            """
        super(AccValidator, self).__init__(**kwargs)
        self.store_full_main_loop = store_full_main_loop
        self.source_sentence = source_sentence
        self.samples = samples
        self.model = model
        self.data_stream = data_stream
        self.config = config
        self.n_best = n_best
        self.track_n_models = track_n_models
        self.normalize = normalize
        self.best_models = []
        self.val_bleu_curve = []
        
        self.src_sparse_feat_map = config['src_sparse_feat_map'] if config['src_sparse_feat_map'] \
            else FlatSparseFeatMap()
        if config['trg_sparse_feat_map']:
            self.trg_sparse_feat_map = config['trg_sparse_feat_map']
            self.beam_search = SparseBeamSearch(
                                                samples=samples,
                                                trg_sparse_feat_map=self.trg_sparse_feat_map)
        else:
            self.trg_sparse_feat_map = FlatSparseFeatMap()
            self.beam_search = BeamSearch(samples=samples)
        
        # Create saving directory if it does not exist
        if not os.path.exists(self.config['saveto']):
            os.makedirs(self.config['saveto'])
        
        if self.config['reload']:
            try:
                bleu_score = numpy.load(os.path.join(self.config['saveto'],
                                                     'val_bleu_scores.npz'))
                self.val_bleu_curve = bleu_score['bleu_scores'].tolist()
                # Track n best previous bleu scores
                for i, bleu in enumerate(
                    sorted(self.val_bleu_curve, reverse=True)):
                        if i < self.track_n_models:
                            self.best_models.append(ModelInfo(bleu))
                logging.info("BleuScores Reloaded")
            except:
                logging.info("BleuScores not Found")

        self.verbose = self.config.get('val_set_out', None)
        utils.load_trg_wmap(self.config['trg_wmap'])
        self.trg_wmap = utils.trg_wmap