Exemple #1
0
    ensure_dirs([model_args.save_path])

    # Create a unique experience identifier string
    exp_id = get_exp_identifier(train_args, model_args, suffix=suffix)
    # Get unique run identifier (starts from 1)
    run_id = get_next_runid(model_args.save_path, exp_id)
    # Get log file name
    log_fname = None
    if not nolog:
        log_fname = os.path.join(model_args.save_path,
                                 "%s.%d.log" % (exp_id, run_id))

    # Start logging module (both to terminal and to file)
    Logger.setup(log_file=log_fname, timestamp=tstamp)
    log = Logger.get()
    cleanup.register_handler(log)

    # Update save_path
    model_args.save_path = os.path.join(model_args.save_path,
                                        "%s.%d" % (exp_id, run_id))

    # ensure valid hyps folder if valid_save_hyp is activated
    if train_args.valid_save_hyp is True:
        ensure_dirs([model_args.save_path + '.valid_hyps'])

    ###################################
    # Set device for Theano
    if 'THEANO_FLAGS' not in os.environ:
        train_args.device_id = get_device(train_args.device_id)

        # Check for GPUARRAY to switch to new Theano backend
Exemple #2
0
    def start(self):
        # create input and output queues for processes
        write_queue = Queue()
        read_queue = Queue()

        # Create processes
        for idx in range(self.n_jobs):
            self.processes[idx] = Process(
                target=translate_model,
                args=(write_queue, read_queue, idx, self.models,
                      self.beam_size, self.nbest, self.suppress_unks,
                      self.export, self.seed, self.mode))
            # Start process and register for cleanup
            self.processes[idx].start()
            cleanup.register_proc(self.processes[idx].pid)

        # Register the created processes to clean them after
        cleanup.register_handler(log)

        # Send data to worker processes
        for idx in range(self.n_sentences):
            write_queue.put((idx, next(self.iterator)))

        log.info("Distributed %d sentences to worker processes." %
                 self.n_sentences)

        # Receive the results
        self.trans = [None] * self.n_sentences
        self.scores = [None] * self.n_sentences

        # Will be filled if --export is passed
        self.att_weights = [None] * self.n_sentences

        # Performance computation stuff
        start_time = per100_time = time.time()

        for i in range(self.n_sentences):
            # Get response from worker
            resp = read_queue.get()

            if resp is None:
                # Worker(s) failed
                log.info('One or more of the workers failed, exiting.')
                sys.exit(1)

            # This is the sample id of the processed sample
            sample_idx = resp[0]

            # Get the hypotheses, scores and attention weights if any
            hyps, self.scores[sample_idx], attw = resp[1:]

            # Did we receive attention weights from beam search?
            if attw is not None:
                self.att_weights[sample_idx] = attw[0]

            # Place the hypotheses into their relevant places
            self.trans[sample_idx] = [
                idx_to_sent(self.trg_idict, hyp) for hyp in hyps
            ]

            # Print progress
            if (i + 1) % 100 == 0:
                per100_time = time.time() - per100_time
                log.info("%4d/%d sentences completed (%.2f seconds)" %
                         ((i + 1), self.n_sentences, per100_time))
                per100_time = time.time()

        # Total time spent during beam search
        total_time = time.time() - start_time
        sent_per_sec = int(self.n_sentences / total_time)

        log.info("-------------------------------------------")
        log.info("Total decoding time: %3.3f seconds (%d sentences / sec)" %
                 (total_time, sent_per_sec))

        # Compute word-based time statistics as well
        if self.nbest == 1:
            n_words = float(sum([len(s[0].split(' ')) for s in self.trans]))
            word_per_sec = int(n_words / total_time)
            log.info("~%d words / sec" % word_per_sec)

        # Stop workers
        for pidx in range(self.n_jobs):
            self.processes[pidx].terminate()