Esempio n. 1
0
        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(self.layer1_size, dtype=REAL)  # each thread must have its own work memory

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                if self.sampler:
                    # Count words is a separate step here
                    job_words = sum(train_sentence_sampler(self, self.sampler(sentence), len(filter(None, sentence)), alpha, work)
                                    for sentence in job)
                else:
                    job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job)

                with lock:
                    word_count[0] += job_words
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
                            (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports
Esempio n. 2
0
        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(
                self.layer1_size,
                dtype=REAL)  # each thread must have its own work memory

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                alpha = max(
                    self.min_alpha,
                    self.alpha * (1 - 1.0 * word_count[0] / total_words))
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                job_words = sum(
                    train_sentence(self, sentence, alpha, work)
                    for sentence in job)
                with lock:
                    word_count[0] += job_words
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info(
                            "PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s"
                            % (100.0 * word_count[0] / total_words, alpha,
                               word_count[0] / elapsed if elapsed else 0.0))
                        next_report[
                            0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports
Esempio n. 3
0
        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            # each thread must have its own work memory
            work = zeros_aligned(self.layer1_size, dtype=REAL)
            prog_msg = "\rPROGRESS: {:5.2%}  {:.5f}α  {:6.0f}w/s"

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job)
                with lock:
                    word_count[0] += job_words
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        print(prog_msg.format(word_count[0] / total_words, alpha,
                                              word_count[0] / elapsed if elapsed else 0.0),
                              end='', file=sys.stderr)
                        next_report[0] = elapsed + 1.0  # don't flood the log,
                        # wait at least a second between progress reports
            print(prog_msg.format(word_count[0] / total_words, alpha,
                                  word_count[0] / elapsed if elapsed else 0.0), file=sys.stderr)