Exemple #1
0
    def train_single_sent_id(self, sentences, iteration, work=None, neu1=None):
        if work is None: work = zeros(self.layer1_size, dtype=REAL)
        if neu1 is None:
            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
        num_of_grad = 0
        if (self.update_mode == 1): num_of_grad = self.layer1_size
        elif (self.update_mode == 2): num_of_grad = 2 * self.layer1_size
        elif (self.update_mode == 3): num_of_grad = 2 * self.layer1_size + 3
        sent_grad = zeros(num_of_grad, dtype=REAL)

        if self.init_adjust:
            denom = sqrt(self.layer1_size)
        else:
            denom = self.layer1_size

        new_sent = (random.rand(self.layer1_size).astype(REAL) - 0.5) / denom
        for i in range(iteration):
            alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * i / iteration)
                        ) if self.update_mode == 0 else self.alpha
            for sentence in sentences:
                sampled = [self.vocab.get(word, None) for word in sentence]
                train_sent_vec(self, new_sent, sampled, alpha, work, neu1,
                               sent_grad)

        return new_sent
Exemple #2
0
        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(
                self.layer1_size + 8,
                dtype=REAL)  # each thread must have its own work memory
            neu1 = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                if self.update_mode == 0:
                    alpha = max(
                        self.min_alpha,
                        self.alpha * (1 - 1.0 * word_count[0] / total_words))
                else:
                    alpha = self.alpha
                job_words = sum(
                    train_sent_vec(self, self.sents[sent_no], sentence, alpha,
                                   work, neu1, self.sents_grad[sent_no])
                    for sent_no, sentence in job)
                with lock:
                    word_count[0] += job_words
                    sent_count[0] += chunksize
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info(
                            "PROGRESS: at %.2f%% sents, alpha %.05f, %.0f words/s"
                            % (100.0 * sent_count[0] / total_sents, alpha,
                               word_count[0] / elapsed if elapsed else 0.0))
                        next_report[
                            0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports
Exemple #3
0
    def train_single_sent_id(self, sentences, iteration, work=None, neu1=None):
        if work is None: work = zeros(self.layer1_size, dtype=REAL)
        if neu1 is None: neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
        num_of_grad = 0
        if (self.update_mode == 1): num_of_grad = self.layer1_size
        elif (self.update_mode == 2): num_of_grad = 2 * self.layer1_size
        elif (self.update_mode == 3): num_of_grad = 2 * self.layer1_size + 3
        sent_grad = zeros(num_of_grad, dtype=REAL)

        if self.init_adjust:
            denom = sqrt(self.layer1_size)
        else:
            denom = self.layer1_size

        new_sent = (random.rand(self.layer1_size).astype(REAL) - 0.5) / denom
        for i in range(iteration):
            alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * i / iteration)) if self.update_mode == 0 else self.alpha
            for sentence in sentences:
                sampled = [self.vocab.get(word, None) for word in sentence]
                train_sent_vec(self, new_sent, sampled, alpha, work, neu1, sent_grad)
        
        return new_sent
Exemple #4
0
        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)  # each thread must have its own work memory
            neu1 = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                if self.update_mode == 0:
                    alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                else:
                    alpha = self.alpha
                job_words = sum(train_sent_vec(self, self.sents[sent_no], sentence, alpha, work, neu1, self.sents_grad[sent_no])
                                for sent_no, sentence in job)
                with lock:
                    word_count[0] += job_words
                    sent_count[0] += chunksize
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% sents, alpha %.05f, %.0f words/s" %
                                    (100.0 * sent_count[0] / total_sents, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports