def train_single_sent_id(self, sentences, iteration, work=None, neu1=None): if work is None: work = zeros(self.layer1_size, dtype=REAL) if neu1 is None: neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) num_of_grad = 0 if (self.update_mode == 1): num_of_grad = self.layer1_size elif (self.update_mode == 2): num_of_grad = 2 * self.layer1_size elif (self.update_mode == 3): num_of_grad = 2 * self.layer1_size + 3 sent_grad = zeros(num_of_grad, dtype=REAL) if self.init_adjust: denom = sqrt(self.layer1_size) else: denom = self.layer1_size new_sent = (random.rand(self.layer1_size).astype(REAL) - 0.5) / denom for i in range(iteration): alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * i / iteration) ) if self.update_mode == 0 else self.alpha for sentence in sentences: sampled = [self.vocab.get(word, None) for word in sentence] train_sent_vec(self, new_sent, sampled, alpha, work, neu1, sent_grad) return new_sent
def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" work = matutils.zeros_aligned( self.layer1_size + 8, dtype=REAL) # each thread must have its own work memory neu1 = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL) while True: job = jobs.get() if job is None: # data finished, exit break # update the learning rate before every job if self.update_mode == 0: alpha = max( self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) else: alpha = self.alpha job_words = sum( train_sent_vec(self, self.sents[sent_no], sentence, alpha, work, neu1, self.sents_grad[sent_no]) for sent_no, sentence in job) with lock: word_count[0] += job_words sent_count[0] += chunksize elapsed = time.time() - start if elapsed >= next_report[0]: logger.info( "PROGRESS: at %.2f%% sents, alpha %.05f, %.0f words/s" % (100.0 * sent_count[0] / total_sents, alpha, word_count[0] / elapsed if elapsed else 0.0)) next_report[ 0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports
def train_single_sent_id(self, sentences, iteration, work=None, neu1=None): if work is None: work = zeros(self.layer1_size, dtype=REAL) if neu1 is None: neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) num_of_grad = 0 if (self.update_mode == 1): num_of_grad = self.layer1_size elif (self.update_mode == 2): num_of_grad = 2 * self.layer1_size elif (self.update_mode == 3): num_of_grad = 2 * self.layer1_size + 3 sent_grad = zeros(num_of_grad, dtype=REAL) if self.init_adjust: denom = sqrt(self.layer1_size) else: denom = self.layer1_size new_sent = (random.rand(self.layer1_size).astype(REAL) - 0.5) / denom for i in range(iteration): alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * i / iteration)) if self.update_mode == 0 else self.alpha for sentence in sentences: sampled = [self.vocab.get(word, None) for word in sentence] train_sent_vec(self, new_sent, sampled, alpha, work, neu1, sent_grad) return new_sent
def worker_train(): """Train the model, lifting lists of sentences from the jobs queue.""" work = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL) # each thread must have its own work memory neu1 = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL) while True: job = jobs.get() if job is None: # data finished, exit break # update the learning rate before every job if self.update_mode == 0: alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) else: alpha = self.alpha job_words = sum(train_sent_vec(self, self.sents[sent_no], sentence, alpha, work, neu1, self.sents_grad[sent_no]) for sent_no, sentence in job) with lock: word_count[0] += job_words sent_count[0] += chunksize elapsed = time.time() - start if elapsed >= next_report[0]: logger.info("PROGRESS: at %.2f%% sents, alpha %.05f, %.0f words/s" % (100.0 * sent_count[0] / total_sents, alpha, word_count[0] / elapsed if elapsed else 0.0)) next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports