예제 #1
0
 def get_datum_in_doc(self, doc, i):
     # Read the doc for i, if it's not already in memory
     # Get it out of the doc, return it as a column vector
     if self.current_doc != doc:
         self.current_doc = doc
         self.current_base_index = self.doc_index[doc, 0]
         self.doc_data = self.read_doc(doc)
     return asvector(self.doc_data[:, i-self.current_base_index])
예제 #2
0
def corpus_mean(reader):
    sum = np.zeros((reader.dim,), 'float64')
    for d in range(reader.num_docs):
        data = reader.read_doc(d)
        np.add(sum, data.sum(axis=1), out=sum)
    return asvector(sum / reader.num_data)