def tf_fn(ctx_sbls): """ The map function for vsm.model.TfMulti. Takes a list of documents as slices and returns a count matrix. :param ctx_sbls: list of documents as slices. :type ctx_sbls: list of slices :returns: a count matrix """ offset = ctx_sbls[0].start corpus = _corpus[offset: ctx_sbls[-1].stop] slices = [slice(s.start-offset, s.stop-offset) for s in ctx_sbls] return count_matrix(corpus, slices, _V.value)
def tf_fn(ctx_sbls): """ The map function for vsm.model.TfMulti. Takes a list of documents as slices and returns a count matrix. :param ctx_sbls: list of documents as slices. :type ctx_sbls: list of slices :returns: a count matrix """ offset = ctx_sbls[0].start corpus = _corpus[offset:ctx_sbls[-1].stop] slices = [slice(s.start - offset, s.stop - offset) for s in ctx_sbls] return count_matrix(corpus, slices, _V.value)
def train(self): """ Counts word-type occurrences per context and stores the results in `self.matrix`. """ self.matrix = count_matrix(self.corpus, self.docs, self.V)