Ejemplo n.º 1
0
 def test_sampling(self):
     init = initilaze_topic_model()
     init.initilize()
     sampleman = Sampling(init.xcorpus, init.ycorpus)
     sampleman.sampling(init.TOPICS, init.xcounts, init.ycounts, init.docid, init.different_word)
     print sampleman.xcorpus
     print sampleman.ycorpus
Ejemplo n.º 2
0
 def test_sampling(self):
     init = initilaze_topic_model()
     init.initilize()
     sampleman = Sampling(init.xcorpus, init.ycorpus)
     sampleman.sampling(init.TOPICS, init.xcounts, init.ycounts, init.docid,
                        init.different_word)
     print sampleman.xcorpus
     print sampleman.ycorpus
Ejemplo n.º 3
0
        with open(path, "r", encoding=encoding) as f:
            for line in f:
                words = tokenize(line.strip())
                if len(words) < window_size + 1:
                    continue
                for i in range(len(words)):
                    example = (
                        words[max(0, i - window_size):i] +
                        words[min(i + 1, len(words)
                                  ):min(len(words), i + window_size) + 1],
                        words[i])
                    examples.append(Example.fromlist(example, fields))
        super(CBOWDataset, self).__init__(examples, fields, **kwargs)


if __name__ == '__main__':
    test_path = '/home/lightsmile/NLP/corpus/novel/test.txt'
    dataset = CBOWDataset(test_path, Fields)
    print(len(dataset))
    print(dataset[0])
    print(dataset[0].context)
    print(dataset[0].target)

    TARGET.build_vocab(dataset)

    from sampling import Sampling

    samp = Sampling(TARGET.vocab)

    print(samp.sampling(3))