コード例 #1
0
ファイル: example.py プロジェクト: ejlb/glove-python
    args = parser.parse_args()


    if args.create:
        # Build the corpus dictionary and the cooccurrence matrix.
        print 'Pre-processing corpus'

        if args.wiki:
            print 'Using wikipedia corpus'
            get_data = read_wikipedia_corpus
        else:
            get_data = read_corpus

        corpus_model = Corpus()
        corpus_model.fit_dictionary(get_data(args.create))
        corpus_model.fit_matrix(get_data(args.create), window=10)
        corpus_model.save('corpus.model')
        
        print 'Dict size: %s' % len(corpus_model.dictionary)
        print 'Collocations: %s' % corpus_model.matrix.nnz

    if args.train:
        # Train the GloVe model and save it to disk.

        if not args.create:
            # Try to load a corpus from disk.
            print 'Reading corpus statistics'
            corpus_model = Corpus.load('corpus.model')

            print 'Dict size: %s' % len(corpus_model.dictionary)
            print 'Collocations: %s' % corpus_model.matrix.nnz