def test_BeagleEnvironment():

    from inphosemantics import load_picklez, dump_matrix

    root = 'test-data/iep/plato/'

    corpus_filename =\
        root + 'corpus/iep-plato.pickle.bz2'

    matrix_filename =\
        root + 'models/iep-plato-beagleenviroment-sentences.npy'


    print 'Loading corpus\n'\
          '  ', corpus_filename
    c = load_picklez(corpus_filename)

    print 'Training model'
    m = BeagleEnvironment()
    m.train(c, n_columns=256)

    print 'Row norms', np.sum(m.matrix**2, axis=1)**(1./2)

    print 'Dumping matrix to\n'\
          '  ', matrix_filename
    m.dump_matrix(matrix_filename)
    
    return m
Example #2
0
def test_TfModel():

    from inphosemantics import load_picklez, dump_matrix

    corpus_filename =\
        'test-data/iep/selected/corpus/iep-plato.pickle.bz2'
    matrix_filename =\
        'test-data/iep/selected/models/iep-plato-tf-word-article.npy'
    document_type = 'articles'

    corpus = load_picklez(corpus_filename)

    model = TfModel()

    model.train(corpus, 'articles')

    model.dump_matrix(matrix_filename)

    model.load_matrix(matrix_filename)

    return corpus, model, document_type
def test_BeagleComposite_2():

    from inphosemantics import load_picklez, dump_matrix

    root = 'test-data/iep/plato/'

    corpus_filename =\
        root + 'corpus/iep-plato.pickle.bz2'

    env_filename =\
        root + 'models/iep-plato-beagleenviroment-sentences.npy'

    matrix_filename =\
        root + 'models/iep-plato-beaglecomposite-sentences.npy'


    print 'Loading corpus\n'\
          '  ', corpus_filename
    c = load_picklez(corpus_filename)
    

    print 'Loading environment model\n'\
          '  ', env_filename
    e = BeagleEnvironment()
    e.load_matrix(env_filename)
    print e.matrix

    print 'Training model'
    m = BeagleComposite()
    m.train(c, env_matrix=e.matrix)
    print m.matrix


    print 'Dumping matrix to\n'\
          '  ', matrix_filename
    m.dump_matrix(matrix_filename)
    
    return m
Example #4
0
    def __init__(self,
                 corpus=None,
                 corpus_filename=None,
                 model=None,
                 model_type=None,
                 matrix=None,
                 matrix_filename=None,
                 token_type=None,
                 stoplist=None):

        if corpus:
            if corpus_filename:
                raise Exception("Both a corpus and a "
                                "corpus filename were given.")
            self.corpus = corpus

        elif corpus_filename:
            
            print 'Loading corpus from\n'\
                  '  ', corpus_filename
            self.corpus = load_picklez(corpus_filename)

        else:
            raise Exception("Neither a corpus nor a "
                            "corpus filename were given.")
            


        if model:
            if matrix:
                raise Exception("Both a model and a "
                                "matrix were given.")
            elif matrix_filename:
                raise Exception("Both a model and a "
                                "matrix filename were given.")
            elif model_type:
                raise Exception("Both a model and a "
                                "model type were given.")
            else:
                self.model = model


        elif matrix:
            if matrix_filename:
                raise Exception("Both a matrix and a "
                                "matrix filename were given.")
            else:
                self.model = model_type(matrix)


        elif matrix_filename:
            
            self.model = model_type()
            print 'Loading matrix from\n'\
                  '  ', matrix_filename
            self.model.load_matrix(matrix_filename)

        else:
            raise Exception("Neither a model, matrix nor "
                            "matrix filename were given.")

        self.token_type = token_type

        if stoplist:
            self.stoplist = self.corpus.encode_tokens_str(stoplist)
            print 'Applying stoplist to matrix'
            self.model.filter_rows(self.stoplist)