def test_BeagleEnvironment(): from inphosemantics import load_picklez, dump_matrix root = 'test-data/iep/plato/' corpus_filename =\ root + 'corpus/iep-plato.pickle.bz2' matrix_filename =\ root + 'models/iep-plato-beagleenviroment-sentences.npy' print 'Loading corpus\n'\ ' ', corpus_filename c = load_picklez(corpus_filename) print 'Training model' m = BeagleEnvironment() m.train(c, n_columns=256) print 'Row norms', np.sum(m.matrix**2, axis=1)**(1./2) print 'Dumping matrix to\n'\ ' ', matrix_filename m.dump_matrix(matrix_filename) return m
def test_TfModel(): from inphosemantics import load_picklez, dump_matrix corpus_filename =\ 'test-data/iep/selected/corpus/iep-plato.pickle.bz2' matrix_filename =\ 'test-data/iep/selected/models/iep-plato-tf-word-article.npy' document_type = 'articles' corpus = load_picklez(corpus_filename) model = TfModel() model.train(corpus, 'articles') model.dump_matrix(matrix_filename) model.load_matrix(matrix_filename) return corpus, model, document_type
def test_BeagleComposite_2(): from inphosemantics import load_picklez, dump_matrix root = 'test-data/iep/plato/' corpus_filename =\ root + 'corpus/iep-plato.pickle.bz2' env_filename =\ root + 'models/iep-plato-beagleenviroment-sentences.npy' matrix_filename =\ root + 'models/iep-plato-beaglecomposite-sentences.npy' print 'Loading corpus\n'\ ' ', corpus_filename c = load_picklez(corpus_filename) print 'Loading environment model\n'\ ' ', env_filename e = BeagleEnvironment() e.load_matrix(env_filename) print e.matrix print 'Training model' m = BeagleComposite() m.train(c, env_matrix=e.matrix) print m.matrix print 'Dumping matrix to\n'\ ' ', matrix_filename m.dump_matrix(matrix_filename) return m
def __init__(self, corpus=None, corpus_filename=None, model=None, model_type=None, matrix=None, matrix_filename=None, token_type=None, stoplist=None): if corpus: if corpus_filename: raise Exception("Both a corpus and a " "corpus filename were given.") self.corpus = corpus elif corpus_filename: print 'Loading corpus from\n'\ ' ', corpus_filename self.corpus = load_picklez(corpus_filename) else: raise Exception("Neither a corpus nor a " "corpus filename were given.") if model: if matrix: raise Exception("Both a model and a " "matrix were given.") elif matrix_filename: raise Exception("Both a model and a " "matrix filename were given.") elif model_type: raise Exception("Both a model and a " "model type were given.") else: self.model = model elif matrix: if matrix_filename: raise Exception("Both a matrix and a " "matrix filename were given.") else: self.model = model_type(matrix) elif matrix_filename: self.model = model_type() print 'Loading matrix from\n'\ ' ', matrix_filename self.model.load_matrix(matrix_filename) else: raise Exception("Neither a model, matrix nor " "matrix filename were given.") self.token_type = token_type if stoplist: self.stoplist = self.corpus.encode_tokens_str(stoplist) print 'Applying stoplist to matrix' self.model.filter_rows(self.stoplist)