def load_matrix(matrix_name): matrices = bk._get_matrices() filename = matrices.get(matrix_name, 'filename') model_type = matrices.get(matrix_name, 'model_type') if model_type == 'lsa': return lsa.LsaModel.load_matrix(filename) return model.Model.load_matrix(filename)
def viewer(matrix_name): matrices = bk._get_matrices() corpus_name = matrices.get(matrix_name, 'vsm_corpus') corpus = corpus_io.load_corpus(corpus_name) matrix = model_io.load_matrix(matrix_name) model_type = matrices.get(matrix_name, 'model_type') if model_type == 'tf': return tfviewer.TfViewer(corpus=corpus, matrix=matrix) elif model_type == 'tfidf': return tfidfviewer.TfIdfViewer(corpus=corpus, matrix=matrix) elif model_type == 'lsa': return lsaviewer.LsaViewer(corpus=corpus, svd_matrices=matrix) elif model_type == 'beagle-environment': return envv.BeagleEnvironmentViewer(corpus=corpus, matrix=matrix) elif model_type == 'beagle-context': return ctxv.BeagleContextViewer(corpus=corpus, matrix=matrix) elif model_type == 'beagle-order': return ordv.BeagleOrderViewer(corpus=corpus, matrix=matrix) elif model_type == 'beagle-composite': return comv.BeagleCompositeViewer(corpus=corpus, matrix=matrix)
def train_model(matrix_name): matrices = bk._get_matrices() corpus_name = matrices.get(matrix_name, 'vsm_corpus') corpus = corpus_io.load_corpus(corpus_name) filename = matrices.get(matrix_name, 'filename') model_type = matrices.get(matrix_name, 'model_type') if model_type == 'tf': m = tf.TfModel() tok_name = matrices.get(matrix_name, 'tok_name') m.train(corpus, tok_name) elif model_type == 'tfidf': m = tfidf.TfIdfModel() tf_file = matrices.get(matrix_name, 'tf_matrix') tf_matrix = model.Model.load_matrix(tf_file) m.train(tf_matrix=tf_matrix) elif model_type == 'lsa': m = lsa.LsaModel() td_file = matrices.get(matrix_name, 'td_matrix') td_matrix = model.Model.load_matrix(td_file) k_factors = matrices.getint(matrix_name, 'factors') m.train(td_matrix=td_matrix, k_factors=k_factors) elif model_type == 'beagle-environment': m = beagleenvironment.BeagleEnvironment() n_columns = matrices.getint(matrix_name, 'n_columns') m.train(corpus, n_columns=n_columns) elif model_type == 'beagle-context': m = beaglecontext.BeagleContext() env_file = matrices.get(matrix_name, 'env_matrix') env_matrix = model.Model.load_matrix(env_file) m.train(corpus, env_matrix=env_matrix) elif model_type == 'beagle-order': m = beagleorder.BeagleOrder() env_file = matrices.get(matrix_name, 'env_matrix') env_matrix = model.Model.load_matrix(env_file) lmda = matrices.getint(matrix_name, 'lambda') m.train(corpus, env_matrix=env_matrix, lmda=lmda) elif model_type == 'beagle-composite': m = beaglecomposite.BeagleComposite() ctx_file = matrices.get(matrix_name, 'ctx_matrix') ctx_matrix = model.Model.load_matrix(ctx_file) ord_file = matrices.get(matrix_name, 'ord_matrix') ord_matrix = model.Model.load_matrix(ord_file) m.train(corpus, ctx_matrix=ctx_matrix, ord_matrix=ord_matrix) m.save_matrix(filename)
def matrix_names(): matrices = bk._get_matrices() return matrices.sections()