def _get_vae_estimator(self, config, reporter, ctx): """Take a model configuration - specified by a config file or as determined by model selection and return a VAE topic model ready for training. Parameters: config (dict): an autogluon configuration/argument object, instantiated to particular parameters reporter (`autogluon.core.scheduler.reporter.Reporter`): object for reporting model evaluations to scheduler ctx (`mxnet.context.Context`): Mxnet compute context Returns: Estimator (:class:`tmnt.estimator.BaseEstimator`): Either BowEstimator or MetaBowEstimator """ lr = config.lr latent_distrib = config.latent_distribution optimizer = config.optimizer n_latent = int(config.n_latent) enc_hidden_dim = int(config.enc_hidden_dim) coherence_reg_penalty = float(config.coherence_loss_wt) redundancy_reg_penalty = float(config.redundancy_loss_wt) batch_size = int(config.batch_size) embedding_source = config.embedding.source fixed_embedding = config.embedding.get('fixed') == True covar_net_layers = config.covar_net_layers n_encoding_layers = config.num_enc_layers enc_dr = config.enc_dr epochs = int(config.epochs) ldist_def = config.latent_distribution kappa = 0.0 alpha = 1.0 latent_distrib = ldist_def.dist_type if latent_distrib == 'vmf': kappa = ldist_def.kappa elif latent_distrib == 'logistic_gaussian': alpha = ldist_def.alpha vocab, emb_size = self._initialize_vocabulary(embedding_source) if emb_size < 0 and 'size' in config.embedding: emb_size = config.embedding.size if self.c_args.use_labels_as_covars: #n_covars = len(self.label_map) if self.label_map else 1 n_covars = -1 model = \ MetaBowEstimator(vocab, coherence_coefficient=8.0, reporter=reporter, num_val_words=self.total_tst_words, wd_freqs=self.wd_freqs, label_map=self.label_map, covar_net_layers=1, ctx=ctx, lr=lr, latent_distribution=latent_distrib, optimizer=optimizer, n_latent=n_latent, kappa=kappa, alpha=alpha, enc_hidden_dim=enc_hidden_dim, coherence_reg_penalty=coherence_reg_penalty, redundancy_reg_penalty=redundancy_reg_penalty, batch_size=batch_size, embedding_source=embedding_source, embedding_size=emb_size, fixed_embedding=fixed_embedding, num_enc_layers=n_encoding_layers, enc_dr=enc_dr, seed_matrix=self.seed_matrix, hybridize=False, epochs=epochs, log_method='log') else: print("Encoder coherence = {}".format( self.c_args.encoder_coherence)) model = \ BowEstimator(vocab, coherence_coefficient=8.0, reporter=reporter, num_val_words=self.total_tst_words, wd_freqs=self.wd_freqs, ctx=ctx, lr=lr, latent_distribution=latent_distrib, optimizer=optimizer, n_latent=n_latent, kappa=kappa, alpha=alpha, enc_hidden_dim=enc_hidden_dim, coherence_reg_penalty=coherence_reg_penalty, redundancy_reg_penalty=redundancy_reg_penalty, batch_size=batch_size, embedding_source=embedding_source, embedding_size=emb_size, fixed_embedding=fixed_embedding, num_enc_layers=n_encoding_layers, enc_dr=enc_dr, seed_matrix=self.seed_matrix, hybridize=False, epochs=epochs, log_method='log', coherence_via_encoder=self.c_args.encoder_coherence, pretrained_param_file = self.pretrained_param_file) model.validate_each_epoch = self.validate_each_epoch return model
def test_train_and_topics_categorical(): model = MetaBowEstimator(vocabulary, batch_size=32) model.fit(X_scipy, y_numpy) model.get_topic_vectors() assert (True)
def test_train_and_npmi_categorical(): model = MetaBowEstimator(vocabulary, batch_size=32) model.fit(X_scipy, y_numpy) assert (model._npmi_per_covariate(X_scipy, y_numpy, 10) == 0)
def test_train_and_transform_categorical(): model = MetaBowEstimator(vocabulary, batch_size=32) model.fit(X_scipy, y_numpy) trans = model.transform(X_scipy, y_numpy) assert (np.all(trans == trans[0]))
def test_train_and_perplexity_categorical(): model = MetaBowEstimator(vocabulary, batch_size=32) model.fit(X_scipy, y_numpy) model.perplexity(X_scipy, y_numpy) assert (True)