def test_get_model_parameter(self): self.assertTrue(expr=len(ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=list(CLUSTER_ALGORITHMS.keys()) ).get_model_parameter().keys() ) > 0 )
def test_train_non_negative_matrix_factorization(self): _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['nmf'], sep=',', train_data_path=DATA_FILE_PATH, ) _model = _model_generator.generate_model() _model.train() self.assertTrue(expr=len(_model.cluster_label) > 0 and _model.nmi > 0.0)
def test_train_latent_semantic_indexing(self): _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['lsi'], sep=',', train_data_path=DATA_FILE_PATH, ) _model = _model_generator.generate_model() _model.train() self.assertTrue(expr=len(_model.cluster_label) > 0 and _model.nmi > 0.0)
def test_train_gibbs_sampling_dirichlet_multinomial_model(self): _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['gsdmm'], sep=',', train_data_path=DATA_FILE_PATH, ) _model = _model_generator.generate_model() _model.train() self.assertTrue(expr=len(_model.cluster_label) > 0 and _model.nmi > 0.0)
def test_generate_latent_semantic_indexing_model(self): _net_gen: object = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['lsi'], tokenize=False, random=True, sep=',', cloud=None, train_data_path=DATA_FILE_PATH ).generate_model() self.assertTrue(expr=isinstance(_net_gen.model, LatentSemanticIndexing))
def test_generate_latent_dirichlet_allocation_model(self): _net_gen: object = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['lda'], tokenize=False, random=True, sep=',', cloud=None, train_data_path=DATA_FILE_PATH ).generate_model() self.assertTrue(expr=isinstance(_net_gen.model, LatentDirichletAllocation))
def test_generate_gibbs_sampling_dirichlet_multinomial_model(self): _net_gen: object = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['gsdmm'], tokenize=True, random=True, sep=',', cloud=None, train_data_path=DATA_FILE_PATH ).generate_model() self.assertTrue(expr=isinstance(_net_gen.model, GibbsSamplingDirichletMultinomialModeling))
def test_generate_params(self): _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=list(CLUSTER_ALGORITHMS.keys()), sep=',', train_data_path=DATA_FILE_PATH, ) _model = _model_generator.generate_model() _mutated_param: dict = copy.deepcopy(_model.model_param_mutated) _model_generator.generate_params(param_rate=0.1, force_param=None) self.assertTrue(expr=len(_mutated_param.keys()) < len(_model_generator.model_param_mutated.keys()))
def test_generate_non_negative_matrix_factorization_model(self): _net_gen: object = ClusteringGenerator(predictor='tweet', model_name=None, cluster_params=None, models=['nmf'], tokenize=False, random=True, sep=',', cloud=None, train_data_path=DATA_FILE_PATH ).generate_model() self.assertTrue(expr=isinstance(_net_gen.model, NonNegativeMatrixFactorization))