Beispiel #1
0
 def test_get_model_parameter(self):
     self.assertTrue(expr=len(ClusteringGenerator(predictor='tweet',
                                                  model_name=None,
                                                  cluster_params=None,
                                                  models=list(CLUSTER_ALGORITHMS.keys())
                                                  ).get_model_parameter().keys()
                              ) > 0
                     )
Beispiel #2
0
 def test_train_non_negative_matrix_factorization(self):
     _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet',
                                                                 model_name=None,
                                                                 cluster_params=None,
                                                                 models=['nmf'],
                                                                 sep=',',
                                                                 train_data_path=DATA_FILE_PATH,
                                                                 )
     _model = _model_generator.generate_model()
     _model.train()
     self.assertTrue(expr=len(_model.cluster_label) > 0 and _model.nmi > 0.0)
Beispiel #3
0
 def test_train_latent_semantic_indexing(self):
     _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet',
                                                                 model_name=None,
                                                                 cluster_params=None,
                                                                 models=['lsi'],
                                                                 sep=',',
                                                                 train_data_path=DATA_FILE_PATH,
                                                                 )
     _model = _model_generator.generate_model()
     _model.train()
     self.assertTrue(expr=len(_model.cluster_label) > 0 and _model.nmi > 0.0)
Beispiel #4
0
 def test_train_gibbs_sampling_dirichlet_multinomial_model(self):
     _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet',
                                                                 model_name=None,
                                                                 cluster_params=None,
                                                                 models=['gsdmm'],
                                                                 sep=',',
                                                                 train_data_path=DATA_FILE_PATH,
                                                                 )
     _model = _model_generator.generate_model()
     _model.train()
     self.assertTrue(expr=len(_model.cluster_label) > 0 and _model.nmi > 0.0)
Beispiel #5
0
 def test_generate_latent_semantic_indexing_model(self):
     _net_gen: object = ClusteringGenerator(predictor='tweet',
                                            model_name=None,
                                            cluster_params=None,
                                            models=['lsi'],
                                            tokenize=False,
                                            random=True,
                                            sep=',',
                                            cloud=None,
                                            train_data_path=DATA_FILE_PATH
                                            ).generate_model()
     self.assertTrue(expr=isinstance(_net_gen.model, LatentSemanticIndexing))
Beispiel #6
0
 def test_generate_latent_dirichlet_allocation_model(self):
     _net_gen: object = ClusteringGenerator(predictor='tweet',
                                            model_name=None,
                                            cluster_params=None,
                                            models=['lda'],
                                            tokenize=False,
                                            random=True,
                                            sep=',',
                                            cloud=None,
                                            train_data_path=DATA_FILE_PATH
                                            ).generate_model()
     self.assertTrue(expr=isinstance(_net_gen.model, LatentDirichletAllocation))
Beispiel #7
0
 def test_generate_gibbs_sampling_dirichlet_multinomial_model(self):
     _net_gen: object = ClusteringGenerator(predictor='tweet',
                                            model_name=None,
                                            cluster_params=None,
                                            models=['gsdmm'],
                                            tokenize=True,
                                            random=True,
                                            sep=',',
                                            cloud=None,
                                            train_data_path=DATA_FILE_PATH
                                            ).generate_model()
     self.assertTrue(expr=isinstance(_net_gen.model, GibbsSamplingDirichletMultinomialModeling))
Beispiel #8
0
 def test_generate_params(self):
     _model_generator: ClusteringGenerator = ClusteringGenerator(predictor='tweet',
                                                                 model_name=None,
                                                                 cluster_params=None,
                                                                 models=list(CLUSTER_ALGORITHMS.keys()),
                                                                 sep=',',
                                                                 train_data_path=DATA_FILE_PATH,
                                                                 )
     _model = _model_generator.generate_model()
     _mutated_param: dict = copy.deepcopy(_model.model_param_mutated)
     _model_generator.generate_params(param_rate=0.1, force_param=None)
     self.assertTrue(expr=len(_mutated_param.keys()) < len(_model_generator.model_param_mutated.keys()))
Beispiel #9
0
 def test_generate_non_negative_matrix_factorization_model(self):
     _net_gen: object = ClusteringGenerator(predictor='tweet',
                                            model_name=None,
                                            cluster_params=None,
                                            models=['nmf'],
                                            tokenize=False,
                                            random=True,
                                            sep=',',
                                            cloud=None,
                                            train_data_path=DATA_FILE_PATH
                                            ).generate_model()
     self.assertTrue(expr=isinstance(_net_gen.model, NonNegativeMatrixFactorization))