Example #1
0
def check_db_status():
    db_len = 0
    for _ in DBController.get_all_articles():
        db_len += 1
    if db_len == 0:
        print('Seeding database...')
        DatabaseSeeder.seed()
Example #2
0
    def load_model(cls, path: str, model_name: str,
                   model_type: str) -> BaseModel:
        print(f'{model_name}.{model_type} model loading...')
        if model_type != 'd2v':
            dictionary = corpora.Dictionary.load(
                os.path.join(path, f'{model_name}.dict'))
            corpus = corpora.MmCorpus(os.path.join(path, f'{model_name}.mm'))
        if model_type == 'ft':
            similarity_matrix = sparse.load_npz(
                os.path.join(path, f'{model_name}.mat.npz'))
        articles_id = cls.load_articles_id(path)
        articles = DBController.get_all_articles(
            {'serial_id': {
                '$in': articles_id
            }})
        training_sample = TrainingSample(articles)

        def load_func(model_path: str, model_type: str):
            if model_type == 'lsi':
                model = models.lsimodel.LsiModel.load(model_path)
                return LsiModel.trained(name=model_name,
                                        model=model,
                                        corpus=corpus,
                                        dictionary=dictionary,
                                        training_sample=training_sample)
            elif model_type == 'lda':
                model = models.ldamodel.LdaModel.load(model_path)
                return LdaModel.trained(name=model_name,
                                        model=model,
                                        corpus=corpus,
                                        dictionary=dictionary,
                                        training_sample=training_sample)
            elif model_type == 'd2v':
                model = models.doc2vec.Doc2Vec.load(model_path)
                return D2vModel.trained(name=model_name,
                                        model=model,
                                        corpus=None,
                                        dictionary=None,
                                        training_sample=training_sample)
            elif model_type == 'ft':
                model = models.FastText.load(model_path)
                # similarity_matrix = sparse.load_npz(os.path.join(path, f'{model_name}.mat.npz'))
                return FastTextModel.trained(
                    name=model_name,
                    model=model,
                    corpus=corpus,
                    dictionary=dictionary,
                    similarity_matrix=similarity_matrix,
                    training_sample=training_sample)

        model = load_func(os.path.join(path, f'{model_name}.{model_type}'),
                          model_type=model_type)
        print('Loaded')

        return model
Example #3
0
from Storage import Storage


def check_db_status():
    db_len = 0
    for _ in DBController.get_all_articles():
        db_len += 1
    if db_len == 0:
        print('Seeding database...')
        DatabaseSeeder.seed()


if __name__ == "__main__":
    check_db_status()

    articles = DBController.get_all_articles(limit=None)

    testing_sample = TrainingSample(articles)

    lsi = LsiModel(model_name='phyge')
    # lda = LdaModel(model_name='phyge')
    # d2v = D2vModel(model_name='phyge')
    # fast_text = FastTextModel(model_name='phyge')

    lsi.train_model(testing_sample)
    # lda.train_model(testing_sample)
    # d2v.train_model(testing_sample)
    # fast_text.train_model(testing_sample)

    Storage.save_model(lsi, path='out/lsi')
    # Storage.save_model(lda, path='out/lda')