예제 #1
0
from params.core import Core as Params
from dataset.core import Core as Dataset
from corpus.meta import Meta

logging.basicConfig(level=logging.INFO)
logging.info("# This script generates meta information about the corpus")

logging.info("# 1. Loading script params ")
logging.info("# ================================")
scriptParams = Params()
params = scriptParams.get()
scriptParams.save(params.data_directory)

logging.info("# 2. Preprocessing data")
logging.info("# ================================")

dataset = Dataset(params.dataset_name, params.data_directory)
datasetToProcess = dataset.get(float(params.dataset_percentage),
                               int(params.total_items))

if not datasetToProcess:
    logging.error('No dataset found')
    sys.exit()

data = datasetToProcess.getTrainingSet()

metaManager = Meta(datasetToProcess)
metaManager.remove()
metaManager.process()
print('Finished')
예제 #2
0
파일: runLDA.py 프로젝트: ishrat2003/TS
import sys
packagesPath = "/content/drive/My Drive/Colab Notebooks/packages/TextMining"
sys.path.append(packagesPath)

from dataset.core import Core as Dataset
from topic.lda import LDA
from topic.evaluate import Evaluate
from params.core import Core as Params
import os

scriptParams = Params()
params = scriptParams.get()
scriptParams.save(params.data_directory)

dataset = Dataset(params.dataset_name, params.data_directory)

dataProcessor = dataset.get()
if params.type == 'lda':
    print(":::::::::::::: Evaluating ::::::::::::::")
    evaluationProcessor = Evaluate(dataProcessor, params)
    evaluationProcessor.process()
else:
    print(":::::::::::::: Train ::::::::::::::")
    lda = LDA(dataProcessor,
              os.path.join(params.data_directory, params.dataset_name))
    lda.remove()
    lda.setPerplexity(5)
    lda.setNumberOfTopics(6)
    lda.setNumberOfTotalTopFrequencyWord(10000)
    lda.setNumberOfIterations(1000)
    lda.train()