def test_docker(self): inputFilePath = 'resources/inputSample.csv' dockerId = 'aa2685b94082' dockerSudo = False pathConfigFile = 'resources/ldaConfig.ini' pathStopWords = 'resources/stopWord.csv' param_object = Params(projectName='exmaple', lang='ja', inputFile=inputFilePath, targetColumnName='contents', indexColumnName='docIndex', encoding='utf-8', sheetName='', min=2, max=5, model='lda', nSentence=2, nTopWords=15, pathNeologdDict='/usr/local/lib/mecab/dic/mecab-ipadic-neologd/', pathUserDict='', osType='mac', dockerId=dockerId, dockerSudo=dockerSudo, mailTo='', mailFrom='', subject='', workingDir='easyTopicClustering/tmpDir', pathParamConfig=pathConfigFile, pathStopWord=pathStopWords) file_parser = Parser(param_object) targetSentenceFrame = file_parser.load_data() documents, dictionaryObj = pre_process(targetSentenceFrame.targetColumnName.tolist(), param_object)
def test_parser_csvfile(self): inputFilePath = 'resources/inputSample.xlsx' projectName = 'example' pathConfigFile = 'resources/ldaConfig.ini' pathStopWords = 'resources/stopWord.csv' docker_sudo = False dockerID = 'aa2685b94082' pathNeologd = "/usr/local/lib/mecab/dic/mecab-ipadic-neologd/" pathUserDict = "" param_object = Params(projectName=projectName, lang='ja', inputFile=inputFilePath, targetColumnName='contents', indexColumnName='docIndex', encoding='utf-8', sheetName='Sheet1', min=2, max=5, model='lda', nSentence=2, nTopWords=15, pathNeologdDict=pathNeologd, pathUserDict=pathUserDict, osType='mac', dockerId=dockerID, dockerSudo=docker_sudo, mailTo='', mailFrom='', subject='', workingDir='easyTopicClustering/tmpDir', pathParamConfig=pathConfigFile, pathStopWord=pathStopWords) file_parser = Parser(param_object) targetSentenceFrame = file_parser.load_data() assert isinstance(targetSentenceFrame, DataFrame)