def on_Save_clicked(self): minsupp = float(self.doubleSpinBoxMinsupp.text()) minconf = float(self.doubleSpinBoxMinconf.text()) inputName = self.labelInputName.text() outDir = './OUTPUT/minsup_{}_minconf_{}/{}/'.format(minsupp, minconf, inputName) freqISName = 'Tap_Phu_Pho_Bien.txt' maxISName = 'Tap_Phu_Toi_Dai.txt' rulesName = 'Luat_Ket_Hop.txt' topTenName = '10_Luat_conf_Cao_Nhat.txt' make_folder.create_folder(outDir) freqIS = self.plainTextFreqIS.toPlainText() maxIS = self.plainTextMaxIS.toPlainText() rules = self.plainTextRules.toPlainText() topTenRules = self.plainTextTopten.toPlainText() if freqIS == "" and maxIS == "" and rules == "" and topTenRules == "": # QtWidgets.QFileDialog.showEvent(,'Chưa có kết quả để lưu.') self.labelLog.setText('Chưa có kết quả.') else: with open(outDir + freqISName, 'w', encoding = 'utf-8') as freqFile: freqFile.write(str(freqIS)) with open(outDir + maxISName, 'w', encoding = 'utf-8') as maxFile: maxFile.write(str(maxIS)) with open(outDir + rulesName, 'w', encoding = 'utf-8') as ruleFile: ruleFile.write(str(rules)) with open(outDir + topTenName, 'w', encoding = 'utf-8') as topTenFile: topTenFile.write(str(topTenRules)) self.labelLog.setText('Đã lưu.')
def on_Vectorize_clicked(self): vectorSize = int(self.spinBoxVecSize.text()) if len(self.listRawSents) == 0: self.labelLog.setText('Chưa có câu.') else: linkFolder = 'outfile/{0}'.format(self.fileInitName) make_folder.create_folder(linkFolder) linkModel = linkFolder + '/word2vec.model' # token sentences --> to list for sent in self.listRawSents: tokens = word_tokenize(sent, format='text').split() words = [] for token in tokens: if re.match(r'^\w+', token): words.append(token) self.listSentToWord.append(words) # training word2vec cho cái list tách từ. model = Word2Vec(self.listSentToWord, size=vectorSize, min_count=1) model.save(linkModel) sent2vec = Sentence2Vec(linkModel) listVect = [] for sent in self.listRawSents: listVect.append(sent2vec.get_vector(sent).tolist()) write_file.list_to_txt(listVect, linkFolder, 'Sent2Vect.txt') write_file.list_to_txt(self.listSentToWord, linkFolder, 'WordTokenize.txt') self.labelLog.setText('Đã lưu file vector.') self.checkVectorize = 1 self.groupBoxKmeans.setEnabled(True)
def list_to_txt(List: list, folderName, name): make_folder.create_folder(folderName) if not List: print('Danh sach rong! ' + name) return with open(folderName + '/' + name, 'w', encoding = 'utf-8') as fout: for item in List: fout.write('{0}\n'.format(item))
def dict_to_txt(Dict: dict, folderName, name): make_folder.create_folder(folderName) if not Dict: print('Dict rỗng!' + name) return with open(folderName + '/' + name, 'w', encoding = 'utf-8') as fout: for (key, value) in Dict.items(): row = '{0}:{1}\n'.format(key, value) fout.write(row)
def doubleList_to_txt_continuos(List: list(list()), folderName, name): make_folder.create_folder(folderName) if not List: print('Danh sach rong! ' + name) return with open(folderName + name, 'w+', encoding = 'utf-8') as fout: for iList in List: for item in iList: fout.write('{0}\n'.format(item)) fout.write('\n')
def list_to_txt_continuos_with_last_comma(List: list, folderName: str, name: str, seperateType: str): make_folder.create_folder(folderName) if not List: print('Danh sach rong! ' + name) return with open(folderName + name, 'a+', encoding = 'utf-8') as fout: # for item in List: for index in range(len(List) - 1): fout.write(('{0}' + seperateType).format(List[index])) fout.write(('{0}').format(List[-1])) fout.write('\n')
def list_to_txt_continuos(List: list, folderName: str, name: str, seperateType: str): make_folder.create_folder(folderName) if not List: print('Danh sach rong! ' + name) return with open(folderName + name, 'a+', encoding = 'utf-8') as fout: for item in List: fout.write(('{0}' + seperateType).format(item)) # fout.write('-!@#$%^&*-'*len(List) + '\n') # fout.write('\n'*3) fout.write('\n')
def dict_list_to_txt(Dict: dict, folderName, name): make_folder.create_folder(folderName) if not Dict: print('Dict rỗng!' + name) return with open(folderName + '/' + name, 'w', encoding = 'utf-8') as fout: for (key, values) in Dict.items(): fout.write('{0}:'.format(key)) for indexItem in range(len(values) - 1): fout.write('{0}, '.format(values[indexItem]) ) fout.write('{0}'.format(values[-1]) ) fout.write('\n')
def list_to_txt_with_last_comma(List: list, folderName, name, splitType: str): make_folder.create_folder(folderName) if not List: print('Danh sach rong! ' + name) return with open(folderName + name, 'w', encoding = 'utf-8') as fout: for itemSet in List: for index in range(len(itemSet) - 1): fout.write('{0}{1}'.format(itemSet[index], splitType) ) fout.write('{0}'.format(itemSet[-1]) ) fout.write('\n')