import dash_html_components as html import plotly.express as px from dash.dependencies import Input, Output import plotly.graph_objects as go import plotly.figure_factory as ff from statsmodels.graphics.gofplots import qqplot import pandas as pd import numpy as np app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) corpus = Corpus() # khởi tạo dirr = "resources/vn_express.txt" # chọn đường dẫn corpus corpus.read(dirr) # đọc đường dẫn corpus.preprocess() # Tiền xử lí corpus.read_word2vec() # Đọc model word2vec để tìm từ đồng nghĩa corpus.read_ner() # Đọc dữ liệu đã xử lí tên thực thể và từ loại def genResult(res): result = [ html.P(children=sen, style={ 'backgroundColor': 'white', 'borderBottom': '2px solid #4F2992', 'margin': '30px', 'padding': '10px' }) for sen in res ] res.append(html.Br())
print "\nLoading %i files"%nb_files print "-------------------------------------------" docs = [] # Load the files for i, f in enumerate(files): sys.stdout.write( "\r%3i/%i %s"%( i+1, nb_files, '{:<70}'.format(f) ) ) sys.stdout.flush() docs.append(Document(f)) corpus = Corpus(docs, termino) print "\n\nCorpus preprocessing" print "-------------------------------------------" corpus.preprocess() print "\n\nExtracting the keywords" print "-------------------------------------------" corpus.process() if Config().testing: print "\n\nResults (%s average)"%("Macro" if config.macro_average else "Micro") print "-------------------------------------------" corpus.results() else: print "\n" if config.save_file != "": print "\nResults saved in %s"%config.save_file corpus.save(config.save_file)
import sys from Corpus import Corpus corpus = Corpus() dirr = input("Input directory of corpus:") # dirr = "resources/corpus_mini.txt" corpus.read(dirr) corpus.preprocess() corpus.ner() corpus.train_word2vec()