import dash_html_components as html
import plotly.express as px
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.figure_factory as ff
from statsmodels.graphics.gofplots import qqplot

import pandas as pd
import numpy as np

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

corpus = Corpus()  # khởi tạo
dirr = "resources/vn_express.txt"  # chọn đường dẫn corpus
corpus.read(dirr)  # đọc đường dẫn
corpus.preprocess()  # Tiền xử lí
corpus.read_word2vec()  # Đọc model word2vec để tìm từ đồng nghĩa
corpus.read_ner()  # Đọc dữ liệu đã xử lí tên thực thể và từ loại


def genResult(res):
    result = [
        html.P(children=sen,
               style={
                   'backgroundColor': 'white',
                   'borderBottom': '2px solid #4F2992',
                   'margin': '30px',
                   'padding': '10px'
               }) for sen in res
    ]
    res.append(html.Br())
Beispiel #2
0
	print "\nLoading %i files"%nb_files
	print "-------------------------------------------"
	
	docs = []
	
	# Load the files
	for i, f in enumerate(files):
		sys.stdout.write( "\r%3i/%i %s"%( i+1, nb_files, '{:<70}'.format(f) ) )
		sys.stdout.flush()
		docs.append(Document(f))
	
	corpus = Corpus(docs, termino)
	
	print "\n\nCorpus preprocessing"
	print "-------------------------------------------"
	corpus.preprocess()
	
	print "\n\nExtracting the keywords"
	print "-------------------------------------------"
	corpus.process()
	
	if Config().testing:
		print "\n\nResults (%s average)"%("Macro" if config.macro_average else "Micro")
		print "-------------------------------------------"
		corpus.results()
	else:
		print "\n"

	if config.save_file != "":
		print "\nResults saved in %s"%config.save_file
		corpus.save(config.save_file)
import sys
from Corpus import Corpus

corpus = Corpus()
dirr = input("Input directory of corpus:")
# dirr = "resources/corpus_mini.txt"
corpus.read(dirr)
corpus.preprocess()
corpus.ner()
corpus.train_word2vec()