Python Corpus.read 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Corpus

클래스/타입: Corpus

메소드/함수: read

hotexamples.com에서의 예제들: 3

Python Corpus.read - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Corpus.Corpus.read에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Corpus(30)

find(5)

get_postag_set(4)

read(3)

__init__(2)

verificarPlagio(2)

add_source_document(2)

add_target_document(2)

get_file_name(2)

buildCorpus(2)

emails_as_string(2)

dump(2)

preprocess(2)

get_data(2)

read_ner(2)

outputWords(1)

pickledumpwords(1)

output_rules(1)

ner(1)

outputPOStags(1)

nettoyer_texte(1)

most_frequent_word_by_year(1)

most_frequent_word_by_month(1)

most_frequent_word_by_day(1)

most_frequent_word(1)

most_frequent_trigrams(1)

most_frequent_content_words(1)

picklegetwords(1)

read_label(1)

prepapre_to_matrix(1)

search_ambiguous(1)

vectoriserDocCorpus(1)

url_to_dir(1)

train_word2vec(1)

tag_words_with_most_likely_parses(1)

spanishTags(1)

set_lista_texto(1)

save_json(1)

process(1)

save(1)

results(1)

resetSentStats(1)

read_word2vec(1)

read_prediction(1)

load_json(1)

read_data(1)

most_frequent_bigrams(1)

get_instances(1)

lemmatiserCorpus(1)

calculSimilarite(1)

예제 #1

파일 보기

파일: create_NER_pickle.py 프로젝트: Tuan-Lee-23/Vietnamese-corpus-search-and-analysis-Web-app

import sys
from Corpus import Corpus

corpus = Corpus()
dirr = input("Input directory of corpus:")
# dirr = "resources/corpus_mini.txt"
corpus.read(dirr)
corpus.preprocess()
corpus.ner()
corpus.train_word2vec()

예제 #2

파일 보기

파일: app.py 프로젝트: Tuan-Lee-23/Vietnamese-corpus-search-and-analysis-Web-app

import dash_bootstrap_components as dbc
import dash_html_components as html
import plotly.express as px
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.figure_factory as ff
from statsmodels.graphics.gofplots import qqplot

import pandas as pd
import numpy as np

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

corpus = Corpus()  # khởi tạo
dirr = "resources/vn_express.txt"  # chọn đường dẫn corpus
corpus.read(dirr)  # đọc đường dẫn
corpus.preprocess()  # Tiền xử lí
corpus.read_word2vec()  # Đọc model word2vec để tìm từ đồng nghĩa
corpus.read_ner()  # Đọc dữ liệu đã xử lí tên thực thể và từ loại


def genResult(res):
    result = [
        html.P(children=sen,
               style={
                   'backgroundColor': 'white',
                   'borderBottom': '2px solid #4F2992',
                   'margin': '30px',
                   'padding': '10px'
               }) for sen in res
    ]

예제 #3

파일 보기

파일: app.py 프로젝트: Tuan-Lee-23/Vietnamese-corpus-search-and-analysis-Web-app

import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
import plotly.express as px
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.figure_factory as ff
from statsmodels.graphics.gofplots import qqplot

import pandas as pd
import numpy as np

app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

corpus = Corpus()
corpus.read('resources/vn_express.txt')
corpus.preprocess()
corpus.read_ner()
# corpus.read_word2vec()

df = pd.DataFrame(corpus.data_sent_segment, columns=['sentences'])

# # đếm theo tiếng
# df['len_tieng'] = df['sentences'].str.split()
# df['len_tieng'] = df['len_tieng'].apply(len)

# so_tieng = df['len_tieng'].sum()
# cau_tu_tieng = pd.DataFrame(np.array([len(corpus.data_sent_segment), len(corpus.vocab.keys()), so_tieng]))
# print(cau_tu_tieng)

# Độ dài câu theo tiếng