Python NNSplit.load 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nnsplit

클래스/타입: NNSplit

메소드/함수: load

hotexamples.com에서의 예제들: 4

Python NNSplit.load - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nnsplit.NNSplit.load에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

NNSplit(8)

load(4)

split(4)

자주 사용되는 메소드들

NNSplit (8)

load (4)

split (4)

예제 #1

파일 보기

def segment_setences(words, lang="en"):

    content = " ".join(map(lambda word: word["text"], words))

    sentences = []

    left = 0

    splits = NNSplit.load(lang).split([content])

    for tokens2d in tqdm(splits):
        for tokens in tokens2d:

            text = "".join(map(lambda token: str(token), tokens)).strip()

            right = min(len(words), left + len(tokens)) - 1

            while right > 0 and not text.endswith(words[right]["text"]):
                right -= 1

            sentences.append({
                "start": words[left]["start"],
                "end": words[right]["end"],
                "text": text
            })

            left = right + 1

    return sentences

예제 #2

파일 보기

 def __init__(self, keyword, channel, contents_id):
     self.engine = create_engine(
         ("mysql+pymysql://{}:{}@{}:{}/{}?charset=utf8mb4").format(
             'root', 'robot369', '1.221.75.76', 3306, 'datacast2'))
     self.keyword = keyword
     self.channel = channel
     self.splitter = NNSplit.load("en")
     self.contents_id = contents_id

예제 #3

파일 보기

파일: create_vector_script.py 프로젝트: TTDS-Dream-Team/Preprocessing

from nnsplit import NNSplit
from sentence_transformers import SentenceTransformer
import numpy as np
import h5py
from tqdm.auto import tqdm
import zlib
import pymongo
from mongo_proxy import MongoProxy
import json
from bson import ObjectId
import time
from threading import Thread, Lock
import gc
from guppy import hpy

splitter = NNSplit.load("en", use_cuda=True)

lock = Lock()


class JSONEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, ObjectId):
            return str(o)
        return json.JSONEncoder.default(self, o)


db_pwd = "LTEG2pfoDiKfH29M"
client = MongoProxy(
    MongoClient(
        f"mongodb+srv://cdminix:{db_pwd}@cluster0.pdjrf.mongodb.net/Reviews_Data?retryWrites=true&w=majority"

예제 #4

파일 보기

    return DEFAULT_LANGUAGE_MODEL.split(s)


c = 'wethepeopleoftheunitedstatesinordertoformamoreperfectunionestablishjusticeinsuredomestictranquilityprovideforthecommondefencepromotethegeneralwelfareandsecuretheblessingsoflibertytoourselvesandourposteritydoordainandestablishthisconstitutionfortheunitedstatesofamerica'
d = 'WeholdthesetruthstobeselfevidentthatallmenarecreatedequalthattheyareendowedbytheirCreatorwithcertainunalienableRightsthatamongtheseareLifeLibertyandthepursuitofHappinessThattosecuretheserightsGovernmentsareinstitutedamongMenderivingtheirjustpowersfromtheconsentofthegovernedThatwheneveranyFormofGovernmentbecomesdestructiveoftheseendsitistheRightofthePeopletoalterortoabolishitandtoinstitutenewGovernmentlayingitsfoundationonsuchprinciplesandorganizingitspowersinsuchformastothemshallseemmostlikelytoeffecttheirSafetyandHappinessPrudenceindeedwilldictatethatGovernmentslongestablishedshouldnotbechangedforlightandtransientcausesandaccordinglyallexperiencehathshewnthatmankindaremoredisposedtosufferwhileevilsaresufferablethantorightthemselvesbyabolishingtheformstowhichtheyareaccustomedButwhenalongtrainofabusesandusurpationspursuinginvariablythesameObjectevincesadesigntoreducethemunderabsoluteDespotismitistheirrightitistheirdutytothrowoffsuchGovernmentandtoprovidenewGuardsfortheirfuturesecuritSuchhasbeenthepatientsufferanceoftheseColoniesandsuchisnowthenecessitywhichconstrainsthemtoaltertheirformerSystemsofGovernmentThehistoryofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpationsallhavingindirectobjecttheestablishmentofanabsoluteTyrannyovertheseStatesToprovethisletFactsbesubmittedtoacandidworld'
r = 'HowdymynameisBrittanyPitcherandiamanelectricalengineeringmajorfromspringtxbutmostimportantlyiamtheloudestandproudestmemberofthefightingtexasaggieclassoftwentytwentyoneawhoop'
z = 'hellomynameisbrittanypitcherandmyfavoritecolorismarooniaminseniordesignrightnowthisiswhyiamworkingonthisprojectitismeanttohelpthosewhoarehardofhearingordeaftoovercomelanguagebarrierswiththeirpeersiamexcitedforittobefinishedandtodeterminghowwellitworks'

#create string of r, c, d
c = " ".join(split(c))
d = " ".join(split(d))
r = " ".join(split(r))
z = " ".join(split(z))

#try to split sentences

from nnsplit import NNSplit
splitter = NNSplit.load("en")

splits = splitter.split([res])[0]

i = len(splits) - 1
#split can be iterated over
for sentence in splits:
    print(sentence, end='')
    if (i > 0):
        print("\b.")
        i = i - 1
    else:
        print('.')