예제 #1
0
def test_TTS():
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(),
                    TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoTTL)

    dto = tepro.pcExec(text5, [
        TeproAlgo.getHyphenationOperName(),
        TeproAlgo.getStressIdentificationOperName(),
        TeproAlgo.getPhoneticTranscriptionOperName(),
        TeproAlgo.getAbbreviationRewritingOperName(),
        TeproAlgo.getNumeralRewritingOperName(),
    ])

    # Processed two sentences...
    assert dto.getNumberOfSentences() == 1

    # For the first sentence:
    assert dto.getSentenceTokens(0)[3].getExpansion() == \
        'o sută douăzeci și trei'
    assert dto.getSentenceTokens(0)[0].getSyllables() == "a-'ceas-ta"
    assert dto.getSentenceTokens(0)[0].getPhonetical() == "a ch e@ a s t a"
    assert dto.getSentenceTokens(0)[11].getSyllables() == "'vir-gu-lă"
    assert dto.getSentenceTokens(0)[11].getPhonetical() == "v i r g u l @"
    assert dto.getSentenceTokens(0)[14].getExpansion() == \
        'patruzeci și cinci virgulă șase sute treizeci și unu'
예제 #2
0
def test_TTL():
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(),
                    TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoTTL)

    dto = tepro.pcExec(text, [TeproAlgo.getChunkingOperName()])

    # Processed two sentences...
    assert dto.getNumberOfSentences() == 2

    # For the first sentence:
    assert dto.getSentenceTokens(0)[0].getWordForm() == 'La'
    assert dto.getSentenceTokens(0)[0].getMSD() == 'Spsa'
    assert dto.getSentenceTokens(0)[0].getLemma() == 'la'
    assert dto.getSentenceTokens(0)[1].getWordForm() == '7'
    assert dto.getSentenceTokens(0)[5].getWordForm() == 'Brașovului'
    assert dto.getSentenceTokens(0)[5].getLemma() == 'Brașov'
    assert dto.getSentenceTokens(0)[5].getMSD() == 'Npmsoy'
    assert dto.getSentenceTokens(0)[5].getChunk() == 'Pp#2,Np#2'
    assert dto.getSentenceTokens(0)[22].getWordForm() == '.'
    assert dto.getSentenceTokens(0)[22].getCTAG() == 'PERIOD'

    # For the second sentence:
    assert dto.getSentenceTokens(1)[0].getWordForm() == 'Amplasarea'
    assert dto.getSentenceTokens(1)[22].getWordForm() == 'metri'
    assert dto.getSentenceTokens(1)[22].getCTAG() == 'NPN'
예제 #3
0
def test_NLPCube():
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(),
                    TeproAlgo.algoCube)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoCube)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoCube)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoCube)

    dto = tepro.pcExec(text, [TeproAlgo.getDependencyParsingOperName()])

    # Processed two sentences...
    assert dto.getNumberOfSentences() == 2

    # Check some dependency structure...
    assert dto.getSentenceTokens(0)[7].getWordForm() == 'imobilul'
    assert dto.getSentenceTokens(0)[7].getHead() == 13
    assert dto.getSentenceTokens(0)[7].getDepRel() == 'nsubj'
    assert dto.getSentenceTokens(0)[13].getWordForm() == 'celor'
    assert dto.getSentenceTokens(0)[13].getHead() == 13
    assert dto.getSentenceTokens(0)[13].getDepRel() == 'iobj'

    assert dto.getSentenceTokens(1)[0].getWordForm() == 'Amplasarea'
    assert dto.getSentenceTokens(1)[0].getHead() == 5
    assert dto.getSentenceTokens(1)[0].getDepRel() == 'nsubj'
    assert dto.getSentenceTokens(1)[1].getWordForm() == 'construcției'
    assert dto.getSentenceTokens(1)[1].getHead() == 1
    assert dto.getSentenceTokens(1)[1].getDepRel() == 'nmod'
예제 #4
0
def test_MWEsAndDepTransfer():
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoTTL)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoTTL)

    dto = tepro.pcFull(text2)

    assert dto.getSentenceTokens(0)[3].getWordForm() == 'o_să'
    assert dto.getSentenceTokens(0)[3].getMSD() == 'Qf'
    assert dto.getSentenceTokens(0)[3].getHead() == 7
    assert dto.getSentenceTokens(0)[3].getDepRel() == 'mark'
예제 #5
0
def test_UDPipe():
    tepro.configure(TeproAlgo.getSentenceSplittingOperName(), TeproAlgo.algoUDPipe)
    tepro.configure(TeproAlgo.getTokenizationOperName(), TeproAlgo.algoUDPipe)
    tepro.configure(TeproAlgo.getPOSTaggingOperName(), TeproAlgo.algoUDPipe)
    tepro.configure(TeproAlgo.getLemmatizationOperName(), TeproAlgo.algoUDPipe)

    dto = tepro.pcExec(text4, [TeproAlgo.getDependencyParsingOperName()])
    
    # Processed two sentences...
    assert dto.getNumberOfSentences() == 1
    assert len(dto.getSentenceTokens(0)) == 21

    # Check some dependency structure...
    assert dto.getSentenceTokens(0)[5].getHead() == 5
    assert dto.getSentenceTokens(0)[5].getDepRel() == 'acl'
    assert dto.getSentenceTokens(0)[10].getWordForm() == 'concentrației'
    assert dto.getSentenceTokens(0)[10].getLemma() == 'concentrație'
    assert dto.getSentenceTokens(0)[10].getCTAG() == 'NOUN'
    assert dto.getSentenceTokens(0)[10].getMSD() == 'Ncfsoy'
    assert dto.getSentenceTokens(0)[10].getHead() == 8
    assert dto.getSentenceTokens(0)[10].getDepRel() == 'nmod'
    assert dto.getSentenceTokens(0)[17].getWordForm() == 'și'