Python CAS 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pycas.cas.core.CAS

클래스/타입: CAS

hotexamples.com에서의 예제들: 4

Python CAS - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pycas.cas.core.CAS.CAS에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CAS(4)

addToIndex(2)

createAnnotation(2)

documentText(2)

sofaMimeType(2)

createFS(1)

getAnnotation(1)

getAnnotationIndex(1)

예제 #1

파일 보기

def rebuilt2xmi(ci, output_dir, typesystem_path):
    """
    Converts a rebuilt ContentItem into Apache UIMA/XMI format.

    The resulting file will be named after the content item's ID, adding
    the `.xmi` extension.

    :param ci: the content item to be converted
    :type ci: `impresso_commons.classes.ContentItem`
    :param output_dir: the path to the output directory
    :type output_dir: str
    :param typesystem_path: TypeSystem file containing defitions of annotation
    layers.
    :type typesystem_path: str
    """
    tsf = TypeSystemFactory()
    tsf = tsf.readTypeSystem(typesystem_path)
    cas = CAS(tsf)
    cas.documentText = ci.fulltext
    cas.sofaMimeType = 'text'
    sentType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence'
    imgLinkType = 'webanno.custom.ImpressoImages'

    # create sentence-level annotations
    start_offset = 0
    for break_offset in ci.lines:
        start = start_offset
        end = break_offset
        start_offset = break_offset
        sntc = cas.createAnnotation(sentType, {'begin': start, 'end': end})
        cas.addToIndex(sntc)

    iiif_links = compute_image_links(ci)
    for iiif_link, start, end in iiif_links:
        imglink = cas.createAnnotation(imgLinkType, {
            'begin': start,
            'end': end,
            'link': iiif_link
        })
        cas.addToIndex(imglink)

    writer = XmiWriter()
    outfile_path = os.path.join(output_dir, f'{ci.id}.xmi')

    writer.write(cas, outfile_path)

예제 #2

파일 보기

파일: CasFactory.py 프로젝트: Maximilian-Fuchs/dkpro-pycas

 def buildCAS(self,xmifilepath,typefilepath):    
     #create type ystem object
     typesystem = TypeSystemFactory.readTypeSystem(self, typefilepath)
     #create a CAS object
     cas = CAS(typesystem)
     #create cas xmi perser object to fetch elements from xmi file
     casXmiParser = CasXmiParser()
     casXmiParser.setXmiAsFile(xmifilepath)
     return self.__build(cas, casXmiParser)

예제 #3

파일 보기

파일: CasFactory.py 프로젝트: Maximilian-Fuchs/dkpro-pycas

 def buildCASfromStrings(self, xmistring, typesysstemString):
     # create type ystem object
     typesystem = TypeSystemFactory.readTypeSystemString(self, typesysstemString)
     # create a CAS object
     cas = CAS(typesystem)
     # create cas xmi perser object to fetch elements from xmi file
     casXmiParser = CasXmiParser()
     casXmiParser.setXmiAsString(xmistring)
     return self.__build(cas, casXmiParser)

예제 #4

파일 보기

    def test_Cas(self):

        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        cas = CAS(typesystem)
        cas.documentText = 'These steps install the basis system requirements'
        cas.sofaMimeType = 'text'

        sentenceType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence'
        tokenType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'
        posType = 'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS'
        tagDescType = 'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription'
        tagSetDescType = 'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription'

        fsSentence1 = cas.createFS(sentenceType)
        cas.addToIndex(fsSentence1)
        #check empty FS
        self.assertEqual(fsSentence1.getFeatureValsAsDictList(), [])

        #throws error, 1, 'as FSid is already occupied' , assigned to auto generated Sofa FS
        self.assertEqual('', '')
        with self.assertRaises(ValueError):
            fsSentence2 = cas.createFS(sentenceType, {
                'begin': 10,
                'end': 20
            }, 1)

        fsPOS = cas.createFS(posType, {'PosValue': 'NN'}, 3)
        cas.addToIndex(fsPOS)
        fsPOS.PosValue = 'Noun'
        #throws error, need to provide feature as dictionary as second argument
        self.assertEqual('', '')
        with self.assertRaises(TypeError):
            fsPOSx = cas.createAnnotation(posType, 2)
        #throws error, as 3 is already set as FS id , lower than 3 can not be set by user
        self.assertEqual('', '')
        with self.assertRaises(ValueError):
            fsPOSx = cas.createAnnotation(posType, {'PosValue': 'NN'}, 2)
        #throws error, needs both begin and end
        self.assertEqual('', '')
        with self.assertRaises(ValueError):
            fsPOSx = cas.createAnnotation(posType, {
                'begin': 10,
                'PosValue': 'NN'
            }, 4)

        #create a valid annotation FS
        fsPOS = cas.createAnnotation(posType, {
            'begin': 0,
            'end': 5,
            'PosValue': 'NN'
        })
        #add it to index
        cas.addToIndex(fsPOS)
        fsPOS1 = cas.createAnnotation(posType, {
            'begin': 0,
            'end': 5
        })
        fsPOS1.PosValue = 'NN'
        cas.addToIndex(fsPOS1)
        fsToken1 = cas.createAnnotation(tokenType, {
            'begin': 0,
            'end': 5,
            'pos': fsPOS
        })
        cas.addToIndex(fsToken1)

        tdlist = []
        fstagDesc1 = cas.createFS(tagDescType, {'name': '#'})
        tdlist.append(fstagDesc1)
        fstagDesc2 = cas.createFS(tagDescType, {'name': '$'})
        tdlist.append(fstagDesc2)
        fstagDesc3 = cas.createFS(tagDescType, {'name': '-LRB-'})
        tdlist.append(fstagDesc3)

        fstagSetDesc = cas.createAnnotation(
            tagSetDescType, {
                'begin': 0,
                'end': 152,
                'layer':
                'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS',
                'name': 'ptb',
                'tags': tdlist
            })
        cas.addToIndex(fstagSetDesc)
        #get all FS in index
        for e in cas.getAnnotationIndex():
            #check FSid
            if (e.FStype.name == "uima.cas.Sofa"):
                self.assertEqual(e.FSid, 1)

            if (e.FStype.name ==
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"
                ):
                self.assertEqual(e.FSid, 2)
            if (e.FStype.name ==
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
                ):
                self.assertEqual(e.FSid, 7)
            #check FS type
            if (e.FSid == 1):
                self.assertEqual(e.FStype.name, "uima.cas.Sofa")
            if (e.FSid == 2):
                self.assertEqual(
                    e.FStype.name,
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"
                )
            if (e.FSid == 7):
                self.assertEqual(
                    e.FStype.name,
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
                )
        #check sofa
        self.assertEqual(cas.sofaMimeType, cas.sofaFS.mimeType)
        self.assertEqual(1, cas.sofaFS.sofaNum)
        self.assertEqual('_InitialView', cas.sofaFS.sofaID)
        self.assertEqual(cas.documentText, cas.sofaFS.sofaString)

        tokens = cas.getAnnotation(
            'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token')
        self.assertEqual(tokens[0].getCoveredText(), "These ")