Beispiel #1
0
    def test_setListVal_Fail(self):
        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        fstype4 = typesystem.getType(
            'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription')
        fsTagDesc1 = TOP(fstype4, 1, typesystem)
        aFeature = Feature(fsTagDesc1.FStype, 'name')
        aFeature.description = ''
        aFeature.rangeType = 'uima.cas.String'
        listval = []
        listval.append('#')
        self.assertEqual('', '')
        with self.assertRaises(TypeError):
            nonlist = 1
            fsTagDesc1.setListValue(aFeature, aFeature.rangeType, nonlist)
        with self.assertRaises(TypeError):
            nonfeature = 1
            fsTagDesc1.setListValue(nonfeature, aFeature.rangeType, listval)
        with self.assertRaises(TypeError):
            aFeature.rangeType = 'uima.cas.Inetger'
            fsTagDesc1.setListValue(aFeature, aFeature.rangeType, listval)
        with self.assertRaises(TypeError):
            aFeature.rangeType = typesystem.getType(
                'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS')

            fsTagDesc1.setListValue(aFeature, aFeature.rangeType, listval)
        with self.assertRaises(TypeError):
            aFeature.rangeType = 'uima.cas.Integer'
            aFeature.elementType = 'uima.cas.String'
            fsTagDesc1.setListValue(aFeature, aFeature.elementType, listval)
Beispiel #2
0
    def test_Annotation(self):
        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        fstype4 = typesystem.getType(
            'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription')
        sentenceType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence'
        fsSentenceType = typesystem.getType(sentenceType)

        sofaType = 'uima.cas.Sofa'
        fsSofaType = typesystem.getType(sofaType)
        sofaFs = SofaFS(fsSofaType, 2)
        sofaFs.sofaNum = 1
        sofaFs.sofaID = '_InitialView'
        sofaFs.mimeType = 'text'
        sofaFs.sofaString = 'These steps install the basis system requirements'

        fsSentence1 = Annotation(fsSentenceType, 102, typesystem)
        fsSentence1.begin = 0
        fsSentence1.end = 115
        #set sofa object directly to the sofa feature
        fsSentence1.sofa = sofaFs

        self.assertEqual(fsSentence1.begin, 0)
        self.assertEqual(fsSentence1.end, 115)
        self.assertEqual(fsSentence1.sofa, sofaFs)

        fsSentence2 = Annotation(fsSentenceType, 107, typesystem)
        fsSentence2.begin = 116
        fsSentence2.end = 152
        fsSentence2.sofa = sofaFs

        self.assertEqual(fsSentence2.begin, 116)
        self.assertEqual(fsSentence2.end, 152)
        self.assertEqual(fsSentence2.sofa, sofaFs)
Beispiel #3
0
def rebuilt2xmi(ci, output_dir, typesystem_path):
    """
    Converts a rebuilt ContentItem into Apache UIMA/XMI format.

    The resulting file will be named after the content item's ID, adding
    the `.xmi` extension.

    :param ci: the content item to be converted
    :type ci: `impresso_commons.classes.ContentItem`
    :param output_dir: the path to the output directory
    :type output_dir: str
    :param typesystem_path: TypeSystem file containing defitions of annotation
    layers.
    :type typesystem_path: str
    """
    tsf = TypeSystemFactory()
    tsf = tsf.readTypeSystem(typesystem_path)
    cas = CAS(tsf)
    cas.documentText = ci.fulltext
    cas.sofaMimeType = 'text'
    sentType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence'
    imgLinkType = 'webanno.custom.ImpressoImages'

    # create sentence-level annotations
    start_offset = 0
    for break_offset in ci.lines:
        start = start_offset
        end = break_offset
        start_offset = break_offset
        sntc = cas.createAnnotation(sentType, {'begin': start, 'end': end})
        cas.addToIndex(sntc)

    iiif_links = compute_image_links(ci)
    for iiif_link, start, end in iiif_links:
        imglink = cas.createAnnotation(imgLinkType, {
            'begin': start,
            'end': end,
            'link': iiif_link
        })
        cas.addToIndex(imglink)

    writer = XmiWriter()
    outfile_path = os.path.join(output_dir, f'{ci.id}.xmi')

    writer.write(cas, outfile_path)
 def buildCASfromStrings(self, xmistring, typesysstemString):
     # create type ystem object
     typesystem = TypeSystemFactory.readTypeSystemString(self, typesysstemString)
     # create a CAS object
     cas = CAS(typesystem)
     # create cas xmi perser object to fetch elements from xmi file
     casXmiParser = CasXmiParser()
     casXmiParser.setXmiAsString(xmistring)
     return self.__build(cas, casXmiParser)
 def buildCAS(self,xmifilepath,typefilepath):    
     #create type ystem object
     typesystem = TypeSystemFactory.readTypeSystem(self, typefilepath)
     #create a CAS object
     cas = CAS(typesystem)
     #create cas xmi perser object to fetch elements from xmi file
     casXmiParser = CasXmiParser()
     casXmiParser.setXmiAsFile(xmifilepath)
     return self.__build(cas, casXmiParser)
Beispiel #6
0
 def test_BoolValsetNget(self):
     typeSystemFilePath = 'tests/testing_data/typesystem.xml'
     typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
     fstype4 = typesystem.getType(
         'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription')
     fsTagDesc1 = TOP(fstype4, 1, typesystem)
     aFeature = Feature(fsTagDesc1.FStype, 'name')
     aFeature.description = ''
     aFeature.elementType = 'uima.cas.Boolean'
     fsTagDesc1.setBoolValue(aFeature, False)
     self.assertEqual(fsTagDesc1.getBoolValue(aFeature), False)
Beispiel #7
0
    def test_ComplexValsetNget(self):
        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        fstype4 = typesystem.getType(
            'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription')
        fsTagDesc1 = TOP(fstype4, 1, typesystem)
        aFeature = Feature(fsTagDesc1.FStype, 'name')
        aFeature.description = ''
        aFeature.elementType = 'uima.cas.String'

        fsTagDesc1.setComplexValue(aFeature, 1j)
        self.assertEqual(complex(fsTagDesc1.getComplexValue(aFeature)), 1j)
Beispiel #8
0
    def test_ListValsetNget(self):

        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        fstype4 = typesystem.getType(
            'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription')
        fsTagDesc1 = TOP(fstype4, 1, typesystem)
        aFeature = Feature(fsTagDesc1.FStype, 'name')
        aFeature.description = ''
        aFeature.rangeType = 'uima.cas.String'
        listval = []
        listval.append('#')
        fsTagDesc1.setListValue(aFeature, aFeature.rangeType, listval)
        self.assertEqual(' '.join(fsTagDesc1.getListValue(aFeature)), '#')
Beispiel #9
0
    def test_feature(self):
        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        typeFsArray = typesystem.getType('uima.cas.FSArray')
        typeSa = typesystem.getType(
            'de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.SuggestedAction')
        aFeature = Feature(
            'de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.Anomaly',
            'suggestions')
        aFeature.description = ''
        aFeature.rangeType = typeFsArray
        aFeature.elementType = typeSa

        self.assertEqual(
            aFeature.domain,
            'de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.Anomaly')
        self.assertEqual(aFeature.name, 'suggestions')
        self.assertEqual(aFeature.description, '')
        self.assertEqual(aFeature.rangeType, typeFsArray)
        self.assertEqual(aFeature.elementType.name, typeSa.name)
Beispiel #10
0
    def test_FeatureValsetNget(self):
        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        tokenType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'
        fsTokenType = typesystem.getType(tokenType)
        posType = 'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS'
        fsPosType = typesystem.getType(posType)

        fsToken = TOP(fsTokenType, 1, typesystem)

        aFeature = Feature(fsToken.FStype, 'pos')
        aFeature.description = ''
        aFeature.rangeType = typesystem.getType(
            'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS')

        fsPOS = Annotation(fsPosType, 1, typesystem)
        fsPOS.PosValue = "DT"

        fsToken.setFeatureValue(aFeature, fsPOS)
        self.assertEqual(fsToken.getFeatureValue(aFeature), fsPOS)
Beispiel #11
0
 def test_setComplexVal_Fail(self):
     typeSystemFilePath = 'typesystem.xml'
     typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
     fstype4 = typesystem.getType(
         'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription')
     fsTagDesc1 = TOP(fstype4, 1, typesystem)
     aFeature = Feature(fsTagDesc1.FStype, 'name')
     aFeature.description = ''
     aFeature.elementType = 'uima.cas.String'
     self.assertEqual('', '')
     with self.assertRaises(TypeError):
         fsTagDesc1.setComplexValue(aFeature, 10.1)
     with self.assertRaises(TypeError):
         nonfeature = 1
         fsTagDesc1.setComplexValue(nonfeature, 1j)
     with self.assertRaises(TypeError):
         aFeature.elementType = None
         fsTagDesc1.setComplexValue(aFeature, 1j)
     with self.assertRaises(TypeError):
         aFeature.elementType = 'uima.cas.Integer'
         fsTagDesc1.setComplexValue(aFeature, 1j)
Beispiel #12
0
    def test_getFeature(self):

        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        posType = 'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS'
        fsPosType = typesystem.getType(posType)
        artType = 'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.ART'
        fsArtType = typesystem.getType(artType)
        tokenType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'
        fsTokenType = typesystem.getType(tokenType)

        sofaType = 'uima.cas.Sofa'
        fsSofaType = typesystem.getType(sofaType)
        sofaFs = SofaFS(fsSofaType, 2)
        sofaFs.sofaNum = 1
        sofaFs.sofaID = '_InitialView'
        sofaFs.mimeType = 'text'
        sofaFs.sofaString = 'These steps install the basis system requirements'

        fsPOS = Annotation(fsPosType, 1, typesystem)
        fsPOS.PosValue = "DT"
        self.assertEqual(fsPOS.PosValue, "DT")

        fsArt1 = Annotation(fsArtType, 540, typesystem)
        fsArt1.begin = 0
        fsArt1.end = 5
        fsArt1.sofa = sofaFs
        #the provided setter wrapper of FeatureStructure_Impl enables converting an attribute to be set like this to create a Feature internally for the attribute
        fsArt1.PosValue = "DT"
        self.assertEqual(fsArt1.PosValue, "DT")

        fsToken1 = Annotation(fsTokenType, 112, typesystem)
        fsToken1.begin = 0
        fsToken1.end = 5
        fsToken1.sofa = sofaFs
        fsToken1.pos = fsArt1
        self.assertEqual(fsToken1.pos, [fsArt1])
Beispiel #13
0
 def test_typeysfact(self):
     typesystem = TypeSystemFactory.readTypeSystem(self, 'typesystem.xml')
     """
Beispiel #14
0
    def test_Cas(self):

        typeSystemFilePath = 'typesystem.xml'
        typesystem = TypeSystemFactory.readTypeSystem(self, typeSystemFilePath)
        cas = CAS(typesystem)
        cas.documentText = 'These steps install the basis system requirements'
        cas.sofaMimeType = 'text'

        sentenceType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence'
        tokenType = 'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token'
        posType = 'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS'
        tagDescType = 'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription'
        tagSetDescType = 'de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription'

        fsSentence1 = cas.createFS(sentenceType)
        cas.addToIndex(fsSentence1)
        #check empty FS
        self.assertEqual(fsSentence1.getFeatureValsAsDictList(), [])

        #throws error, 1, 'as FSid is already occupied' , assigned to auto generated Sofa FS
        self.assertEqual('', '')
        with self.assertRaises(ValueError):
            fsSentence2 = cas.createFS(sentenceType, {
                'begin': 10,
                'end': 20
            }, 1)

        fsPOS = cas.createFS(posType, {'PosValue': 'NN'}, 3)
        cas.addToIndex(fsPOS)
        fsPOS.PosValue = 'Noun'
        #throws error, need to provide feature as dictionary as second argument
        self.assertEqual('', '')
        with self.assertRaises(TypeError):
            fsPOSx = cas.createAnnotation(posType, 2)
        #throws error, as 3 is already set as FS id , lower than 3 can not be set by user
        self.assertEqual('', '')
        with self.assertRaises(ValueError):
            fsPOSx = cas.createAnnotation(posType, {'PosValue': 'NN'}, 2)
        #throws error, needs both begin and end
        self.assertEqual('', '')
        with self.assertRaises(ValueError):
            fsPOSx = cas.createAnnotation(posType, {
                'begin': 10,
                'PosValue': 'NN'
            }, 4)

        #create a valid annotation FS
        fsPOS = cas.createAnnotation(posType, {
            'begin': 0,
            'end': 5,
            'PosValue': 'NN'
        })
        #add it to index
        cas.addToIndex(fsPOS)
        fsPOS1 = cas.createAnnotation(posType, {
            'begin': 0,
            'end': 5
        })
        fsPOS1.PosValue = 'NN'
        cas.addToIndex(fsPOS1)
        fsToken1 = cas.createAnnotation(tokenType, {
            'begin': 0,
            'end': 5,
            'pos': fsPOS
        })
        cas.addToIndex(fsToken1)

        tdlist = []
        fstagDesc1 = cas.createFS(tagDescType, {'name': '#'})
        tdlist.append(fstagDesc1)
        fstagDesc2 = cas.createFS(tagDescType, {'name': '$'})
        tdlist.append(fstagDesc2)
        fstagDesc3 = cas.createFS(tagDescType, {'name': '-LRB-'})
        tdlist.append(fstagDesc3)

        fstagSetDesc = cas.createAnnotation(
            tagSetDescType, {
                'begin': 0,
                'end': 152,
                'layer':
                'de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS',
                'name': 'ptb',
                'tags': tdlist
            })
        cas.addToIndex(fstagSetDesc)
        #get all FS in index
        for e in cas.getAnnotationIndex():
            #check FSid
            if (e.FStype.name == "uima.cas.Sofa"):
                self.assertEqual(e.FSid, 1)

            if (e.FStype.name ==
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"
                ):
                self.assertEqual(e.FSid, 2)
            if (e.FStype.name ==
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
                ):
                self.assertEqual(e.FSid, 7)
            #check FS type
            if (e.FSid == 1):
                self.assertEqual(e.FStype.name, "uima.cas.Sofa")
            if (e.FSid == 2):
                self.assertEqual(
                    e.FStype.name,
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"
                )
            if (e.FSid == 7):
                self.assertEqual(
                    e.FStype.name,
                    "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token"
                )
        #check sofa
        self.assertEqual(cas.sofaMimeType, cas.sofaFS.mimeType)
        self.assertEqual(1, cas.sofaFS.sofaNum)
        self.assertEqual('_InitialView', cas.sofaFS.sofaID)
        self.assertEqual(cas.documentText, cas.sofaFS.sofaString)

        tokens = cas.getAnnotation(
            'de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token')
        self.assertEqual(tokens[0].getCoveredText(), "These ")
Beispiel #15
0
 def test_typeysfact(self):
     typesystem = TypeSystemFactory.readTypeSystem(
         self, 'tests/testing_data/typesystem.xml')
     """