def change_to_variant(myAnaInfo, my_irr_infl_var_map):

    oldCap = myAnaInfo.getCapitalization()
    pfxs = myAnaInfo.getAnalysisPrefixes().split()
    num_pfxs = len(pfxs)
    sfxs = myAnaInfo.getAnalysisSuffixes().split()
    tags = pfxs+sfxs
    
    # loop through the irr. infl. form variant list for this main entry
    varList = my_irr_infl_var_map[myAnaInfo.getPreDotRoot()]
    
    for varTuple in varList: # each tuple as form (entry, feat_abbr_list)
        e = varTuple[0]
        feat_abbr_list = varTuple[1]

        # See if there is a variant that has inflection features that match the tags in this entry
        variant_matches = False
        featList = [y[1] for y in sorted(feat_abbr_list, key=lambda x: x[0])]
        num_features = len(featList)
        
        # There has to be at least as many tags as features
        if len(tags) >= num_features:
            # Loop through slices of the tag list
            for i in range(0,len(tags)-num_features+1):
                # See if we match regardless of order
                if sorted(tags[i:i+num_features]) == sorted(featList):
                    variant_matches = True
                    break
            if variant_matches:
                break
    
    if variant_matches:
        
        # Set the headword value and the homograph #
        headWord = ITsString(e.HeadWord).Text
            
        # If there is not a homograph # at the end, make it 1
        headWord = Utils.add_one(headWord)
            
        # Remove the matched tags
        del pfxs[i:i+num_features]
        beg = i-num_pfxs
        if beg < 0:
            beg = 0
        end = i-num_pfxs+num_features
        if end < 0:
            end = 0
        del sfxs[beg:end]
        
        # We are intentionally not adding the sense number.
        myAnaInfo.setAnalysisByPart(' '.join(pfxs), "_variant_", headWord, ' '.join(sfxs))
        # Change the case as necessary
        myAnaInfo.setCapitalization(oldCap)
def get_ana_data_from_entry(comp_e):
    
    # default to 1st sense. At the moment this isn't a big deal because we aren't doing anything with target senses. But eventually this needs to be gleaned somehow from the complex form.
    sense_num = '1'
    
    # The thing the component lexeme points to could be a sense rather than an entry
    if comp_e.ClassName == 'LexSense':
        comp_sense = comp_e
        # Get the headword text of the owning entry
        owning_e = comp_e.Owner # Assumption here that this isn't a subsense
        
        a = ITsString(owning_e.HeadWord).Text
        a = Utils.add_one(a)
        
        posObj = comp_sense.MorphoSyntaxAnalysisRA.PartOfSpeechRA
        if posObj:            
                abbrev = ITsString(posObj.Abbreviation.BestAnalysisAlternative).Text
                   
        # Get the sense # from the sense Headword E.g. xxx 2 (keep.pst) or xxx (foot)
        sense_num = re.search(r'(\d*) \(',ITsString(comp_sense.HeadWord).Text).group(1)
        
        # No number found, so use sense 1
        if sense_num == '':
            sense_num = '1'
        
    else: # entry
        comp_e = GetEntryWithSense(comp_e)
        
        a = ITsString(comp_e.HeadWord).Text
        a = Utils.add_one(a)
        #print a   
        # Get POS
        abbrev = 'NULL'
        if comp_e.SensesOS.Count > 0:
            posObj = comp_e.SensesOS.ToArray()[0].MorphoSyntaxAnalysisRA.PartOfSpeechRA
            if posObj:            
                abbrev = ITsString(posObj.Abbreviation.BestAnalysisAlternative).Text
    
    return (a, abbrev, sense_num)
Ejemplo n.º 3
0
                for entryRef in e.EntryRefsOS:
                    if entryRef.RefType == 0: # we have a variant
                        
                        # we only are going to output inflectional variants
                        for varType in entryRef.VariantEntryTypesRS:
                            if varType.ClassName == "LexEntryInflType":
                                got_one = True
                                break
                        
                        if got_one:
                            break
                
                if got_one:                
                    # Set the headword value and the homograph #, if necessary
                    headWord = ITsString(e.HeadWord).Text
                    headWord = Utils.add_one(headWord)
                    headWord = headWord.lower()
                    # change spaces to underscores
                    headWord = re.sub('\s', '_', headWord)
    
                    # Write out morphname field (no sense number for variants)
                    f_rt.write('\\m '+headWord.encode('utf-8')+'\n')
                    f_rt.write('\\c '+"_variant_"+'\n')

                    # Process all allomorphs and their environments
                    process_allomorphs(e, f_rt, "", report, 'stem', TargetDB)
                    rt_cnt +=1

        else: # Entry with senses
            # Loop through senses
            for i, mySense in enumerate(e.SensesOS):
Ejemplo n.º 4
0
def MainFunction(DB, report, modifyAllowed):
    
    # Read the configuration file which we assume is in the current directory.
    configMap = ReadConfig.readConfig(report)
    if not configMap:
        return

    # Build an output path using the system temp directory.
    outFileVal = ReadConfig.getConfigVal(configMap, 'AnalyzedTextOutputFile', report)
    if not outFileVal:
        return
    #fullPathTextOutputFile = os.path.join(tempfile.gettempdir(), outFileVal)
    fullPathTextOutputFile = outFileVal
    try:
        f_out = open(fullPathTextOutputFile, 'w')
    except IOError:
        report.Error('There is a problem with the Analyzed Text Output File path: '+fullPathTextOutputFile+'. Please check the configuration file setting.')
        return
    
    # Find the desired text
    text_desired_eng = ReadConfig.getConfigVal(configMap, 'SourceTextName', report)
    if not text_desired_eng:
        return
    
    foundText = False
    for text in DB.ObjectsIn(ITextRepository):
        if text_desired_eng == ITsString(text.Name.BestAnalysisAlternative).Text:
            foundText = True
            break;
        
    if not foundText:
        report.Error('The text named: '+text_desired_eng+' not found.')
        return
    
    # Get punctuation string
    sent_punct = ReadConfig.getConfigVal(configMap, 'SentencePunctuation', report)
    
    if not sent_punct:
        return
    
    prev_pv_list = []
    prev_e = None
    outputStrList = []
    ccc = 0 # current_complex_component
    
    # Process the text
    report.Info("Exporting analyses...")

    typesList = ReadConfig.getConfigVal(configMap, 'SourceComplexTypes', report)
    if not typesList:
        typesList = []
    elif not ReadConfig.configValIsList(configMap, 'SourceComplexTypes', report):
        return

    prevEndOffset = 0
    
    # count analysis objects
    ss = SegmentServices.StTextAnnotationNavigator(text.ContentsOA)
    for obj_cnt,analysisOccurance in enumerate(ss.GetAnalysisOccurrencesAdvancingInStText()):
        pass
    
    report.ProgressStart(obj_cnt+1)
    ss = SegmentServices.StTextAnnotationNavigator(text.ContentsOA)
    for prog_cnt,analysisOccurance in enumerate(ss.GetAnalysisOccurrencesAdvancingInStText()):
       
        report.ProgressUpdate(prog_cnt)
        outStr = affixStr = ''
        
        if prevEndOffset > 0:
            numSpaces = analysisOccurance.GetMyBeginOffsetInPara() - prevEndOffset
            if numSpaces > 0:
                outputStrList.append(' '*numSpaces)
            elif numSpaces < 0: # new paragraph
                outputStrList.append('\n')
        
        prevEndOffset = analysisOccurance.GetMyEndOffsetInPara()
            
        if analysisOccurance.Analysis.ClassName == "PunctuationForm":
            
            text_punct = ITsString(analysisOccurance.Analysis.Form).Text
            
            # See if one or more symbols is part of the user-defined sentence punctuation. If so output the
            # punctuation as part of a data stream along with the symbol/tag <sent>
            # convert to lists and take the set intersection
            if set(list(text_punct)).intersection(set(list(sent_punct))):
                outStr = "^"+text_punct+"<sent>$"
                
            # If not, assume this is non-sentence punctuation and just output the punctuation without a "symbol" e.g. <xxx>
            else:
                outStr = text_punct
            
            outputStrList.append(outStr)        
            continue
        if analysisOccurance.Analysis.ClassName == "WfiGloss":
            wfiAnalysis = analysisOccurance.Analysis.Analysis   # Same as Owner
        elif analysisOccurance.Analysis.ClassName == "WfiAnalysis":
            wfiAnalysis = analysisOccurance.Analysis
        # We get into this block if there are no analyses for the word or a analysis suggestion hasn't been accepted.
        elif analysisOccurance.Analysis.ClassName == "WfiWordform":
            outStr = ITsString(analysisOccurance.Analysis.Form.BestVernacularAlternative).Text
            report.Warning('No analysis found for the word: '+ outStr + ' Treating this is an unknown word.')
            outStr += '<UNK>'
            outputStrList.append('^'+outStr+'$')
            continue
        else:
            wfiAnalysis = None
            
        # Go through each morpheme in the word (i.e. bundle)
        for bundle in wfiAnalysis.MorphBundlesOS:
            if bundle.SenseRA:
                if bundle.MsaRA:
                    # Get the LexEntry object
                    e = bundleEntry = bundle.MorphRA.Owner
                        
                    # For a stem we just want the headword and it's POS
                    if bundle.MsaRA.ClassName == 'MoStemMsa':
                        
                        # Check for valid POS
                        if not bundle.MsaRA.PartOfSpeechRA:
                            outStr = ITsString(wfiAnalysis.Owner.Form.BestVernacularAlternative).Text
                            report.Warning('No POS found for the word: '+ outStr + ' Treating this is an unknown word.', DB.BuildGotoURL(e))
                            outStr += '<UNK>'
                            break
                        if bundle.MorphRA:
                            
                            # Go from variant(s) to entry/variant that has a sense
                            # We are only dealing with senses, so we have to get to one.
                            # Along the way collect inflection features associated with
                            # irregularly inflected variant forms so they can be outputted
                            inflFeatAbbrevs = []
                            e = GetEntryWithSense(e, inflFeatAbbrevs)
                            
                            # See if we have an enclitic or proclitic
                            if ITsString(e.LexemeFormOA.MorphTypeRA.Name.BestAnalysisAlternative).Text in ('proclitic','enclitic'):
                                # Get the clitic gloss. Substitute periods with underscores to make it easier in Apertium.
                                affixStr += '<' + re.sub(r'\.', r'_',ITsString(bundle.SenseRA.Gloss.BestAnalysisAlternative).Text) +'>'
                            
                            # Otherwise we have a root or stem or phrase
                            else:
                                pv_list = []
                                shared_complex_e = None
                                
                                # Check for adjacent words that point to the same complex form
                                # If the form is a phrasal verb use it as the headword to output
                                if e.ComplexFormEntries.Count > 0:
                                    # each word could be part of multiple complex forms (e.g. ra -> char ra, ra raftan
                                    for complex_e in e.ComplexFormEntries:
                                        if complex_e.EntryRefsOS:
                                            # find the complex entry ref (there could be one or more variant entry refs listed along side the complex entry)
                                            for entryRef in complex_e.EntryRefsOS:
                                                if entryRef.RefType == 1: # 1=complex form, 0=variant
                                                    if entryRef.ComplexEntryTypesRS:
                                                        # there could be multiple types assigned to a complex form (e.g. Phrasal Verb, Derivative)
                                                        # just see if one of them is Phrasal Verb
                                                        for complexType in entryRef.ComplexEntryTypesRS:
                                                            if ITsString(complexType.Name.BestAnalysisAlternative).Text in typesList:
                                                                pos_in_list = get_position_in_component_list(e, complex_e)
                                                                # The entry we are on has to be at the right postion in the complex form's component list
                                                                if pos_in_list == ccc:
                                                                    pv_list.append(complex_e)
                                                                    break;
                                    # See if we ended up with any phrasal verbs
                                    if len(pv_list) == 0: # no phrasal verbs
                                        prev_pv_list = []
                                        ccc = 0
                                    else: # yes, we have phrasal verbs
                                        if ccc == 0:
                                            saved1stbaselineWord = ITsString(analysisOccurance.BaselineText).Text
                                        ccc += 1
                                        # First make sure that the entry of the last word isn't the same as this word. In that case, of course there are going to be shared complex forms, but we are only interested in different entries forming a phrasal verb.
                                        # See if the previous word had a link to a complex phrasal verb
                                        if prev_e != e and len(prev_pv_list) > 0:
                                            found = False
                                            # See if there is a match between something on the list for the
                                            # previous word and this word.
                                            for i in range(0, len(prev_pv_list)):
                                                for j in range(0, len(pv_list)):
                                                    if prev_pv_list[i].Guid == pv_list[j].Guid:
                                                        shared_complex_e = pv_list[j]
                                                        found = True
                                                        break
                                                if found:
                                                    break
                                            # If we found a match, we remove the previous word from the output and use the complex form
                                            if found:
                                                component_count = get_component_count(shared_complex_e)
                                                if ccc == component_count:
                                                    ccc = 0
                                                    savedTags = ''
                                                    pv_list = []
                                                    
                                                # remove n/adj/... and it's tag from being output
                                                saveStr = outputStrList.pop()
                                                # first pop may have just popped punctuation of spacing
                                                if len(outputStrList) > 0:
                                                    saveStr = outputStrList.pop() 
                                                    
                                                
                                                # The first component(s) could have tags (from affixes or inflection info.)
                                                # Save these tags so they can be put on the end of the complex form.
                                                # This kind of assumes that inflection isn't happening on multiple components
                                                # because that might give a mess when it's all duplicated on the complex form.
                                                g = re.search(r'.+?<\w+>(<.+>)', saveStr)
                                                if (g): 
                                                    savedTags += g.group(1)
                                                
                                        prev_pv_list = copy.copy(pv_list) 
                                        prev_e = e
                                else:
                                    ccc = 0
                                    
                                if shared_complex_e:
                                    
                                    if shared_complex_e.SensesOS:
                                        senseNum = 0 # require only one sense for a complex form
                                        
                                        # Get headword and set homograph # if necessary
                                        headWord = ITsString(shared_complex_e.HeadWord).Text
                                        headWord = Utils.do_capitalization(headWord, saved1stbaselineWord)
                                        headWord = Utils.add_one(headWord)
                                                                    
                                        outStr += headWord + '.' + str(senseNum+1)
                                        
                                        senseOne = shared_complex_e.SensesOS.ToArray()[0]
                                        
                                        # Get the POS
                                        if senseOne.MorphoSyntaxAnalysisRA.PartOfSpeechRA:
                                            outStr += '<' + ITsString(senseOne.MorphoSyntaxAnalysisRA.PartOfSpeechRA.Abbreviation.BestAnalysisAlternative).Text + '>'
                                        else:
                                            report.Warning("PartOfSpeech object is null.")
                                        
                                        # Get inflection class abbreviation  
                                        if senseOne.MorphoSyntaxAnalysisRA.InflectionClassRA:
                                            outStr += '<'+ITsString(senseOne.MorphoSyntaxAnalysisRA.InflectionClassRA.\
                                                                  Abbreviation.BestAnalysisAlternative).Text+'>'         

                                        # Get any features the stem or root might have
                                        if senseOne.MorphoSyntaxAnalysisRA.MsFeaturesOA:
                                            feat_abbr_list = []
                                            # The features might be complex, make a recursive function call to find all features
                                            get_feat_abbr_list(senseOne.MorphoSyntaxAnalysisRA.MsFeaturesOA.FeatureSpecsOC, feat_abbr_list)
                                            
                                            # This sort will keep the groups in order e.g. 'gender' features will come before 'number' features 
                                            for grpName, abb in sorted(feat_abbr_list, key=lambda x: x[0]):
                                                outStr += '<' + abb + '>'
                                        
                                        # Get any features that come from irregularly inflected forms        
                                        # This sort will keep the groups in order e.g. 'gender' features will come before 'number' features 
                                        for grpName, abb in sorted(inflFeatAbbrevs, key=lambda x: x[0]):
                                            outStr += '<' + abb + '>'
                                            
                                        # Add the saved tags from a previous complex form component
                                        outStr += savedTags
                                    else:
                                        report.Warning("No senses found for the complex form.")
                                else:
                                    # Go through each sense and identify which sense number we have
                                    foundSense = False
                                    senseNum = 0
                                    for i, mySense in enumerate(e.SensesOS):
                                        if mySense.Guid == bundle.SenseRA.Guid:
                                            foundSense = True
                                            break
                                    if foundSense:
                                        senseNum = i
                                    else:
                                        report.Warning("Couldn't find the sense for headword: "+ITsString(e.HeadWord).Text)    
                                        
                                    # Get headword and set homograph # if necessary
                                    headWord = ITsString(e.HeadWord).Text
                                    headWord = Utils.do_capitalization(headWord, ITsString(analysisOccurance.BaselineText).Text)
                                    headWord = Utils.add_one(headWord)
                                    outStr += headWord + '.' + str(senseNum+1)
                                 
                                    # Get the POS
                                    if bundle.MsaRA.PartOfSpeechRA:
                                        outStr += '<' + ITsString(bundle.MsaRA.PartOfSpeechRA.Abbreviation.BestAnalysisAlternative).Text + '>'
                                    else:
                                        report.Warning("PartOfSpeech object is null.")
                                        
                                    # Get inflection class abbreviation  
                                    if bundle.MsaRA.InflectionClassRA:
                                        outStr += '<'+ITsString(bundle.MsaRA.InflectionClassRA.\
                                                              Abbreviation.BestAnalysisAlternative).Text+'>'         

                                    # Get any features the stem or root might have
                                    if bundle.MsaRA.MsFeaturesOA:
                                        feat_abbr_list = []
                                        # The features might be complex, make a recursive function call to find all features
                                        get_feat_abbr_list(bundle.MsaRA.MsFeaturesOA.FeatureSpecsOC, feat_abbr_list)
                                        
                                        # This sort will keep the groups in order e.g. 'gender' features will come before 'number' features 
                                        for grpName, abb in sorted(feat_abbr_list, key=lambda x: x[0]):
                                            outStr += '<' + abb + '>'
                                    
                                    # Get any features that come from irregularly inflected forms        
                                    # This sort will keep the groups in order e.g. 'gender' features will come before 'number' features 
                                    for grpName, abb in sorted(inflFeatAbbrevs, key=lambda x: x[0]):
                                        outStr += '<' + abb + '>'
                        else:
                            report.Warning("Morph object is null.")    
                    # We have an affix
                    else:
                        if bundle.SenseRA:
                            # Get the affix gloss. Substitute periods with underscores to make it easier in Apertium.
                            affixStr += '<' + re.sub(r'\.', r'_',ITsString(bundle.SenseRA.Gloss.BestAnalysisAlternative).Text) +'>'
                        else:
                            #e = GetEntryWithSense(e)
                            report.Warning("Sense object for affix is null.")
                else:
                    outStr = ITsString(wfiAnalysis.Owner.Form.BestVernacularAlternative).Text
                    report.Warning('No morphosyntactic analysis found for some part of the word: '+ outStr + ' Treating this is an unknown word.')
                    outStr += '<UNK>'
                    break # go on to the next word    
            else:
                # Part of the word has not been tied to a lexical entry-sense
                outStr = ITsString(wfiAnalysis.Owner.Form.BestVernacularAlternative).Text
                report.Warning('No sense found for some part of the word: '+ outStr + ' Treating this is an unknown word.')
                outStr += '<UNK>'
                break # go on to the next word    
        outStr += affixStr
        outputStrList.append('^'+outStr+'$')
    
    # Write out all the words
    for outStr in outputStrList:
        # Split compound words
        outStr = split_compounds(outStr)
        f_out.write(outStr.encode('utf-8'))

    f_out.close()
    report.Info('Export of '+str(obj_cnt+1)+' analyses complete to the file: '+fullPathTextOutputFile+'.')
Ejemplo n.º 5
0
    def readLexicalInfo(self):
        
        configMap = ReadConfig.readConfig(self.report)

        morphNames = ReadConfig.getConfigVal(configMap, 'TargetMorphNamesCountedAsRoots', self.report)
        
        if not morphNames: 
            self.report.Warning('Configuration File Problem. Morphnames not found.')
            return 

        # Loop through all the entries
        for i,e in enumerate(self.db.LexiconAllEntries()):
        
            morphType = ITsString(e.LexemeFormOA.MorphTypeRA.Name.BestAnalysisAlternative).Text
            
            # If no senses, skip it
            if e.SensesOS.Count == 0:
                continue
                
            else: # Entry with senses
                # Loop through senses
                for i, mySense in enumerate(e.SensesOS):
                    
                    gloss = ITsString(mySense.Gloss.BestAnalysisAlternative).Text
                    
                    # Process roots
                    # Don't process clitics in this block
                    if e.LexemeFormOA and \
                       e.LexemeFormOA.ClassName == 'MoStemAllomorph' and \
                       e.LexemeFormOA.MorphTypeRA and morphType in morphNames:
                    
                        # Set the headword value and the homograph #, if necessary
                        headWord = ITsString(e.HeadWord).Text
                        headWord = Utils.add_one(headWord)
    
                        # Only take word senses that have a grammatical category set.
                        if mySense.MorphoSyntaxAnalysisRA.ClassName == 'MoStemMsa':
                            
                            if mySense.MorphoSyntaxAnalysisRA.PartOfSpeechRA:            
                                                      
                                # build the word sense and add it to the map
                                wordSense = headWord+'.'+str(i+1)
                                wordSense = re.sub(' ', '_', wordSense) # change spaces to underscores
                                self.mapWordSenses[wordSense] = 7 # dummy value

                    # Now process non-roots
                    else:
                        if gloss == None:
                            continue
                        elif e.LexemeFormOA == None:
                            continue
                        elif e.LexemeFormOA.MorphTypeRA == None:
                            continue
                        elif e.LexemeFormOA.ClassName != 'MoStemAllomorph':
                            if e.LexemeFormOA.ClassName == 'MoAffixAllomorph':
                                gloss = re.sub(r'\.', '_', gloss)
                                self.__saveAffixGloss(gloss)
                            else:
                                continue # err_list.append(('Skipping entry since the lexeme is of type: '+e.LexemeFormOA.ClassName, 1, TargetDB.BuildGotoURL(e)))
                        elif morphType not in morphNames:
                            if morphType == 'proclitic' or morphType == 'enclitic':
                                gloss = re.sub(r'\.', '_', gloss)
                                self.__saveAffixGloss(gloss)
                            else:
                                continue # err_list.append(('Skipping entry because the morph type is: ' + morphType, 1, TargetDB.BuildGotoURL(e)))
Ejemplo n.º 6
0
def create_stamp_dictionaries(TargetDB, f_rt, f_pf, f_if, f_sf, morphNames, report):
    err_list = []
        
    if report is not None:
        report.ProgressStart(TargetDB.LexiconNumberOfEntries())
    
    pf_cnt = sf_cnt = if_cnt = rt_cnt = 0
    
    # Loop through all the entries
    for i,e in enumerate(TargetDB.LexiconAllEntries()):
    
        if report is not None:
            report.ProgressUpdate(i)
            
        morphType = ITsString(e.LexemeFormOA.MorphTypeRA.Name.BestAnalysisAlternative).Text
        
        # If no senses, check if this entry is an inflectional variant and output it
        if e.SensesOS.Count == 0:
            
            got_one = False
            
            # Process roots
            # Don't process clitics in this block
            if e.LexemeFormOA and \
               e.LexemeFormOA.ClassName == 'MoStemAllomorph' and \
               e.LexemeFormOA.MorphTypeRA and morphType in morphNames:
            
                for entryRef in e.EntryRefsOS:
                    if entryRef.RefType == 0: # we have a variant
                        
                        # we only are going to output inflectional variants
                        for varType in entryRef.VariantEntryTypesRS:
                            if varType.ClassName == "LexEntryInflType":
                                got_one = True
                                break
                        
                        if got_one:
                            break
                
                if got_one:                
                    # Set the headword value and the homograph #, if necessary
                    headWord = ITsString(e.HeadWord).Text
                    headWord = Utils.add_one(headWord)
                    headWord = headWord.lower()
                    # change spaces to underscores
                    headWord = re.sub('\s', '_', headWord)
    
                    # Write out morphname field (no sense number for variants)
                    f_rt.write('\\m '+headWord.encode('utf-8')+'\n')
                    f_rt.write('\\c '+"_variant_"+'\n')

                    # Process all allomorphs and their environments
                    process_allomorphs(e, f_rt, "", report, 'stem', TargetDB)
                    rt_cnt +=1

        else: # Entry with senses
            # Loop through senses
            for i, mySense in enumerate(e.SensesOS):
                
                gloss = ITsString(mySense.Gloss.BestAnalysisAlternative).Text
                
                # Process roots
                # Don't process clitics in this block
                if e.LexemeFormOA and \
                   e.LexemeFormOA.ClassName == 'MoStemAllomorph' and \
                   e.LexemeFormOA.MorphTypeRA and morphType in morphNames:
                
                    # Set the headword value and the homograph #, if necessary
                    headWord = ITsString(e.HeadWord).Text
                    headWord = Utils.add_one(headWord)
                    headWord = headWord.lower()
                    
                    # change spaces to underscores
                    headWord = re.sub('\s', '_', headWord)

                    if mySense.MorphoSyntaxAnalysisRA:
                        
                        # Get the POS abbreviation for the current sense, assuming we have a stem
                        if mySense.MorphoSyntaxAnalysisRA.ClassName == 'MoStemMsa':
                            
                            if mySense.MorphoSyntaxAnalysisRA.PartOfSpeechRA:            
                                abbrev = ITsString(mySense.MorphoSyntaxAnalysisRA.PartOfSpeechRA.\
                                                      Abbreviation.BestAnalysisAlternative).Text
                            else:
                                err_list.append(('Skipping sense because the POS is unknown: '+\
                                               ' while processing target headword: '+ITsString(e.HeadWord).Text, 1, TargetDB.BuildGotoURL(e)))
                                continue
                                                      
                        else:
                            err_list.append(('Skipping sense that is of class: '+mySense.MorphoSyntaxAnalysisRA.ClassName+\
                                           ' for headword: '+ITsString(e.HeadWord).Text, 1, TargetDB.BuildGotoURL(e)))
                            continue
                    else:
                        err_list.append(('Skipping sense that has no Morpho-syntax analysis.'+\
                                       ' Headword: '+ITsString(e.HeadWord).Text, 1, TargetDB.BuildGotoURL(e)))
                        continue
    
                    # Write out morphname field
                    f_rt.write('\\m '+headWord.encode('utf-8')+'.'+str(i+1)+'\n')
                    
                    # change spaces to underscores
                    abbrev = re.sub('\s', '_', abbrev)

                    # remove periods
                    abbrev = re.sub('\.', '', abbrev)

                    f_rt.write('\\c '+abbrev+'\n')
                    
                    # Process all allomorphs and their environments 
                    process_allomorphs(e, f_rt, gloss, report, 'stem', TargetDB)
                    rt_cnt +=1

                # Now process non-roots
                else:
                    if gloss == None:
                        err_list.append(('No gloss. Skipping. Headword: '+ITsString(e.HeadWord).Text, 1, TargetDB.BuildGotoURL(e)))
                    elif e.LexemeFormOA == None:
                        err_list.append(('No lexeme form. Skipping. Headword: '+ITsString(e.HeadWord).Text, 1, TargetDB.BuildGotoURL(e)))
                    elif e.LexemeFormOA.MorphTypeRA == None:
                        err_list.append(('No Morph Type. Skipping.'+ITsString(e.HeadWord).Text+' Best Vern: '+\
                                       ITsString(e.LexemeFormOA.Form.VernacularDefaultWritingSystem).Text, 1, TargetDB.BuildGotoURL(e)))
                    elif e.LexemeFormOA.ClassName != 'MoStemAllomorph':
                        if e.LexemeFormOA.ClassName == 'MoAffixAllomorph':
                            if morphType in ['prefix', 'prefixing interfix']:
                                process_allomorphs(e, f_pf, gloss, report, 'non-stem', TargetDB)
                                pf_cnt += 1
                            elif morphType in ['suffix', 'suffixing interfix']:
                                process_allomorphs(e, f_sf, gloss, report, 'non-stem', TargetDB)
                                sf_cnt += 1
                            elif morphType in ['infix', 'infixing interfix']:
                                process_allomorphs(e, f_if, gloss, report, 'non-stem', TargetDB)
                                if_cnt += 1
                            elif morphType == 'circumfix':
                                process_circumfix(e, f_pf, f_sf, gloss, report, 'non-stem', TargetDB)
                                pf_cnt += 1
                                sf_cnt += 1
                            else:
                                err_list.append(('Skipping entry because the morph type is: ' + morphType, 1, TargetDB.BuildGotoURL(e)))
                        else:
                            err_list.append(('Skipping entry since the lexeme is of type: '+e.LexemeFormOA.ClassName, 1, TargetDB.BuildGotoURL(e)))
                    elif morphType not in morphNames:
                        if morphType == 'proclitic':
                            process_allomorphs(e, f_pf, gloss, report, 'non-stem', TargetDB)
                            pf_cnt += 1
                        elif morphType == 'enclitic':
                            process_allomorphs(e, f_sf, gloss, report, 'non-stem', TargetDB)
                            sf_cnt += 1
                        else:
                            err_list.append(('Skipping entry because the morph type is: ' + morphType, 1, TargetDB.BuildGotoURL(e)))
    
    err_list.append(('STAMP dictionaries created.', 0))
    err_list.append((str(pf_cnt)+' prefixes in the prefix dictionary.', 0))
    err_list.append((str(sf_cnt)+' suffixes in the suffix dictionary.', 0))
    err_list.append((str(if_cnt)+' infixes in the infix dictionary.', 0))
    err_list.append((str(rt_cnt)+' roots in the root dictionary.', 0))

    return err_list