Esempio n. 1
0
    def GetCustomFieldValue(self, senseOrEntryOrHvo, fieldID):
        """
        Returns the field value for String, MultiString and Integer fields.
        Returns None for other field types.
        """

        if not senseOrEntryOrHvo: raise FP_NullParameterError()
        if not fieldID: raise FP_NullParameterError()

        try:
            hvo = senseOrEntryOrHvo.Hvo
        except AttributeError:
            hvo = senseOrEntryOrHvo
            
        # Adapted from XDumper.cs::GetCustomFieldValue
        mdc = self.project.MetaDataCacheAccessor
        cellarPropertyType = mdc.GetFieldType(fieldID)

        if cellarPropertyType in FLExLCM.CellarStringTypes:
            return ITsString(self.project.DomainDataByFlid.\
                             get_StringProp(hvo, fieldID))
        elif cellarPropertyType in FLExLCM.CellarUnicodeTypes:
            mua = self.project.DomainDataByFlid.get_MultiStringProp(hvo, fieldID)
            return ITsString(mua.BestAnalysisVernacularAlternative)

        elif cellarPropertyType == CellarPropertyType.Integer:
            return self.project.DomainDataByFlid.get_IntProp(hvo, fieldID)
            
        return None
Esempio n. 2
0
 def TextsGetAll(self, supplyName=True, supplyText=True):
     """
     A Generator that returns tuples of (Name, Text) where:
     
         - Name is the best vernacular or analysis name.
         - Text is a string with newlines separating paragraphs.
         
     Passing supplyName/Text=False returns only the texts or names.
     """
     
     if not supplyText:
         for t in self.ObjectsIn(ITextRepository):
             yield ITsString(t.Name.BestVernacularAnalysisAlternative).Text
     else:
         for t in self.ObjectsIn(ITextRepository):
             content = []
             if t.ContentsOA:
                 for p in t.ContentsOA.ParagraphsOS:
                     if ITsString(p.Contents).Text:
                         content.append(ITsString(p.Contents).Text)
             
             if supplyName:
                 name = ITsString(t.Name.BestVernacularAnalysisAlternative).Text
                 yield name, u"\n".join(content)        
             else:                
                 yield u"\n".join(content)        
Esempio n. 3
0
 def BestStr(self, stringObj):
     """
     Generic string extraction function returning the best Analysis or Vernacular string.
     """
     if not stringObj: raise FP_NullParameterError()
     
     s = ITsString(stringObj.BestAnalysisVernacularAlternative).Text
     return u"" if s == "***" else s
Esempio n. 4
0
 def ReversalGetForm(self, entry, languageTagOrHandle=None):
     """
     Returns the citation form for the reversal entry in the Default
     Vernacular WS or other WS as specified by languageTagOrHandle.
     """
     WSHandle = self.__WSHandleAnalysis(languageTagOrHandle)
     
     form = ITsString(entry.ReversalForm.get_String(WSHandle)).Text
     return form or u""
Esempio n. 5
0
 def LexiconGetSenseDefinition(self, sense, languageTagOrHandle=None):
     """
     Returns the definition for the sense in the Default Analysis WS or
     other WS as specified by languageTagOrHandle.
     """
     WSHandle = self.__WSHandleAnalysis(languageTagOrHandle)
     
     # Definition is a MultiString
     defn = ITsString(sense.Definition.get_String(WSHandle)).Text
     return defn or u""
Esempio n. 6
0
 def LexiconGetSenseGloss(self, sense, languageTagOrHandle=None):
     """
     Returns the gloss for the sense in the Default Analysis WS or
     other WS as specified by languageTagOrHandle.
     """
     WSHandle = self.__WSHandleAnalysis(languageTagOrHandle)
     
     # MultiUnicodeAccessor
     gloss = ITsString(sense.Gloss.get_String(WSHandle)).Text
     return gloss or u""
Esempio n. 7
0
 def LexiconGetExample(self, example, languageTagOrHandle=None):
     """
     Returns the example text in the Default Vernacular WS or
     other WS as specified by languageTagOrHandle.
     """
     WSHandle = self.__WSHandleVernacular(languageTagOrHandle)
     
     # Example is a MultiString
     ex = ITsString(example.Example.get_String(WSHandle)).Text
     return ex or u""
Esempio n. 8
0
    def LexiconGetPronunciation(self, pronunciation, languageTagOrHandle=None):
        """
        Returns the Form for the Pronunciation in the Default Vernacular WS
        or other WS as specified by languageTagOrHandle.
        """
        WSHandle = self.__WSHandleVernacular(languageTagOrHandle)

        # MultiUnicodeAccessor
        form = ITsString(pronunciation.Form.get_String(WSHandle)).Text
        return form or u""
Esempio n. 9
0
    def LexiconGetCitationForm(self, entry, languageTagOrHandle=None):
        """
        Returns the citation form for the entry in the Default Vernacular WS
        or other WS as specified by languageTagOrHandle.
        """
        WSHandle = self.__WSHandleVernacular(languageTagOrHandle)

        # MultiUnicodeAccessor
        form = ITsString(entry.CitationForm.get_String(WSHandle)).Text
        return form or u""
Esempio n. 10
0
def MainFunction(DB, report, modifyAllowed):

    report.Info("Approving spelling of numbers...")
    if not modifyAllowed:
        report.Info(
            "Run with database changes allowed to actually change the status.")

    for w in DB.ObjectsIn(IWfiWordformRepository):
        if w.SpellingStatus == SpellingStatusStates.undecided:
            form = ITsString(w.Form.BestVernacularAlternative).Text
            if NumberFormRegEx.match(form):
                report.Info(form, DB.BuildGotoURL(w))
                if modifyAllowed:
                    w.SpellingStatus = SpellingStatusStates.correct
Esempio n. 11
0
    def LexiconGetExampleTranslation(self, translation, languageTagOrHandle=None):
        """
        Returns the translation of an example in the Default Analysis WS or
        other WS as specified by languageTagOrHandle.

        NOTE: Analysis language translations of example sentences are
        stored as a collection (list). E.g.::

            for translation in example.TranslationsOC:
                print (project.LexiconGetExampleTranslation(translation))
        """
        WSHandle = self.__WSHandleAnalysis(languageTagOrHandle)
        
        # Translation is a MultiString
        tr = ITsString(translation.Translation.get_String(WSHandle)).Text
        return tr or u""
Esempio n. 12
0
def MergeEntries(DB, report, modifyAllowed):

    # --------------------------------------------------------------------
    def __EntryMessage(entry, message, reportFunc=report.Info):
        POSList = "; ".join(
            set([x.ShortName for x in entry.MorphoSyntaxAnalysesOC]))
        reportFunc(
            "   %s [%s][%s] %s" %
            (entry.HomographForm, DB.BestStr(
                MorphType.Name), POSList, message), DB.BuildGotoURL(entry))

    # --------------------------------------------------------------------
    def __WarningMessage(entry, message):
        __EntryMessage(entry, message, report.Warning)

    # --------------------------------------------------------------------
    numEntries = DB.LexiconNumberOfEntries()
    report.Info("Scanning %s entries for merge commands..." % numEntries)
    report.ProgressStart(numEntries)

    tagsField = DB.LexiconGetEntryCustomFieldNamed("FTFlags")
    if not tagsField:
        report.Error("FTFlags custom field doesn't exist at entry level")
    elif not DB.LexiconFieldIsStringType(tagsField):
        report.Error("FTFlags custom field is not of type Single-line Text")
        tagsField = None
    if not tagsField:
        report.Warning("Please read the instructions")
        return

    DoCommands = modifyAllowed

    # mergeList is a dictionary of dictionaries

    def dict_list_factory():
        return defaultdict(list)

    mergeList = defaultdict(dict_list_factory)
    deleteList = list()

    for entryNumber, entry in enumerate(DB.LexiconAllEntries()):
        report.ProgressUpdate(entryNumber)

        MorphType = entry.LexemeFormOA.MorphTypeRA

        tag = DB.LexiconGetFieldText(entry, tagsField)
        if not tag: continue
        tag = tag.lower()

        if tag.startswith(TAG_MergeDelete):
            deleteList.append(entry)

            # Usage count in the text corpus is different to the information given in
            # the deletion warning message.
            usageCount = DB.LexiconEntryAnalysesCount(entry)

            __EntryMessage(
                entry, "to be deleted (used %i time%s in text corpus)" %
                (usageCount, "" if usageCount == 1 else "s"))

            deleteWarningMessage = ITsString(entry.DeletionTextTSS).Text
            cut = deleteWarningMessage.find("1.")
            if cut >= 0:
                deleteWarningMessage = deleteWarningMessage[cut:]\
                                       .replace(StringUtils.kChHardLB, "\n")
                report.Warning("%s is in use. See tooltip for more info."\
                               % entry.HomographForm,
                               deleteWarningMessage)

        elif tag in ALL_MERGE_TAGS:

            # Ignore affixes
            if MorphType.IsAffixType:
                continue

            # Handle Grammatical Categories as sets: so senses of (Noun, Verb) will match (Verb, Noun)
            POS = set([x.ShortName for x in entry.MorphoSyntaxAnalysesOC])
            POSList = "; ".join(POS)

            # Record this entry for merging
            key = "{} [{}][{}]".format(entry.HomographForm,
                                       DB.BestStr(MorphType.Name), POSList)
            mergeList[key][tag].append(entry)

    if DoCommands:
        report.Info("Actioning merge commands...")
    else:
        report.Info(
            "Run again with 'Modify enabled' to carry out these actions:")

    # MERGE
    totalMerged = 0
    totalTarget = 0

    report.ProgressStart(len(list(mergeList.items())) + len(deleteList))
    progressCount = 0

    for key, mergeData in list(mergeList.items()):
        progressCount += 1
        report.ProgressUpdate(progressCount)

        # Validity checks
        if len(mergeData[TAG_MergeTarget]) > 1:
            __WarningMessage(mergeData[TAG_MergeTarget][0],
                             "Multiple merge targets: ignoring")
            continue

        if len(mergeData[TAG_MergeTarget]) == 0:
            if len(mergeData[TAG_Merge]) == 0:
                __WarningMessage(mergeData[TAG_MergeDiscard][0],
                                 "No merge target specified: ignoring")
                continue

            targetEntry = mergeData[TAG_Merge].pop()
        else:
            targetEntry = mergeData[TAG_MergeTarget][0]

        merged = False
        # Do the merges
        for entry in mergeData[TAG_Merge]:
            if DoCommands:
                targetEntry.MergeObject(entry, True)  # Append data
            merged = True
            totalMerged += 1

        for entry in mergeData[TAG_MergeDiscard]:
            if DoCommands:
                targetEntry.MergeObject(entry,
                                        False)  # Discard conflicting data
            merged = True
            totalMerged += 1

        if merged:
            if DoCommands:
                DB.LexiconSetFieldText(targetEntry, tagsField,
                                       TAG_MergeComplete)
                __EntryMessage(targetEntry, "merged")
            else:
                __EntryMessage(targetEntry, "to be merged")
            totalTarget += 1
        else:
            __WarningMessage(targetEntry,
                             "Only one entry tagged for merging: ignoring")

    if DoCommands:
        report.Info("%i %s merged into %i merge target%s" %
                    (totalMerged, "entry" if totalMerged == 1 else "entries",
                     totalTarget, "" if totalTarget == 1 else "s"))

    # DELETE
    if DoCommands and deleteList:
        report.Info("Deleting %i entries" % len(deleteList))

        for entry in deleteList:
            entry.Delete(
            )  # OnBeforeObjectDeleted() will fix homograph numbering

            progressCount += 1
            report.ProgressUpdate(progressCount)
Esempio n. 13
0
def MergeEntries(DB, report, modifyAllowed):

    # --------------------------------------------------------------------
    def __EntryMessage(entry, message, reportFunc=report.Info):
        POSList = u"; ".join(set([x.ShortName for x in entry.MorphoSyntaxAnalysesOC]))
        reportFunc(u"   %s [%s][%s] %s" % (entry.HomographForm,
                                           DB.BestStr(MorphType.Name),
                                           POSList,
                                           message),
                    DB.BuildGotoURL(entry))
        
    # --------------------------------------------------------------------
    def __WarningMessage(entry, message):
        __EntryMessage(entry, message, report.Warning)

   
    # --------------------------------------------------------------------
    numEntries = DB.LexiconNumberOfEntries()
    report.Info(u"Scanning %s entries for merge commands..." % numEntries)
    report.ProgressStart(numEntries)

    tagsField = DB.LexiconGetEntryCustomFieldNamed(u"FTFlags")
    if not tagsField:
        report.Error(u"FTFlags custom field doesn't exist at entry level")
    elif not DB.LexiconFieldIsStringType(tagsField):
        report.Error(u"FTFlags custom field is not of type Single-line Text")
        tagsField = None
    if not tagsField:
        report.Warning(u"Please read the instructions")
        return

    DoCommands = modifyAllowed

    # mergeList is a dictionary of dictionaries
    
    def dict_list_factory():
        return defaultdict(list)

    mergeList = defaultdict(dict_list_factory)
    deleteList = list()

    for entryNumber, entry in enumerate(DB.LexiconAllEntries()):
        report.ProgressUpdate(entryNumber)

        MorphType = entry.LexemeFormOA.MorphTypeRA
        
        tag = DB.LexiconGetFieldText(entry, tagsField)
        if not tag: continue
        tag = tag.lower()
        
        if tag.startswith(TAG_MergeDelete):
            deleteList.append(entry)

            # Usage count in the text corpus is different to the information given in
            # the deletion warning message.
            usageCount = DB.LexiconEntryAnalysesCount(entry)

            __EntryMessage(entry, u"to be deleted (used %i time%s in text corpus)" %
                                  (usageCount, u"" if usageCount == 1 else u"s"))
            
            deleteWarningMessage = ITsString(entry.DeletionTextTSS).Text
            cut = deleteWarningMessage.find(u"1.")
            if cut >= 0:
                deleteWarningMessage = deleteWarningMessage[cut:]\
                                       .replace(StringUtils.kChHardLB, "\n")
                report.Warning("%s is in use. See tooltip for more info."\
                               % entry.HomographForm,
                               deleteWarningMessage)
                                 
        elif tag in ALL_MERGE_TAGS:

            # Ignore affixes
            if MorphType.IsAffixType:
                continue

            # Handle Grammatical Categories as sets: so senses of (Noun, Verb) will match (Verb, Noun)
            POS = set([x.ShortName for x in entry.MorphoSyntaxAnalysesOC])
            POSList = u"; ".join(POS)
            
            # Record this entry for merging
            key = u"{} [{}][{}]".format(entry.HomographForm,
                                        DB.BestStr(MorphType.Name),
                                        POSList)
            mergeList[key][tag].append(entry)


    if DoCommands:
        report.Info(u"Actioning merge commands...")
    else:
        report.Info(u"Run again with 'Modify enabled' to carry out these actions:")


    # MERGE
    totalMerged = 0
    totalTarget = 0

    report.ProgressStart(len(mergeList.items()) + len(deleteList))
    progressCount = 0
    
    for key, mergeData in mergeList.items():
        progressCount += 1
        report.ProgressUpdate(progressCount)

        # Validity checks
        if len(mergeData[TAG_MergeTarget]) > 1:
            __WarningMessage(mergeData[TAG_MergeTarget][0],
                             u"Multiple merge targets: ignoring")
            continue

        if len(mergeData[TAG_MergeTarget]) == 0:
            if len(mergeData[TAG_Merge]) == 0:
                __WarningMessage(mergeData[TAG_MergeDiscard][0],
                                 u"No merge target specified: ignoring")
                continue
            
            targetEntry = mergeData[TAG_Merge].pop()
        else:
            targetEntry = mergeData[TAG_MergeTarget][0]

        merged = False
        # Do the merges
        for entry in mergeData[TAG_Merge]:
            if DoCommands:
                targetEntry.MergeObject(entry, True)    # Append data
            merged = True
            totalMerged += 1

        for entry in mergeData[TAG_MergeDiscard]:
            if DoCommands:
                targetEntry.MergeObject(entry, False)   # Discard conflicting data
            merged = True
            totalMerged += 1

        if merged:
            if DoCommands:
                DB.LexiconSetFieldText(targetEntry, tagsField, TAG_MergeComplete)
                __EntryMessage(targetEntry, "merged")
            else:
                __EntryMessage(targetEntry, "to be merged")
            totalTarget += 1
        else:
            __WarningMessage(targetEntry, u"Only one entry tagged for merging: ignoring")
            
    if DoCommands:
        report.Info(u"%i %s merged into %i merge target%s" %
                    (totalMerged, u"entry" if totalMerged == 1 else u"entries",
                     totalTarget, u"" if totalTarget == 1 else u"s"))


    # DELETE
    if DoCommands and deleteList:
        report.Info("Deleting %i entries" % len(deleteList))

        for entry in deleteList:
            entry.Delete()      # OnBeforeObjectDeleted() will fix homograph numbering

            progressCount += 1
            report.ProgressUpdate(progressCount)