Python clGraphonolev Examples

Programming Language: Python

Namespace/Package Name: md060graphonoLev

Method/Function: clGraphonolev

Examples at hotexamples.com: 4

Python clGraphonolev - 4 examples found. These are the top rated real world Python examples of md060graphonoLev.clGraphonolev extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: md070crosslevenshteinPhon.py Project: bogdanbabych/bogdan

    def __init__(self, SFInA, SFInB, SLangIDa, SLangIDb):
        '''
        Constructor
        '''
        FDebug = open('md050crosslevenshtein.debug', 'w')
        LWordsA = self.readWordList(SFInA)
        LWordsB = self.readWordList(
            SFInB
        )  # graphonological object with phonological features over graphemes
        OGraphonolev = md060graphonoLev.clGraphonolev()

        LDistances = []
        ICounter = 0
        ICounterRec = 0
        for (SWordA, SPoSA, IFrqA) in LWordsA:
            LenA = len(SWordA)
            try:
                LogFrqA = math.log(IFrqA)
            except:
                LogFrqA = 0
            LCognates = []
            LCognates1 = []
            ICounter += 1
            if ICounter % 5 == 0:
                sys.stderr.write(SWordA + ' ' + str(ICounter) + '\n')
            ''' 
            # changed:
            for (SWordB, SPoSB, IFrqB) in LWordsB:
                LenB = len(SWordB)
                LenAve = (LenA + LenB) / 2
            
                ILev = self.computeLevenshtein(SWordA, SWordB)
                ALevNorm = ILev/LenAve
                if ALevNorm <= 0.30:
                    LCognates.append((ALevNorm, ILev, SWordB, SPoSB, IFrqB))
            '''
            for (SWordB, SPoSB, IFrqB) in LWordsB:
                (Lev0, Lev1, Lev0Norm,
                 Lev1Norm) = OGraphonolev.computeLevenshtein(
                     SWordA, SWordB, SLangIDa, SLangIDb)
                if Lev0Norm <= 0.36:
                    LCognates.append((Lev0Norm, Lev0, SWordB, SPoSB, IFrqB))
                if Lev1Norm <= 0.36:
                    LCognates1.append((Lev1Norm, Lev1, SWordB, SPoSB, IFrqB))

            LDistances.append((SWordA, SPoSA, IFrqA, LCognates))
            if (len(LCognates) > 0 or len(LCognates1) > 0):
                ICounterRec += 1
                ACognPerCent = ICounterRec / ICounter
                sys.stdout.write(
                    '\t{, %(ICounterRec)d, %(ICounter)d, %(SWordA)s, %(SPoSA)s, frq=%(IFrqA)d, ln=%(LogFrqA).2f, have-cognates: %(ACognPerCent).2f : \n'
                    % locals())
                # sys.stdout.flush()
                self.printCognates(LCognates, LogFrqA, SPoSA)
                sys.stdout.write('\t\n')
                self.printCognates(LCognates1, LogFrqA, SPoSA)
        '''

Example #2

Show file

File: md070crosslevenshteinPhonV05.py Project: bogdanbabych/morphosyntax

    def __init__(self, SFInA, SFInB, SLangIDa, SLangIDb):
        '''
        Constructor
        '''
        SNameIName , SNameIExt = os.path.splitext(SFInA) # generate the debug using the first file name
        SFDebug = SNameIName + '-md050crosslevenshtein.debug'
        # FDebug = open(SFDebug, 'w') # debug file for each of the input files.. 
        LWordsA = self.readWordList(SFInA)
        LWordsB = self.readWordList(SFInB) # graphonological object with phonological features over graphemes
        # OGraphonolev = md060graphonoLev.clGraphonolev(Debug = True, DebugFile = SFDebug)
        OGraphonolev = md060graphonoLev.clGraphonolev()

        
        LDistances = []
        ICounter = 0
        ICounterRec = 0
        for (SWordA, SPoSA, IFrqA) in LWordsA:
            LenA = len(SWordA)
            try:
                LogFrqA = math.log(IFrqA)
            except:
                LogFrqA = 0
            LCognates = []
            LCognates1 = []
            ICounter += 1
            if ICounter % 1 == 0:
                sys.stderr.write(SWordA + ' ' + str(ICounter) + '\n')

            
            ''' 
            # changed:
            for (SWordB, SPoSB, IFrqB) in LWordsB:
                LenB = len(SWordB)
                LenAve = (LenA + LenB) / 2
            
                ILev = self.computeLevenshtein(SWordA, SWordB)
                ALevNorm = ILev/LenAve
                if ALevNorm <= 0.30:
                    LCognates.append((ALevNorm, ILev, SWordB, SPoSB, IFrqB))
            '''
            for (SWordB, SPoSB, IFrqB) in LWordsB:
                # Lev0 is baseline Levenshtein distance
                # Lev1 is is graphonological Levenshtein distance
                (Lev0, Lev1, Lev0Norm, Lev1Norm) = OGraphonolev.computeLevenshtein(SWordA, SWordB, SLangIDa, SLangIDb)
                # if Lev0Norm <= 0.36:
                if Lev0Norm <= 0.4:
                    LCognates.append((Lev0Norm, Lev0, SWordB, SPoSB, IFrqB)) # baseline Lev
                # if Lev1Norm <= 0.36:
                if Lev1Norm <= 0.4:
                    LCognates1.append((Lev1Norm, Lev1, SWordB, SPoSB, IFrqB)) # graphonological Lev
            
            
            LDistances.append((SWordA, SPoSA, IFrqA, LCognates))
            if (len(LCognates) > 0 or len(LCognates1) > 0):
                ICounterRec += 1
                ACognPerCent = ICounterRec / ICounter
                # now restricted to writing only one cognate...
                # sys.stdout.write('\t{, %(ICounterRec)d, %(ICounter)d, %(SWordA)s, %(SPoSA)s, frq=%(IFrqA)d, ln=%(LogFrqA).2f, have-cognates: %(ACognPerCent).2f : \n' % locals())
                sys.stdout.write('%(SWordA)s\t%(SPoSA)s\tfrq=%(IFrqA)d\t\n' % locals())
                sys.stdout.flush()
                sys.stdout.write('BASELINE:\n')
                self.printCognate(LCognates, LogFrqA, SPoSA)
                # sys.stdout.write('\t')
                sys.stdout.write('GRAPHONOLOGICAL:\n')
                self.printCognate(LCognates1, LogFrqA, SPoSA)
                # removed (s) --> printCognate(s) in function call : simple production version
                sys.stdout.write('\n\n')
                sys.stdout.flush()
            
        '''

Example #3

Show file

    def __init__(self, SFInA, SFInB, SLangIDa, SLangIDb):
        '''
        Constructor
        '''
        SNameIName, SNameIExt = os.path.splitext(
            SFInA)  # generate the debug using the first file name
        SFDebug = SNameIName + '-md050crosslevenshtein.debug'
        # FDebug = open(SFDebug, 'w') # debug file for each of the input files..
        LWordsA, LWordsIndexA = self.readWordList(SFInA)
        LWordsB, LWordsIndexB = self.readWordList(
            SFInB
        )  # graphonological object with phonological features over graphemes
        # OGraphonolev = md060graphonoLev.clGraphonolev(Debug = True, DebugFile = SFDebug)
        OGraphonolev = md060graphonoLev.clGraphonolev()

        LDistances = []
        ICounter = 0
        ICounterRec = 0
        for (SWordA, SPoSA, IFrqA, LTargetsEval) in LWordsA:
            LenA = len(SWordA)
            try:
                LogFrqA = math.log(IFrqA)
            except:
                LogFrqA = 0
            LCognates = []
            LCognates1 = []
            ICounter += 1
            if ICounter % 1 == 0:
                sys.stderr.write(SWordA + ' ' + str(ICounter) + '\n')
            print(SWordA)
            print(str(LTargetsEval))

            # count if references can be found in the target?
            ICountEvalInTarget = 0
            # dictionary of translation equivalents to find:
            DTargetsEval = {}
            for STargetE in LTargetsEval:
                (Lev0E, Lev1E, Lev0NormE,
                 Lev1NormE) = OGraphonolev.computeLevenshtein(
                     SWordA, STargetE, SLangIDa, SLangIDb)
                print(
                    '%(SWordA)s,%(STargetE)s,L-G:%(Lev0E)f,%(Lev1E)f,NL-G:%(Lev0NormE)f,%(Lev1NormE)f'
                    % locals())
                # not printing but recording into memory
                DTargetsEval[(STargetE, SPoSA)] = (
                    Lev0NormE, Lev1NormE, Lev0E, Lev1E
                )  # then we should be able to sort by the top candidate
                # if found the evaluation target in reference
                if STargetE in LWordsIndexB:
                    ICountEvalInTarget += 1

            if ICountEvalInTarget == 0:
                print('\t: not in the reference list!')
                continue
            # -- this stops search for items, which where reference is not in Russian list, so cannot be found...
            # can be treated a bit differently, statistics collected, etc.!!!
            ''' 
            # changed:
            for (SWordB, SPoSB, IFrqB) in LWordsB:
                LenB = len(SWordB)
                LenAve = (LenA + LenB) / 2
            
                ILev = self.computeLevenshtein(SWordA, SWordB)
                ALevNorm = ILev/LenAve
                if ALevNorm <= 0.30:
                    LCognates.append((ALevNorm, ILev, SWordB, SPoSB, IFrqB))
            '''
            # LTargetsEval2 not used -- usually large reference word list is not expected to have references
            # create a dictionary to store the equivalents, sort differently;
            DCognates = {}
            for (SWordB, SPoSB, IFrqB, LTargetsEval2) in LWordsB:
                # Lev0 is baseline Levenshtein distance
                # Lev1 is is graphonological Levenshtein distance
                if SPoSB != SPoSA:
                    continue  # restrict search to the same pos space
                (Lev0, Lev1, Lev0Norm,
                 Lev1Norm) = OGraphonolev.computeLevenshtein(
                     SWordA, SWordB, SLangIDa, SLangIDb)
                # if Lev0Norm <= 0.36:
                ''' --> removed on 14/04/2017, to be replaced with a dictionary to find things in the list...
                if Lev0Norm <= 0.5:
                    LCognates.append((Lev0Norm, Lev0, SWordB, SPoSB, IFrqB)) # baseline Lev
                # if Lev1Norm <= 0.36:
                if Lev1Norm <= 0.5:
                    LCognates1.append((Lev1Norm, Lev1, SWordB, SPoSB, IFrqB)) # graphonological Lev
                    
                '''
                DCognates[(SWordB, SPoSB)] = (
                    Lev0Norm, Lev1Norm, Lev0, Lev1
                )  # expand to account for variations of the scores; then evaluate rank accoding to different scores
            ''' -- removed -- not used now
            LDistances.append((SWordA, SPoSA, IFrqA, LCognates))
            
            if (len(LCognates) > 0 or len(LCognates1) > 0):
                ICounterRec += 1
                ACognPerCent = ICounterRec / ICounter
                # now restricted to writing only one cognate...
                # sys.stdout.write('\t{, %(ICounterRec)d, %(ICounter)d, %(SWordA)s, %(SPoSA)s, frq=%(IFrqA)d, ln=%(LogFrqA).2f, have-cognates: %(ACognPerCent).2f : \n' % locals())
                sys.stdout.write('%(SWordA)s\t%(SPoSA)s\tfrq=%(IFrqA)d\t\n' % locals())
                sys.stdout.flush()
                sys.stdout.write('BASELINE:\n')
                self.printCognate(LCognates, LogFrqA, SPoSA)
                # sys.stdout.write('\t')
                sys.stdout.write('GRAPHONOLOGICAL:\n')
                self.printCognate(LCognates1, LogFrqA, SPoSA)
                # removed (s) --> printCognate(s) in function call : simple production version
                sys.stdout.write('\n\n')
                sys.stdout.flush()
            '''
            # list of cognates is formed
            for (STargetE, SPoSA) in sorted(DTargetsEval.keys(),
                                            reverse=False,
                                            key=lambda k: k[1][0]):
                if (STargetE, SPoSA) in DCognates.keys():
                    # find rank and top-n list:
                    # (IRankL, ITopNL, IRankG, ITopNG) = self.findRanks((STargetE, SPoSA), DCognates)
                    LLSortCog = self.findRanks((STargetE, SPoSA), DCognates,
                                               [0, 1])
                    for (ICogRank, ILenLSortCog, LSortCog) in LLSortCog:
                        sys.stdout.write(
                            '%(SWordA)s\t%(STargetE)s\t%(ICogRank)d\t%(ILenLSortCog)d\t\n'
                            % locals())
                        for (ICogRank, TKey, TVals) in LSortCog:  # corrected
                            SKey = str(TKey)
                            SVals = str(TVals)
                            sys.stdout.write(
                                '\t%(ICogRank)d\t%(SKey)s\t%(SVals)s\n' %
                                locals())

                    (Lev0Norm, Lev1Norm, Lev0, Lev1) = DCognates[(STargetE,
                                                                  SPoSA)]
                    sys.stdout.write(
                        '%(STargetE)s\t%(SPoSA)s\t%(Lev0Norm)f\t%(Lev1Norm)f\t -- found \n'
                        % locals())
                    sys.stdout.flush()
                else:
                    (Lev0NormE, Lev1NormE, Lev0E,
                     Lev1E) = DTargetsEval[(STargetE, SPoSA)]
                    sys.stdout.write(
                        '%(STargetE)s\t%(SPoSA)s\t%(Lev0NormE)f\t%(Lev1NormE)f\t -- NOT found \n'
                        % locals())
                    sys.stdout.flush()
        '''

Example #4

Show file

File: md070crosslevenshteinPhonV06.py Project: bogdanbabych/morphosyntax

    def __init__(self, SFInA, SFInB, SLangIDa, SLangIDb):
        '''
        Constructor
        '''
        SNameIName, SNameIExt = os.path.splitext(
            SFInA)  # generate the debug using the first file name
        SFDebug = SNameIName + '-md050crosslevenshtein.debug'
        # FDebug = open(SFDebug, 'w') # debug file for each of the input files..
        LWordsA, LWordsIndexA = self.readWordList(SFInA)
        LWordsB, LWordsIndexB = self.readWordList(
            SFInB
        )  # graphonological object with phonological features over graphemes
        # OGraphonolev = md060graphonoLev.clGraphonolev(Debug = True, DebugFile = SFDebug)
        OGraphonolev = md060graphonoLev.clGraphonolev()

        LDistances = []
        ICounter = 0
        ICounterRec = 0
        for (SWordA, SPoSA, IFrqA, LTargetsEval) in LWordsA:
            LenA = len(SWordA)
            try:
                LogFrqA = math.log(IFrqA)
            except:
                LogFrqA = 0
            LCognates = []
            LCognates1 = []
            ICounter += 1
            if ICounter % 1 == 0:
                sys.stderr.write(SWordA + ' ' + str(ICounter) + '\n')
            print(SWordA)
            print(str(LTargetsEval))

            # count if references can be found in the target?
            ICountEvalInTarget = 0
            for STargetE in LTargetsEval:
                (Lev0E, Lev1E, Lev0NormE,
                 Lev1NormE) = OGraphonolev.computeLevenshtein(
                     SWordA, STargetE, SLangIDa, SLangIDb)
                print(
                    '%(SWordA)s,%(STargetE)s,L-G:%(Lev0E)f,%(Lev1E)f,NL-G:%(Lev0NormE)f,%(Lev1NormE)f'
                    % locals())
                # not printing but recording into memory
                # if found the evaluation target in reference
                if STargetE in LWordsIndexB:
                    ICountEvalInTarget += 1

            if ICountEvalInTarget == 0:
                print('\t: not in the reference list!')
                continue
            # -- this stops search for items, which where reference is not in Russian list, so cannot be found...
            # can be treated a bit differently, statistics collected, etc.!!!
            ''' 
            # changed:
            for (SWordB, SPoSB, IFrqB) in LWordsB:
                LenB = len(SWordB)
                LenAve = (LenA + LenB) / 2
            
                ILev = self.computeLevenshtein(SWordA, SWordB)
                ALevNorm = ILev/LenAve
                if ALevNorm <= 0.30:
                    LCognates.append((ALevNorm, ILev, SWordB, SPoSB, IFrqB))
            '''
            # LTargetsEval2 not used -- usually large reference word list is not expected to have references
            for (SWordB, SPoSB, IFrqB, LTargetsEval2) in LWordsB:
                # Lev0 is baseline Levenshtein distance
                # Lev1 is is graphonological Levenshtein distance
                (Lev0, Lev1, Lev0Norm,
                 Lev1Norm) = OGraphonolev.computeLevenshtein(
                     SWordA, SWordB, SLangIDa, SLangIDb)
                # if Lev0Norm <= 0.36:
                if Lev0Norm <= 0.5:
                    LCognates.append(
                        (Lev0Norm, Lev0, SWordB, SPoSB, IFrqB))  # baseline Lev
                # if Lev1Norm <= 0.36:
                if Lev1Norm <= 0.5:
                    LCognates1.append((Lev1Norm, Lev1, SWordB, SPoSB,
                                       IFrqB))  # graphonological Lev

            LDistances.append((SWordA, SPoSA, IFrqA, LCognates))
            if (len(LCognates) > 0 or len(LCognates1) > 0):
                ICounterRec += 1
                ACognPerCent = ICounterRec / ICounter
                # now restricted to writing only one cognate...
                # sys.stdout.write('\t{, %(ICounterRec)d, %(ICounter)d, %(SWordA)s, %(SPoSA)s, frq=%(IFrqA)d, ln=%(LogFrqA).2f, have-cognates: %(ACognPerCent).2f : \n' % locals())
                sys.stdout.write('%(SWordA)s\t%(SPoSA)s\tfrq=%(IFrqA)d\t\n' %
                                 locals())
                sys.stdout.flush()
                sys.stdout.write('BASELINE:\n')
                self.printCognate(LCognates, LogFrqA, SPoSA)
                # sys.stdout.write('\t')
                sys.stdout.write('GRAPHONOLOGICAL:\n')
                self.printCognate(LCognates1, LogFrqA, SPoSA)
                # removed (s) --> printCognate(s) in function call : simple production version
                sys.stdout.write('\n\n')
                sys.stdout.flush()
        '''