Example #1
0
    def testvalidation(self):
        response.headers['Content-Type'] = 'application/json'
        validationId = dict(request.params)['validationId']

        appSettings = meta.Session.query(model.ApplicationSettings).order_by(
            desc(model.ApplicationSettings.id)).first()

        # Get the appropriate Inventory object for validation from app_globals
        #  These objects are saved by functions/applicationSettingsToAppGlobals
        #  when the application settings are saved
        if validationId == u'testOrthographicValidation':
            inventory = app_globals.orthTranscrInvObj
            fieldName = u'transcription'
        elif validationId == u'testNarrPhonValidation':
            inventory = app_globals.narrPhonInvObj
            fieldName = u'narrowPhoneticTranscription'
        elif validationId == u'testBroadPhonValidation':
            inventory = app_globals.broadPhonInvObj
            fieldName = u'phoneticTranscription'
        elif validationId in [u'testMorphophonValidation',
                              u'testOrthographicMBValidation']:
            inventory = app_globals.morphBreakInvObj
            fieldName = u'morphemeBreak'

        # regex is the regular expression that valid fields will match
        regex = inventory.getRegexValidator()

        # NFDFixes returns True if unicode canonical decompositional
        #  normalization makes the data valid
        def NFDFixes(string, regex):
            string = h.NFD(string)
            patt = re.compile(regex)
            if patt.match(string):
                return True
            return False

        # Count the forms in the db
        formCount = meta.Session.query(model.Form).count()

        # Get the invalid forms using regex and SQLAlchemy
        invalidForms = meta.Session.query(model.Form).filter(not_(
            getattr(model.Form, fieldName).op('regexp')(regex))).all()

        # Build the result to return to the user
        result = {'formCount': formCount, 'invalidCount': len(invalidForms),
                  'NFDFixes': 0, 'invalid': [],
                  'validationInventory': inventory.inputList}

        for f in invalidForms :
            if NFDFixes(getattr(f, fieldName), regex):
                result['NFDFixes'] += 1
            else:
                result['invalid'].append([f.id,
                        h.NFD(getattr(f, fieldName)),
                        h.getUnicodeCodePoints(h.NFD(getattr(f, fieldName)))])

        return json.dumps(result)
Example #2
0
 def getCharacterCodesAndNames(self):
     input = h.NFD(dict(request.params)['input'])
     response.headers['Content-Type'] = 'application/json'
     return json.dumps(
         (h.getUnicodeCodePoints(input), h.getUnicodeNames(input)))
Example #3
0
    def testvalidation(self):
        response.headers['Content-Type'] = 'application/json'
        validationId = dict(request.params)['validationId']

        appSettings = meta.Session.query(model.ApplicationSettings).order_by(
            desc(model.ApplicationSettings.id)).first()

        # Get the appropriate Inventory object for validation from app_globals
        #  These objects are saved by functions/applicationSettingsToAppGlobals
        #  when the application settings are saved
        if validationId == u'testOrthographicValidation':
            inventory = app_globals.orthTranscrInvObj
            fieldName = u'transcription'
        elif validationId == u'testNarrPhonValidation':
            inventory = app_globals.narrPhonInvObj
            fieldName = u'narrowPhoneticTranscription'
        elif validationId == u'testBroadPhonValidation':
            inventory = app_globals.broadPhonInvObj
            fieldName = u'phoneticTranscription'
        elif validationId in [
                u'testMorphophonValidation', u'testOrthographicMBValidation'
        ]:
            inventory = app_globals.morphBreakInvObj
            fieldName = u'morphemeBreak'

        # regex is the regular expression that valid fields will match
        regex = inventory.getRegexValidator()

        # NFDFixes returns True if unicode canonical decompositional
        #  normalization makes the data valid
        def NFDFixes(string, regex):
            string = h.NFD(string)
            patt = re.compile(regex)
            if patt.match(string):
                return True
            return False

        # Count the forms in the db
        formCount = meta.Session.query(model.Form).count()

        # Get the invalid forms using regex and SQLAlchemy
        invalidForms = meta.Session.query(model.Form).filter(
            not_(getattr(model.Form, fieldName).op('regexp')(regex))).all()

        # Build the result to return to the user
        result = {
            'formCount': formCount,
            'invalidCount': len(invalidForms),
            'NFDFixes': 0,
            'invalid': [],
            'validationInventory': inventory.inputList
        }

        for f in invalidForms:
            if NFDFixes(getattr(f, fieldName), regex):
                result['NFDFixes'] += 1
            else:
                result['invalid'].append([
                    f.id,
                    h.NFD(getattr(f, fieldName)),
                    h.getUnicodeCodePoints(h.NFD(getattr(f, fieldName)))
                ])

        return json.dumps(result)
Example #4
0
 def getCharacterCodesAndNames(self):
     input = h.NFD(dict(request.params)['input'])
     response.headers['Content-Type'] = 'application/json'
     return json.dumps(
         (h.getUnicodeCodePoints(input), h.getUnicodeNames(input)))
Example #5
0
            result = schema.to_python(values)
        except Invalid, e:
            result = {'valid': False, 'errors': e.unpack_errors()}
        else:
            # Count all the characters tokens by type
            result = {}
            forms = meta.Session.query(model.Form).all()
            field = values['field']
            fieldBag = ''.join([getattr(f, field) for f in forms
                                if getattr(f, field)])
            for c in fieldBag:
                try:
                    result[c] += 1
                except KeyError:
                    result[c] = 1

            # Sort the tokens by count in descending order
            result = [[k, result[k]] for k in result]
            result = sorted(result, key=lambda x: x[1], reverse=True)

            # Add some character information
            result = [[c[0], c[1], h.getUnicodeNames(c[0]),
                       h.getUnicodeCodePoints(c[0]), ud.normalize('NFC', c[0]),
                       h.getUnicodeCodePoints(ud.normalize('NFC', c[0]))]
                     for c in result]

            result = {'valid': True, 'response': result}

        response.headers['Content-Type'] = 'application/json'
        return json.dumps(result)
Example #6
0
        else:
            # Count all the characters tokens by type
            result = {}
            forms = meta.Session.query(model.Form).all()
            field = values['field']
            fieldBag = ''.join(
                [getattr(f, field) for f in forms if getattr(f, field)])
            for c in fieldBag:
                try:
                    result[c] += 1
                except KeyError:
                    result[c] = 1

            # Sort the tokens by count in descending order
            result = [[k, result[k]] for k in result]
            result = sorted(result, key=lambda x: x[1], reverse=True)

            # Add some character information
            result = [[
                c[0], c[1],
                h.getUnicodeNames(c[0]),
                h.getUnicodeCodePoints(c[0]),
                ud.normalize('NFC', c[0]),
                h.getUnicodeCodePoints(ud.normalize('NFC', c[0]))
            ] for c in result]

            result = {'valid': True, 'response': result}

        response.headers['Content-Type'] = 'application/json'
        return json.dumps(result)