예제 #1
0
def gwords( text ):
    ''' (string) -> int
    Return the number of variants (not occurrences) of gwords in the given text.

    >>> gwords( '' )
    0
    >>> gwords( ' abc ' )
    0
    >>> gwords( TEST_FIXTURES['gwords'][0] + ' ' + TEST_FIXTURES['gwords'][1] )
    2
    >>> gwords( "%s %s %s" % (TEST_FIXTURES['gwords'][0],\
        TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    2
    >>> gwords( "%s%s %s" % (TEST_FIXTURES['gwords'][0],\
        TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    1
    >>> gwords( "%s%s %s %s" % (TEST_FIXTURES['gwords'][0],\
        araby.DAMMA, TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    2
    >>> gwords( "%s%s %s%s %s" % (TEST_FIXTURES['gwords'][0],\
        araby.DAMMA, 'abc', TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    2
    '''
    """ Search by regular expression then filter the possibilities """
    words_set = set( araby.stripTashkeel( text ).split() )
    return len( words_set & GWORDS_FORMS )
예제 #2
0
def gwords(text):
    ''' (string) -> int
    Return the number of variants (not occurrences) of gwords in the given text.

    >>> gwords( '' )
    0
    >>> gwords( ' abc ' )
    0
    >>> gwords( TEST_FIXTURES['gwords'][0] + ' ' + TEST_FIXTURES['gwords'][1] )
    2
    >>> gwords( "%s %s %s" % (TEST_FIXTURES['gwords'][0],\
        TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    2
    >>> gwords( "%s%s %s" % (TEST_FIXTURES['gwords'][0],\
        TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    1
    >>> gwords( "%s%s %s %s" % (TEST_FIXTURES['gwords'][0],\
        araby.DAMMA, TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    2
    >>> gwords( "%s%s %s%s %s" % (TEST_FIXTURES['gwords'][0],\
        araby.DAMMA, 'abc', TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) )
    2
    '''
    """ Search by regular expression then filter the possibilities """
    words_set = set(araby.stripTashkeel(text).split())
    return len(words_set & GWORDS_FORMS)
예제 #3
0
def gwords(text):
    """ Search by regular expression then filter the possibilities """
    gword_pattern = re.compile(u"لله")
    GWORDS_FORMS = set([
        u"أبالله",
        u"وتالله",
        u"بالله",
        u"تالله",
        u"والله",
        u"الله",
        u"ولله",
        u"اللهم",
        u"آلله",
        u"فلله",
        u"لله",
        u"فالله",
    ])
    results = set(gword_pattern.findall(
        araby.stripTashkeel(text))) & GWORDS_FORMS
    return len(results)
예제 #4
0
def gwords( text ):
    """ Search by regular expression then filter the possibilities """
    gword_pattern = re.compile( u"لله" )
    GWORDS_FORMS = set( [u"أبالله", u"وتالله", u"بالله", u"تالله", u"والله", u"الله", u"ولله", u"اللهم", u"آلله", u"فلله", u"لله", u"فالله", ] )
    results = set( gword_pattern.findall( araby.stripTashkeel( text ) ) ) & GWORDS_FORMS
    return len( results )