def gwords( text ): ''' (string) -> int Return the number of variants (not occurrences) of gwords in the given text. >>> gwords( '' ) 0 >>> gwords( ' abc ' ) 0 >>> gwords( TEST_FIXTURES['gwords'][0] + ' ' + TEST_FIXTURES['gwords'][1] ) 2 >>> gwords( "%s %s %s" % (TEST_FIXTURES['gwords'][0],\ TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 2 >>> gwords( "%s%s %s" % (TEST_FIXTURES['gwords'][0],\ TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 1 >>> gwords( "%s%s %s %s" % (TEST_FIXTURES['gwords'][0],\ araby.DAMMA, TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 2 >>> gwords( "%s%s %s%s %s" % (TEST_FIXTURES['gwords'][0],\ araby.DAMMA, 'abc', TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 2 ''' """ Search by regular expression then filter the possibilities """ words_set = set( araby.stripTashkeel( text ).split() ) return len( words_set & GWORDS_FORMS )
def gwords(text): ''' (string) -> int Return the number of variants (not occurrences) of gwords in the given text. >>> gwords( '' ) 0 >>> gwords( ' abc ' ) 0 >>> gwords( TEST_FIXTURES['gwords'][0] + ' ' + TEST_FIXTURES['gwords'][1] ) 2 >>> gwords( "%s %s %s" % (TEST_FIXTURES['gwords'][0],\ TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 2 >>> gwords( "%s%s %s" % (TEST_FIXTURES['gwords'][0],\ TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 1 >>> gwords( "%s%s %s %s" % (TEST_FIXTURES['gwords'][0],\ araby.DAMMA, TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 2 >>> gwords( "%s%s %s%s %s" % (TEST_FIXTURES['gwords'][0],\ araby.DAMMA, 'abc', TEST_FIXTURES['gwords'][1], TEST_FIXTURES['gwords'][1]) ) 2 ''' """ Search by regular expression then filter the possibilities """ words_set = set(araby.stripTashkeel(text).split()) return len(words_set & GWORDS_FORMS)
def gwords(text): """ Search by regular expression then filter the possibilities """ gword_pattern = re.compile(u"لله") GWORDS_FORMS = set([ u"أبالله", u"وتالله", u"بالله", u"تالله", u"والله", u"الله", u"ولله", u"اللهم", u"آلله", u"فلله", u"لله", u"فالله", ]) results = set(gword_pattern.findall( araby.stripTashkeel(text))) & GWORDS_FORMS return len(results)
def gwords( text ): """ Search by regular expression then filter the possibilities """ gword_pattern = re.compile( u"لله" ) GWORDS_FORMS = set( [u"أبالله", u"وتالله", u"بالله", u"تالله", u"والله", u"الله", u"ولله", u"اللهم", u"آلله", u"فلله", u"لله", u"فالله", ] ) results = set( gword_pattern.findall( araby.stripTashkeel( text ) ) ) & GWORDS_FORMS return len( results )