コード例 #1
0
ファイル: get_templates.py プロジェクト: daghistani/aruudy
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import sys
import pattern
from trans.buckwalter import Buckwalter

if __name__ == '__main__':

    reload(sys)
    sys.setdefaultencoding('utf8')

    print "To exit enter an empty word"

    while (True):
        sys.stdout.write("Please enter an Arabic word: ")
        word = raw_input()
        if len(word) < 2:
            print "Thank you for using me"
            break

        wtrans = Buckwalter.translaterate(unicode(word))
        #print wtrans # Translateration
        templates = pattern.getTemplateNoDiac(wtrans)
        print Buckwalter.untranslaterate(templates)
コード例 #2
0
    dstdb.addTable(tab)

    src = srcdb.getTable('ardict')

    print src.getSqlQuery()

    rows = src.getData()

    vocalized = src.getColumnIndex('vocal')
    unvocalized = src.getColumnIndex('unvocal')
    i = 0
    for row in rows:
        i = i + 1
        print "processing " + row[unvocalized]
        wtranslate = Buckwalter.translaterate(row[vocalized])
        #print deleteDiacritics(deleteRoot(wtranslate))
        wpattern = getTemplate(wtranslate)
        #wpattern_u = Buckwalter.untranslaterate(wpattern)
        #print deleteDiacritics(deleteRoot(wpattern))
        #pattern.getPattern(row[vocalized], row[unvocalized])
        data = u"'%s', '%s', '%s'" % (row[unvocalized], wpattern,
                                      row[vocalized])
        print data
        tab.insertData(data, u'word, pattern, vocalized')
        if i == 5000:
            dstdb.commit()
            i = 0

    dstdb.commit()
コード例 #3
0
ファイル: get_templates.py プロジェクト: kariminf/ArArud
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#   
import os
import sys
import pattern
from trans.buckwalter import Buckwalter

if __name__ == '__main__':
	
	reload(sys)  
	sys.setdefaultencoding('utf8')
	
	print "To exit enter an empty word"
	
	while (True):
		sys.stdout.write("Please enter an Arabic word: ")
		word = raw_input()
		if len(word)< 2:
			print "Thank you for using me"
			break
			
		wtrans = Buckwalter.translaterate(unicode(word))
		#print wtrans # Translateration
		templates = pattern.getTemplateNoDiac(wtrans)
		print Buckwalter.untranslaterate(templates)
		
コード例 #4
0
ファイル: ardicfilter.py プロジェクト: kariminf/ArArud
	# print tab.getSqlQuery()
	
	dstdb.addTable(tab)
	
	src = srcdb.getTable('ardict')
	
	print src.getSqlQuery()
	
	rows = src.getData()
	
	vocalized = src.getColumnIndex('vocal')
	unvocalized = src.getColumnIndex('unvocal')
	i=0
	for row in rows:
		i = i + 1
		print "processing " + row[unvocalized]
		wtranslate = Buckwalter.translaterate(row[vocalized])
		#print deleteDiacritics(deleteRoot(wtranslate))
		wpattern = getTemplate(wtranslate)
		#wpattern_u = Buckwalter.untranslaterate(wpattern)
		#print deleteDiacritics(deleteRoot(wpattern))
		#pattern.getPattern(row[vocalized], row[unvocalized])
		data = u"'%s', '%s', '%s'" % (row[unvocalized], wpattern, row[vocalized])
		print data
		tab.insertData(data, u'word, pattern, vocalized')
		if i == 5000:
			dstdb.commit() 
			i=0
	
	dstdb.commit()