def arabic_to_latin(input_file_path, output_file_path, sep_type, column_n): print 'Transliterate from Arabic to Latin' out=codecs.open(output_file_path, 'w', 'utf-8'); if args.preper==True: status_tuple=commands.getstatusoutput('ruby1.8 ./pre_per2.rb {0} > {1}'.format(input_file_path, './tmp_file_1')); if status_tuple[0]!=0: sys.exit(status_tuple); status_tuple=commands.getstatusoutput('nkf -w ./tmp_file_1 > conved'); if status_tuple[0]!=0: sys.exit(status_tuple); with codecs.open('./tmp_file_1', 'r', 'utf-8') as input_line: for line in input_line: items=line.split(sep_type); Ins=translitr.transliter(line.split(sep_type)[column_n]); if column_n==0: out.write(Ins.arabic_to_unicode()+sep_type+sep_type.join(items[column_n+1:-1])+u'\n' ); elif column_n==-1: out.write(sep_type.join(items[0:len(items)-1])+sep_type+Ins.arabic_to_unicode()+u'\n' ); #TODO あと,ここに中間のindexに対する記述をしていく os.remove('./conved'); os.remove('./tmp_file_1'); else: with codecs.open(input_file_path, 'r', 'utf-8') as lines: for line in lines: items=line.split(sep_type); Ins=translitr.transliter(line.split(sep_type)[column_n]); if column_n==0: out.write(Ins.arabic_to_unicode()+sep_type+sep_type.join(items[column_n+1:-1])+u'\n' ); elif column_n==-1: out.write(sep_type.join(items[0:len(items)-1])+sep_type+Ins.arabic_to_unicode()+u'\n' ); out.close();
def arabic_to_latin(input_file_path, output_file_path, sep_type, column_n): print 'Transliterate from Arabic to Latin' out = codecs.open(output_file_path, 'w', 'utf-8') if args.preper == True: status_tuple = commands.getstatusoutput( 'ruby1.8 ./pre_per2.rb {0} > {1}'.format(input_file_path, './tmp_file_1')) if status_tuple[0] != 0: sys.exit(status_tuple) status_tuple = commands.getstatusoutput('nkf -w ./tmp_file_1 > conved') if status_tuple[0] != 0: sys.exit(status_tuple) with codecs.open('./tmp_file_1', 'r', 'utf-8') as input_line: for line in input_line: items = line.split(sep_type) Ins = translitr.transliter(line.split(sep_type)[column_n]) if column_n == 0: out.write(Ins.arabic_to_unicode() + sep_type + sep_type.join(items[column_n + 1:-1]) + u'\n') elif column_n == -1: out.write( sep_type.join(items[0:len(items) - 1]) + sep_type + Ins.arabic_to_unicode() + u'\n') #TODO あと,ここに中間のindexに対する記述をしていく os.remove('./conved') os.remove('./tmp_file_1') else: with codecs.open(input_file_path, 'r', 'utf-8') as lines: for line in lines: items = line.split(sep_type) Ins = translitr.transliter(line.split(sep_type)[column_n]) if column_n == 0: out.write(Ins.arabic_to_unicode() + sep_type + sep_type.join(items[column_n + 1:-1]) + u'\n') elif column_n == -1: out.write( sep_type.join(items[0:len(items) - 1]) + sep_type + Ins.arabic_to_unicode() + u'\n') out.close()
def transliteration_whole(input_file_path, output_file_path, args): print 'Whole transliteration' print 'Mode is {}'.format(args.mode) out = codecs.open(output_file_path, 'w', 'utf-8') if args.mode == 'a_l': if args.preper == True: status_tuple = commands.getstatusoutput( 'ruby1.8 ./pre_per2.rb {0} > {1}'.format( input_file_path, './tmp_file_1')) if status_tuple[0] != 0: sys.exit(status_tuple) status_tuple = commands.getstatusoutput( 'nkf -w ./tmp_file_1 > conved') if status_tuple[0] != 0: sys.exit(status_tuple) with codecs.open('./conved', 'r', 'utf-8') as input_line: for line in input_line: Ins = translitr.transliter(line) out.write(Ins.arabic_to_unicode()) else: try: file_obj = codecs.open(input_file_path, 'rb', 'utf-8') for i, line in enumerate(file_obj.readlines()): Ins = translitr.transliter(line) out.write(Ins.arabic_to_unicode()) except UnicodeDecodeError: print u'Different Chara set is found.' print u'Original sentence is :{} line at:{}'.format(line, i) os.remove('./tmp_file_1') os.remove('./conved') out.close() elif args.mode == 'l_a': with codecs.open(input_file_path, 'r', 'utf-8') as input_line: for line in input_line: Ins = transliter.transliter(line) out.write(Ins.unicode_to_arabic()) out.close()
def latin_to_arabic(input_file_path, output_file_path, sep_type, column_n): print 'Transliterate from Latin to Arabic' out=codecs.open(output_file_path, 'w', 'utf-8'); with codecs.open(input_file_path, 'r', 'utf-8') as input_line: for line in input_line: items=line.split(sep_type); Ins=translitr.transliter(line.split(sep_type)[column_n]); if column_n==0: out.write(Ins.unicode_to_arabic()+sep_type+sep_type.join(items[column_n+1:-1])+u'\n' ); elif column_n==-1: out.write(sep_type.join(items[0:len(items)-1])+sep_type+Ins.unicode_to_arabic()+u'\n' ); #TODO あと,ここに中間のindexに対する記述をしていく out.close();
def transliteration_whole(input_file_path, output_file_path, args): print 'Whole transliteration' print 'Mode is {}'.format(args.mode); out=codecs.open(output_file_path, 'w', 'utf-8'); if args.mode=='a_l': if args.preper==True: status_tuple=commands.getstatusoutput('ruby1.8 ./pre_per2.rb {0} > {1}'.format(input_file_path, './tmp_file_1')); if status_tuple[0]!=0: sys.exit(status_tuple); status_tuple=commands.getstatusoutput('nkf -w ./tmp_file_1 > conved'); if status_tuple[0]!=0: sys.exit(status_tuple); with codecs.open('./conved', 'r', 'utf-8') as input_line: for line in input_line: Ins=translitr.transliter(line); out.write(Ins.arabic_to_unicode()); else: try: file_obj=codecs.open(input_file_path, 'rb', 'utf-8'); for i, line in enumerate(file_obj.readlines()): Ins=translitr.transliter(line); out.write(Ins.arabic_to_unicode()); except UnicodeDecodeError: print u'Different Chara set is found.' print u'Original sentence is :{} line at:{}'.format(line, i); os.remove('./tmp_file_1'); os.remove('./conved'); out.close(); elif args.mode=='l_a': with codecs.open(input_file_path, 'r', 'utf-8') as input_line: for line in input_line: Ins=transliter.transliter(line); out.write(Ins.unicode_to_arabic()); out.close();
def latin_to_arabic(input_file_path, output_file_path, sep_type, column_n): print 'Transliterate from Latin to Arabic' out = codecs.open(output_file_path, 'w', 'utf-8') with codecs.open(input_file_path, 'r', 'utf-8') as input_line: for line in input_line: items = line.split(sep_type) Ins = translitr.transliter(line.split(sep_type)[column_n]) if column_n == 0: out.write(Ins.unicode_to_arabic() + sep_type + sep_type.join(items[column_n + 1:-1]) + u'\n') elif column_n == -1: out.write( sep_type.join(items[0:len(items) - 1]) + sep_type + Ins.unicode_to_arabic() + u'\n') #TODO あと,ここに中間のindexに対する記述をしていく out.close()