コード例 #1
0
def main(args):
    # exmpale for encode
    import mysam.tagmaker as tagmaker
    tag_maker = tagmaker.tagMaker()
    taglist = [u'اسم', u'مضير متصل', u'مجرور']
    tag_maker.encode(taglist)
    tagstr = str(tag_maker)
    print(tagstr.encode('utf8'))

    # exmaple for decode
    print("***exmaple for decode***")
    tag_maker = tagmaker.tagMaker()
    tagcode = 'N--;--I-;----;---'
    print(tag_maker.repr(tag_maker.decode(tagcode)).encode('utf8'))

    print("***exmaple for inflect***")
    tag_maker = tagmaker.tagMaker()
    tagcode = 'N--;M3IY;---H;---'
    print(tag_maker.inflect(tagcode).encode('utf8'))

    print("***exmaple for add tag***")
    tag_maker = tagmaker.tagMaker()
    tagcode = 'N--;--I-;----;---'
    tag_new = u"تعريف"
    tag_maker.add(tag_new)
    tag_new = u"اسم"
    tag_maker.add(tag_new)
    print(str(tag_maker).encode('utf8'))

    print("***exmaple for has tag***")
    tag_maker = tagmaker.tagMaker()
    tagcode = 'N--;--I-;----;---'
    tag_search = u"مجرور"
    print(tag_maker.has_tag(tag_search, tagcode))
コード例 #2
0
ファイル: tagsdict.py プロジェクト: veeshi/arramooz
    def __init__(self, wordtype, version="N/A"):
        """
        initiate the dict
        """
        csvdict.CsvDict.__init__(self, wordtype, version)
        self.affixes_list = []
        nb1 = 0
        nb2 = 0
        file_conf = os.path.join(os.path.dirname(__file__),
                                 "config/tag.config")
        self.tagmaker = tagmaker.tagMaker(file_conf)

        for procletic in snconst.COMP_PREFIX_LIST_MODEL.keys():
            for encletic in snconst.COMP_SUFFIX_LIST_MODEL:
                #~ for procletic in snconst.COMP_PREFIX_LIST:
                #~ for encletic in snconst.COMP_SUFFIX_LIST:
                for suffix in snconst.CONJ_SUFFIX_LIST:
                    pro_nm = araby.strip_tashkeel(procletic)
                    enc_nm = araby.strip_tashkeel(encletic)
                    if u"-".join([pro_nm,
                                  enc_nm]) in snconst.COMP_NOUN_AFFIXES:
                        nb1 += 1
                        if nspell.verify_proaffix_affix(
                                procletic, encletic, suffix):
                            nb2 += 1
                            self.affixes_list.append(
                                (procletic, encletic, suffix))
        print nb1, nb2
コード例 #3
0
 def __init__(
     self,
     version="N/A",
 ):
     """
     initiate the dict
     """
     csvdict.CsvDict.__init__(self, version)
     file_conf = os.path.join(os.path.dirname(__file__),
                              "config/tag.config")
     self.tagmaker = tagmaker.tagMaker(file_conf)
     self.affixer = verb_affixer.verb_affixer()
コード例 #4
0
def main(args):
    
    taglists = [[u'اسم', u'هاء', u'مجرور',],
                u'تعريف::مرفوع:متحرك:ينون:::'.split(":"),
                u'المضارع المعلوم:هو:::n:'.split(":"),
                u':مضاف:مجرور:متحرك:ينون:::'.split(':'),
                ]
    for taglist in taglists:
        tag_maker = tagmaker.tagMaker()
        tag_maker.encode(taglist)
        print(u"+".join(taglist).encode('utf8'))
        tagstr = str(tag_maker)
        print(tagstr.encode('utf8'))
        # decode a unifed tag string
        print(tag_maker.repr(tag_maker.decode()).encode('utf8'))
コード例 #5
0
def main(args):
    import pandas as pd
    # test all existing tags 
    tag_maker = tagmaker.tagMaker()
    tagstr = str(tag_maker)
    print("----")
    for tag in tag_const.TAGSDICT:
        tagstr = str(tag_maker)
        tag_maker.add(tag)
        tagstr_new = str(tag_maker)
        if tagstr == tagstr_new:
            print(u" ".join(["error:old\t",  tagstr, tag, "\n     new:\t",  tagstr_new]).encode('utf8'))
        else:
            print(u" ".join([tag, tagstr_new]).encode('utf8')) 
        decode_tags = tag_maker.decode()
        df = pd.DataFrame(decode_tags)
        print(df)
        tag_maker.add(u"اسم")
        print("******Inflect", tag_maker.inflect().encode('utf8'))
        tag_maker.add(u"فعل")
        print("***Verb***Inflect", tag_maker.inflect().encode('utf8'))