def test_sandhi_join(sandhiobj, join_reference):
    objs = map(lambda x: SanskritImmutableString(x, encoding=DEVANAGARI),
               (join_reference[0]))
    joins = sandhiobj.join(*objs)
    expected = SanskritImmutableString(join_reference[1],
                                       encoding=DEVANAGARI).canonical()
    assert expected in joins, u"Join, {}, {}, {}, {}".format(*join_reference)
 def main():
     args = getArgs()
     print("Input Dhatu:", args.dhatu)
     if args.debug:
         logging.basicConfig(filename='DhatuWrapper.log',
                             filemode='w',
                             level=logging.DEBUG)
     else:
         logging.basicConfig(filename='DhatuWrapper.log',
                             filemode='w',
                             level=logging.INFO)
     logger = logging.getLogger(__name__)
     if args.input_encoding is None:
         ie = None
     else:
         ie = SCHEMES[args.input_encoding]
     i = SanskritImmutableString(args.dhatu, encoding=ie)
     it = i.canonical()
     print("Input String in SLP1:", it)
     logger.info("Input String in SLP1: {}".format(it))
     w = DhatuWrapper(logger=logger)
     if args.tags == "all":
         res = w._get_dhatus(it)
     else:
         res = map(lambda x: x[args.tags], w._get_dhatus(it))
     print(res)
     print("Is {} sakarmaka?: {}".format(it, w.is_sakarmaka(it)))
     logger.info("Reported {}".format(res))
Beispiel #3
0
def isInPratyahara(p, s):
    if s == "":
        return False
    if isinstance(p, str):
        p = SanskritImmutableString(p, SLP1)
    if isinstance(s, str):
        s = SanskritImmutableString(s, SLP1)
    return ms.isInPratyahara(p, s)
Beispiel #4
0
def test_is_sakarmaka_for_dvikarmaka():
    s = SanskritImmutableString("nI")
    it = s.transcoded(SLP1)
    w = DhatuWrapper.DhatuWrapper()

    is_sakarmaka = w.is_sakarmaka(it)

    assert is_sakarmaka is True
def test_sandhi_split(sandhiobj, split_reference):
    obj = SanskritImmutableString(split_reference[0], encoding=DEVANAGARI)
    splits = sandhiobj.split_all(obj)
    expected = tuple(
        map(
            lambda x: SanskritImmutableString(x, encoding=DEVANAGARI).
            canonical(), split_reference[1]))
    assert expected in splits, u"Split, {}, {}, {}, {}".format(
        *split_reference)
Beispiel #6
0
def isSavarna(p, s):
    if ((s == "") and (p != "") or (s != "") and (p == "")):
        return False
    elif ((s == "") and (p == "")):
        return True
    else:
        if isinstance(p, str):
            p = SanskritImmutableString(p, SLP1)
        if isinstance(s, str):
            s = SanskritImmutableString(s, SLP1)
        return ms.isSavarna(p, s)
 def map_verb(self, obj):
     tagset = set()
     newobj = self.refresh(obj)
     tagset.add(
         SanskritImmutableString(self.lakAra[newobj.mode.id - 1],
                                 DEVANAGARI))
     tagset.add(
         SanskritImmutableString(self.pada_prayoga[newobj.voice.id - 1],
                                 DEVANAGARI))
     tagset.add(
         SanskritImmutableString(self.puruSha[newobj.person.id - 1],
                                 DEVANAGARI))
     tagset.add(
         SanskritImmutableString(self.vacanam[newobj.number.id - 1],
                                 DEVANAGARI))
     return (SanskritImmutableString(newobj.root.name, SLP1), tagset)
Beispiel #8
0
def _inriaTagsToDb(tag):
    # ('tag','ourtag)
    itag = tag[0]
    otag = tag[1]
    if itag.find('-') != -1:
        iset = set(itag.split('-'))
    else:
        iset = set([itag])
    return (iset, SanskritImmutableString(otag, encoding=SCHEMES['Devanagari']))
    def main(self, args):
        if args.input_encoding is None:
            ie = None
        else:
            ie = SCHEMES[args.input_encoding]

        word_in = SanskritImmutableString(args.word, encoding=ie).canonical()
        print("Getting tags for", word_in)
        tags = self.get_tags(word_in, tmap=args.map_tags)
        if tags is not None:
            for tag in tags:
                print(tag)
 def map_nominal(self, obj):
     self.logger.debug("map_nominal %s", obj)
     tagset = set()
     obj = self.refresh(obj)
     #         self.logger.debug("%s, gender_id = %d", obj, obj.gender_id)
     tagset.add(
         SanskritImmutableString(self.lingam[obj.gender_id - 1],
                                 DEVANAGARI))
     if obj.compounded:
         tagset.add(
             SanskritImmutableString("समासपूर्वपदनामपदम्", DEVANAGARI))
     else:
         tagset.add(
             SanskritImmutableString(self.vacanam[obj.number_id - 1],
                                     DEVANAGARI))
         tagset.add(
             SanskritImmutableString(self.vibhakti[obj.case_id - 1],
                                     DEVANAGARI))
     stem = obj.stem
     if type(stem) == ParticipleStem:
         mode = stem.mode.abbr
         voice = stem.voice.abbr
         for t in self.kRdanta[(mode, voice)]:
             tagset.add(SanskritImmutableString(t, SLP1))
     return (SanskritImmutableString(stem.name, SLP1), tagset)
Beispiel #11
0
def inriaMapTag(tag):
    ''' Map an INRIA tag to our format

    Params:
       tag(tuple) : (<stem>,set([inria tags]))
    Returns
       tuple : (<stem>,set([our tags]))
    '''
    stem = tag[0]
    tset = tag[1]
    olist = []
    for s in inriatagdb:
        if s[0] <= tset:
            olist.append(s[1])
    return (SanskritImmutableString(stem, SCHEMES['SLP1']), set(olist))
Beispiel #12
0
    def main():
        args = getArgs()
        if args.input_encoding is None:
            ie = None
        else:
            ie = SCHEMES[args.input_encoding]

        if args.loglevel:
            numeric_level = getattr(logging, args.loglevel.upper(), None)
            if not isinstance(numeric_level, int):
                raise ValueError('Invalid log level: %s' % args.loglevel)
            logging.basicConfig(level=numeric_level)

        word_in = SanskritImmutableString(args.word, encoding=ie).canonical()
        xmlDB = InriaXMLWrapper()
        print("Getting tags for", word_in)
        tags = xmlDB.get_tags(word_in, tmap=args.map_tags)
        if tags is not None:
            for t in tags:
                print(t)
Beispiel #13
0
    def add_rules_from_file(self, path):
        """
        Add sandhi rules from file.
        Each line of the input file should contain one rule. E.g. अ + अ = आ
        Lines starting with a # are treated as comments and skipped.
        Empty lines are ignored as well.

        :param path: file to read rules from

        See also add_rules_from_dir
        """
        filename = os.path.basename(path)
        with codecs.open(path, "rb", 'utf-8') as f:
            for linenum, line in enumerate(f):
                line = line.strip()
                if line.startswith('#') or line == '':
                    continue
                self.logger.debug("Processing rule %s", line)
                rule = SanskritImmutableString(line).canonical()
                for r in self.expand_rule(rule):
                    self.add_rule(*r,
                                  annotation="%s:%d" % (filename, linenum + 1))
Beispiel #14
0
 def __init__(self, name, aps, optional=False, overrides=None):
     if isinstance(name, str):
         self.name = SanskritImmutableString(name)
     else:
         self.name = name
     if isinstance(aps, str):
         self.aps = aps  # Adhaya.pada.sutra
         aps_l = aps.split(".")
         aps_t = [int(_x) for _x in aps_l]
         if len(aps_l) > 3:  # Subsutra/Vartikam
             aps_sub = Decimal("0." + str(aps_t[-1]))
         else:
             aps_sub = 0
         self._aps_tuple = aps_t
     elif isinstance(aps, tuple):
         aps_t = aps
         self._aps_tuple = aps_t
         self.aps = '.'.join([str(x) for x in list(aps_t)])
     self._aps_num = aps_t[2] + aps_t[1] * 1000 + aps_t[0] * 10000 + aps_sub
     self.overrides = overrides
     self.optional = optional
     logger.info(
         f"Initialized {self}:  {self._aps_num} Optional:{self.optional}")
 def map_tags(self, tags):
     out = []
     for t in tags:
         if type(t) == Nominal:
             otags = self.map_nominal(t)
             if otags not in out:
                 out.append(otags)
         elif type(t) == Verb:
             out.append(self.map_verb(t))
         elif type(t) == Indeclinable:
             out.append((SanskritImmutableString(t.name, SLP1),
                         set([SanskritImmutableString('avyayam', SLP1)])))
         elif type(t) == Gerund:
             newobj = self.refresh(t)
             out.append((SanskritImmutableString(newobj.root.name, SLP1),
                         set([SanskritImmutableString('ktvA', SLP1)])))
         elif type(t) == Infinitive:
             newobj = self.refresh(t)
             out.append((SanskritImmutableString(newobj.root.name, SLP1),
                         set([SanskritImmutableString('tumun', SLP1)])))
         else:
             out.append(t)
     return out
Beispiel #16
0
def _env(s1, s2):
    # Helper function to define execution environment
    env = {}
    env["lp"] = s1
    env["rp"] = s2
    if s1.canonical() == "":
        env["l"] = SanskritImmutableString("")
    else:
        env["l"] = SanskritImmutableString(s1.canonical()[-1], SLP1)
    if s2.canonical() == "":
        env["r"] = SanskritImmutableString("")
    else:
        env["r"] = SanskritImmutableString(s2.canonical()[0], SLP1)
    if len(s1.canonical()) > 1:
        env["ll"] = SanskritImmutableString(s1.canonical()[-2], SLP1)
        env["lc"] = SanskritImmutableString(s1.canonical()[:-1], SLP1)
    else:
        env["ll"] = SanskritImmutableString("")
        env["lc"] = SanskritImmutableString("")
    if len(s2.canonical()) > 1:
        env["rr"] = SanskritImmutableString(s2.canonical()[1], SLP1)
        env["rc"] = SanskritImmutableString(s2.canonical()[1:], SLP1)
    else:
        env["rr"] = SanskritImmutableString("", SLP1)
        env["rc"] = SanskritImmutableString("", SLP1)
    return env
Beispiel #17
0
    def expand_rule(self, rule):
        """
        Expands a given sandhi rule from the rules file to generate all possible combinations

        :param rule: Rule to expand
        :return: A generator of all possible expanded rules
        """
        self.logger.debug("Expanding rule %s", rule)

        ms = MaheshvaraSutras()

        b, afters = map(six.text_type.strip, rule.split("="))
        before = list(map(six.text_type.strip, b.split("+", 1)))
        left_classes = re.split(r'\[(.*?)\]', before[0])
        self.logger.debug("Left classes = %s", left_classes)

        # Split after forms into individual forms
        afters = map(six.text_type.strip, afters.split("/"))

        before_left = []
        for c in left_classes:
            if c != '':
                if c.startswith("*"):
                    # This is a mAheswara sUtra pratyAhAra
                    splits = list(map(six.text_type.strip, c.split('-')))
                    varnas = set(
                        ms.getPratyahara(SanskritImmutableString(
                            splits[0][1:], encoding=SLP1),
                                         longp=False,
                                         remove_a=True,
                                         dirghas=True).canonical())
                    if len(splits) == 2:
                        varnas -= set(splits[1])
                    self.logger.debug("Found pratyAhAra %s = %s", c, varnas)
                    before_left.append(varnas)
                else:
                    before_left.append(map(six.text_type.strip, c.split(",")))
        self.logger.debug("before_left iterator = %s", before_left)

        right_classes = re.split(r'\[(.*?)\]', before[1])
        # Could have used list comprehension, but this is easier to read
        self.logger.debug("right_classes = %s", right_classes)
        if right_classes:
            before_right = []
            for c in right_classes:
                if c != '':
                    if c.startswith("*"):
                        # This is a mAheswara sUtra pratyAhAra
                        splits = list(
                            map(six.text_type.strip, re.split('([+-])', c)))
                        varnas = set(
                            ms.getPratyahara(SanskritImmutableString(
                                splits[0][1:], encoding=SLP1),
                                             longp=False,
                                             remove_a=True,
                                             dirghas=True).canonical())
                        if len(splits) == 3:
                            if splits[1] == '-':
                                varnas -= set(splits[2])
                            elif splits[1] == '+':
                                varnas |= set(splits[2])
                        self.logger.debug("Found pratyAhAra %s (%s) = %s", c,
                                          splits[0][1:], varnas)
                        before_right.append(varnas)
                    else:
                        before_right.append(
                            map(six.text_type.strip, c.split(",")))
        else:
            before_right = [before[1].strip()]
        self.logger.debug("before_right iterator = %s", before_right)

        for after, before_l, before_r in itertools.product(
                afters, itertools.product(*before_left),
                itertools.product(*before_right)):
            left = ''.join(before_l)
            right = ''.join(before_r)
            list_before_r = list(before_r)
            left_right = (left, right)
            a = after.format(*(list(before_l) + list_before_r))
            # The below is just too much logging - should be silenced in production:
            # self.logger.debug("Final rule = %s -> %s", left_right, a)
            yield (left_right, a)
def jedge(pred, node, label, strict_io=False):
    return (node.pada.canonical(strict_io=strict_io),
            jtag(node.getMorphologicalTags(), strict_io),
            SanskritImmutableString(label, encoding=SLP1).canonical(strict_io=strict_io),
            pred.pada.canonical(strict_io=strict_io))