def to_devanagari(obj): if isinstance(obj, (six.text_type, six.string_types)): obj = SanskritObject(obj, encoding=SLP1) if isinstance(obj, SanskritObject): return obj.devanagari() else: return map(to_devanagari, obj)
def get(self, v): """ Parse a presegmented sentence """ strict_p = True if request.args.get("strict") == "false": strict_p = False vobj = SanskritObject(v, strict_io=strict_p, replace_ending_visarga=None) parser = Parser(input_encoding="SLP1", output_encoding="Devanagari", replace_ending_visarga='s') mres = [] print(v) for split in parser.split(vobj.canonical(), limit=10, pre_segmented=True): parses = list(split.parse(limit=10)) sdot = split.to_dot() mres = [x.serializable() for x in parses] pdots = [x.to_dot() for x in parses] r = { "input": v, "devanagari": vobj.devanagari(), "analysis": mres, "split_dot": sdot, "parse_dots": pdots } return r
def get(self, v): """ Get morphological tags for v """ vobj = SanskritObject(v, strict_io=False, replace_ending_visarga=None) g = analyzer.getSandhiSplits(vobj, tag=True) if g: splits = g.find_all_paths(10, score=True) else: splits = [] mres = {} plotbase = {} for sp in splits: bn = f"{randint(0,9999):4}" vg = VakyaGraph(sp, max_parse_dc=5) sl = "_".join([n.devanagari(strict_io=False) for n in sp]) for (ix, p) in enumerate(vg.parses): if sl not in mres: mres[sl] = [] t = [] for n in p: preds = list(p.predecessors(n)) if preds: pred = preds[0] # Only one lbl = p.edges[pred, n]['label'] t.append(jedge(pred, n, lbl)) else: t.append(jnode(n)) mres[sl].append(t) plotbase[sl] = bn try: vg.write_dot(f"static/{bn}.dot") except Exception: pass r = {"input": v, "devanagari": vobj.devanagari(), "analysis": mres, "plotbase": plotbase} return r
def get(self, p): """ Get lexical tags for p """ pobj = SanskritObject(p, strict_io=False) tags = analyzer.getLexicalTags(pobj) if tags is not None: ptags = jtags(tags) else: ptags = [] r = {"input": p, "devanagari": pobj.devanagari(), "tags": ptags} return r
def get(self, v): """ Get lexical tags for v """ vobj = SanskritObject(v, strict_io=False, replace_ending_visarga=None) g = analyzer.getSandhiSplits(vobj) if g: splits = g.find_all_paths(10) jsplits = [[ss.devanagari(strict_io=False) for ss in s] for s in splits] else: jsplits = [] r = {"input": v, "devanagari": vobj.devanagari(), "splits": jsplits} return r
def get(self, v): """ Presegmented Split """ vobj = SanskritObject(v, strict_io=True, replace_ending_visarga=None) parser = Parser(input_encoding="SLP1", output_encoding="Devanagari", replace_ending_visarga='s') splits = parser.split(vobj.canonical(), limit=10, pre_segmented=True) r = { "input": v, "devanagari": vobj.devanagari(), "splits": [x.serializable()['split'] for x in splits] } return r
def getannotation(v): """ Get morphological tags for v """ vobj = SanskritObject(v, strict_io=False, replace_ending_visarga=None) g = analyzer.getSandhiSplits(vobj, tag=True) if g: splits = g.findAllPaths(10) else: splits = [] mres = {} for sp in splits: p = analyzer.constrainPath(sp) if p: sl = "_".join([spp.devanagari(strict_io=False) for spp in sp]) mres[sl] = [] for pp in p: mres[sl].append([(spp.devanagari(strict_io=False), jtag(pp[spp.canonical()])) for spp in sp]) r = {"input": v, "devanagari": vobj.devanagari(), "analysis": mres} return r