コード例 #1
0
 def print_r(r):
     df = sagas.to_df(
         r['domains'],
         ['rel', 'index', 'text', 'lemma', 'children', 'features'])
     print('%s(%s)' % (r['type'], r['lemma']))
     sagas.print_df(df)
     print_stem_chunks(r)
コード例 #2
0
    def deconstructing(self, text, target='ar'):
        """
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student'
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' de
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' fr
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' es
        $ python -m sagas.ar.arabic_processor deconstructing 'I am a student' vi

        ## other langs: ru, ja, zh

        :param text:
        :param target:
        :return:
        """
        import sagas
        source = 'en'
        options = {'get_pronounce', 'get_translations'}
        res, t = translate(text,
                           source=source,
                           target=target,
                           trans_verbose=False,
                           options=options)
        print('✁', '%s(%s %s)' % (text, res, ''.join(t.pronounce)))
        for sent in text.split(' '):
            res, t = translate(sent,
                               source=source,
                               target=target,
                               trans_verbose=False,
                               options=options)
            # print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)), end=" ")
            print('%s(%s%s)' % (sent, res, marks_th(t.pronounce)))
            sagas.print_df(t.translations)
        print('.')
コード例 #3
0
    def parse_print(self, sents, format='default'):
        """
        $ python -m sagas.nlu.stanford_helper parse_print 'جارك رجل طيب'
        :param sents:
        :param format:
        :return:
        """

        import json
        import sagas

        # TEXT = 'جارك رجل طيب'
        # host = 'pc'
        ann = self.invoke_server(
            sents,
            'arabic',
            output_format='text' if format == 'default' else 'json')
        if format == 'default':
            print(ann.strip())
        elif format == 'json':
            print(', '.join(ann['sentences'][0].keys()))
            print(json.dumps(ann, indent=2, ensure_ascii=False))
        elif format == 'df':
            tokens = ann['sentences'][0]['tokens']
            sagas.print_df(sagas.dict_df(tokens))
コード例 #4
0
def analyse_ar(text, disp_df=False):
    import sagas
    target = 'en'
    if disp_df:
        sagas.print_df(process_df('ar', target, text, with_styles=False))
    else:
        process('ar', target, text)
    NluTools().say(text, 'ar')
コード例 #5
0
ファイル: knp_helper.py プロジェクト: CoderOverflow/stack
def display(df, col_defs=None):
    if outputer == 'console':
        if col_defs is not None:
            for col in col_defs:
                crop_column(df, col[0], col[1])
        sagas.print_df(df)
    else:
        from IPython.display import display
        display(df)
コード例 #6
0
 def all_sources(self, s):
     """
     $ python -m sagas.nlu.trans_cacher all_sources vi
     :param s:
     :return:
     """
     import sagas
     rs = []
     for r in cacher.coll.find({'source': s}):
         rs.append((r['text'], r['target']))
     sagas.print_df(sagas.to_df(rs, ['text', 'target']))
コード例 #7
0
ファイル: corenlp_procs.py プロジェクト: CoderOverflow/stack
 def testings(self):
     """
     $ python -m sagas.nlu.corenlp_procs testings
     :return:
     """
     ds = [
         words_table('عمري تسعة عشر عاماً.', 'ar'),
         words_table('آخرین کسی که به کامپیوتر وصل شد، کی بود؟', 'fa')
     ]
     for df in ds:
         sagas.print_df(df)
コード例 #8
0
ファイル: test_mongo.py プロジェクト: CoderOverflow/stack
def simple():
    from pymongo import MongoClient
    import sagas

    # uri = 'mongodb://samlet.com/langs'
    uri = 'mongodb://localhost/langs'
    # uri = 'mongodb://192.168.0.101/langs'
    client = MongoClient(uri)
    db = client.get_default_database()
    print(db.name)

    rs = []
    for r in db.trans.find({'source': 'id'}):
        rs.append((r['text'], r['target']))
    sagas.print_df(sagas.to_df(rs, ['text', 'target']))
コード例 #9
0
    def tests(self):
        """
        $ python -m sagas.nlu.translit_ar tests
        :return:
        """
        import pandas as pd
        import sagas

        inputfile1 = 'data/phrases.csv'
        inputfile2 = 'data/nouns.csv'

        inputdf = pd.read_csv(inputfile1)
        outputdf = self.transliterate_df(inputdf)
        d = pd.concat([inputdf, outputdf], axis=1)
        sagas.print_df(d)
コード例 #10
0
 def ents(self, sents, lang='en', simple=True):
     """
     $ python -m sagas.nlu.spacy_procs ents 'New York'
     $ python -m sagas.nlu.spacy_procs ents 'I am from China'
     $ python -m sagas.nlu.spacy_procs ents "Ada Lovelace was born in London"
     :param sents:
     :param lang:
     :return:
     """
     import sagas
     rs = []
     doc = self.spacy_doc(sents, lang, simple=simple)
     for ent in doc.ents:
         rs.append((ent.text, ent.start_char, ent.end_char, ent.label_,
                    ent.kb_id_))
     r = sagas.to_df(rs, ['word', 'start', 'end', 'entity', 'kb'])
     sagas.print_df(r)
コード例 #11
0
ファイル: uni_jsonifier.py プロジェクト: CoderOverflow/stack
def rs_summary(rs, console=True):
    from sagas.tool.misc import print_stem_chunks
    from IPython.display import display
    import sagas
    for serial, r in enumerate(rs):
        df = sagas.to_df(r['domains'], ['rel', 'index', 'text', 'lemma', 'children', 'features'])
        if 'head' in r:
            cla = "%s(%s)" % (r['head'], r['head_pos'])
        else:
            cla = '_'
        print('%s(%s)' % (r['type'], r['lemma']), cla)
        # sagas.print_df(df)
        if not console:
            display(df)
        else:
            sagas.print_df(df)
        print_stem_chunks(r)
コード例 #12
0
ファイル: chunk_entities.py プロジェクト: CoderOverflow/stack
    def list_chunk_entities(self, sents, lang='en'):
        """
        $ python -m sagas.nlu.chunk_entities list_chunk_entities 'Apple is looking at buying U.K. startup for $1 billion.'
        $ python -m sagas.nlu.chunk_entities list_chunk_entities "Where's the president?"
        $ python -m sagas.nlu.chunk_entities list_chunk_entities "διαμένω στη Νέα Υόρκη" el

        :param sents:
        :return:
        """
        import sagas

        doc = self.core_nlp(sents)
        doc_s = doc.sentences[0]

        tokens = tokenize(sents, doc_s)
        for tok in tokens:
            print(tok.index, '\t', tok.word, tok.word_offset, tok.positions)
        ent_pos = self.entity_positions(sents, lang)
        print(ent_pos)
        # process spans and overlaps
        chunks = []
        r = self.get_verb_domain(doc.sentences[0])
        # r = self.get_chunks(doc.sentences[0])
        if len(r) > 0:
            for el in r[0]['domains']:
                span_id = el[0]
                span_pos = el[4]
                start_mark = tokens[span_pos[0] - 1]
                end_mark = tokens[span_pos[-1] - 1]
                word_range = [
                    start_mark.positions['start'], end_mark.positions['end']
                ]
                entities = get_included_entities(word_range, ent_pos)
                chunks.append((span_id, span_pos, word_range,
                               sents[word_range[0]:word_range[1]],
                               [ent['entity'] for ent in entities]))
            df = sagas.to_df(
                chunks,
                ['rel', 'positions', 'range', 'chunk text', 'entities'])
            sagas.print_df(df[['rel', 'chunk text', 'entities']])
        else:
            # print("no chunks.")
            print("no verbs.")
コード例 #13
0
    def nltk_locales(self):
        """
        $ python -m sagas.nlu.locales nltk_locales
        :return:
        """
        from nltk.corpus import wordnet as wn
        from iso639 import languages
        import sagas
        langs = wn.langs()
        print(len(langs), sorted(langs))
        rs = []
        excepts = ['qcn']
        for lang in langs:
            if lang not in excepts:
                loc = languages.get(part3=lang)
                rs.append((loc.part3, loc.macro, loc.name))

        df=sagas.to_df(rs, ['code', 'micro', 'name'])
        sagas.print_df(df)
コード例 #14
0
ファイル: nlu_cli.py プロジェクト: CoderOverflow/stack
 def get_word_trans(self, word, lang, pos='*'):
     import sagas
     from sagas.nlu.translator import translate, with_words, WordsObserver
     r, t = translate(word,
                      source=lang,
                      target='en',
                      options={'get_pronounce'},
                      tracker=with_words())
     if r:
         word_r = r.lower()
         tc.emp('cyan', f"1. translate: {word_r}")
         obs: WordsObserver = t.observer(WordsObserver)
         dfs = obs.trans_dfs
         if dfs:
             tc.emp('cyan', f"2. candidates: {obs.get_axis(word_r, pos)}")
             for k, df in dfs.items():
                 print(f"- pos:{k} -")
                 sagas.print_df(df)
         else:
             tc.emp('cyan', f"2. no candidates.")
         return word_r
     return ''
コード例 #15
0
    def disp_by_offset(self, lang, offset, pos='n'):
        """
        $ python -m sagas.nlu.omw_extended disp_by_offset ru 9918554
        $ python -m sagas.nlu.omw_extended disp_by_offset de 9918554
        :param offset:
        :return:
        """
        import sagas

        offset = str(offset)
        id = '%s-%s' % (offset.zfill(8), pos)
        rs = []
        print('search for', id)
        if lang in langsets:
            data = self.load_dicts(lang)
            for row in data:
                if row[0] == id:
                    rs.append((row[0], row[2]))
            df = sagas.to_df(rs, ['id', 'word'])
            sagas.print_df(df)
        else:
            print('no data.')
コード例 #16
0
ファイル: ltp_meta.py プロジェクト: CoderOverflow/stack
 def defs(self):
     """
     $ python -m sagas.zh.ltp_meta defs
     :return:
     """
     sagas.print_df(self.roles_df)
     sagas.print_df(self.dep_defs)
     sagas.print_df(self.pos_defs)
コード例 #17
0
ファイル: uni_parser.py プロジェクト: CoderOverflow/stack
    def verb_domains(self, sents, lang='en'):
        """
        $ python -m sagas.nlu.uni_parser verb_domains "Barack Obama was born in Hawaii." en
        # 我有一只阿比西尼亚猫
        $ python -m sagas.nlu.uni_parser verb_domains "I have an Abyssinian cat." en

        $ python -m sagas.nlu.uni_parser verb_domains 'Что ты обычно ешь на ужин?' ru
        $ python -m sagas.nlu.uni_parser verb_domains 'Die Zeitschrift erscheint monatlich.' de

        # 测试多个动词(过滤掉从句的动词):
        $ python -m sagas.nlu.uni_parser verb_domains 'Tu as choisi laquelle tu vas manger ?' fr
        :param sents:
        :param lang:
        :return:
        """
        from sagas.nlu.corenlp_helper import get_nlp
        serial_numbers = '❶❷❸❹❺❻❼❽❾❿'
        nlp = get_nlp(lang)
        doc = nlp(sents)
        # 分析依赖关系, 自下而上, 可用于抽取指定关系的子节点集合, 比如此例中的'nsubj:pass'和'obl'
        # word.governor即为当前word的parent
        sent = doc.sentences[0]
        rs = get_verb_domain(sent)
        # r=rs[0]
        for num, r in enumerate(rs):
            # print(json.dumps(r, indent=2, ensure_ascii=False))
            print(serial_numbers[num], '-' * 50)
            # print(r['verb'], r['index'])
            print(r['word'], r['index'])
            # df=sagas.to_df(r[0]['domains'], ['rel', 'index', 'text', 'children'])
            df = sagas.to_df(
                r['domains'],
                ['rel', 'index', 'text', 'lemma', 'children', 'features'])
            sagas.print_df(df)
            for stem in r['stems']:
                if stem[0] == 'obj':
                    print('object ->', ' '.join(stem[1]))
コード例 #18
0
ファイル: nlu_tools.py プロジェクト: CoderOverflow/stack
 def all_voices(self, lang=None):
     """
     $ python -m sagas.nlu.nlu_tools all_voices
     $ nlu all-voices ru
     :return:
     """
     import pyttsx3
     import sagas
     engine = pyttsx3.init()
     voices: collections.Iterable = engine.getProperty('voices')
     rs=[]
     for voice in voices:
         if lang is not None:
             if voice.languages[0].startswith(lang):
                 print(voice)
         else:
             print(voice, voice.id, voice.languages[0])
             rs.append((voice.id.replace('com.apple.speech.synthesis.',''),
                        voice.name,
                        voice.languages,
                        voice.gender
                        ))
     rs=sorted(rs, key=lambda el: el[2][0])
     sagas.print_df(sagas.to_df(rs, ['id', 'name', 'lang', 'gender']))
コード例 #19
0
 def dfs(self, *args):
     import sagas
     for arg in args:
         # print(arg)
         sagas.print_df(arg)