コード例 #1
0
def text_rank(text, language):
    sentences = []
    a = []
    if (language == 'ukrainian'):
        morph = MorphAnalyzer(lang='uk')
        sentences = sent_tokenizer_ua(text)
        if len(sentences) < 2:
            s = sentences[0]
            return [(1, 0, s)]
        a = tfidf(text, language, sent_tokenizer_ua, stop_words_ua)
    else:
        morph = MorphAnalyzer()
        sentences = sent_tokenizer_ru(text)
        if len(sentences) < 2:
            s = sentences[0]
            return [(1, 0, s)]
        a = tfidf(text, language, sent_tokenizer_ru, stop_words_ru)

    pairs = combinations(range(len(sentences)), 2)
    scores = [(i, j, similarity(a[i, :], a[j, :])) for i, j in pairs]
    scores = filter(lambda x: x[2], scores)

    pr = rank_graph(scores)

    return sorted(((i, pr[i], s) for i, s in enumerate(sentences) if i in pr),
                  key=lambda x: pr[x[0]], reverse=True)  # Сортировка по убыванию ранга тройки
コード例 #2
0
def text_rank(text, language):
    sentences = []
    words = []

    if (language == 'ukrainian'):
        morph = MorphAnalyzer(lang='uk')
        sentences = sent_tokenizer_ua(text)
        if len(sentences) < 2:
            s = sentences[0]
            return [(1, 0, s)]
        words = [set(morph.parse(word)[0].normalized for word in word_tokenizer.tokenize(sentence.lower())
                    if word not in stop_words_ua) for sentence in sentences]
    else:
        morph = MorphAnalyzer()
        sentences = sent_tokenizer_ru(text)
        if len(sentences) < 2:
            s = sentences[0]
            return [(1, 0, s)]
        words = [set(morph.parse(word)[0].normalized for word in word_tokenizer.tokenize(sentence.lower())
                     if word not in stop_words_ru) for sentence in sentences]

    pairs = combinations(range(len(sentences)), 2)
    scores = [(i, j, similarity(words[i], words[j])) for i, j in pairs]
    scores = filter(lambda x: x[2], scores)
    pr = rank_graph(scores)

    return sorted(((i, pr[i], s) for i, s in enumerate(sentences) if i in pr),
                  key=lambda x: pr[x[0]], reverse=True)
コード例 #3
0
    def connectUI(self):
        name_ui = 'ui_windows/main_window2.ui'
        uic.loadUi(name_ui, self.parent)
        self.setFixedSize(1010, 750)

        self.connect_buttons()
        self.update_info()
        self.create_labels(['', '', '', ''])
        self.set_group_companies()

        DATABASE['currency'].name = 'exchange_rates_month'
        money = DATABASE['currency'].draw_exchange_rates(
            self.parent.value.currentText(), my_type='Month')
        self.parent.chart.setPixmap(
            QPixmap(DATABASE['currency'].directory +
                    DATABASE['currency'].name + '.png'))

        morph_country = MorphAnalyzer().parse('Российский')[0]
        morph_currency = MorphAnalyzer().parse('Рубль')[0]

        text = [
            str(money),
            morph_country.make_agree_with_number(money).word.capitalize() +
            ' ' +
            morph_currency.make_agree_with_number(money).word.capitalize()
        ]
        self.parent.currensy.setText(
            "<html><head/><body><p align=\"center\"><span>" + text[0] +
            "</span></p></body></html>")
        self.parent.currensy_2.setText(
            "<html><head/><body><p align=\"center\"><span>" + text[1] +
            "</span></p></body></html>")
        self.parent.value.currentTextChanged.connect(
            self.change_type_exchange_rates)
        self.parent.current_company_2.currentTextChanged.connect(
            self.set_group_companies)
        self.parent.current_company.currentTextChanged.connect(
            self.set_company)
        try:
            company = self.parent.current_company.currentText().strip().lower()
            if not company:
                return
            img = open(DATABASE[company].directory + DATABASE[company].name +
                       '.png')
            img.close()
            self.parent.chart_3.setPixmap(
                QPixmap(DATABASE[company].directory + DATABASE[company].name +
                        '.png'))

        except FileNotFoundError:
            self.set_company()
コード例 #4
0
ファイル: merge-file.py プロジェクト: creaciond/coreference
def main():
    morph = MorphAnalyzer()
    reg_new_name = re.compile('[0-9]{,9}-#')

    tokens_path = '..' + os.sep + '..' + os.sep + 'RuCor' + os.sep + '!new_tokens.txt'
    documents_path = tokens_path.replace('!new_tokens', 'Documents')
    nlc_folder = '..' + os.sep + '..' + os.sep + 'RuCor' + os.sep + '!all-in-one'

    all_tokens = read_info(tokens_path, header=True)
    files_and_ids = filenames_ids(documents_path)
    filenames = set(files_and_ids.keys())

    counter = 1
    total = len(os.listdir(nlc_folder))

    for item in os.listdir(nlc_folder):
        if item.endswith('.csv'):
            original_name = reg_new_name.sub('', item).strip('.csv')
            if original_name in filenames:
                # NLC
                nlc_path = nlc_folder + os.sep + item
                nlc = NLC_to_dict(read_info(nlc_path, header=False))
                # RuCor
                original_id = files_and_ids[original_name]
                rucor = tokens_to_dict(all_tokens, doc_id=original_id)
                # merging
                rucor = text_and_tokens(rucor, nlc)
                rucor = do_morphology(rucor, morph)
                # save
                save_dataset(rucor, original_id)
                # kinda logging print
                now = datetime.now()
                print('{0:2d}:{1:2d}:{2:2d}\t{3:3d}/{4:3d}\tfile: {5}\t\t\tdone: {6:.2f}%'.format(now.hour, now.minute,
                    now.second, counter, total, nlc_path, counter / total * 100))
                counter += 1
コード例 #5
0
def pymorphying(filename):
    dictionary = dict()
    morph = MorphAnalyzer()
    words_and_grams = list()
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read()
        tokenized = word_tokenize(text)
        for one in tokenized:
            parsed = morph.parse(one)
            parsed = parsed[0]
            original_word = one
            gram_info = str(parsed.tag).split(',')
            first_gram = str(gram_info[0]).split()[0]
            if first_gram == 'PNCT' or first_gram == 'UNKN':
                continue
            if len(gram_info) == 1:
                continue
            loop = dictionary
            counter = 0
            for gram in gram_info:
                counter += 1
                check = gram
                checking = check in loop
                if checking == False:
                    add_to_dict(check, loop, counter, len(gram_info))
                if type(loop) != list:
                    loop = loop[check]
            try:
                loop.append(original_word)
                loop.sort()
            except AttributeError:
                loop = list()
                loop.append(original_word)
                loop.sort()
    print(dictionary)
コード例 #6
0
 def __init__(
     self,
     vocab: Vocab,
     model: Optional[Model],
     name: str = "lemmatizer",
     *,
     mode: str = "pymorphy2",
     overwrite: bool = False,
     scorer: Optional[Callable] = lemmatizer_score,
 ) -> None:
     if mode == "pymorphy2":
         try:
             from pymorphy2 import MorphAnalyzer
         except ImportError:
             raise ImportError(
                 "The Ukrainian lemmatizer mode 'pymorphy2' requires the "
                 "pymorphy2 library and dictionaries. Install them with: "
                 "pip install pymorphy2 pymorphy2-dicts-uk") from None
         if getattr(self, "_morph", None) is None:
             self._morph = MorphAnalyzer(lang="uk")
     super().__init__(vocab,
                      model,
                      name,
                      mode=mode,
                      overwrite=overwrite,
                      scorer=scorer)
コード例 #7
0
ファイル: main.py プロジェクト: VadzimIlyukevich/vocabulary
def addWord():
    global vocabulary
    start_time = time.time()
    words = {}
    analyzer = MorphAnalyzer()
    vocabulary.append(inputText.get(1.0, END))
    tokenize_sentence = word_tokenize(vocabulary[0])
    for word in tokenize_sentence:
        parse_word = analyzer.parse(word)[0]
        word_word = parse_word.word
        word_lemma = parse_word.normal_form
        word_tags = parse_word.tag.cyr_repr
        word_ending = list(set(word_word) - set(word_lemma))
        if word_word is not word_lemma:
            words.update({
                word_word: {
                    'lemma': word_lemma,
                    'tag': word_tags,
                    'ending': word_ending
                }
            })
    sorted_words = sorted(words)
    for key in sorted_words:
        lexeme = Lexeme((words[key]['lemma']), (words[key]['tag']),
                        (words[key]['ending']))
        outputText.insert(0, str(lexeme.lemma) + '      ' + str(lexeme.tags) + '      ' \
                          + str(lexeme.endings))
    end_time = time.time()
    result_time = end_time - start_time
    print(str(result_time) + " seconds")
    vocabulary.clear()
コード例 #8
0
ファイル: util.py プロジェクト: turchaev/nltk4russian
def read_tab_corpus(inc):
    m = MorphAnalyzer()
    sent = []
    for t in inc:
        # try:
        #     t = t.rstrip().decode('utf-8')
        # except AttributeError:
        t = t.rstrip()
        if not t:
            continue
        if t == u'sent':
            sent = []
            continue
        if t == u'/sent' or t == u'SENT':
            sent = [x[0] for x in sent]
            parses = [m.parse(token) for token in sent]
            if sent:
                yield [(p[0].word, p[0].tag) for p in parses]
            continue
        t = t.split('\t')
        try:
            token = (t[1], ' '.join(t[2].split(' ')[2:]))
            sent.append(token)
        except IndexError:
            continue
コード例 #9
0
def search(query):
    relevance = defaultdict(float)
    m = MorphAnalyzer()
    inverted_index, articles, avdl = get_indices()
    N = len(articles)
    words = [
        x.lower().strip(string.punctuation + '»«–…')
        for x in word_tokenize(query)
    ]
    lemmas = [
        m.parse(x)[0].normal_form for x in words
        if x and x not in set(stopwords.words('russian'))
    ]
    for lemma in lemmas:
        if lemma in inverted_index:
            articles_w_lemma = inverted_index[lemma]
            n = len(articles_w_lemma)
            for a in articles_w_lemma:
                a_info = articles[a[0]]
                qf = a[1]
                dl = a_info[2]
                relevance[(a_info[0],
                           a_info[1])] += score_BM25(n, qf, N, dl, avdl)
    res = sorted(relevance.items(), key=lambda x: x[1], reverse=True)
    res = [x[0] for x in res]
    return res
コード例 #10
0
def load_russe_lemm_df(part='bts-rnc/train'):
    df = load_russe_df(part)
    from pymorphy2.tokenizers import simple_word_tokenize
    df['lctx'] = df.apply(lambda r: r.context[:r.positions[0]], axis=1)
    df['rctx'] = df.apply(lambda r: r.context[r.positions[1]:], axis=1)
    from pymorphy2 import MorphAnalyzer
    _ma = MorphAnalyzer()
    _ma_cache = {}

    def ma(s):
        s = s.strip(
        )  # get rid of spaces before and after token, pytmorphy2 doesn't work with them correctly
        if s not in _ma_cache:
            _ma_cache[s] = _ma.parse(s)
        return _ma_cache[s]

    def sent_ma(tokens):
        return [ma(t)[0] for t in tokens]

    for col in ('lctx', 'rctx'):
        df[col] = df[col].apply(simple_word_tokenize). \
          apply(sent_ma). \
          apply(lambda l: [s.normal_form for s in l if 'PNCT' not in s.tag]). \
          str.join(' ')
    df.context = df.lctx + ' ' + df.word + ' ' + df.rctx
    df.positions = df.apply(lambda r:
                            (len(r.lctx) + 1, len(r.lctx) + 1 + len(r.word)),
                            axis=1)
    df['word_at'] = df.apply(lambda r: r.context[slice(*r.positions)], axis=1)
    return df
コード例 #11
0
def make_tags(sentence):
    morph = MorphAnalyzer()
    tags = {'NOUN': 'N', 'NPRO': 'N', 'ADJF': 'A', 'ADJS': 'A', 'PRTF': 'A', 'PRTS': 'V', 'NUMR': 'A', 'VERB': 'V',
            'INFN': 'V', 'GRND': 'V', 'ADVB': 'D', 'PREP': 'P', 'PRCL': 'P', 'CONJ': 'P'}
    tokens = [token for token in nltk.word_tokenize(sentence)]
    tokens_tags = [tags[morph.parse(token)[0].tag.POS] for token in nltk.word_tokenize(sentence)]
    return [tokens, tokens_tags]
コード例 #12
0
    def parse_text(self, string, flag):
        morph = MorphAnalyzer()
        FURTHER_DEVELOPMENT = morph.parse('дальнейшие'.lower())[0].normal_form
        FURTHER_IMPROVEMENTS = morph.parse('улучшения'.lower())[0].normal_form
        self.sentences = []
        self.find_further_development = False
        self.filtered_docs = []
        stop_words = set(stopwords.words("russian"))
        filtered_doc = []
        self.get_sentences(string, flag)

        for sent in self.sentences:
            token_sent = [
                w.lower() for w in word_tokenize(sent)
                if w.lower() not in stop_words
            ]
            for word in token_sent:
                w = morph.parse(word)[0].normal_form
                filtered_doc.append(w)
                if w in [FURTHER_DEVELOPMENT, FURTHER_IMPROVEMENTS
                         ] and not flag:
                    self.find_further_development = True
                    self.further_dev_sentence = sent
            self.filtered_docs.append(filtered_doc)
            filtered_doc = []
コード例 #13
0
ファイル: bot.py プロジェクト: dashajarikova/homework2year
def result():
    morph = MorphAnalyzer()
    if request.args:
        sent = request.args['sentence']
        m = Mystem()
        ana = m.analyze(sent)
        new_sent = open('sentence.txt', 'w', encoding='utf-8')
        for word in ana:
            if 'analysis' in word:
                forma_slova = word['analysis'][0]['gr']
                sent2 = clear_words
                for w in sent2:
                    ana2 = m.analyze(w)
                    try:
                        an_word = ana2[0]
                        if 'analysis' in an_word:
                            print(an_word)
                            forma_slova2 = an_word['analysis'][0]['gr']
                            if forma_slova == forma_slova2:
                                new_sent.write(w + ' ')
                                break
                    except IndexError:
                        pass
        new_sent.close()
        with open('sentence.txt', 'r', encoding='utf-8') as f:
            read_sent = f.read()
        return render_template('result.html', sentence=read_sent)
    return render_template('result.html')
コード例 #14
0
ファイル: OntologyExtractor.py プロジェクト: amkatyshev/OE
 def __init__(self):
     self.device = None
     self.model = None
     self.tokenizer = None
     self.morph = MorphAnalyzer()
     self.module_path = os.path.dirname(os.path.abspath(__file__))
     self.data = Struct()
コード例 #15
0
ファイル: lama_bot.py プロジェクト: soon/Lama-Reporter
    def __init__(self, app_id, mail_manager,
                 chat_id=1, number_of_seconds_for_the_rest=60, chat_id_for_mails=None, admins=None, **kwargs):
        """
        Initializes Lama Bot.

        Expects login/password or access_token as named parameters

        :param mail_manager: A manager for retrieving mails
        :type mail_manager: AbstractMailManager

        :param chat_id: Chat identifier
        :type chat_id: int

        :param chat_id_for_mails: Chat for mails. Same as chat_id, if not presented
        :type chat_id_for_mails: int

        :raise ValueError: When neither login/password nor access_token was provided
        """
        self.exit_event = Event()
        self.morph = MorphAnalyzer()
        self.version = '0.1.1'
        self.app_id = app_id
        self.access_token = None
        self.password = None
        self.login = None
        self.vkapi = ThreadSafeVkApi(app_id=app_id, **kwargs)
        self.commands = {}
        self._plugins = []
        self.mail_manager = mail_manager
        self.number_of_seconds_for_the_rest = number_of_seconds_for_the_rest
        self.chat_id = chat_id
        self.chat_id_for_mails = chat_id_for_mails or self.chat_id
        self.admins = admins or []

        self.initialize_commands()
コード例 #16
0
ファイル: market_operations.py プロジェクト: gt005/it-class
def get_correct_form_of_points_number_name(number: int) -> str:
    """ Возвращает верное слово (Баллов/Балла/Балл) для правильного написания """
    if not isinstance(number, int):  # Ввелось не число
        return "Балл"

    analysis = MorphAnalyzer().parse("Балл")[0]
    return analysis.make_agree_with_number(number).word
コード例 #17
0
def to_normal_form(file_text):
    morph = MorphAnalyzer()
    out = []
    for word in word_tokenize(file_text.lower()):
        if word.isalnum():
            out.append(morph.parse(word)[0].normal_form)
    return " ".join(out)
コード例 #18
0
    def __init__(self,
                 token_pat="[а-я]+",
                 mode="normal",
                 counter=None,
                 threshold=3,
                 allowed_pos=None,
                 stop_words=None,
                 stop_cities=False):
        self.token = token_pat
        self.mode = mode

        if self.mode not in {"normal", "nospace"}:
            raise ValueError("Unknown mode")
        elif self.mode == "nospace":
            if not isinstance(counter, Counter):
                raise ValueError(
                    "In 'nospace' mode the counter attribute should be passed")
            self.counter = counter
            self.nospace = NoSpaceSplitter(counter)
            self.threshold = threshold

        self.morph = MorphAnalyzer()
        self.allowed_pos = allowed_pos
        self.stop_words = stop_words or STOPWORDS
        if stop_cities:
            self.stop_words.union(CITIES)
コード例 #19
0
 def persistent_load(self, id):
     if id == "pymorphy2.MorphAnalyzer":
         return MorphAnalyzer()
     elif id == "russtress.Accent":
         return Accent()
     else:
         raise pickle.UnpicklingError("unsupported persistent object")
コード例 #20
0
    def feminine_checker(self, w): 
        '''
        Check if the word is feminine. Necessary for some variants of hieroglyphs
        Args:
            w: str, input Russian word
        Returns:
            sex: str, 'M' or 'F' - gender of a word
        '''

        morph = MorphAnalyzer()
        w = self.input_word.split(' ')[0]
        ana = morph.parse(w)[0]
        gram = str(ana.tag).split(',')
#        print(gram)
        try:
            if 'femn' in gram[2]:
                sex = 'F'
            else:
                sex = 'M'
        except:
            if w[-1] == 'а' or w[-1] == 'я':
                sex = 'F'
            else:
                sex = 'M'
        self.sex = sex
        return self.sex
コード例 #21
0
ファイル: morph.py プロジェクト: alex2304/pizza_bot_test
class MorphParser:
    _morph = MorphAnalyzer()

    @classmethod
    def parse(cls, tokens: List[str]) -> List[ParsedToken]:
        tokens_parsings = []

        for token in tokens:
            pymorphy_parsings = cls._morph.parse(token)

            token_parsings = [
                Parsing(pp.word, pp.normal_form, pp.tag)
                for pp in pymorphy_parsings
            ]

            tokens_parsings.append(ParsedToken(token, token_parsings))

        return tokens_parsings

    @classmethod
    def lemmatize(cls, tokens: List[str]) -> List[str]:
        if isinstance(tokens, str):
            tokens = [tokens]

        lemmas = [str(parsed_token) for parsed_token in cls.parse(tokens)]

        return lemmas
コード例 #22
0
    def analyzeWord(self, word):
        morph = MorphAnalyzer()
        analysisResults = []

        for p in morph.parse(word):
            curAnalysis = {
                'исходное слово': word,
                'нормальная форма': p.normal_form,
                'часть речи': p.tag.POS,
                'одушевленность': p.tag.animacy,
                'вид': p.tag.aspect,
                'падеж': p.tag.case,
                'род': p.tag.gender,
                'включенность': p.tag.involvement,
                'наклонение': p.tag.mood,
                'число': p.tag.number,
                'лицо': p.tag.person,
                'время': p.tag.tense,
                'переходность': p.tag.transitivity,
                'залог': p.tag.voice,
                'лексема': [lexeme[0] for lexeme in p.lexeme]
            }
            analysisResults.append(curAnalysis)

        return analysisResults
コード例 #23
0
    def __init__(self,
                 data_name,
                 lemmatizing_method,
                 max_examples=None,
                 delete_word_parts=False,
                 drop_duplicates=True,
                 count_lemmas_weights=False,
                 limit=None):
        self.data_name = data_name
        self.lemmatizing_method = lemmatizing_method
        self.max_examples = max_examples
        self.delete_word_parts = delete_word_parts
        self.drop_duplicates = drop_duplicates
        self.count_lemmas_weights = count_lemmas_weights
        self.translation = str.maketrans('', '', string.punctuation)

        self.dfs = dict()
        self.nf_cnts = dict()
        self.cache = dict()
        self.pattern = re.compile(r'\b\w+\b')

        if lemmatizing_method is not None and lemmatizing_method != 'none':
            if 'ru' in data_name:
                self.analyzer = MorphAnalyzer()
            elif 'german' in data_name:
                self.analyzer = spacy.load("de_core_news_sm",
                                           disable=['ner', 'parser'])
            elif 'english' in data_name:
                self.analyzer = spacy.load("en_core_web_sm",
                                           disable=['ner', 'parser'])
            else:
                assert "unknown data name %s" % data_name
コード例 #24
0
def merge_files():
    morph = MorphAnalyzer()
    reg_new_name = re.compile('[0-9]{,9}-#')

    tokens_path = '..' + os.sep + '..' + os.sep + 'RuCor' + os.sep + '!new_tokens.txt'
    documents_path = tokens_path.replace('!new_tokens', 'Documents')
    nlc_folder = '..' + os.sep + '..' + os.sep + 'RuCor' + os.sep + '!all-in-one'

    all_tokens = read_info(tokens_path, header=True)
    files_and_ids = filenames_ids(documents_path)
    filenames = set(files_and_ids.keys())

    emb_dict = embeddings()
    par, bin_par = syntpar()

    for item in os.listdir(nlc_folder):
        if item.endswith('.csv'):
            original_name = reg_new_name.sub('', item).strip('.csv')
            if original_name in filenames:
                nlc_path = nlc_folder + os.sep + item
                nlc = NLC_to_dict(read_info(nlc_path, header=False))

                original_id = files_and_ids[original_name]
                rucor = tokens_to_dict(all_tokens, doc_id=original_id)

                united_annotation = add_nlc_to_rucor(rucor, nlc)
                united_annotation = do_morphology(united_annotation, morph)

                data = [
                    dataset(united_annotation[token])
                    for token in united_annotation
                ]
                add_embeddings_save(emb_dict, bin_par, data, original_name)
        break
コード例 #25
0
 def __init__(self):
     self.grammeme_vectorizer_input = GrammemeVectorizer()
     self.grammeme_vectorizer_output = GrammemeVectorizer()
     self.word_dictionary = WordDictionary()
     self.char_set = set()
     self.morph = MorphAnalyzer() # pyMorphy2
     self.converter = converters.converter('opencorpora-int', 'ud14')
コード例 #26
0
def thanks():
    morph = MorphAnalyzer()
    if request.args:
        #a = input('Введите предложение: ')
        a = request.args['sentence']
        words = open('words.txt', 'r', encoding='utf-8')
        words = words.readlines()
        reg = re.compile('[^а-яА-Я ]')
        a = a.split()
        new_sent = open('sentence.txt', 'w', encoding='utf-8')
        for i in a:
            ana = morph.parse(i)[0]
            random.shuffle(words)
            for word in words:
                word = reg.sub('', word)
                word = morph.parse(word)[0]
                if word.tag == ana.tag:
                    new_sent.write(word.word)
                    new_sent.write(' ')
                    break
        new_sent.close()
        new_sent1 = open('sentence.txt', 'r', encoding='utf-8')
        new_sent1 = new_sent1.read()
        return render_template('thanks.html', sentence_answer=new_sent1)
        #print(new_sent1)
        #new_sent1.close()
    return redirect(url_for(''))
コード例 #27
0
def load_ae(paths):
    analyzer = MorphAnalyzer()
    for path in paths:
        for a, b, weight in load_ae_(path):
            a_pos = get_pos(a, analyzer)
            b_pos = get_pos(b, analyzer)
            yield Sim(a, b, a_pos, b_pos, weight)
コード例 #28
0
def place(message):
    global places
    m = MorphAnalyzer()
    word = m.parse(message)[0]
    if 'гео' in word.tag.cyr_repr:
        if message not in places:
            places[message] = r.choice([
                "Отличное место! Бывало, что я заползал туда иногда, раз в месяцок",
                "О да, знаю, там подают таки-и-ие блюда!",
                "Ну, знаешь, насчет этого места. Тут точно дело вкуса, обычному туристу лучше сюда не соваться...",
                "Место, откровенно говоря, так себе...",
                "Это одно из моих любимых мест на планете! Когда будет возможность, обязательно посети",
                "Это место меня отталкивает, даже не планируй туда поездку",
                "Да ладно, нашел место для отдыха!", "Погодка там так себе",
                "Ну, ничего, норм выбор",
                "Как тебе вообще в голову пришло туда захотеть поехать?!",
                "Для питона как раз!)",
                "Как-то одним морозным дням я замечательно отдохнул там, но общее впечатление оставляет желать лучшего",
                "Там бывает мокро, но для меня, питона, это естественная среда)",
                "Брррррррр, не нада", "Питон одобряет",
                "Не трать время на это", "Конечно, там прекрасно!",
                "Что ты там будешь делать?", "Хммм, ничего!"
            ])
        return places[message]
    return False
コード例 #29
0
def agree(w1, w2, t1, t2):
    morph = MorphAnalyzer()
    raw_cur_tags = morph.tag(w1)[0]
    raw_next_tags = morph.tag(w1)[0]

    cur_tags = re.findall(r"\w+", raw_cur_tags)
    next_tags = re.findall(r"\w+", raw_next_tags)

    if t1 == "person":
        if t2 == "verb_right":
            if next_tags[3] == "tran":
                cur_tags[-1] = "nomn"
            else:
                cur_tags[-1] = "datv"

    if t1 == "verb_right":
        if t2 == "property":
            pass

    if t1 == "adjective":
        if t2 == "property":
            pass

    if t1 == "property":
        if t2 == "person":
            pass
        if t2 == "adjective":
            pass


    #w1 = morph.parse(w1)[0].inflect({}).word
    return w1, w2
コード例 #30
0
def generate_answer(input_sentence):
    morph = MorphAnalyzer()

    words = input_sentence.split()
    answer = ''

    for word in words:
        analyze = morph.parse(word)[0]

        pos_tag = analyze.tag.POS
        filename = str(pos_tag) + '.txt'

        with open(filename, 'r', encoding='utf-8') as file:
            lemmas = file.readlines()

        changed_word = None

        while changed_word == None:
            new_analyze, inf_tags = collect_inf_tags(analyze, pos_tag, lemmas,
                                                     morph)
            changed_word = new_analyze.inflect(inf_tags)

        answer = answer + changed_word.word + ' '

    print(answer)