Beispiel #1
0
 def make_response(self, text, user_info=DEFAULT_USER, global_context=[]):
     text = normalize.normalize(text)
     response = ''
     stop_make_response = False
     for method in RESPONDING_METHODS:
         for response in method(text, user_info):
             if isinstance(response, dict):
                 response['text'] = response.get('text', '').strip()
                 if not (response['text'] in user_info['replies']
                         or response['text'] in global_context):
                     stop_make_response = True
                     break
             else:
                 response = response.rstrip()
                 if not (response in user_info['replies']
                         or response in global_context):
                     stop_make_response = True
                     break
         if stop_make_response:
             break
     if not response:
         response = {'text': 'ああ(;´Д`)'}
     elif isinstance(response, str):
         response = {'text': response}
     response['text'] = self.replace_name(response['text'], user_info)
     return response
Beispiel #2
0
 def run(self):
     _filter = kuzuha.build_hour_filter(1)
     for post in kuzuha.search(_filter=_filter, sort=[('_score', 'desc')]):
         text = normalize.normalize(post['text'], repeat=4)
         text = regex.re_html_tag.sub('', text)
         text = regex.re_url.sub('', text)
         result = self.extract575(text)
         if result:
             return result + ' #みさお川柳'
Beispiel #3
0
def _extract_oshiete_answer(query, posts):
    for case_marking_particle in (u'って', u'は', u'の', ''):
        extract_rule = re.compile('(%s%s%s.{2,})' % (query, NOUN_SUFFIXES,
                                                     case_marking_particle))
        for post in posts:
            text = post['text']
            text = normalize.normalize(text.strip())
            if extract_rule.search(text):
                answer = extract_rule.search(text.strip()).group(1)
                if not answer or any(w in answer for w in NG_SUBSTRS):
                    continue
                if 3 < len(answer) < 120:
                    yield answer
Beispiel #4
0
 def _get_title(self, url):
     title = ''
     root, ext = os.path.splitext(url)
     if ext in image_extensions:
         time.sleep(3)  # for avoiding to be treated as spam by Google
         logger.info('Search by google: %s' % url)
         results = google_image.search(url, best_kwds_max_length=18)
         keywords = filter(lambda x: not x.isdigit(),
                           results['best_keywords'])
         title = ''.join(keywords)
     elif not ext in ignore_extensions:
         logger.info('Retrieve web resource: %s' % url)
         html = web.open_url(url)
         soup = BeautifulSoup(html, "html5lib")
         if soup.title and soup.title.string:
             title = soup.title.string
             title = normalize.normalize(title)
             title = self._shorten_title(title)
     return title
Beispiel #5
0
    def run(self, interval=60, min_length=40):
        m_generator = markov.MarkovGenerater()
        m = mecab.MeCabWrapper()
        posts = kuzuha.search('', _filter=kuzuha.build_date_filter_by_range({'minutes': interval}), sort=[])

        words = []
        for post in posts:
            if 'text' not in post:
                continue
            text = regex.re_a_tag.sub('', post['text'])
            text = normalize.normalize(text)
            if 'アニメ時報' in text:
                continue
            words.append([])
            for line in text.splitlines():
                words[-1].append(BOS)
                for n in m.parse_to_node(line):
                    words[-1].append('%s,%s' % (n.surface, ''.join(n.feature.split(',')[:5])))
                words[-1].append(EOS)
        return m_generator.generate(words, min_length)
Beispiel #6
0
def give_present(*arg):
    present_list = file_io.read('present.txt', data=True)
    sentence = misc.choice(present_list)
    while ('集計' in sentence or 'シュウケイ' in sentence or 'を' not in sentence or
            sentence.endswith('萌え') or len(sentence) < 3):
        sentence = misc.choice(present_list)
    present = normalize.remove_emoticon(sentence)
    present = present.replace('!', '').replace('!', '')
    present = present.replace('漏れの', '').replace('俺の', '').replace('俺が', '')
    present = present[:-1] if present.endswith('を') else present
    search_result = google_image.search(present)
    if 'images' in search_result:
        for url in search_result['images']:
            if url.endswith(('.jpg', '.gif', '.png')):
                try:
                    web.download(url, '/tmp/present')
                    break
                except:
                    continue
    sentence = normalize.normalize(sentence)
    return {'text': '%nameに' + sentence, 'media[]': '/tmp/present'}
Beispiel #7
0
def test_normalize():
    got = normalize.normalize(u'あいぼんのおまんこを指で開いてクチュクチュしたいよおおーう')
    assert got == u'あいぼんのおまんこを指で開いてクチュクチュしたいよーう'
Beispiel #8
0
 def prepare_for_counting(text):
     text = regex.re_a_tag.sub('', text)
     text = normalize.normalize(text, emoticon=False, repeat=3)
     return text
Beispiel #9
0
def cleansing(text):
    text = text.strip()
    text = text.replace('\n', '')
    text = regex.re_a_tag.sub('', text)
    text = normalize.remove_emoticon(text)
    return normalize.normalize(text, repeat=3)