Example #1
0
 def retrieve_from_syobocal(self):
     start_html = web.open_url(SYOBOCAL_START_URL, params={'cat': 1})
     lines = []
     for (url_part, title_name) in re_title_url.findall(start_html):
         if not self.is_valid_word(title_name):
             continue
         title_url = SYOBOCAL_BASE_URL + url_part
         title_html = web.open_url(title_url)
         title_yomi = ''.join(re_title_yomi.findall(title_html))
         lines.append(self.to_mecab_format(title_name, title_yomi, 'SC'))
         time.sleep(INTERVAL)
     if lines:
         self.write(lines)
Example #2
0
 def retrieve_from_syobocal(self):
     start_html = web.open_url(SYOBOCAL_START_URL, params={'cat': 1})
     lines = []
     for (url_part, title_name) in re_title_url.findall(start_html):
         if not self.is_valid_word(title_name):
             continue
         title_url = SYOBOCAL_BASE_URL + url_part
         title_html = web.open_url(title_url)
         title_yomi = ''.join(re_title_yomi.findall(title_html))
         lines.append(self.to_mecab_format(title_name, title_yomi, 'SC'))
         time.sleep(INTERVAL)
     if lines:
         self.write(lines)
Example #3
0
 def get_trend_search_query(self, url):
     html = web.open_url(url)
     soup = BeautifulSoup(html)
     for item in soup.find_all('item'):
         title = item.find('title')
         keyword = str(title.string).strip()
         keyword = re_sumikakko.sub('', keyword)
         if ' ' in keyword:
             for kwd in keyword.split(' '):
                 yield kwd
         else:
             yield keyword
Example #4
0
 def get_trend_search_query(self, url):
     html = web.open_url(url)
     soup = BeautifulSoup(html, "html5lib")
     for item in soup.find_all('item'):
         title = item.find('title')
         keyword = str(title.string).strip()
         keyword = re_sumikakko.sub('', keyword)
         if ' ' in keyword:
             for kwd in keyword.split(' '):
                 yield kwd
         else:
             yield keyword
Example #5
0
def give_valentine_present(*arg):
    if random.randint(0, 11) > 8:
        icon_url = arg[1]['icon'].replace('_normal', '')
        filename = icon_url.split('/')[-1]
        web.download(icon_url, '/tmp/%s' % (filename))
        misc.command('%s evaluate.py --checkpoint ../../data/ckpt ' % (PYTHON_EXE_PATH) +
                     '--in-path /tmp/%s --out-path /tmp/%s' % (filename, filename),
                     shell=True, allow_err=True, cwd=STYLE_TRANSFER_PATH)
        return {'text': '%nameをチョコにしてやろうか!(゚Д゚)', 'media[]': '/tmp/%s' % (filename)}
    pid = random.randint(0, 59)
    xml = web.open_url(SAFEBOORU_URL % pid)
    soup = BeautifulSoup(xml, 'lxml')
    post = misc.choice(soup.find_all('post'))
    image_url = 'https:' + post['file_url']
    web.download(image_url, '/tmp/present')
    suffix = '!' * random.randint(0, 59)
    return {'text': '%nameにチョコをヽ(´ー`)ノ' + suffix, 'media[]': '/tmp/present'}
Example #6
0
 def _get_title(self, url):
     title = ''
     root, ext = os.path.splitext(url)
     if ext in image_extensions:
         time.sleep(3)  # for avoiding to be treated as spam by Google
         logger.info('Search by google: %s' % url)
         results = google_image.search(url, best_kwds_max_length=18)
         keywords = filter(lambda x: not x.isdigit(), results['best_keywords'])
         title = ''.join(keywords)
     elif not ext in ignore_extensions:
         logger.info('Retrieve web resource: %s' % url)
         html = web.open_url(url)
         soup = BeautifulSoup(html, "html5lib")
         if soup.title and soup.title.string:
             title = soup.title.string
             title = normalize.normalize(title)
             title = self._shorten_title(title)
     return title
Example #7
0
 def _get_title(self, url):
     title = ''
     root, ext = os.path.splitext(url)
     if ext in image_extensions:
         time.sleep(3)  # for avoiding to be treated as spam by Google
         logger.info('Search by google: %s' % url)
         results = google_image.search(url, best_kwds_max_length=18)
         keywords = filter(lambda x: not x.isdigit(),
                           results['best_keywords'])
         title = ''.join(keywords)
     elif not ext in ignore_extensions:
         logger.info('Retrieve web resource: %s' % url)
         html = web.open_url(url)
         soup = BeautifulSoup(html, "html5lib")
         if soup.title and soup.title.string:
             title = soup.title.string
             title = normalize.normalize(title)
             title = self._shorten_title(title)
     return title
Example #8
0
def test_open_url():
    url = 'http://qwerty.on.arena.ne.jp/'
    got = web.open_url(url)
    assert_true(u'あやしいわーるど' in got)
Example #9
0
def test_open_url():
    url = 'http://misao.on.arena.ne.jp/cgi-bin/bbs.cgi'
    got = web.open_url(url)
    assert 'あやしいわーるど' in got