Beispiel #1
0
    def parse_title(self, html, video_info):
        match = re.search(u'影片名称开始代码(.*)影片名称结束代码', html)
        if match:
            video_info.title = match.group(1)
            Log.write_stdout('影片名称: %s', match.group(1))

        match = re.search(u'影片副标开始代码(.*)影片副标结束代码', html)
        if match:
            video_info.sub_title = match.group(1)
            Log.write_stdout('影片副标: %s', match.group(1))
Beispiel #2
0
 def add(self, data):
     Log.write_stdout("cache add")
     self._cachedItems.append(data)
     max = self._maxSize
     size = len(self._cachedItems)
     if size == max:
         Log.write_stdout("cache fulled, write to persistence")
         self.writeToPersistence(self._cachedItems[:max])
         del self._cachedItems[:max]
     pass
Beispiel #3
0
    def parse(self, soup):
        if not soup:
            Log.write_stdout('ERROR:soup is None')
            return

        Log.write_stdout('#####################')
        video_info = Video_Info()
        self.video_parser.parse(soup, video_info)

        for qhash in video_info.qhash_list:
            print qhash

        if len(video_info.qhash_list) > 0:
            self.video_list.append(video_info)
Beispiel #4
0
def search(keyword):
    ''' 
        search content for keyword
        @author: douzifly
    '''
    if not keyword:
        return
    url = "http://www.hakuzy.com/search.asp"
    keyword = Utils.to_unicode(keyword)
    params = {"searchword": keyword.encode("gbk")}
    html = WebTool.request(url, params, "post") # replace with other lib
    if not html:
        Log.write_stderr('ERROR:cant get html')
        return
    Log.write_stdout(html) # this html only contain search result, no hash
    parser = HakuzyVideoParser() # do not create parse every time

    # find video links
    cache = VideoCache()
    for url in parser.parse_search_page(html):
        html = WebTool.request(url)
        soup = BeautifulSoup(html)
        video_info = Video_Info()
        video_info.ref_url = url
        parser.parse(soup, video_info)
        Log.write_stdout("###################")
        Log.write_stdout(video_info)
        cache.add(video_info)
        time.sleep(Config.NETWORK_REQUST_INTERVAL)
    cache.flush()  # write left items to persistence
Beispiel #5
0
 def writeToPersistence(self, datas):
     # move to thread pool later
     Log.write_stdout("writeToPersistence len:%d", len(datas))
     videoTb = VideoTb()
     ret = videoTb.open()
     if not ret:
         Log.write_stdout("cant open db")
         return
     for i, video in enumerate(datas):
         videoTb.insert(video)
     videoTb.commit()
     videoTb.close()
     Log.write_stdout("write finished")
     pass
Beispiel #6
0
 def parse_language(self, html, video_info):
     match = re.search(u'影片语言开始代码(.*)影片语言结束代码', html)
     if match:
         video_info.language = match.group(1)
         Log.write_stdout('影片语言: %s', match.group(1))
Beispiel #7
0
 def parse_type(self, html, video_info):
     match = re.search(u'影片类型开始代码(.*)影片类型结束代码', html)
     if match:
         video_info.type = match.group(1)
         Log.write_stdout('影片类型: %s', match.group(1))
Beispiel #8
0
 def parse_director(self, html, video_info):
     match = re.search(u'影片导演开始代码(.*)影片导演结束代码', html)
     if match:
         video_info.director = match.group(1)
         Log.write_stdout('导演: %s', match.group(1))
Beispiel #9
0
 def parse_actors(self, html, video_info):
     match = re.search(u'影片演员开始代码(.*)影片演员结束代码', html)
     if match:
         video_info.actors = match.group(1)
         Log.write_stdout('演员: %s', match.group(1))
Beispiel #10
0
 def __init__(self, webSiteURL, entryFilter=None, yieldFilter=None, identifier=None, enableStatusSupport=False):
     Koala.__init__(self, webSiteURL, entryFilter, yieldFilter, identifier, enableStatusSupport)
     self.__total_size = 0
     self.video_list = list()
     self.video_parser = HakuzyVideoParser()
     Log.write_stdout('Hakuzy.__init__')
Beispiel #11
0
 def parse_img(self, soup, video_info):
     img = soup.find('img', onerror=True)
     if img:
         video_info.img = img.get('src')
         Log.write_stdout('img = %s', img.get('src'))
Beispiel #12
0
 def parse_brief(self, html, video_info):
     match = re.search(u'影片介绍开始代码(.*)影片介绍结束代码', html)
     if match:
         video_info.brief = match.group(1)
         Log.write_stdout('影片介绍: %s', match.group(1))
Beispiel #13
0
 def parse_statues(self, html, video_info):
     match = re.search(u'影片状态:(.*)', html)
     if match:
         video_info.status = match.group(1)
         Log.write_stdout('影片状态: %s', match.group(1))
Beispiel #14
0
 def parse_update_time(self, html, video_info):
     match = re.search(u'影片更新时间开始代码(.*)影片更新时间结束代码', html)
     if match:
         video_info.update_time = match.group(1)
         Log.write_stdout('更新时间: %s', match.group(1))
Beispiel #15
0
 def parse_public_time(self, html, video_info):
     match = re.search(u'上映日期开始代码(.*)上映日期结束代码', html)
     if match:
         video_info.public_time = match.group(1)
         Log.write_stdout('上映日期: %s', match.group(1))
Beispiel #16
0
 def parse_area(self, html, video_info):
     match = re.search(u'影片地区开始代码(.*)影片地区结束代码', html)
     if match:
         video_info.area = match.group(1)
         Log.write_stdout('影片地区: %s', match.group(1))