def get_movie(self, url): try: sqlList = [] info = { 'id': '', 'name': '', 'date': None, 'score': '0', 'introduction': '', 'download': '', } pattern = re.compile(r'.*/(?P<id>\d*)\.html') info['id'] = re.search(pattern, url).group('id') if info['id'] in self.idList: return html = Grab.get_content(url).replace( 'xmlns="http://www.w3.org/1999/xhtml" /', '').replace('xmlns="http://www.w3.org/1999/xhtml"', '').decode('gb2312', 'ignore') doc = pq(html) info['download'] = doc('#Zoom table td:first-child').eq(0).text() name = doc('.bd3r .co_area2 .title_all h1').text() info['name'] = re.search(r'.*《(.*)》.*', name).group(1) # name = re.search(r'.*阳光电影www\.ygdy8\.com\.(\W*\d?)\..*', info['download']).group(1).strip() content = doc('#Zoom p:first-child') if not content: return content = content.remove('br').html().replace(' ', '') pattern = re.compile( r'.*◎年代.*(?:.*/)?(?P<date>\d{4}[-年]\d{2}[-月]\d{2}日?)\(.*') res = re.match(pattern, content) if res: info['date'] = res.group('date').strip().replace( '年', '-').replace('月', '-').replace('日', '-') pattern = re.compile(r'.*◎豆瓣评分(?P<score>.*)/10 from .* users.*') res = re.match(pattern, content) if res: info['score'] = res.group('score').strip().replace(',', '.') if not info['score']: info['score'] = '0' else: pattern = re.compile( r'.*◎IMDb评分(?P<score>.*)/10 from .* users.*', re.I) res = re.match(pattern, content) if res: info['score'] = res.group('score').strip().replace( ',', '.') if not info['score']: info['score'] = '0' pattern = re.compile(r'.*◎简介(?P<introduction>.*)◎获奖情况.*<img.*') res = re.match(pattern, content) if res: info['introduction'] = res.group( 'introduction').strip().replace('\'', '\\\'') else: pattern = re.compile(r'.*◎简介(?P<introduction>.*).*<img.*') res = re.match(pattern, content) if res: info['introduction'] = res.group( 'introduction').strip().replace('\'', '\\\'') if len(info['introduction']) >= 1024: info['introduction'] = '' # print(info['introduction'], url) # return sql = "select * from bs_movie where name='%s' or id='%s'" % ( info['name'], info['id']) res = DB.fetchone(sql) if res: if int(info['id']) > res[0]: sql = "delete from bs_movie where id='%s'" % res[0] sqlList.append(sql) # self.sqlList.append(sql) else: return if info['date']: sql = "insert into bs_movie (id,name,date,score,introduction,url,download) values ('%s','%s','%s','%s','%s','%s','%s')" % ( info['id'], info['name'], info['date'], info['score'], info['introduction'], url, info['download']) else: sql = "insert into bs_movie (id,name,date,score,introduction,url,download) values ('%s','%s',NULL,'%s','%s','%s','%s')" % ( info['id'], info['name'], info['score'], info['introduction'], url, info['download']) # print(sql) sqlList.append(sql) if len(sqlList) == 1: DB.execute(sqlList[0]) else: DB.doTrans(sqlList) # self.sqlList.append(sql) except Exception as ex: print(str(ex), url)