def getUrl(url, page): url = url + str(page) + '.html' print(url) mysqlDao = MysqlDao() try: n = 1 while True: try: headers = Headers.getHeaders() req = requests.get(url, headers=headers, timeout=10) break except Exception, e: print Exception, ":", e print('sleep') time.sleep(n * 10) n = n + 1 if req.status_code == 200: html = req.content selector = etree.HTML(html) url_contents = selector.xpath('//div[@class="box3"]/descendant::a/@href') for url_content in url_contents: sql = 'insert ignore into loldytt_url (`category_id`,`url`,`status`,created_at) VALUES (%s,%s,%s,%s)' created_at = time.strftime('%Y-%m-%d %H:%M:%S') values = (category_id, url_content, 0, created_at) print(values) mysqlDao.executeValues(sql, values) mysqlDao.close()
def run(self): mysqlDao = MysqlDao() while True: print(self.name) sql = 'select * from allsearch_key_word WHERE `status`=0 limit 0,1' ret = mysqlDao.execute(sql) if (len(ret) > 0): res = ret[0] id = res[0] sql = 'update allsearch_key_word set `status`=2 where `id`=' + str( id) mysqlDao.execute(sql) word = res[1] sql_values = self.getSearch(word) for sql_value in sql_values: created_at = time.strftime('%Y-%m-%d %H:%M:%S') values = (sql_value, id, 0, created_at) sql = 'insert ignore into allsearch_key_word (`word`,`parent_id`,`status`,`created_at`) VALUES (%s,%s,%s,%s)' mysqlDao.executeValues(sql, values) sql = 'update allsearch_key_word set `status`=2 where `id`=' + str( id) mysqlDao.execute(sql) else: print(self.name + 'sleep') time.sleep(3600) mysqlDao.close()
def run(self): mysqlDao = MysqlDao() for one in xrange(0, 1000): v = random.randint(1, 10000) sql = 'insert into yingshi_test (`name`) values (%s)' print(sql) mysqlDao.executeValues(sql, v) mysqlDao.close()
def run(self): while True: print(self.name) mysqlDao = MysqlDao() sql = 'select * from ygdy8_url WHERE `status`=0 limit 0,1' ret = mysqlDao.execute(sql) if (len(ret) > 0): res = ret[0] id = res[0] sql = 'update ygdy8_url set `status`=2 where `id`=' + str(id) mysqlDao.execute(sql) category_id = res[1] url = res[2] sql_values = self.getContent(url, category_id) if sql_values != None: sql_pattern = 'insert ignore INTO `ygdy8_content`(`category_id`,`name`, `content`, `img`,`created_at`, `url`) VALUES( %s, %s, %s, %s, %s, %s)' mysqlDao.executeValues(sql_pattern, sql_values) sql = 'update ygdy8_url set `status`=1 where `id`=' + str( id) mysqlDao.execute(sql) mysqlDao.close() else: mysqlDao.close() break
from headers import Headers from lxml import etree from mysqlpooldao import MysqlDao reload(sys) sys.setdefaultencoding('utf8') while True: try: url = 'http://top.baidu.com/' headers = Headers.getHeaders() req = requests.get(url, headers=headers, timeout=30) if req.status_code == 200: html = req.content.decode('gb2312', 'ignore') selector = etree.HTML(html) words = selector.xpath( '//div[@id="box-cont"]/descendant::a/@title') for word in words: print(word) mysqlDao = MysqlDao() sql = 'insert ignore into allsearch_key_word (`word`,`parent_id`,`status`,`created_at`) VALUES (%s,%s,%s,%s)' created_at = time.strftime('%Y-%m-%d %H:%M:%S') values = (word, 0, 0, created_at) mysqlDao.executeValues(sql, values) mysqlDao.close() else: print('code error') except: pass time.sleep(21600)
def run(self): while True: print(self.name) mysqlDao = MysqlDao() sql = 'select * from loldytt_url WHERE `status`=0 limit 0,1' ret = mysqlDao.execute(sql) if len(ret) == 0: mysqlDao.close() """ 不用睡眠直接退出等crontab唤醒 """ print('game over') sys.exit() else: res = ret[0] id = res[0] category_id = res[1] url = res[2] sql = 'update loldytt_url set `status`=2 where `id`=' + str(id) mysqlDao.execute(sql) headers = Headers.getHeaders() n = 0 while n < 5: req = requests.get(url, headers=headers) req.encoding = "gbk" if req.status_code == 200: html = req.text.encode(encoding="utf-8", errors="ignore").decode( "utf-8", errors="ignore") try: selector = etree.HTML(html) except: print 333 titles = selector.xpath( '//div[contains(@class,"lm")]/h1/a/text()') if len(titles) > 0: break n = n + 1 if len(titles) > 0: title = titles[0] else: continue casts = selector.xpath( '//div[contains(@class,"zhuyan")]/ul[1]/li/text()') imgs = selector.xpath( '//div[contains(@class,"haibao")]/a[1]/img/@src') cast = '' img = '' content = '' if len(casts) > 0: cast = casts[0].split(':')[1] if len(imgs) > 0: img = imgs[0] contents = selector.xpath( '//div[@class="neirong"]/descendant::text()') if len(contents) > 0: content = simplejson.dumps(contents) created_at = time.strftime('%Y-%m-%d %H:%M:%S') xunlei_download_keys = selector.xpath( '//*[contains(@id,"jishu")]/descendant::a[contains(@href,"thunder")]/text()' ) xunlei_download_values = selector.xpath( '//*[contains(@id,"jishu")]/descendant::a[contains(@href,"thunder")]/@href' ) bt_download_keys = selector.xpath( '//*[contains(@id,"bt")]/descendant::a[contains(@href,"thunder")]/text()' ) bt_download_values = selector.xpath( '//*[contains(@id,"bt")]/descendant::a[contains(@href,"thunder")]/@href' ) magnet_download_keys = selector.xpath( '//a[contains(@href,"magnet")]/text()') magnet_download_values = selector.xpath( '//a[contains(@href,"magnet")]/@href') xunlei_download = [] bt_download = [] magnet_download = [] try: xn = 0 for x in xunlei_download_keys: xunlei_download.append({ xunlei_download_keys[xn]: xunlei_download_values[xn] }) xn = xn + 1 bn = 0 for b in bt_download_keys: bt_download.append( {bt_download_keys[bn]: bt_download_values[bn]}) bn = bn + 1 mn = 0 for m in magnet_download_keys: magnet_download.append({ magnet_download_keys[mn]: magnet_download_values[mn] }) mn = mn + 1 except Exception, e: print Exception, ":", e xunlei_download_json = simplejson.dumps(xunlei_download) bt_download_json = simplejson.dumps(bt_download) magnet_download_json = simplejson.dumps(magnet_download) sql_pattern = 'insert ignore INTO `loldytt_content`(`category_id`, `title`,`cast`,`img`,`xunlei_download`, `bt_download`, `magnet_download`, `content`, `url`,`created_at`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' sql_values = (category_id, title, cast, img, xunlei_download_json, bt_download_json, magnet_download_json, content, url, created_at) print(title) mysqlDao.executeValues(sql_pattern, sql_values) sql = 'update loldytt_url set `status`=1 where `id`=' + str(id) mysqlDao.execute(sql) mysqlDao.close()
def run(self): while True: print(self.name) mysqlDao = MysqlDao() sql = 'select * from bttiantang_url WHERE `status`=0 limit 0,1' ret = mysqlDao.execute(sql) res = [] for r in ret: res = r print(res) if len(res) == 0: print('sleep') # sql = 'update yingshi_bttiantang_url set `status`=0 WHERE `status`=2' # database.mysqlExecute(sql) mysqlDao.close() # time.sleep(21600) # continue """ 不用睡眠直接退出等crontab唤醒 """ print('game over') sys.exit() else: id = res[0] url = res[1] sql = 'update bttiantang_url set `status`=2 where `id`=' + str( id) mysqlDao.execute(sql) headers = Headers.getHeaders() n = 0 while n < 5: req = requests.get(url, headers=headers) if req.status_code == 200: html = req.content selector = etree.HTML(html) contents = selector.xpath( '//ul[contains(@class,"moviedteail_list")]') if len(contents) > 0: break n = n + 1 if len(contents) > 0: content = contents[0] else: continue names_chn = selector.xpath( '//div[contains(@class,"moviedteail_tt")]/h1/text()') names_eng = selector.xpath( '//div[contains(@class,"moviedteail_tt")]/span/text()') name_chn = '' name_eng = '' if len(names_chn) > 0: name_chn = names_chn[0] if len(names_eng) > 0: name_eng = names_eng[0] names_nick = content.xpath( 'li[contains(text(),"%s")]/a/text()' % (u'又名')) if len(names_nick) > 0: names_nick_new = ",".join(names_nick) else: names_nick_new = "" imgs = simplejson.dumps( selector.xpath( '//div[contains(@class,"moviedteail_img")]/a/img/@src') ) tags = content.xpath('li[contains(text(),"%s")]/a/text()' % (u'标签')) if len(tags) > 0: tags_new = ",".join(tags) else: tags_new = "" areas = content.xpath('li[contains(text(),"%s")]/a/text()' % (u'地区')) if len(areas) > 0: areas_new = ",".join(areas) else: areas_new = "" years = content.xpath('li[contains(text(),"%s")]/a/text()' % (u'年份')) if len(years) > 0: years_new = ",".join(years) else: years_new = "" directors = content.xpath( 'li[contains(text(),"%s")]/a/text()' % (u'导演')) if len(directors) > 0: directors_new = ",".join(directors) else: directors_new = "" writers = content.xpath('li[contains(text(),"%s")]/a/text()' % (u'编剧')) if len(writers) > 0: writers_new = ",".join(writers) else: writers_new = "" casts = content.xpath('li[contains(text(),"%s")]/a/text()' % (u'主演')) if len(casts) > 0: casts_new = ",".join(casts) else: casts_new = "" imdbs = content.xpath('li[contains(text(),"%s")]/a/text()' % (u'imdb')) if len(imdbs) > 0: imdbs_new = ",".join(imdbs) else: imdbs_new = "" details = self.getDetails( content.xpath('li[contains(text(),"%s")]/a/@href' % (u'详情'))) if len(details) > 0: details_new = details[0] else: details_new = "" created_at = time.strftime('%Y-%m-%d %H:%M:%S') downloads = selector.xpath('//div[contains(@class,"tinfo")]') download = [] for d in downloads: try: dn_text = d.xpath('a[1]/@title')[0] dn_url = d.xpath('a[1]/@href')[0] download.append({dn_text: dn_url}) except: pass download_json = simplejson.dumps(download) sql_pattern = 'insert ignore INTO `bttiantang_content`(`names_chn`, `names_eng`,`names_nick`,`imgs`,`tags`, `areas`, `years`, `directors`, `writers`,`casts`, `imdbs`,`details`, `download`,`created_at`, `url`) VALUES(%s, %s, %s,%s,%s,%s, %s, %s,%s, %s,%s, %s,%s, %s, %s)' sql_values = (name_chn, name_eng, names_nick_new, imgs, tags_new, areas_new, years_new, directors_new, writers_new, casts_new, imdbs_new, details_new, download_json, created_at, url) mysqlDao.executeValues(sql_pattern, sql_values) sql = 'update bttiantang_url set `status`=1 where `id`=' + str( id) mysqlDao.execute(sql) mysqlDao.close()