Example #1
0
def getUrl(url, page):
    url = url + str(page) + '.html'
    print(url)
    mysqlDao = MysqlDao()
    try:
        n = 1
        while True:
            try:
                headers = Headers.getHeaders()
                req = requests.get(url, headers=headers, timeout=10)
                break
            except Exception, e:
                print Exception, ":", e
                print('sleep')
                time.sleep(n * 10)
                n = n + 1
        if req.status_code == 200:
            html = req.content
            selector = etree.HTML(html)
            url_contents = selector.xpath('//div[@class="box3"]/descendant::a/@href')
            for url_content in url_contents:
                sql = 'insert ignore into loldytt_url (`category_id`,`url`,`status`,created_at) VALUES (%s,%s,%s,%s)'
                created_at = time.strftime('%Y-%m-%d %H:%M:%S')
                values = (category_id, url_content, 0, created_at)
                print(values)
                mysqlDao.executeValues(sql, values)
            mysqlDao.close()
Example #2
0
 def run(self):
     mysqlDao = MysqlDao()
     while True:
         print(self.name)
         sql = 'select * from allsearch_key_word WHERE `status`=0 limit 0,1'
         ret = mysqlDao.execute(sql)
         if (len(ret) > 0):
             res = ret[0]
             id = res[0]
             sql = 'update allsearch_key_word set `status`=2 where `id`=' + str(
                 id)
             mysqlDao.execute(sql)
             word = res[1]
             sql_values = self.getSearch(word)
             for sql_value in sql_values:
                 created_at = time.strftime('%Y-%m-%d %H:%M:%S')
                 values = (sql_value, id, 0, created_at)
                 sql = 'insert ignore into allsearch_key_word (`word`,`parent_id`,`status`,`created_at`) VALUES (%s,%s,%s,%s)'
                 mysqlDao.executeValues(sql, values)
             sql = 'update allsearch_key_word set `status`=2 where `id`=' + str(
                 id)
             mysqlDao.execute(sql)
         else:
             print(self.name + 'sleep')
             time.sleep(3600)
     mysqlDao.close()
Example #3
0
 def run(self):
     mysqlDao = MysqlDao()
     for one in xrange(0, 1000):
         v = random.randint(1, 10000)
         sql = 'insert into yingshi_test (`name`) values (%s)'
         print(sql)
         mysqlDao.executeValues(sql, v)
     mysqlDao.close()
Example #4
0
 def run(self):
     while True:
         print(self.name)
         mysqlDao = MysqlDao()
         sql = 'select * from ygdy8_url WHERE `status`=0 limit 0,1'
         ret = mysqlDao.execute(sql)
         if (len(ret) > 0):
             res = ret[0]
             id = res[0]
             sql = 'update ygdy8_url set `status`=2 where `id`=' + str(id)
             mysqlDao.execute(sql)
             category_id = res[1]
             url = res[2]
             sql_values = self.getContent(url, category_id)
             if sql_values != None:
                 sql_pattern = 'insert ignore INTO `ygdy8_content`(`category_id`,`name`, `content`, `img`,`created_at`, `url`) VALUES( %s, %s, %s, %s, %s, %s)'
                 mysqlDao.executeValues(sql_pattern, sql_values)
                 sql = 'update ygdy8_url set `status`=1 where `id`=' + str(
                     id)
                 mysqlDao.execute(sql)
             mysqlDao.close()
         else:
             mysqlDao.close()
             break
Example #5
0
from headers import Headers
from lxml import etree
from mysqlpooldao import MysqlDao

reload(sys)
sys.setdefaultencoding('utf8')

while True:
    try:
        url = 'http://top.baidu.com/'
        headers = Headers.getHeaders()
        req = requests.get(url, headers=headers, timeout=30)
        if req.status_code == 200:
            html = req.content.decode('gb2312', 'ignore')
            selector = etree.HTML(html)
            words = selector.xpath(
                '//div[@id="box-cont"]/descendant::a/@title')
            for word in words:
                print(word)
                mysqlDao = MysqlDao()
                sql = 'insert ignore into allsearch_key_word (`word`,`parent_id`,`status`,`created_at`) VALUES (%s,%s,%s,%s)'
                created_at = time.strftime('%Y-%m-%d %H:%M:%S')
                values = (word, 0, 0, created_at)
                mysqlDao.executeValues(sql, values)
                mysqlDao.close()
        else:
            print('code error')
    except:
        pass
    time.sleep(21600)
Example #6
0
 def run(self):
     while True:
         print(self.name)
         mysqlDao = MysqlDao()
         sql = 'select * from loldytt_url WHERE `status`=0 limit 0,1'
         ret = mysqlDao.execute(sql)
         if len(ret) == 0:
             mysqlDao.close()
             """
             不用睡眠直接退出等crontab唤醒
             """
             print('game over')
             sys.exit()
         else:
             res = ret[0]
             id = res[0]
             category_id = res[1]
             url = res[2]
             sql = 'update loldytt_url set `status`=2 where `id`=' + str(id)
             mysqlDao.execute(sql)
             headers = Headers.getHeaders()
             n = 0
             while n < 5:
                 req = requests.get(url, headers=headers)
                 req.encoding = "gbk"
                 if req.status_code == 200:
                     html = req.text.encode(encoding="utf-8",
                                            errors="ignore").decode(
                                                "utf-8", errors="ignore")
                     try:
                         selector = etree.HTML(html)
                     except:
                         print 333
                     titles = selector.xpath(
                         '//div[contains(@class,"lm")]/h1/a/text()')
                     if len(titles) > 0:
                         break
                 n = n + 1
             if len(titles) > 0:
                 title = titles[0]
             else:
                 continue
             casts = selector.xpath(
                 '//div[contains(@class,"zhuyan")]/ul[1]/li/text()')
             imgs = selector.xpath(
                 '//div[contains(@class,"haibao")]/a[1]/img/@src')
             cast = ''
             img = ''
             content = ''
             if len(casts) > 0:
                 cast = casts[0].split(':')[1]
             if len(imgs) > 0:
                 img = imgs[0]
             contents = selector.xpath(
                 '//div[@class="neirong"]/descendant::text()')
             if len(contents) > 0:
                 content = simplejson.dumps(contents)
             created_at = time.strftime('%Y-%m-%d %H:%M:%S')
             xunlei_download_keys = selector.xpath(
                 '//*[contains(@id,"jishu")]/descendant::a[contains(@href,"thunder")]/text()'
             )
             xunlei_download_values = selector.xpath(
                 '//*[contains(@id,"jishu")]/descendant::a[contains(@href,"thunder")]/@href'
             )
             bt_download_keys = selector.xpath(
                 '//*[contains(@id,"bt")]/descendant::a[contains(@href,"thunder")]/text()'
             )
             bt_download_values = selector.xpath(
                 '//*[contains(@id,"bt")]/descendant::a[contains(@href,"thunder")]/@href'
             )
             magnet_download_keys = selector.xpath(
                 '//a[contains(@href,"magnet")]/text()')
             magnet_download_values = selector.xpath(
                 '//a[contains(@href,"magnet")]/@href')
             xunlei_download = []
             bt_download = []
             magnet_download = []
             try:
                 xn = 0
                 for x in xunlei_download_keys:
                     xunlei_download.append({
                         xunlei_download_keys[xn]:
                         xunlei_download_values[xn]
                     })
                     xn = xn + 1
                 bn = 0
                 for b in bt_download_keys:
                     bt_download.append(
                         {bt_download_keys[bn]: bt_download_values[bn]})
                     bn = bn + 1
                 mn = 0
                 for m in magnet_download_keys:
                     magnet_download.append({
                         magnet_download_keys[mn]:
                         magnet_download_values[mn]
                     })
                     mn = mn + 1
             except Exception, e:
                 print Exception, ":", e
             xunlei_download_json = simplejson.dumps(xunlei_download)
             bt_download_json = simplejson.dumps(bt_download)
             magnet_download_json = simplejson.dumps(magnet_download)
             sql_pattern = 'insert ignore INTO `loldytt_content`(`category_id`, `title`,`cast`,`img`,`xunlei_download`, `bt_download`, `magnet_download`, `content`, `url`,`created_at`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'
             sql_values = (category_id, title, cast, img,
                           xunlei_download_json, bt_download_json,
                           magnet_download_json, content, url, created_at)
             print(title)
             mysqlDao.executeValues(sql_pattern, sql_values)
             sql = 'update loldytt_url set `status`=1 where `id`=' + str(id)
             mysqlDao.execute(sql)
             mysqlDao.close()
Example #7
0
 def run(self):
     while True:
         print(self.name)
         mysqlDao = MysqlDao()
         sql = 'select * from bttiantang_url WHERE `status`=0 limit 0,1'
         ret = mysqlDao.execute(sql)
         res = []
         for r in ret:
             res = r
         print(res)
         if len(res) == 0:
             print('sleep')
             # sql = 'update yingshi_bttiantang_url set `status`=0 WHERE `status`=2'
             # database.mysqlExecute(sql)
             mysqlDao.close()
             # time.sleep(21600)
             # continue
             """
             不用睡眠直接退出等crontab唤醒
             """
             print('game over')
             sys.exit()
         else:
             id = res[0]
             url = res[1]
             sql = 'update bttiantang_url set `status`=2 where `id`=' + str(
                 id)
             mysqlDao.execute(sql)
             headers = Headers.getHeaders()
             n = 0
             while n < 5:
                 req = requests.get(url, headers=headers)
                 if req.status_code == 200:
                     html = req.content
                     selector = etree.HTML(html)
                     contents = selector.xpath(
                         '//ul[contains(@class,"moviedteail_list")]')
                     if len(contents) > 0:
                         break
                 n = n + 1
             if len(contents) > 0:
                 content = contents[0]
             else:
                 continue
             names_chn = selector.xpath(
                 '//div[contains(@class,"moviedteail_tt")]/h1/text()')
             names_eng = selector.xpath(
                 '//div[contains(@class,"moviedteail_tt")]/span/text()')
             name_chn = ''
             name_eng = ''
             if len(names_chn) > 0:
                 name_chn = names_chn[0]
             if len(names_eng) > 0:
                 name_eng = names_eng[0]
             names_nick = content.xpath(
                 'li[contains(text(),"%s")]/a/text()' % (u'又名'))
             if len(names_nick) > 0:
                 names_nick_new = ",".join(names_nick)
             else:
                 names_nick_new = ""
             imgs = simplejson.dumps(
                 selector.xpath(
                     '//div[contains(@class,"moviedteail_img")]/a/img/@src')
             )
             tags = content.xpath('li[contains(text(),"%s")]/a/text()' %
                                  (u'标签'))
             if len(tags) > 0:
                 tags_new = ",".join(tags)
             else:
                 tags_new = ""
             areas = content.xpath('li[contains(text(),"%s")]/a/text()' %
                                   (u'地区'))
             if len(areas) > 0:
                 areas_new = ",".join(areas)
             else:
                 areas_new = ""
             years = content.xpath('li[contains(text(),"%s")]/a/text()' %
                                   (u'年份'))
             if len(years) > 0:
                 years_new = ",".join(years)
             else:
                 years_new = ""
             directors = content.xpath(
                 'li[contains(text(),"%s")]/a/text()' % (u'导演'))
             if len(directors) > 0:
                 directors_new = ",".join(directors)
             else:
                 directors_new = ""
             writers = content.xpath('li[contains(text(),"%s")]/a/text()' %
                                     (u'编剧'))
             if len(writers) > 0:
                 writers_new = ",".join(writers)
             else:
                 writers_new = ""
             casts = content.xpath('li[contains(text(),"%s")]/a/text()' %
                                   (u'主演'))
             if len(casts) > 0:
                 casts_new = ",".join(casts)
             else:
                 casts_new = ""
             imdbs = content.xpath('li[contains(text(),"%s")]/a/text()' %
                                   (u'imdb'))
             if len(imdbs) > 0:
                 imdbs_new = ",".join(imdbs)
             else:
                 imdbs_new = ""
             details = self.getDetails(
                 content.xpath('li[contains(text(),"%s")]/a/@href' %
                               (u'详情')))
             if len(details) > 0:
                 details_new = details[0]
             else:
                 details_new = ""
             created_at = time.strftime('%Y-%m-%d %H:%M:%S')
             downloads = selector.xpath('//div[contains(@class,"tinfo")]')
             download = []
             for d in downloads:
                 try:
                     dn_text = d.xpath('a[1]/@title')[0]
                     dn_url = d.xpath('a[1]/@href')[0]
                     download.append({dn_text: dn_url})
                 except:
                     pass
             download_json = simplejson.dumps(download)
             sql_pattern = 'insert ignore INTO `bttiantang_content`(`names_chn`, `names_eng`,`names_nick`,`imgs`,`tags`, `areas`, `years`, `directors`, `writers`,`casts`, `imdbs`,`details`, `download`,`created_at`, `url`) VALUES(%s, %s, %s,%s,%s,%s, %s, %s,%s, %s,%s, %s,%s, %s, %s)'
             sql_values = (name_chn, name_eng, names_nick_new, imgs,
                           tags_new, areas_new, years_new, directors_new,
                           writers_new, casts_new, imdbs_new, details_new,
                           download_json, created_at, url)
             mysqlDao.executeValues(sql_pattern, sql_values)
             sql = 'update bttiantang_url set `status`=1 where `id`=' + str(
                 id)
             mysqlDao.execute(sql)
             mysqlDao.close()