def getProxyLs(self): try: html = requests.get('http://47.97.7.119:8080/proxypool/proxys/0',timeout =self.timeout) db.conn() proxyLs = [] if html.status_code != requests.codes.ok: print(u'获取代理失败!') return proxyDataLs = json.loads(html.text)['data'] for proxy in proxyDataLs: proxyLs.append([proxy['proxyType'],proxy['proxyAddress'],str(proxy['proxyPort']),round(time.time()*1000),None,self.md5Encode(proxy['proxyAddress'])]) try: db.executemany('replace into proxyls(PROXY_TYPE,PROXY_ADDR,PROXY_PORT,PROXY_ADD_TIME,PROXY_CHECK_TIME,PROXY_MD5) values (%s,%s,%s,%s,%s,%s)',proxyLs) except Exception as e: traceback.print_exc() db.rollback() else: db.commit() time.sleep(3) print(u'爬取完毕') except Exception as e: traceback.print_exc() time.sleep(2) print(u'开始检验代理...') self.testbythread()
def complete(self, animeid): """这个函数是更新已完成的URL完成""" try: db.execute( 'UPDATE anime_home a SET a.ANIME_INFO_DOWNLOAD_STATUS = 2 where a.ANIME_ID = %s' % (animeid), None) except Exception as e: db.rollback() raise e finally: db.commit()
def getData(self, url): content = self.requestbyproxy(url) dataJson = json.loads(content.text) if dataJson['status'] == 200: if len(dataJson['data']['page']['list']): values_insert = [] for i in dataJson['data']['page']['list']: anime_bid = int(i['bid']) anime_name = i['title'] anime_cover = i['cover'] anime_vertical_cover = i['verticalCover'] anime_play_date = i['playDate'] anime_play_time = i['playTime'] anime_origin_time = i['originTime'] anime_play_site = i['playSite'] anime_origin_station = i['originStation'] anime_play_url = i['playUrl'] anime_play_episode = i['episode'] if db.execute( 'select 1 from anime_timetable a where a.ANIME_BID = %s limit 1', (anime_bid)) == 0: values_insert.append([ anime_bid, None, anime_name, anime_cover, anime_vertical_cover, anime_play_date, anime_play_time, anime_origin_time, anime_play_site, anime_origin_station, anime_play_url, anime_play_episode ]) else: try: db.execute( "update anime_timetable t set t.ANIME_PLAY_DATE = '%s',t.ANIME_PLAY_EPISODE = '%s' where t.ANIME_BID = %d" % (anime_play_date, anime_play_episode, anime_bid), None) except Exception as e: db.rollback() traceback.print_exc() try: if len(values_insert): db.executemany( 'insert into anime_timetable(ANIME_BID,ANIME_ID,ANIME_NAME,ANIME_COVER,ANIME_VERTICAL_COVER,ANIME_PLAY_DATE,ANIME_PLAY_TIME,ANIME_ORIGIN_TIME,ANIME_PLAY_SITE,ANIME_ORIGIN_STATION,ANIME_PLAY_URL,ANIME_PLAY_EPISODE) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)', values_insert) except Exception as e: db.rollback() traceback.print_exc() finally: db.commit() time.sleep(1) else: pass else: pass
def repair(self): try: db.execute( 'SELECT h.* FROM anime_home h LEFT JOIN anime_info i on (h.ANIME_ID = i.ANIME_ID) WHERE i.ANIME_ID IS NOT NULL', None) records = db.fetchall() if records: for index, r in enumerate(records): db.execute( 'UPDATE anime_home a SET a.ANIME_INFO_DOWNLOAD_STATUS = 2 WHERE a.ID = %s' % (r[0]), None) db.execute( 'UPDATE anime_home h SET h.ANIME_INFO_DOWNLOAD_STATUS = 0 WHERE h.ANIME_INFO_DOWNLOAD_STATUS =1', None) except Exception as e: db.rollback() raise e finally: db.commit()
def pop(self): try: db.execute( 'SELECT a.ID,a.ANIME_LINE,a.ANIME_INFO_DOWNLOAD_STATUS FROM anime_home a WHERE a.ANIME_INFO_DOWNLOAD_STATUS = 0', None) records = db.fetchall() db.execute( 'UPDATE anime_home a SET a.ANIME_INFO_DOWNLOAD_STATUS = 1 WHERE a.ANIME_INFO_DOWNLOAD_STATUS = 0', None) if records: for r in records: self.queue.put(r) return self.queue # else: # self.repair() # raise KeyError except Exception as e: db.rollback() raise e finally: db.commit()
def getxiciProxy(self,i): proxyList=[] proxyLs=[] print(u'获取第',i,'页数据') html=self.request('http://www.xicidaili.com/nn/'+str(i),i) all_proxy=self.xpathResolve(html,"//table[@id='ip_list']/tr[@class][position()>1]//td[6]|//table[@id='ip_list']/tr[@class][position()>1]//td[2]|//table[@id='ip_list']/tr[@class][position()>1]//td[3]") if len(all_proxy)>0: for i in range(0,len(all_proxy),3): proxyList.append(all_proxy[i:i+3]) if len(proxyList)>0: for i in proxyList: proxyLs.append([i[2].text.lower(),i[0].text,str(i[1].text),round(time.time()*1000),None,self.md5Encode(i[0].text)]) self.lock.acquire() try: db.executemany('insert into proxyls(PROXY_TYPE,PROXY_ADDR,PROXY_PORT,PROXY_ADD_TIME,PROXY_CHECK_TIME,PROXY_MD5) values (%s,%s,%s,%s,%s,%s)',proxyLs) except Exception as e: traceback.print_exc() db.rollback() else: db.commit() self.lock.release()
def delproxy(self,id): try: db.execute('delete from proxyls where ID = %s' % (id),None) except: db.rollback()
def writeproxy(self,id): try: db.execute('update proxyls p set p.PROXY_CHECK_TIME = %s where p.ID = %s' % (round(time.time()*1000),id),None) except: db.rollback()