def back_up_opt(): print utils.get_time_now(), 'Working on table back_up job...' try: conn = mysql.connector.connect(host="10.122.202.19", user="******", password="******", db='ip_proxy_db') cursor = conn.cursor() sqlStr = 'select * from ip_proxy_info;' cursor.execute(sqlStr) cam_rows = cursor.fetchall() if cam_rows: cursor.execute('truncate table ip_proxy_info_bak') cursor.execute( "insert into ip_proxy_info_bak select * from ip_proxy_info where isvalid='1'" ) cursor.execute('truncate table ip_proxy_info') conn.commit() cursor.close() conn.close() except: traceback.print_exc() print utils.get_time_now(), 'Job is Done.'
def process_request(self, request, spider): if get_ip_proxy(): if request.meta.has_key('tried'): request.meta['tried'] = int(request.meta['tried']) + 1 else: proxy = random.choice(get_ip_proxy()) request.meta['proxy'] = "http://%s:%s" % (proxy['ip'], proxy['port']) request.meta['ip'] = proxy['ip'] request.meta['port'] = proxy['port'] request.meta['tried'] = 1 print utils.get_time_now(), "use ip: %s proxy: %s, try for %s times" % (request.url, request.meta['proxy'], request.meta['tried'] )
def process_request(self, request, spider): if get_ip_proxy(): if request.meta.has_key('tried'): request.meta['tried'] = int(request.meta['tried']) + 1 else: proxy = random.choice(get_ip_proxy()) request.meta['proxy'] = "http://%s:%s" % (proxy['ip'], proxy['port']) request.meta['ip'] = proxy['ip'] request.meta['port'] = proxy['port'] request.meta['tried'] = 1 print utils.get_time_now( ), "use ip: %s proxy: %s, try for %s times" % ( request.url, request.meta['proxy'], request.meta['tried'])
def process_request(self, request, spider): if get_ip_proxy(): if request.meta.has_key("tried"): request.meta["tried"] = int(request.meta["tried"]) + 1 else: proxy = random.choice(get_ip_proxy()) request.meta["proxy"] = "http://%s:%s" % (proxy["ip"], proxy["port"]) request.meta["ip"] = proxy["ip"] request.meta["port"] = proxy["port"] request.meta["tried"] = 1 print utils.get_time_now(), "use ip: %s proxy: %s, try for %s times" % ( request.url, request.meta["proxy"], request.meta["tried"], )
def get_metoffice_weston(): response = requests.get(f"{URL}{LOCATION_ID}?res={RESOLUTION}&key={key}") output = response.json() # Now write the data to file time_now = get_time_now() with open( Path(FORECAST_OUTPUT_DIR, f"{OUTPUT_PREFIX}_{time_now}.json"), "w" ) as jsonfile: json.dump(output, jsonfile, indent=4)
def scrape_xc_weston(): driver = webdriver.Firefox() driver.get(URL) forecast = driver.find_element(By.ID, "fcastbox") time_now = get_time_now() with open(Path(FORECAST_OUTPUT_DIR, f"{OUTPUT_PREFIX}_{time_now}.txt"), "w") as f: f.write(forecast.text) driver.close()
def back_up_opt(): print utils.get_time_now(), 'Working on table back_up job...' try: conn = mysql.connector.connect(host="10.122.202.19",user="******",password="******",db='ip_proxy_db') cursor = conn.cursor() sqlStr = 'select * from ip_proxy_info;' cursor.execute(sqlStr) cam_rows = cursor.fetchall() if cam_rows: cursor.execute('truncate table ip_proxy_info_bak') cursor.execute("insert into ip_proxy_info_bak select * from ip_proxy_info where isvalid='1'") cursor.execute('truncate table ip_proxy_info') conn.commit() cursor.close() conn.close() except: traceback.print_exc() print utils.get_time_now(), 'Job is Done.'
def _do_insert(self, conn, item, spider): try: conn.execute("select * from ip_proxy_info where ip=%s", (item['ip'],)) ret = conn.fetchone() if ret: print utils.get_time_now(), "do db update, ip ==>", item['ip'] conn.execute("update ip_proxy_info set port=%s, anonymous=%s, protocol=%s, location=%s, latency=%s, last_verify_time=%s, source=%s where ip=%s", (item['port'], item['anonymous'], item['http_type'], item['location'], item['latency'], item['last_verify_time'], item['source'], item['ip'])) else: print utils.get_time_now(), "do db insert, ip ==>", item['ip'] conn.execute("insert into ip_proxy_info (ip, port, anonymous, protocol, location, latency, last_verify_time, source) values (%s, %s, %s, %s, %s, %s, %s, %s)", (item['ip'], item['port'], item['anonymous'], item['http_type'], item['location'], item['latency'], item['last_verify_time'], item['source'])) except: utils.get_time_now(), traceback.format_exc() print traceback.format_exc()
def days_since_last_pmt(self): if not self.lastPaymentDate: return None return (get_time_now() - parse_time(self.lastPaymentDate)).days
def _handle_error(self, failure, item, spider): print utils.get_time_now(), "Error ==>", failure