def schedule_getter(self, cycle=GETTER_CYCLE): """定时获取代理""" getter = Getter() while True: print('开始抓取代理') getter.run() time.sleep(cycle)
def schedule_getter(self, cycle=GETTER_CYCLE): """Cycle get proxy""" coroutine = Getter() while True: loop = asyncio.get_event_loop() loop.run_until_complete(coroutine.run()) time.sleep(cycle)
def run_getter(self, cycle=GETTER_CYCLE): """定时获取cookie""" getter = Getter() while True: print('开始抓取cookies') getter.run() time.sleep(cycle)
class WebStash: def __init__(self, getterType='urlopen', waitTimeBeforeScraping=0): self.cacher = Cacher() self.config = Config() self.getter = Getter(getterType, waitTimeBeforeScraping=waitTimeBeforeScraping) def get_web_data(self, url): try: return self.cacher[url] except KeyError: self.config.debugPrint('Getting webData...') filename = self.cacher.getFilename(url) html = self.getter.get_html(url) screenshotLocation = self.getter.get_screenshot( url, filename + '.png') webData = WebData(filename, url, html, screenshotLocation=screenshotLocation) self.cacher[url] = webData return self.cacher[url] def delete(url): del self.cacher[url] def clean(self): self.cacher.clean()
def schdule_getter(self, cycle=GETTER_CYCLE): """定时获取代理""" getter = Getter() while 1: print('start crawl proxy...') getter.run() time.sleep(cycle)
def schedule_getter(self, cycle=settings.GETTER_CYCLE): ''' 定时抓取 ''' getter = Getter() while True: getter.run() time.sleep(cycle)
def scheduler_getter(self, cycle=GETTER_CYCLE): # 调度获取模块 '''定时获取代理''' getter = Getter() while True: print('开始抓取代理') getter.run() time.sleep(cycle)
def schedule_getter(self, cycle=GETTER_CYCLE): """ 定时获取代理 """ getter = Getter() while GETTER_ENABLED: print('抓取器开始运行') getter.run() time.sleep(cycle)
def scheduler_getter(self): """ 每隔5天获取一次cookies :return: """ while True: getter = Getter() getter.run() time.sleep(self.getter_cycle)
def schedule_getter(self, cycle=GETTER_CYCLE): #定时获取代理 getter = Getter() count = 0 while True: count += 1 print('获取器开始运行', count) getter.run() time.sleep(cycle)
def schedule_getter(self, cycle=GETTER_CYCLE): """ 定时获取代理 """ getter = Getter() while True: print('Start crawling proxy') getter.run() time.sleep(cycle)
def run(self, q): ## handles the publishing of data, should be run at an acceptable update interval # get data from queue data = q[self.name] # TODO: format data for publisher #print("nearest obs data ", data) # publish data Getter.run(self, data)
def init(self, ros, hostname): self.hostname = hostname # create publisher self.pub = ros.Publisher('/' + hostname + '/lidar/' + resource_name(self.name), Int16MultiArray, queue_size=1) # pass to parent to enble probes #self.mp.write_probe(self.codes,1) Getter.init(self)
def schedule_getter(self, cycle=GETTER_CYCLE): """ Get the agent regularly """ getter = Getter() while True: print('Start to grab the proxy') logger.log('INFOR', 'Start to Grab the proxy...') getter.run() time.sleep(cycle)
def schedule_getter(self, cycle=GETTER_CYCLE): """ 定时获取代理 while循环运行获取器模块,运行一次之后间隔一段时间,间隔时间参数为GETTER_CYCLE配置。 """ getter = Getter() while True: print('开始抓取代理') getter.run() time.sleep(cycle)
def getter_scheduler(self, cycle=GETTER_CYCLE): """ 定时获取代理 """ print('获取器开始执行!') getter = Getter() while True: getter.run() print('休息', GETTER_CYCLE, '秒') time.sleep(GETTER_CYCLE)
def run_getter(self): """ run getter """ getter = Getter() loop = 0 while True: logger.debug(f'getter loop {loop} start...') getter.run() loop += 1 time.sleep(20)
def login_getter(self): ''' Logs into getter, should have no problems if one can log into poster ''' self.getter = Getter(self.username, self.password) status = self.getter.login() return status
def schedule_getter(self, cycle=GETTER_CYCLE): """ 定时获取代理 :param cycle: :return: """ getter = Getter() while True: print("开始抓取代理") getter.run() time.sleep(cycle)
def process_getter(self): '''获取器执行子进程''' getter = Getter() redis = db() while True: if redis.count_proxie() < MAXPOOL: getter.run() time.sleep(GETTER_CYCLE) else: print("代理池已经达到最大容量") break
def schedule_getter(self, cycle=GETTER_CYCLE): ''' 定时获取代理 :param cycle: :return: ''' getter = Getter() while True: print('开始抓取代理') getter.run() time.sleep(cycle)
def run(self, q, current_time): ## handles the publishing of data, should be run at an acceptable update interval # get data from queue data = [] labels = self.name if type(labels) != tuple: labels = (labels) for label in labels: data.append(q[label]) # publish data Getter.run(self, data[0])
def DBwriter(): result = Getter() #print(result) #result=[('start_day', req_count, req_per_sec, req_per_min, req_per_hour)] #------------------------------------------------------------------------------------------- # if DB not exist, create and connect if not os.path.exists("mydatabase.db"): conn = sqlite3.connect("mydatabase.db") # creating DB cursor = conn.cursor() # creating cursor cursor.execute('''CREATE TABLE results (analize_date text, requests_count real, request_per_second real, request_per_minute real, request_per_hour real) ''') # If DB exist - connecting else: conn = sqlite3.connect("mydatabase.db") cursor = conn.cursor() # making cursor #------------------------------------------------------------------------------------------- # Insert data to DB table safely cursor.execute( 'INSERT INTO results VALUES (?, ?, ?, ?, ?)', (result[0][0], result[0][1], result[0][2], result[0][3], result[0][4])) #------------------------------------------------------------------------------------------- # Save changes conn.commit() #------------------------------------------------------------------------------------------ # Checking (output) results sql = "SELECT * FROM results" cursor.execute(sql) print(cursor.fetchall())
def init(self, ros, hostname): self.hostname = hostname # create publisher if type(self.name) != tuple: self.pub = ros.Publisher('/' + hostname + '/odom/' + resource_name(self.name), Odometry, queue_size=1) else: self.pub = ros.Publisher( '/' + hostname + '/odom/' + resource_name(os.path.commonprefix(self.name)), Odometry, queue_size=1) # pass to parent to enble probes Getter.init(self)
def test_getterTypes(): testurl = 'https://news.ycombinator.com/news' getter0 = Getter('urlopen') html0 = getter0.get_html(testurl) assert isinstance(html0, bytes) getter1 = Getter('chromedriver') html1 = getter1.get_html(testurl) assert isinstance(html1, str) getter2 = Getter('requests') html2 = getter2.get_html(testurl) assert isinstance(html2, bytes)
def test_getter_wait_before_scraping(): import datetime waitTimeBeforeScraping = 1 testSleep = Getter('urlopen', waitTimeBeforeScraping=waitTimeBeforeScraping) startTime = datetime.datetime.now() for i in range(3): testSleep.get_html('https://news.ycombinator.com/news') endTime = datetime.datetime.now() assert (endTime - startTime).seconds > 3 * waitTimeBeforeScraping try: errorgetter = Getter('this is not a getter type') except GetterImplementationError as e: assert str( e) == 'this is not a getter type is not a supported getter type'
def run(self, q, current_time): ## handles the publishing of data, should be run at an acceptable update interval # get data from queue data = q[self.name] # get the data from the message data = data.split('v=')[-1] if (data == ''): return # create x and y arrays from the base16 string of data strarr = data.split(':') x = [] y = [] (lambda a: (x.append(int(a[0], 16)), y.append(int(a[1], 16)), fx(a[2:])) if a.size > 1 else False)(strarr) # create pointcloud2 message dataout = PointCloud2() # add timestamp based on ros time msg.header.stamp = current_time # add frame msg.header.frame_id = self.hostname # format message N = len(x) xy = np.array(np.hstack([x, y]), dtype=np.float32) msg.height = 1 msg.width = N msg.fields = [ PointField('x', 0, PointField.FLOAT32, 1), PointField('y', 4, PointField.FLOAT32, 1), ] msg.is_bigendian = False msg.point_step = 8 msg.row_step = msg.point_step * N msg.is_dense = True msg.data = xy.tostring() # publish data Getter.run(self, dataout)
class GetterThread(Thread): getter = None def __init__(self): super(GetterThread, self).__init__() self.getter = Getter() def update(self): """ Generate a random number every 1 second and emit to a socketio instance (broadcast) Ideally to be run in a separate thread? """ # infinite loop of magical random numbers while not thread_stop_event.isSet(): self.getter.update() data = json.loads(self.getter.data.decode('utf-8')) print(data) socketio.emit('newnumber', data, namespace='/test') def run(self): self.update()
def schedule_getter(self): getter = Getter() while GETTER_ENABLED: try: getter.run() except: getter.run() time.sleep(GETTER_CYLE)
def run(self, q, current_time): ## handles the publishing of data, should be run at an acceptable update interval # get data from queue odom = Odometry() odom.header.stamp = current_time odom.header.frame_id = self.hostname data = [] labels = self.name if type(labels) != tuple: labels = (labels) for label in labels: data.append(q[label]) heading = tf.transformations.quaternion_from_euler(0., 0., data[2]) odom.pose.pose = Pose( Point(data[0], data[1], 0.), Quaternion(heading[0], heading[1], heading[2], heading[3])) odom.twist.twist = Twist(Vector3(data[3], 0, 0), Vector3(0, 0, data[4])) # publish data Getter.run(self, odom)