Esempi in Python per Throttle, threatconnect-playbooks

Linguaggio di programmazione: Python

Classe/tipologia: Throttle

Esempi su hotexamples.com: 10

Throttle in Python: 10 esempi trovati. Questi sono i migliori esempi reali in Python per Throttle da pachetto threatconnect-playbooks, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Throttle(7)

initTrain(1)

throwNextSwitch(1)

setSpeed(1)

setLights(1)

setBell(1)

removeInterest(1)

readLayout(1)

moveSwitch(1)

getPath(1)

addInterest(1)

followSensorPath(1)

doFunction(1)

doCommands(1)

doCommand(1)

atSpeedGoTo(1)

atSensorDoCommands(1)

atSensorDoCommand(1)

waitFor(1)

Esempio n. 1

Mostra file

File: Downloader.py Progetto: doubledouLiu/python

class Downloader:
    def __init__(self,
                 delay=5,
                 user_agent='wswp',
                 proxies=None,
                 num_tries=1,
                 catch=None):
        self.throttle = Throttle(delay)
        self.user_agent = user_agent
        self.proxies = proxies
        self.num_tries = num_tries
        self.catch = catch

    def __call__(self, url):
        result = None
        if self.catch:
            try:
                result = self.catch[url]
            except KeyError:
                pass
            else:
                if self.num_tries > 0 and 500 <= result['code'] < 600:
                    result = None
        if result is None:
            self.throttle.wait(url)
            proxy = random.choice(self.proxies) if self.proxies else None
            headers = {
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
            }
            result = self.download(url, headers, proxy, self.num_tries)
            if self.catch:
                self.catch[url] = result
        return result['html']

    def download(self, url, headers, proxy, num_tries, data=None):
        print 'downloading:', url
        request = urllib2.Request(url, headers=headers)
        opener = urllib2.build_opener()
        if proxy:
            proxy_params = {urlparse.urlparse(url).scheme: proxy}
            opener.add_handler(urllib2.ProxyHandler(proxy_params))

        html = None
        try:
            response = opener.open(request, timeout=30)
            html = response.read()
            code = response.code
        except urllib2.URLError as e:
            print 'Url error:', e
            html = None
            if hasattr(e, 'code'):
                code = e.code
                if num_tries > 0 and 500 <= code < 600:
                    return None
        except Exception as e:
            print 'error:', e
        return {'html': html, 'code': code}

Esempio n. 2

Mostra file

File: Downloader.py Progetto: doubledouLiu/python

 def __init__(self,
              delay=5,
              user_agent='wswp',
              proxies=None,
              num_tries=1,
              catch=None):
     self.throttle = Throttle(delay)
     self.user_agent = user_agent
     self.proxies = proxies
     self.num_tries = num_tries
     self.catch = catch

Esempio n. 3

Mostra file

File: spider_second.py Progetto: xiaoYyProcessWorld/lerngitxiaoYY

def link_crawler(seed_url, link_regex, max_depth=1, scrape_callback=None):
    crawl_queue = [seed_url]
    seen = {seed_url: 0}
    rp = robotparser.RobotFileParser()
    while crawl_queue:
        url = crawl_queue.pop()
        rp.set_url(url + '/robots.txt')
        rp.read()
        user_agent = 'wswp'

        if rp.can_fetch(user_agent, url):
            throttle = Throttle.Throttle(5)
            throttle.wait(url)
            html = download(url)
            links = []
            if scrape_callback:
                links.extend(scrape_callback(url, html) or [])
            depth = seen[url]
            if depth != max_depth:
                for link in get_links(html):
                    if re.match(link_regex, link):
                        link = urlparse.urljoin(seed_url, link)
                        if link not in seen:
                            seen[link] = depth + 1
                            # seen.add(link)
                            crawl_queue.append(link)
        else:
            print 'Blocked by robots.txt:', url

Esempio n. 4

Mostra file

 def __call__(self, url):
     '''
     先从缓存中取出该url对应的数据，如果缓存中有该数据则不必下载也不必限速
      如果缓存中没有该数据，则需要重新下载，并且下载前需要限速throttle
     '''
     result = None
     if self.cache:
         try:
             result = self.cache[url]
         except KeyError:
             # 如果这个url不在缓存中
             pass
         else:
             if self.num_retries > 0 and 500 <= result['code'] < 600:
                 # 如果有服务器错误，说明之前缓存的数据有误不可用
                 #并且num_retries>0,则重新下载
                 result = None
     if result is None:
         # 此时才是真正发生下载，不是从缓存中下载获取，故需要限速，防止被封
         self.throttle = Throttle.wait(self, url)
         proxy = random.choice(self.proxies) if self.proxies else None
         headers = {'User-agent': self.user_agent}
         self.num_retries = 1
         result = self.download(url,
                                headers,
                                proxy=proxy,
                                num_retries=self.num_retries)
         if self.cache:
             # 把下载得到的html网页存进缓存中
             self.cache[url] = result
     return result['html']

Esempio n. 5

Mostra file

def link_crawler(seed_url, link_regex):
    """
     crawlfrom the given seed URL following links matched by link_regex
     :param seed_url: 
     :param link_regex: 
     :return: 
     """
    #read the robots.txt
    rp = robotparser.RobotFileParser()
    rp.set_url('http://example.webscraping.com/robots.txt')
    rp.read()
    #set the agent's name
    user_agent = "667's Python Spider"
    #set the delay for crawl speed    5 second

    th = Throttle.Throttle(5)

    #set the crawl queue for crawled url
    crawl_queue = [seed_url]
    visited = set(crawl_queue)
    while crawl_queue:
        url = crawl_queue.pop()
        if rp.can_fetch(user_agent, url):
            th.wait(url)
            html = download_network_page(url)
            print html
            # filter for links matching out regular expression
            for link in get_links(html):
                if re.match(link_regex, link):
                    link = urlparse.urljoin(seed_url, link)

                    if link not in visited:
                        visited.add(link)
                        crawl_queue.append(link)

Esempio n. 6

Mostra file

File: crawlerUtil.py Progetto: 1399852153/python

def linked_download(seed_url,
                    linked_rex=None,
                    user_agent='wswp',
                    proxy=None,
                    max_depth=2,
                    delay=3):
    # 按照正则匹配规则,下载关联的所有网页

    print("linked_download start")

    # 设置延迟访问休眠对象
    throttle = Throttle.Throttle(delay)

    # 访问过的url字典缓存
    searched_urls = {}
    # 需要遍历的url列表
    url_list = [seed_url]

    # 设置user-agent和代理
    opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxy))
    opener.addheaders = [('User-agent', user_agent)]
    urllib.request.install_opener(opener)

    # 读取robot.txt
    rp = get_robots(seed_url)

    # 遍历所有的url
    while url_list:
        # 弹出当前第一个url
        url = url_list.pop()

        # robot.txt中当前代理是否允许爬取
        if rp.can_fetch(user_agent, url):
            # 获得当前url访问过的次数(默认为0)
            depth = searched_urls.get(url, 0)

            # 如果url最大访问次数未达到次数
            if depth != max_depth:
                # 判断当前访问是否需要延迟
                throttle.wait(url)

                # 访问url,获得html数据
                html = download(url, user_agent, proxy)

                # 从html中获得所有的a标签链接
                linked_urls = get_linked_url(html.decode('utf-8'))

                # 将符合规则的a标签加入url列表
                for url_item in linked_urls:
                    # 是否符合传入的url规则
                    if re.search(linked_rex, url_item):
                        # 是否还未被爬取过
                        if url_item not in searched_urls:
                            # 将已经爬取过的网页保存起来,并且设置爬取的次数加1
                            searched_urls[url_item] = depth + 1

                            # 将url拼接为绝对路径
                            url_item = urlparse.urljoin(seed_url, url_item)
                            # 加入当前url_list
                            url_list.append(url_item)
        else:
            # 被robot.txt 拒绝
            print('Blocked by robots.txt:' + url)

Esempio n. 7

Mostra file

File: Throttle_demo.py Progetto: Stveshawn/Autonomous-Vehicle

# In[1]:

get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import pandas as pd
from data_pre import *
from Throttle import *

# In[2]:

data = load_data()

# In[3]:

m = Throttle(data)
m.update_vars(m.data)
m.filter_obs()

# In[4]:

m.update_vars(m.data2)
m.fit_by_batches()

# In[5]:

# Estimated parameters for the first 5 batches
m.mus[:5], m.Sigs[:5]

# In[6]:

Esempio n. 8

Mostra file

 def __init__(self, delay=5, user_agent='wswp', proxies=None, num_retries=2, cache=None):
     self.throttle = Throttle.Throttle(delay)
     self.user_agent = user_agent
     self.proxies = proxies
     self.num_retries = num_retries
     self.cache = cache

Esempio n. 9

Mostra file

File: 04_链接爬虫(下载限速).py Progetto: 384401056/PythonProject

# coding=utf-8
import urllib2
import re
import urlparse
import Throttle

# 设置下载限速(秒)
throttle = Throttle.Throttle(5)
data_list = []

def download(url, user_agent = 'wswp', proxy=None, re_times = 2):
    '''可以设置用户代理的下载方法'''
    print 'DownLoad....', url

    # 限制下载速度
    throttle.wait(url)

    # 设置请求头
    headers = {'User-agent': user_agent}
    request = urllib2.Request(url, headers = headers)

    opener = urllib2.build_opener()
    # 添加代理的支持
    if proxy:
        proxy_params = {urlparse.urlparse(url).scheme:proxy}
        opener.add_handler(urllib2.ProxyHandler(proxy_params))
    try:
        html = opener.open(request).read()
        # html = urllib2.urlopen(request).read()
    except urllib2.URLError as e:
        print "DownLoad Error: ", e.reason

Esempio n. 10

Mostra file

File: Throttle_Test.py Progetto: mlozborne/motin

        sleep(0.2)
        self.setLights(kOff)
        sleep(0.2)

if __name__ == "__main__":
    gLog.open()

    #  Start the simulator and controller
    sak = StartAndKill()
    sak.start("simulator")
    sak.start("controller")

    # Create the communication resources for 1 user
    comRes = CommunicationResources(name = 'throttle-test', host = 'localhost', port = 1235, numberOfPackages = 1)
    
    myThrottle = Throttle(name = 'Bill', comPkg = comRes.getNextPackage())
    
    # Tell the throttle to read the layout file
    gLog.print("Main reading layout")
    msg = myThrottle.readLayout("../../runSoftware/Layout.xml")
    sleep(2)

    testing = 9  #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< set test case
    print ("Testing option == {0}". format(testing))

    if testing == 1:

        # Initialize train 1111
        gLog.print("Main initializing train")
        msg = myThrottle.initTrain(1111, [5, 1])
        gLog.print("physAdd = {0}, physSlot = {1}, virtAdd = {2}, virtSlot = {3}".