def setUp(self):
     self.conf = CONFIG
     self.db = {}
     self.input_queue = PriorityQueue()
     self.filtered_queue = PriorityQueue()
     self.filter = StatuslistFilter(self.conf, self.input_queue,
                                    self.filtered_queue, self.db)
Ejemplo n.º 2
0
 def __init__(self, target, threads=100, mode=False):
     self.start_time = time.time()
     self.target = target.strip()
     self.threads = threads
     self.file = "subnames.txt"
     self.full_scan = mode
     self.ignore_intranet = False
     self.scan_count = self.found_count = 0
     self.console_width = getTerminalSize()[0] - 2
     self.resolvers = [
         dns.resolver.Resolver(configure=False) for _ in range(self.threads)
     ]
     for _ in self.resolvers:
         _.lifetime = _.timeout = 10.0
     self.print_count = 0
     self.basedir = os.path.dirname(os.path.dirname(__file__))
     self.STOP_ME = False
     self._load_dns_servers()
     self._load_next_sub()
     self.queue = PriorityQueue()
     self.priority = 0
     self._load_sub_names()
     self.ip_dict = {}
     self.found_subs = set()
     self.ex_resolver = dns.resolver.Resolver(configure=False)
     self.ex_resolver.nameservers = self.dns_servers
     self.result_domains = []
     self.result_ips = []
Ejemplo n.º 3
0
 def __init__(self, *params):
     (
         self.domain,
         self.options,
         self.process_num,
         self.dns_servers,
         self.next_subs,
         self.scan_count,
         self.found_count,
         self.queue_size_array,
         tmp_dir,
     ) = params
     self.dns_count = len(self.dns_servers)
     self.scan_count_local = 0
     self.found_count_local = 0
     self.resolvers = [
         dns.resolver.Resolver(configure=False)
         for _ in range(self.options.threads)
     ]
     for r in self.resolvers:
         r.lifetime = r.timeout = 10.0
     self.queue = PriorityQueue()
     self.priority = 0
     self.ip_dict = {}
     self.found_subs = set()
     self.timeout_subs = {}
     self.count_time = time.time()
     self.outfile = open(
         "%s/%s_part_%s.txt" % (tmp_dir, self.domain, self.process_num),
         "w")
     self.normal_names_set = set()
     self.load_sub_names()
     self.lock = RLock()
 def __init__(self, target, subdomainfile=None):
     self.start_time = time.time()
     self.target = target.strip()
     self.ignore_intranet = config.ignore_intranet
     self.scan_count = self.found_count = 0
     self.console_width = getTerminalSize()[0] - 2
     self.threads = config.threads
     self.resolvers = [
         dns.resolver.Resolver(configure=False) for _ in range(self.threads)
     ]
     for _ in self.resolvers:
         _.lifetime = _.timeout = 10.0
     self.print_count = 0
     self.STOP_ME = False
     try:
         self.full_scan = config.full_scan
     except:
         self.full_scan = False
     self.subdomainfile = subdomainfile if subdomainfile != None else config.subnamefile
     self.basedir = os.path.dirname(
         os.path.dirname(__file__))  #Teemo home dir
     self._load_dns_servers()
     self._load_next_sub()
     self.queue = PriorityQueue()
     self.priority = 0
     self._load_sub_names()
     self.ip_dict = {}
     self.found_subs = set()
     self.ex_resolver = dns.resolver.Resolver(configure=False)
     self.ex_resolver.nameservers = self.dns_servers
     self.result_lines = []
     self.result_domains = []
     self.result_ips = []
Ejemplo n.º 5
0
 def __init__(self, **kwargs):
     self.connection_cls = kwargs.get('connection_cls', AWSConnection)
     self.aws_region = kwargs.get('aws_region')
     self.aws_access_key_id = kwargs.get('aws_access_key_id')
     self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
     self.log_group_name = kwargs.get('log_group_name')
     self.log_stream_name = kwargs.get('log_stream_name')
     self.watch = kwargs.get('watch')
     self.color_enabled = kwargs.get('color_enabled')
     self.output_stream_enabled = kwargs.get('output_stream_enabled')
     self.output_group_enabled = kwargs.get('output_group_enabled')
     self.start = self.parse_datetime(kwargs.get('start'))
     self.end = self.parse_datetime(kwargs.get('end'))
     self.pool_size = max(kwargs.get('pool_size', 0), 10)
     self.max_group_length = 0
     self.max_stream_length = 0
     self.publishers = []
     self.events_queue = Queue()
     self.raw_events_queue = PriorityQueue()
     self.publishers_queue = PriorityQueue()
     self.publishers = []
     self.stream_status = {}
     self.stream_max_timestamp = {}
     self.connection = self.connection_cls(
         self.aws_region,
         aws_access_key_id=self.aws_access_key_id,
         aws_secret_access_key=self.aws_secret_access_key
     )
Ejemplo n.º 6
0
    def __init__(self, **kwargs):
        self.connection_cls = kwargs.get("connection_cls", AWSConnection)
        self.aws_region = kwargs.get("aws_region")
        self.aws_access_key_id = kwargs.get("aws_access_key_id")
        self.aws_secret_access_key = kwargs.get("aws_secret_access_key")
        self.log_group_name = kwargs.get("log_group_name")
        self.log_stream_name = kwargs.get("log_stream_name")
        self.watch = kwargs.get("watch")
        self.color_enabled = kwargs.get("color_enabled")
        self.output_stream_enabled = kwargs.get("output_stream_enabled")
        self.output_group_enabled = kwargs.get("output_group_enabled")
        self.start = self.parse_datetime(kwargs.get("start"))
        self.end = self.parse_datetime(kwargs.get("end"))
        self.pool_size = max(kwargs.get("pool_size", 0), 10)
        self.max_group_length = 0
        self.max_stream_length = 0
        self.publishers = []
        self.events_queue = Queue()
        self.raw_events_queue = PriorityQueue()
        self.publishers_queue = PriorityQueue()
        self.publishers = []
        self.stream_status = {}
        self.stream_max_timestamp = {}

        self.connection = self.connection_cls(
            self.aws_region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key
        )
Ejemplo n.º 7
0
 def __init__(self, name, fsm_id, states, initial_state, tracer,
              channel_tracer, fsm_registry, fsm_id_seq, inventory,
              play_header, outputs):
     self.shutting_down = False
     self.is_shutdown = False
     self.fsm_registry = fsm_registry
     self.name = name
     self.fsm_id = fsm_id
     self.tracer = tracer
     self.channel_tracer = channel_tracer
     self.state = initial_state
     self.states = states
     self.inbox = PriorityQueue()
     self.message_buffer = Queue()
     self.self_channel = Channel(self, self, tracer, self.inbox)
     self.worker = AnsibleTaskWorker(tracer, next(fsm_id_seq), inventory,
                                     play_header)
     self.worker_output_queue = Queue()
     self.worker.controller.outboxes['output'] = self.worker_output_queue
     self.worker.queue.put(Inventory(0, inventory))
     self.outboxes = dict(default=None)
     self.last_event = NULL_EVENT
     self.task_id_seq = count(0)
     self.failure_count = 0
     if outputs:
         self.outboxes.update({name: None for name in outputs})
     self.thread = gevent.spawn(self.receive_messages)
Ejemplo n.º 8
0
 def __init__(self, target, options):
     self.start_time = time.time()
     self.target = target.strip()
     self.options = options
     self.ignore_intranet = options.i
     self.scan_count = self.found_count = 0
     self.console_width = getTerminalSize()[0] - 2
     self.resolvers = [
         dns.resolver.Resolver(configure=False)
         for _ in range(options.threads)
     ]
     for _ in self.resolvers:
         _.lifetime = _.timeout = 10.0
     self.print_count = 0
     self.STOP_ME = False
     self._load_dns_servers()
     self._load_next_sub()
     self.queue = PriorityQueue()
     self.priority = 0
     self._load_sub_names()
     if options.taskid:
         self.taskid = options.taskid
     else:
         self.taskid = 0
     self.ip_dict = {}
     self.found_subs = set()
     self.sub_domain = []
     self.ex_resolver = dns.resolver.Resolver(configure=False)
     self.ex_resolver.nameservers = self.dns_servers
Ejemplo n.º 9
0
 def __init__(self, target, options):
     self.start_time = time.time()
     self.target = target.strip()
     self.options = options
     self.ignore_intranet = options.i
     self.scan_count = self.found_count = 0
     self.console_width = getTerminalSize()[0] - 2
     self.resolvers = [
         dns.resolver.Resolver(configure=False)
         for _ in range(options.threads)
     ]
     for _ in self.resolvers:
         _.lifetime = _.timeout = 10.0
     self.print_count = 0
     self.STOP_ME = False
     self._load_dns_servers()
     self._load_next_sub()
     self.queue = PriorityQueue()
     self.priority = 0
     self._load_sub_names()
     if options.output:
         outfile = options.output
     else:
         _name = os.path.basename(self.options.file).replace('subnames', '')
         if _name != '.txt':
             _name = '_' + _name
         outfile = target + _name if not options.full_scan else target + '_full' + _name
     self.outfile = open(outfile, 'w')
     self.ip_dict = {}
     self.found_subs = set()
     self.ex_resolver = dns.resolver.Resolver(configure=False)
     self.ex_resolver.nameservers = self.dns_servers
     self.result_lines = []
     self.result_domains = []
     self.result_ips = []
Ejemplo n.º 10
0
    def __init__(self, **kwargs):
        self.connection_cls = kwargs.get('connection_cls', AWSConnection)
        self.aws_region = kwargs.get('aws_region')
        self.aws_access_key_id = kwargs.get('aws_access_key_id')
        self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
        self.log_group_name = kwargs.get('log_group_name')
        self.log_stream_name = kwargs.get('log_stream_name')
        self.watch = kwargs.get('watch')
        self.color_enabled = kwargs.get('color_enabled')
        self.output_stream_enabled = kwargs.get('output_stream_enabled')
        self.output_group_enabled = kwargs.get('output_group_enabled')
        self.start = self.parse_datetime(kwargs.get('start'))
        self.end = self.parse_datetime(kwargs.get('end'))
        self.pool_size = max(kwargs.get('pool_size', 0), 10)
        self.max_group_length = 0
        self.max_stream_length = 0
        self.publishers = []
        self.events_queue = Queue()
        self.raw_events_queue = PriorityQueue()
        self.publishers_queue = PriorityQueue()
        self.publishers = []
        self.stream_status = {}
        self.stream_max_timestamp = {}

        self.connection = self.connection_cls(
            self.aws_region,
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key)
Ejemplo n.º 11
0
    def __init__(self, target, options, process_num, dns_servers, cdns,next_subs,
                 scan_count, found_count, queue_size_list, tmp_dir):
        self.target = target.strip()
        self.options = options
        self.process_num = process_num
        self.dns_servers = dns_servers
        self.cdns = cdns

        self.dns_count = len(dns_servers)
        self.next_subs = next_subs
        self.scan_count = scan_count
        self.scan_count_local = 0
        self.found_count = found_count
        self.found_count_local = 0
        self.queue_size_list = queue_size_list

        self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)]
        for _r in self.resolvers:
            _r.lifetime = _r.timeout = 6.0
        self.queue = PriorityQueue()
        self.item_index = 0
        self.priority = 0
        self._load_sub_names()
        self.ip_dict = {}
        self.found_subs = set()
        self.ex_resolver = dns.resolver.Resolver(configure=False)
        self.ex_resolver.nameservers = dns_servers
        self.local_time = time.time()
        self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, target, process_num), 'w')
Ejemplo n.º 12
0
 def __init__(self, l, func, num=20):
     self.queue = PriorityQueue()
     for item in l:
         self.queue.put(item)
     self.num = num
     self.func = func
     self.stop = False
     self.results = PriorityQueue()
Ejemplo n.º 13
0
 def __init__(self, from_fsm, to_fsm, tracer, queue=None):
     if queue is None:
         self.queue = PriorityQueue()
     else:
         self.queue = queue
     self.from_fsm = from_fsm
     self.to_fsm = to_fsm
     self.tracer = tracer
Ejemplo n.º 14
0
 def __init__(self):
     self.start_time = time.time()
     self.queue = PriorityQueue()
     self.history = []
     self.total_count = 0
     self.scan_count = 0
     self._load_target()
     self.outfile = open("log.log", 'w')
     self.console_width = getTerminalSize()[0] - 2
Ejemplo n.º 15
0
 def __init__(self, factory, maxsize=200, timeout=60):
     self.factory = factory
     self.maxsize = maxsize
     self.timeout = timeout
     self.clients = PriorityQueue(maxsize)
     # If there is a maxsize, prime the queue with empty slots.
     if maxsize is not None:
         for _ in xrange(maxsize):
             self.clients.put(EMPTY_SLOT)
Ejemplo n.º 16
0
    def __init__(self, start_requests):
        self.start_request = start_requests
        self.domain = tldextract.extract(self.start_request.url).domain

        self.request_queue = PriorityQueue()
        self.result = {
            start_requests.url: 0,
        }
        self.gl_list = []
        self.stop_flag = False
Ejemplo n.º 17
0
 def __init__(self, zoomeye_results, threads_num):
     self.threads_num = threads_num
     self.targets = PriorityQueue()
     self.zoomeye_results = zoomeye_results
     self.result = []
     
     for zoomeye_result in zoomeye_results:
         self.targets.put(zoomeye_result)
     self.total = self.targets.qsize()
     self.pbar = tqdm(total=self.total,ascii=True)
Ejemplo n.º 18
0
    def test__check_bulk(self):
        input_queue = PriorityQueue()
        queue = PriorityQueue()
        old_date_modified = datetime.now().isoformat()
        id_1 = uuid4().hex
        date_modified_1 = datetime.now().isoformat()
        id_2 = uuid4().hex
        date_modified_2 = datetime.now().isoformat()
        id_3 = uuid4().hex
        date_modified_3 = datetime.now().isoformat()
        db = MagicMock()
        bulk = {
            id_1: date_modified_1,
            id_2: date_modified_2,
            id_3: date_modified_3
        }
        priority_cache = {id_1: 1, id_2: 1, id_3: 1}
        return_value = {
            u'docs': [
                {
                    u'_type': u'Tender',
                    u'_source': {
                        u'dateModified': date_modified_1
                    },
                    u'_index': u'bridge_tenders',
                    u'_version': 1,
                    u'found': True,
                    u'_id': id_1
                },
                {
                    u'_type': u'Tender',
                    u'_source': {
                        u'dateModified': old_date_modified
                    },
                    u'_index': u'bridge_tenders',
                    u'_version': 1,
                    u'found': True,
                    u'_id': id_2
                },
                {
                    u'found': False,
                    u'_type': u'Tender',
                    u'_id': id_3,
                    u'_index': u'bridge_tenders'
                }
            ]
        }
        db.mget.return_value = return_value
        elastic_filter = BasicElasticSearchFilter(self.config, input_queue, queue, db)
        self.assertEqual(queue.qsize(), 0)

        elastic_filter._check_bulk(bulk, priority_cache)
        self.assertEqual(queue.qsize(), 2)
Ejemplo n.º 19
0
    def __init__(self, target, options):
        # 设置优先级
        self.queue = PriorityQueue()
        self.priority = 0

        # 根据参数进行基本设置
        self.target = target.strip()
        self.options = options
        self.ignore_intranet = options.get('ignore_intranet')

        # 是否用大字典
        if self.options.get('subnames_full'):
            outfile_name+='_sfull'
        if self.options.get('next_sub_full'):
            outfile_name += '_nfull'

        # 根据主域名确定结果文件名称
        outfile_name = options.get('file') if options.get('file') else(target)
        self.fname = 'results/'+outfile_name+'.txt'
        self.outfile = open('results/'+outfile_name+'.txt', 'wb')
        self.outfile_ips = open('results/'+outfile_name+'_ip.txt', 'w')

        # 设置dns解析器 (根据预设的线程数量初始化dns resolver)
        # QUESTION: configure = False还是不太明白 为什么要不以/etc/resolv.conf的常规常规配置??
        self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.get('threads'))]
        for _ in self.resolvers:
            '''
            dns.resolver.Resolver: http://www.dnspython.org/docs/1.14.0/dns.resolver.Resolver-class.html
            dns.resolver.Resolver.lifetime: The total number of seconds to spend trying to get an answer to the question.
            dns.resolver.Resolver.timeout: The number of seconds to wait for a response from a server, before timing out.
            '''
            # QUESTION:lifetime 与 timeout 什么区别?
            _.lifetime = _.timeout = 10.0

        # 加载dns服务器列表
        self._load_dns_servers()
        # self.ex_resolver是备用的在出现except时使用的dns_resolver
        self.ex_resolver = dns.resolver.Resolver(configure=False)
        self.ex_resolver.nameservers = self.dns_servers
        self.logfile = open('results/'+target+'_log.txt','a')

        #set subdomain dct set
        self._load_next_sub()
        self._load_sub_names()

        #set init paras
        self.start_time = time.time()
        self.scan_count = 0
        self.found_count = 0 # 已验证过存在子域名的前缀
        self.STOP_ME = False
        self.ip_dict = {}
        self.found_subs = set()
Ejemplo n.º 20
0
 def __init__(self,host,keyword,ips,timeout):
     self.threads = 100
     self.queue = PriorityQueue()
     self.host = host
     self.keyword = keyword
     self.result = []
     for ip in ips:
         self.queue.put(ip)
     self.num = self.queue.qsize()
     self.i = 0
     self.success = 0
     self.timeout = timeout
     self.filename = os.path.join(rootPath,"result",host + ".log")
     self.outfile = open(self.filename, 'w')
Ejemplo n.º 21
0
    def __init__(self, service):
        self.service = service
        self._queue = PriorityQueue()
        # done keeps the tasks that have been extracted from the queue
        # so we can inspect them later

        # keep the done task on disk, not in memory.
        # now we use the filesystem, but we could plug any key-value stor or database behind
        # check TaskStorageBase to see the interface your storage needs to have
        # to be used to store tasks
        # self._done = TaskStorageFile(self)
        self._done = TaskStorageSqlite(self)
        # pointer to current task
        self._current = None
        self._current_mu = Semaphore()
Ejemplo n.º 22
0
def Channel(from_fsm, to_fsm, tracer, queue=None):
    if settings.instrumented:
        return _Channel(from_fsm, to_fsm, tracer, queue)
    if queue is not None:
        return queue
    else:
        return PriorityQueue()
Ejemplo n.º 23
0
def main(n_processor = 3, n_picker = 3, queue_size = 10, *a, **kw):
    """
    Queue processor simulator.
    
    Parameters
    ----------
    n_processor : int
        Number of processors working simultaneously
    n_picker : int
        Number of pickers working simultaneously
    queue_size : int
        Maximum allowed size of queue
        
    Returns
    ----------
    None

    """
    pages = page_generator()
    global queue
    queue = PriorityQueue(maxsize = queue_size)
    spawn_list = []
    for i in range(n_processor):
        greenlet = gevent.spawn(page_processor, 'Processor {0}'.format(i+1), pages, next_step, queue)
        spawn_list.append(greenlet)
    for j in range(n_picker):
        greenlet = gevent.spawn(pick_page, 'Picker {0}'.format(j+1), pages, queue)
        spawn_list.append(greenlet)
    gevent.joinall(spawn_list)
    print('####################### END #######################')
Ejemplo n.º 24
0
    def __init__(self, target, options, process_num, dns_servers, next_subs,
                 scan_count, found_count, queue_size_list, tmp_dir):
        self.target = target.strip()
        self.options = options
        self.process_num = process_num
        self.dns_servers = dns_servers
        self.dns_count = len(dns_servers)
        self.next_subs = next_subs
        self.scan_count = scan_count
        self.scan_count_local = 0
        self.found_count = found_count
        self.found_count_local = 0
        self.queue_size_list = queue_size_list

        self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)]
        for _r in self.resolvers:
            _r.lifetime = _r.timeout = 6.0
        self.queue = PriorityQueue()
        self.item_index = 0
        self.priority = 0
        self._load_sub_names()
        self.ip_dict = {}
        self.found_subs = set()
        self.ex_resolver = dns.resolver.Resolver(configure=False)
        self.ex_resolver.nameservers = dns_servers
        self.local_time = time.time()
        self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, target, process_num), 'w')
Ejemplo n.º 25
0
 def __init__(self, target, options):
     self.start_time = time.time()
     self.target = target.strip()
     self.options = options
     self.ignore_intranet = options.i
     self.scan_count = self.found_count = 0
     self.console_width = getTerminalSize()[0] - 2
     self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)]
     for _ in self.resolvers:
         _.lifetime = _.timeout = 10.0
     self.print_count = 0
     self.STOP_ME = False
     self._load_dns_servers()
     self._load_next_sub()
     self.queue = PriorityQueue()
     self.priority = 0
     self._load_sub_names()
     if options.output:
         outfile = options.output
     else:
         _name = os.path.basename(self.options.file).replace('subnames', '')
         if _name != '.txt':
             _name = '_' + _name
         outfile = target + _name if not options.full_scan else target + '_full' + _name
     self.outfile = open(outfile, 'w')
     self.ip_dict = {}
     self.found_subs = set()
     self.ex_resolver = dns.resolver.Resolver(configure=False)
     self.ex_resolver.nameservers = self.dns_servers
Ejemplo n.º 26
0
 def __init__(self, crawler):
     self._crawler = crawler
     self.proxy_pool = Queue()
     self._proxy_lock = RLock()
     max_connections = crawler.max_connections
     self._request_queue = PriorityQueue()
     self._request_semaphore = BoundedSemaphore(max_connections)
Ejemplo n.º 27
0
def worker():
    q = PriorityQueue()
    q.put(Job(5, 'mid job'))
    q.put(Job(10, 'low job'))
    q.put(Job(1, 'high job'))

    while not q.empty():
        job = q.get()
        print(job)
    def __init__(self, target, options):
        self.start_time = time.time()
        self.target = target.strip()
        self.options = options
        self.scan_count = self.found_count = 0
        self.console_width = os.get_terminal_size()[0] - 2

        # create dns resolver pool ~ workers
        self.resolvers = [
            dns.resolver.Resolver(configure=False)
            for _ in range(options.threads)
        ]
        for resolver in self.resolvers:
            resolver.lifetime = resolver.timeout = 10.0

        self.print_count = 0
        self.STOP_ME = False

        # load dns servers and check whether these dns servers works fine ?
        self._load_dns_servers()

        # load sub names
        self.subs = []  # subs in file
        self.goodsubs = []  # checks ok for further exploitation
        self._load_subname('dict/subnames.txt', self.subs)

        # load sub.sub names
        self.subsubs = []
        self._load_subname('dict/next_sub.txt', self.subsubs)

        # results will save to target.txt

        global path

        path = os.path.join("results", target)
        if not os.path.exists(path):
            os.makedirs(path)

        self.outfile = open('%s/%s.txt' % (path, target), 'w')

        self.ip_dict = set()  #
        self.found_sub = set()

        # task queue
        self.queue = PriorityQueue()
        for sub in self.subs:
            self.queue.put(sub)
Ejemplo n.º 29
0
class HttpTest(object):

    def __init__(self,host,keyword,ips,timeout):
        self.threads = 100
        self.queue = PriorityQueue()
        self.host = host
        self.keyword = keyword
        self.result = []
        for ip in ips:
            self.queue.put(ip)
        self.num = self.queue.qsize()
        self.i = 0
        self.success = 0
        self.timeout = timeout
        self.filename = os.path.join(rootPath,"result",host + ".log")
        self.outfile = open(self.filename, 'w')


    def _scan(self,j):
        while not self.queue.empty():
            try:
                item = self.queue.get(timeout=3.0)
                if config.HTTPS_Support:
                    host, domain, port = item, self.host , 443
                else:
                    host, domain, port = item, self.host , 80
                html = httpServer((host, domain, port),self.timeout)
                if html  is not None and self.keyword in html:
                    self.outfile.write(item + '\n')
                    self.outfile.flush()
                    self.success += 1
            except:
                pass
            finally:
                self.i += 1
                msg = '[*] %s found, %s scanned , %s groups left'%(self.success,self.i,self.num - self.i)
                print_msg(msg)
            time.sleep(1.0)

    def run(self):
        threads = [gevent.spawn(self._scan, i) for i in range(self.threads)]
        gevent.joinall(threads)

        msg = '[+] All Done. Success:%d Saved in:%s'%(self.success,self.filename)
        print_msg(msg, line_feed=True)
Ejemplo n.º 30
0
 def __init__(self, fsm_registry, connector_registry, configuration):
     self.fsm_registry = fsm_registry
     self.connector_registry = connector_registry
     self.context = zmq.Context.instance()
     self.socket = self.context.socket(zmq.ROUTER)
     if 'bind_port' in configuration:
         self.socket_port = configuration.get('bind_port')
         self.socket.bind('tcp://{0}:{1}'.format(configuration.get('bind_address', '127.0.0.1'),
                                                 self.socket_port))
     else:
         self.socket_port = self.socket.bind_to_random_port('tcp://{0}'.format(configuration.get('bind_address', '127.0.0.1')))
     logger.info('starting zmq_thread')
     self.zmq_thread = gevent.spawn(self.receive_external_messages)
     self.inbox_thread = gevent.spawn(self.receive_internal_messages)
     self.inbox = PriorityQueue()
     self.message_id_seq = count(0)
     self.client_id_seq = count(0)
     self.clients = dict()
Ejemplo n.º 31
0
    def test__get_resource_item_from_queue(self):
        items_queue = PriorityQueue()
        item = (1, uuid.uuid4().hex)
        items_queue.put(item)

        # Success test
        worker = ResourceItemWorker(resource_items_queue=items_queue,
                                    config_dict=self.worker_config)
        self.assertEqual(worker.resource_items_queue.qsize(), 1)
        priority, resource_item = worker._get_resource_item_from_queue()
        self.assertEqual((priority, resource_item), item)
        self.assertEqual(worker.resource_items_queue.qsize(), 0)

        # Empty queue test
        priority, resource_item = worker._get_resource_item_from_queue()
        self.assertEqual(resource_item, None)
        self.assertEqual(priority, None)
        del worker
Ejemplo n.º 32
0
 def __init__(self, factory, maxsize=200, timeout=60):
     self.factory = factory
     self.maxsize = maxsize
     self.timeout = timeout
     self.clients = PriorityQueue(maxsize)
     # If there is a maxsize, prime the queue with empty slots.
     if maxsize is not None:
         for _ in xrange(maxsize):
             self.clients.put(EMPTY_SLOT)
Ejemplo n.º 33
0
    def test_add_to_retry_queue(self, mocked_logger):
        retry_items_queue = PriorityQueue()
        worker = AgreementWorker(config_dict=self.worker_config,
                                 retry_resource_items_queue=retry_items_queue)
        resource_item = {'id': uuid.uuid4().hex}
        priority = 1000
        self.assertEqual(retry_items_queue.qsize(), 0)

        # Add to retry_resource_items_queue
        worker.add_to_retry_queue(resource_item, priority=priority)

        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item = retry_items_queue.get()
        self.assertEqual((priority, retry_resource_item),
                         (1001, resource_item))

        resource_item = {'id': 0}
        # Add to retry_resource_items_queue with status_code '429'
        worker.add_to_retry_queue(resource_item, priority, status_code=429)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item = retry_items_queue.get()
        self.assertEqual((priority, retry_resource_item),
                         (1001, resource_item))

        priority = 1002
        worker.add_to_retry_queue(resource_item, priority=priority)
        sleep(worker.config['retry_default_timeout'] * 2)
        self.assertEqual(retry_items_queue.qsize(), 1)
        priority, retry_resource_item = retry_items_queue.get()
        self.assertEqual((priority, retry_resource_item),
                         (1003, resource_item))

        worker.add_to_retry_queue(resource_item, priority=priority)
        self.assertEqual(retry_items_queue.qsize(), 0)
        mocked_logger.critical.assert_called_once_with(
            'Tender {} reached limit retries count {} and droped from '
            'retry_queue.'.format(resource_item['id'],
                                  worker.config['retries_count']),
            extra={
                'MESSAGE_ID': 'dropped_documents',
                'JOURNAL_TENDER_ID': resource_item['id']
            })
        del worker
Ejemplo n.º 34
0
 def __init__(self, signer, message_broker, trader_client, fee_rate=None):
     self._sign = signer.sign
     self.address = signer.address
     self.swaps = dict()  # offer_hash -> CommitmentTuple
     self.trader_client = trader_client
     # FIXME fee_rate should be int representation (int(float_rate/uint32.max_int)) for CSAdvertisements
     self.fee_rate = fee_rate
     self.message_broker = message_broker
     self.refund_queue = PriorityQueue()  # type: (TransferReceipt, substract_fee <bool>)
     self.message_queue = Queue()  # type: (messages.Signed, recipient (str) or None)
Ejemplo n.º 35
0
    def __init__(self,
                 host_url,
                 resource,
                 auth=None,
                 params={},
                 headers=None,
                 retrievers_params=DEFAULT_RETRIEVERS_PARAMS,
                 adaptive=False,
                 with_priority=False):
        LOGGER.info(f'Init SyncClient for resource {resource}')
        self.host = host_url
        self.auth = auth
        self.resource = resource
        self.adaptive = adaptive
        self.headers = headers

        self.params = params
        self.retrievers_params = retrievers_params
        self.queue = PriorityQueue(maxsize=retrievers_params['queue_size'])
Ejemplo n.º 36
0
 def __init__(self,
              factory,
              retry_max=3,
              retry_delay=.1,
              timeout=-1,
              max_lifetime=600.,
              max_size=10,
              options=None):
     self.max_size = max_size
     self.pool = PriorityQueue()
     self.size = 0
     self.factory = factory
     self.retry_max = retry_max
     self.retry_delay = retry_delay
     self.timeout = timeout
     self.max_lifetime = max_lifetime
     if options is None:
         self.options = {}
     else:
         self.options = options
 def test_run(self):
     result = self.mox.CreateMock(AsyncResult)
     env = Envelope('*****@*****.**', ['*****@*****.**'])
     env.parse('From: [email protected]\r\n\r\ntest test\r\n')
     queue = PriorityQueue()
     queue.put((1, result, env))
     self.sock.recv(IsA(int)).AndReturn('220 Welcome\r\n')
     self.sock.sendall('EHLO test\r\n')
     self.sock.recv(IsA(int)).AndReturn('250-Hello\r\n250 PIPELINING\r\n')
     self.sock.sendall('MAIL FROM:<*****@*****.**>\r\nRCPT TO:<*****@*****.**>\r\nDATA\r\n')
     self.sock.recv(IsA(int)).AndReturn('250 Ok\r\n250 Ok\r\n354 Go ahead\r\n')
     self.sock.sendall('From: [email protected]\r\n\r\ntest test\r\n.\r\n')
     self.sock.recv(IsA(int)).AndReturn('250 Ok\r\n')
     result.set(True)
     self.sock.sendall('QUIT\r\n')
     self.sock.recv(IsA(int)).AndReturn('221 Goodbye\r\n')
     self.sock.close()
     self.mox.ReplayAll()
     client = SmtpRelayClient(None, queue, socket_creator=self._socket_creator, ehlo_as='test')
     client._run()
Ejemplo n.º 38
0
 def __init__(self, host, port=25, pool_size=None, client_class=None,
                    **client_kwargs):
     super(StaticSmtpRelay, self).__init__()
     if client_class:
         self.client_class = client_class
     else:
         from slimta.relay.smtp.client import SmtpRelayClient
         self.client_class = SmtpRelayClient
     self.host = host
     self.port = port
     self.queue = PriorityQueue()
     self.pool = set()
     self.pool_size = pool_size
     self.client_kwargs = client_kwargs
Ejemplo n.º 39
0
class Actor(Greenlet):
    """Simple implementation of the Actor pattern
    """

    def __init__(self):
        self.inbox = PriorityQueue()
        self._handlers = {ShutdownRequest: self.receive_shutdown}
        Greenlet.__init__(self)

    def receive(self, msg):
        """Dispatch a received message to the appropriate type handler
        """
        #log.debug("Received a message: " + repr(msg))
        cls = msg.__class__
        if cls in self._handlers.keys():
            self._handlers[cls](msg)
        else:
            raise NotImplemented()

    def receive_shutdown(self, msg):
        self.running = False

    def send(self, msg, priority=50):
        """Place a message into the actor's inbox
        """
        self.inbox.put((priority, msg))

    def _run(self):
        """Run the Actor in a blocking event loop
        """
        self.running = True

        while self.running:
            prio, msg = self.inbox.get()
            self.receive(msg)
            del msg
Ejemplo n.º 40
0
 def __init__(self, factory,
              retry_max=3, retry_delay=.1,
              timeout=-1, max_lifetime=600.,
              max_size=10, options=None):
     self.max_size = max_size
     self.pool = PriorityQueue()
     self.size = 0
     self.factory = factory
     self.retry_max = retry_max
     self.retry_delay = retry_delay
     self.timeout = timeout
     self.max_lifetime = max_lifetime
     if options is None:
         self.options = {}
     else:
         self.options = options
Ejemplo n.º 41
0
 def __init__(self, target, options):
     self.start_time = time.time()
     self.target = target.strip()
     self.options = options
     self.client = MongoClient(connect=False)
     self.host_db = self.client['orangescan']['subdomain']['host']
     self.ip_db = self.client['orangescan']['subdomain']['ip']
     self.ignore_intranet = options.i
     self.scan_count = self.found_count = 0
     self.console_width = getTerminalSize()[0] - 2
     self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)]
     for _ in self.resolvers:
         _.lifetime = _.timeout = 10.0
     self.print_count = 0
     self.STOP_ME = False
     self._load_dns_servers()
     self._load_next_sub()
     self.queue = PriorityQueue()
     self.priority = 0
     self._load_sub_names()
     self.ip_dict = {}
     self.found_subs = set()
     self.ex_resolver = dns.resolver.Resolver(configure=False)
     self.ex_resolver.nameservers = self.dns_servers
Ejemplo n.º 42
0
class StaticSmtpRelay(Relay):
    """Manages the relaying of messages to a specific ``host:port``. Connections
    may be recycled when possible, to send multiple messages over a single
    channel.

    :param host: Host string to connect to.
    :param port: Port to connect to.
    :param pool_size: At most this many simultaneous connections will be open to
                      the destination. If this limit is reached and no
                      connections are idle, new attempts will block.
    :param tls: Optional dictionary of TLS settings passed directly as
                keyword arguments to :class:`gevent.ssl.SSLSocket`.
    :param tls_required: If given and True, it should be considered a delivery
                         failure if TLS cannot be negotiated by the client.
    :param connect_timeout: Timeout in seconds to wait for a client connection
                            to be successful before issuing a transient failure.
    :param command_timeout: Timeout in seconds to wait for a reply to each SMTP
                            command before issuing a transient failure.
    :param data_timeout: Timeout in seconds to wait for a reply to message data
                         before issuing a transient failure.
    :param idle_timeout: Timeout in seconds after a message is delivered before
                         a QUIT command is sent and the connection terminated.
                         If another message should be delivered before this
                         timeout expires, the connection will be re-used. By
                         default, QUIT is sent immediately and connections are
                         never re-used.


    """

    def __init__(self, host, port=25, pool_size=None, client_class=None,
                       **client_kwargs):
        super(StaticSmtpRelay, self).__init__()
        if client_class:
            self.client_class = client_class
        else:
            from slimta.relay.smtp.client import SmtpRelayClient
            self.client_class = SmtpRelayClient
        self.host = host
        self.port = port
        self.queue = PriorityQueue()
        self.pool = set()
        self.pool_size = pool_size
        self.client_kwargs = client_kwargs

    def _remove_client(self, client):
        self.pool.remove(client)
        if not self.queue.empty() and not self.pool:
            self._add_client()

    def _add_client(self):
        client = self.client_class((self.host, self.port), self.queue,
                                   **self.client_kwargs)
        client.start()
        client.link(self._remove_client)
        self.pool.add(client)

    def _check_idle(self):
        for client in self.pool:
            if client.idle:
                return
        if not self.pool_size or len(self.pool) < self.pool_size:
            self._add_client()

    def attempt(self, envelope, attempts):
        self._check_idle()
        result = AsyncResult()
        self.queue.put((1, result, envelope))
        return result.get()
Ejemplo n.º 43
0
class NetworkManager(object):

    """网络控制类"""

    logger = logging.getLogger('Crawler.NetworkManager')

    def __init__(self, crawler):
        self._crawler = crawler
        self.proxy_pool = Queue()
        self._proxy_lock = RLock()
        max_connections = crawler.max_connections
        self._request_queue = PriorityQueue()
        self._request_semaphore = BoundedSemaphore(max_connections)

    def join(self):
        """等待队列里面的请求发送完成"""
        while not self._request_queue.empty():
            # self._process_request_from_queue()
            gevent.sleep(5)

    def request(self, method, url, **kwargs):
        """阻塞请求一个url。

        :param method:
        :param url:
        :param kwargs: 同add_request
        :return: :rtype: :raise err:
        """

        # 构造默认HTTP头
        default_header = {
            'Accept': self._crawler.accept_mine,
            'Accept-Language': self._crawler.accept_language,
            'User-Agent': self._crawler.user_agent,
            'Connection': 'keep-alive',
            'Accept-Encoding': 'gzip, deflate'
        }

        # 如果没有设置headers就使用全局设置
        kwargs['headers'] = kwargs.pop('headers', {})
        default_header.update(kwargs['headers'])
        kwargs['headers'] = default_header

        # 如果没有设置timeout就使用全局设置
        kwargs['timeout'] = kwargs.pop('timeout',
                                       self._crawler.timeout)

        session = requests.Session()
        session.max_redirects = self._crawler.max_redirects

        kwargs['cookies'] = kwargs.pop('cookies', {})

        # 设置代理
        kwargs['proxies'] = kwargs.pop('proxies', self._crawler.proxies)

        try_times = 0

        while try_times <= self._crawler.max_retries:
            try_times += 1
            try:
                self.logger.debug('[%s]>> %s' % (method.upper(), url))
                response = session.request(method, url, **kwargs)

                if self._crawler.retry_with_no_content and not response.content:
                    self.logger.warning('Page have no content.')
                    raise NoContent

                if self._crawler.retry_with_broken_content and '</html>' not in response.content:
                    self.logger.warning('Page content has been breaken.')
                    raise BreakenContent

                if response.status_code in self._crawler.do_not_retry_with_server_error_code:
                    self.logger.warning(
                        'Something wrong with server,but we DO NOT retry with it.')
                    raise ServerErrorWithoutRetry(
                        'Error Code:%s' % response.status_code)

                # 遇到非200错误
                if response.status_code != 200 and response.status_code not in self._crawler.ignore_server_error_code:
                    self._crawler.on_server_error(response)

                    # self.logger.warning('Something wrong with server.')
                    # raise ServerError, 'Error Code:%s' % response.status_code

            except (ConnectionError, Timeout, socket.timeout, socket.error, TryAgain,), err:
                # 好恶心的做法,代理发生错误居然没有特定的Exception
                if kwargs['proxies'] and any(
                        urlsplit(proxy).hostname in str(err.message) for proxy in kwargs['proxies'].values()):
                    # 代理有问题就切换呗
                    self.logger.debug(
                        'Proxy %s seems go down.', kwargs['proxies'])
                    self.switch_proxy(kwargs['proxies'].values()[0])

                    # self._crawler.on_proxies_error(kwargs['proxies'][0])

                # 如果发生重试异常和空白页异常的,就进行重试,否则把异常往上爆
                if isinstance(err, ConnectionError) and not isinstance(err.message, MaxRetryError):
                    raise err

                sleep_time = self._crawler.sleep_seconds * try_times

                self.logger.debug(err)

                self.logger.info('Try again with %s after %s '
                                 'seconds' % (url, sleep_time))

                gevent.sleep(sleep_time)
            except BaseException, err:
                # TODO:不知道是不是这里有捕获不了的gevent超时,稳定后删除。
                self.logger.error(type(err))
                self.logger.error(err)
            else:
Ejemplo n.º 44
0
class FactoryPool(object):

    def __init__(self, factory, maxsize=200, timeout=60):
        self.factory = factory
        self.maxsize = maxsize
        self.timeout = timeout
        self.clients = PriorityQueue(maxsize)
        # If there is a maxsize, prime the queue with empty slots.
        if maxsize is not None:
            for _ in xrange(maxsize):
                self.clients.put(EMPTY_SLOT)

    @contextlib.contextmanager
    def reserve(self):
        """Context-manager to obtain a Client object from the pool."""
        ts, client = self._checkout_connection()
        try:
            yield client
        finally:
            self._checkin_connection(ts, client)

    def _checkout_connection(self):
        # If there's no maxsize, no need to block waiting for a connection.
        blocking = self.maxsize is not None

        # Loop until we get a non-stale connection, or we create a new one.
        while True:
            try:
                ts, client = self.clients.get(blocking)
            except Empty:
                # No maxsize and no free connections, create a new one.
                # XXX TODO: we should be using a monotonic clock here.
                # see http://www.python.org/dev/peps/pep-0418/
                now = int(time.time())
                return now, self.factory()
            else:
                now = int(time.time())
                # If we got an empty slot placeholder, create a new connection.
                if client is None:
                    return now, self.factory()
                # If the connection is not stale, go ahead and use it.
                if ts + self.timeout > now:
                    return ts, client
                # Otherwise, the connection is stale.
                # Close it, push an empty slot onto the queue, and retry.
                if hasattr(client, 'disconnect'):
                    client.disconnect()

                self.clients.put(EMPTY_SLOT)
                continue

    def _checkin_connection(self, ts, client):
        """Return a connection to the pool."""
        if hasattr(client, '_closed') and client._closed:
            self.clients.put(EMPTY_SLOT)
            return

        # If the connection is now stale, don't return it to the pool.
        # Push an empty slot instead so that it will be refreshed when needed.
        now = int(time.time())
        if ts + self.timeout > now:
            self.clients.put((ts, client))
        else:
            if self.maxsize is not None:
                self.clients.put(EMPTY_SLOT)
Ejemplo n.º 45
0
class SubNameBrute:
    def __init__(self, target, options):
        self.start_time = time.time()
        self.target = target.strip()
        self.options = options
        self.ignore_intranet = options.i
        self.scan_count = self.found_count = 0
        self.console_width = getTerminalSize()[0] - 2
        self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)]
        for _ in self.resolvers:
            _.lifetime = _.timeout = 10.0
        self.print_count = 0
        self.STOP_ME = False
        self._load_dns_servers()
        self._load_next_sub()
        self.queue = PriorityQueue()
        self.priority = 0
        self._load_sub_names()
        if options.output:
            outfile = options.output
        else:
            _name = os.path.basename(self.options.file).replace('subnames', '')
            if _name != '.txt':
                _name = '_' + _name
            outfile = target + _name if not options.full_scan else target + '_full' + _name
        self.outfile = open(outfile, 'w')
        self.ip_dict = {}
        self.found_subs = set()
        self.ex_resolver = dns.resolver.Resolver(configure=False)
        self.ex_resolver.nameservers = self.dns_servers

    def _load_dns_servers(self):
        print '[+] Validate DNS servers ...'
        self.dns_servers = []
        pool = Pool(30)
        for server in open('dict/dns_servers.txt').xreadlines():
            server = server.strip()
            if server:
                pool.apply_async(self._test_server, (server,))
        pool.join()

        self.dns_count = len(self.dns_servers)
        sys.stdout.write('\n')
        print '[+] Found %s available DNS Servers in total' % self.dns_count
        if self.dns_count == 0:
            print '[ERROR] No DNS Servers available.'
            sys.exit(-1)

    def _test_server(self, server):
        resolver = dns.resolver.Resolver()
        resolver.lifetime = resolver.timeout = 10.0
        try:
            resolver.nameservers = [server]
            answers = resolver.query('public-dns-a.baidu.com')    # test lookup a existed domain
            if answers[0].address != '180.76.76.76':
                raise Exception('incorrect DNS response')
            try:
                resolver.query('test.bad.dns.lijiejie.com')    # Non-existed domain test
                with open('bad_dns_servers.txt', 'a') as f:
                    f.write(server + '\n')
                self._print_msg('[+] Bad DNS Server found %s' % server)
            except:
                self.dns_servers.append(server)
            self._print_msg('[+] Check DNS Server %s < OK >   Found %s' % (server.ljust(16), len(self.dns_servers)))
        except:
            self._print_msg('[+] Check DNS Server %s <Fail>   Found %s' % (server.ljust(16), len(self.dns_servers)))

    def _load_sub_names(self):
        self._print_msg('[+] Load sub names ...')
        if self.options.full_scan and self.options.file == 'subnames.txt':
            _file = 'dict/subnames_full.txt'
        else:
            if os.path.exists(self.options.file):
                _file = self.options.file
            elif os.path.exists('dict/%s' % self.options.file):
                _file = 'dict/%s' % self.options.file
            else:
                self._print_msg('[ERROR] Names file not exists: %s' % self.options.file)
                exit(-1)

        normal_lines = []
        wildcard_lines = []
        wildcard_list = []
        regex_list = []
        lines = set()
        with open(_file) as f:
            for line in f.xreadlines():
                sub = line.strip()
                if not sub or sub in lines:
                    continue
                lines.add(sub)

                if sub.find('{alphnum}') >= 0 or sub.find('{alpha}') >= 0 or sub.find('{num}') >= 0:
                    wildcard_lines.append(sub)
                    sub = sub.replace('{alphnum}', '[a-z0-9]')
                    sub = sub.replace('{alpha}', '[a-z]')
                    sub = sub.replace('{num}', '[0-9]')
                    if sub not in wildcard_list:
                        wildcard_list.append(sub)
                        regex_list.append('^' + sub + '$')
                else:
                    normal_lines.append(sub)
        pattern = '|'.join(regex_list)
        if pattern:
            _regex = re.compile(pattern)
            if _regex:
                for line in normal_lines[:]:
                    if _regex.search(line):
                        normal_lines.remove(line)

        for item in normal_lines:
            self.priority += 1
            self.queue.put((self.priority, item))

        for item in wildcard_lines:
            self.queue.put((88888888, item))

    def _load_next_sub(self):
        self._print_msg('[+] Load next level subs ...')
        self.next_subs = []
        _set = set()
        _file = 'dict/next_sub.txt' if not self.options.full_scan else 'dict/next_sub_full.txt'
        with open(_file) as f:
            for line in f:
                sub = line.strip()
                if sub and sub not in self.next_subs:
                    tmp_set = {sub}
                    while len(tmp_set) > 0:
                        item = tmp_set.pop()
                        if item.find('{alphnum}') >= 0:
                            for _letter in 'abcdefghijklmnopqrstuvwxyz0123456789':
                                tmp_set.add(item.replace('{alphnum}', _letter, 1))
                        elif item.find('{alpha}') >= 0:
                            for _letter in 'abcdefghijklmnopqrstuvwxyz':
                                tmp_set.add(item.replace('{alpha}', _letter, 1))
                        elif item.find('{num}') >= 0:
                            for _letter in '0123456789':
                                tmp_set.add(item.replace('{num}', _letter, 1))
                        elif item not in _set:
                            _set.add(item)
                            self.next_subs.append(item)

    def _print_msg(self, _msg=None, _found_msg=False):
        if _msg is None:
            self.print_count += 1
            if self.print_count < 100:
                return
            self.print_count = 0
            msg = '%s Found| %s Groups| %s scanned in %.1f seconds' % (
                self.found_count, self.queue.qsize(), self.scan_count, time.time() - self.start_time)
            sys.stdout.write('\r' + ' ' * (self.console_width - len(msg)) + msg)
        elif _msg.startswith('[+] Check DNS Server'):
            sys.stdout.write('\r' + _msg + ' ' * (self.console_width - len(_msg)))
        else:
            sys.stdout.write('\r' + _msg + ' ' * (self.console_width - len(_msg)) + '\n')
            if _found_msg:
                msg = '%s Found| %s Groups| %s scanned in %.1f seconds' % (
                    self.found_count, self.queue.qsize(), self.scan_count, time.time() - self.start_time)
                sys.stdout.write('\r' + ' ' * (self.console_width - len(msg)) + msg)
        sys.stdout.flush()

    @staticmethod
    def is_intranet(ip):
        ret = ip.split('.')
        if len(ret) != 4:
            return True
        if ret[0] == '10':
            return True
        if ret[0] == '172' and 16 <= int(ret[1]) <= 32:
            return True
        if ret[0] == '192' and ret[1] == '168':
            return True
        return False

    def put_item(self, item):
        num = item.count('{alphnum}') + item.count('{alpha}') + item.count('{num}')
        if num == 0:
            self.priority += 1
            self.queue.put((self.priority, item))
        else:
            self.queue.put((self.priority + num * 10000000, item))

    def _scan(self, j):
        self.resolvers[j].nameservers = [self.dns_servers[j % self.dns_count]]
        while not self.queue.empty():
            try:
                item = self.queue.get(timeout=1.0)[1]
                self.scan_count += 1
            except:
                break
            self._print_msg()
            try:
                if item.find('{alphnum}') >= 0:
                    for _letter in 'abcdefghijklmnopqrstuvwxyz0123456789':
                        self.put_item(item.replace('{alphnum}', _letter, 1))
                    continue
                elif item.find('{alpha}') >= 0:
                    for _letter in 'abcdefghijklmnopqrstuvwxyz':
                        self.put_item(item.replace('{alpha}', _letter, 1))
                    continue
                elif item.find('{num}') >= 0:
                    for _letter in '0123456789':
                        self.put_item(item.replace('{num}', _letter, 1))
                    continue
                elif item.find('{next_sub}') >= 0:
                    for _ in self.next_subs:
                        self.queue.put((0, item.replace('{next_sub}', _, 1)))
                    continue
                else:
                    sub = item

                if sub in self.found_subs:
                    continue

                cur_sub_domain = sub + '.' + self.target
                _sub = sub.split('.')[-1]
                try:
                    answers = self.resolvers[j].query(cur_sub_domain)
                except dns.resolver.NoAnswer, e:
                    answers = self.ex_resolver.query(cur_sub_domain)

                if answers:
                    self.found_subs.add(sub)
                    ips = ', '.join(sorted([answer.address for answer in answers]))
                    if ips in ['1.1.1.1', '127.0.0.1', '0.0.0.0']:
                        continue

                    if self.ignore_intranet and SubNameBrute.is_intranet(answers[0].address):
                        continue

                    try:
                        self.scan_count += 1
                        answers = self.resolvers[j].query(cur_sub_domain, 'cname')
                        cname = answers[0].target.to_unicode().rstrip('.')
                        if cname.endswith(self.target) and cname not in self.found_subs:
                            self.found_subs.add(cname)
                            cname_sub = cname[:len(cname) - len(self.target) - 1]    # new sub
                            self.queue.put((0, cname_sub))

                    except:
                        pass

                    if (_sub, ips) not in self.ip_dict:
                        self.ip_dict[(_sub, ips)] = 1
                    else:
                        self.ip_dict[(_sub, ips)] += 1

                    if ips not in self.ip_dict:
                        self.ip_dict[ips] = 1
                    else:
                        self.ip_dict[ips] += 1

                    if self.ip_dict[(_sub, ips)] > 3 or self.ip_dict[ips] > 6:
                        continue

                    self.found_count += 1
                    msg = cur_sub_domain.ljust(30) + ips
                    self._print_msg(msg, _found_msg=True)
                    self._print_msg()
                    self.outfile.write(cur_sub_domain.ljust(30) + '\t' + ips + '\n')
                    self.outfile.flush()
                    try:
                        self.resolvers[j].query('lijiejietest.' + cur_sub_domain)
                    except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e:
                        self.queue.put((999999999, '{next_sub}.' + sub))
                    except:
                        pass

            except (dns.resolver.NXDOMAIN, dns.name.EmptyLabel) as e:
                pass
            except (dns.resolver.NoNameservers, dns.resolver.NoAnswer, dns.exception.Timeout) as e:
                pass
Ejemplo n.º 46
0
Archivo: fw.py Proyecto: andreypopp/fw
class WaveGenerator(object):

    def __init__(self, u):
        self.uid = u.uid
        self.access_token = u.access_token
        self.db = DB()
        self.results = PriorityQueue()

    def fetch_friends(self):
        friends = self.db.friends(self.uid)
        if not friends:
            friends = [User(x["id"], None, x["name"]) for x
                in unpage_par(fb.me.using(self.access_token).friends.get)]
            self.db.store_friends(self.uid, friends)
        return friends

    def rt_listens_for(self, u):
        rt_results = users.subscribe(u.uid, self.access_token)
        for listen in rt_results:
            if not self.db.has_song(listen.sid):
                self.db.store_song(self.fetch_song(listen.sid))
            t = self.db.match_song(listen.sid)
            if t:
                self.results.put((1, {
                        "trackId": t.ztid,
                        "userId": u.uid,
                        "userName": u.uname,
                        "src": t.surl,
                        "songName": t.title,
                        "artistName": t.artist_name,
                        "artistPhoto": t.aimgurl,
                        "coverSrc": t.rimgurl,
                        "timestamp": listen.ts.strftime("%Y-%m-%dT%H:%M:%S+0000")
                        }))

    def listens_for(self, u, num=50):
        last_ts, last_cts = self.db.last_listen(u.uid)
       #if not last_cts or (
       #        last_cts and datetime.utcnow() - last_ts > timedelta(seconds=300)):
       #    for listen in unpage_seq(
       #            fb[u.uid]["music.listens"].using(self.access_token).get, num):
       #        ts = fb_datetime(listen.get("end_time"))
       #        if last_ts and last_ts >= ts:
       #            break
       #        listen = Listen(
       #            lid=listen.get("id"),
       #            uid=u.uid,
       #            sid=listen.get("data", {}).get("song", {}).get("id"),
       #            ts=ts)
       #        self.db.store_listen(listen)
       #        yield listen
       #    self.db.update_cts(u.uid)
        time.sleep(2)
        for n, listen in enumerate(self.db.listens(u.uid, last_ts)):
            if n % 3 == 0:
                time.sleep(1)
            yield listen

    def fetch_song(self, sid):
        data = fb[sid].using(self.access_token).get()
        return Song(
            sid=sid,
            title=data.get("title"),
            artist_name=data.get("data", {}).get("musician", [{}])[0].get("name"),
            site_name=data.get("site_name"))

    def fetch_listens(self, u):
        for listen in self.listens_for(u):
            if not self.db.has_song(listen.sid):
                self.db.store_song(self.fetch_song(listen.sid))
            t = self.db.match_song(listen.sid)
            if t:
                self.results.put((10, {
                    "trackId": t.ztid,
                    "userId": u.uid,
                    "userName": u.uname,
                    "src": t.surl,
                    "songName": t.title,
                    "artistName": t.artist_name,
                    "artistPhoto": t.aimgurl,
                    "coverSrc": t.rimgurl,
                    "timestamp": listen.ts.strftime("%Y-%m-%dT%H:%M:%S+0000")
                    }))

    def fetch(self):
        friends = self.fetch_friends()
        for f in friends:
            spawn(self.rt_listens_for, f)
            spawn(self.fetch_listens, f)

    def __call__(self):
        spawn(self.fetch)
        return self.results
Ejemplo n.º 47
0
Archivo: fw.py Proyecto: andreypopp/fw
 def __init__(self, u):
     self.uid = u.uid
     self.access_token = u.access_token
     self.db = DB()
     self.results = PriorityQueue()
Ejemplo n.º 48
0
class DatasetDownloader(object):
    _queue_item_type = namedtuple("queue_item",
                                    ("hour", "sleep_until", "filename",
                                     "expect_pressures", "bad_downloads"))

    def __init__(self, directory, ds_time, timeout=120,
                 first_file_timeout=600,
                 bad_download_retry_limit=3,
                 write_dataset=True, write_gribmirror=True,
                 deadline=None,
                 dataset_host="ftp.ncep.noaa.gov",
                 dataset_path="/pub/data/nccf/com/gfs/prod/gfs.{0}/"):

        # set these ASAP for close() via __del__ if __init__ raises something
        self.success = False
        self._dataset = None
        self._gribmirror = None
        self._tmp_directory = None

        assert ds_time.hour in (0, 6, 12, 18)
        assert ds_time.minute == ds_time.second == ds_time.microsecond == 0

        if not (write_dataset or write_gribmirror):
            raise ValueError("Choose write_datset or write_gribmirror "
                                "(or both)")

        if deadline is None:
            deadline = max(datetime.now() + timedelta(hours=2),
                           ds_time + timedelta(hours=9, minutes=30))

        self.directory = directory
        self.ds_time = ds_time

        self.timeout = timeout
        self.first_file_timeout = first_file_timeout
        self.write_dataset = write_dataset
        self.write_gribmirror = write_gribmirror
        self.bad_download_retry_limit = bad_download_retry_limit

        self.deadline = deadline
        self.dataset_host = dataset_host
        self.dataset_path = dataset_path

        self.have_first_file = False

        self.files_complete = 0
        self.files_count = 0
        self.completed = Event()

        ds_time_str = self.ds_time.strftime("%Y%m%d%H")
        self.remote_directory = dataset_path.format(ds_time_str)

        self._greenlets = Group()
        self.unpack_lock = RLock()

        # Items in the queue are
        #   (hour, sleep_until, filename, ...)
        # so they sort by hour, and then if a not-found adds a delay to
        # a specific file, files from that hour without the delay
        # are tried first
        self._files = PriorityQueue()

        # areas in self.dataset.array are considered 'undefined' until
        #   self.checklist[index[:3]] is True, since unpack_grib may
        #   write to them, and then abort via ValueError before marking
        #   updating the checklist if the file turns out later to be bad

        # the checklist also serves as a sort of final sanity check:
        #   we also have "does this file contain all the records we think it
        #   should" checklists; see Worker._download_file

        self._checklist = make_checklist()

    def open(self):
        logger.info("downloader: opening files for dataset %s", self.ds_time)

        self._tmp_directory = \
                tempfile.mkdtemp(dir=self.directory, prefix="download.")
        os.chmod(self._tmp_directory, 0o775)
        logger.debug("Temporary directory is %s", self._tmp_directory)

        if self.write_dataset:
            self._dataset = \
                Dataset(self.ds_time, directory=self._tmp_directory, new=True)

        if self.write_gribmirror:
            fn = Dataset.filename(self.ds_time,
                                  directory=self._tmp_directory,
                                  suffix=Dataset.SUFFIX_GRIBMIRROR)
            logger.debug("Opening gribmirror (truncate and write) %s %s",
                                self.ds_time, fn)
            self._gribmirror = open(fn, "w+")

    def download(self):
        logger.info("download of %s starting", self.ds_time)

        ttl, addresses = resolve_ipv4(self.dataset_host)
        logger.debug("Resolved to %s IPs", len(addresses))

        addresses = [inet_ntoa(x) for x in addresses]

        total_timeout = self.deadline - datetime.now()
        total_timeout_secs = total_timeout.total_seconds()
        if total_timeout_secs < 0:
            raise ValueError("Deadline already passed")
        else:
            logger.debug("Deadline in %s", total_timeout)

        self._add_files()
        self._run_workers(addresses, total_timeout_secs)

        if not self.completed.is_set():
            raise ValueError("timed out")

        if not self._checklist.all():
            raise ValueError("incomplete: records missing")

        self.success = True
        logger.debug("downloaded %s successfully", self.ds_time)

    def _add_files(self):
        filename_prefix = self.ds_time.strftime("gfs.t%Hz.pgrb2")

        for hour in Dataset.axes.hour:
            hour_str = "{0:02}".format(hour)

            for bit, exp_pr in (("f", Dataset.pressures_pgrb2f),
                                ("bf", Dataset.pressures_pgrb2bf)):
                self._files.put(self._queue_item_type(
                    hour, 0, filename_prefix + bit + hour_str, exp_pr, 0))
                self.files_count += 1

        logger.info("Need to download %s files", self.files_count)

    def _run_workers(self, addresses, total_timeout_secs):
        logger.debug("Spawning %s workers", len(addresses) * 2)

        # don't ask _join_all to raise the first exception it catches
        # if we're already raising something in the except block
        raising = False

        try:
            for worker_id, address in enumerate(addresses * 2):
                w = DownloadWorker(self, worker_id, address)
                w.start()
                w.link()
                self._greenlets.add(w)

            # worker unhandled exceptions are raised in this greenlet
            # via link(). They can appear in completed.wait and
            # greenlets.kill(block=True) only (the only times that this
            # greenlet will yield)
            self.completed.wait(timeout=total_timeout_secs)

        except:
            # includes LinkedCompleted - a worker should not exit cleanly
            # until we .kill them below
            logger.debug("_run_workers catch %s (will reraise)",
                         sys.exc_info()[1])
            raising = True
            raise

        finally:
            # don't leak workers.
            self._join_all(raise_exception=(not raising))

    def _join_all(self, raise_exception=False):
        # we need the loop to run to completion and so have it catch and
        # hold or discard exceptions for later.
        # track the first exception caught and re-raise that
        exc_info = None

        while len(self._greenlets):
            try:
                self._greenlets.kill(block=True)
            except greenlet.LinkedCompleted:
                # now that we've killed workers, these are expected.
                # ignore.
                pass
            except greenlet.LinkedFailed as e:
                if exc_info is None and raise_exception:
                    logger.debug("_join_all catch %s "
                                 "(will reraise)", e)
                    exc_info = sys.exc_info()
                else:
                    logger.debug("_join_all discarding %s "
                                 "(already have exc)", e)

        if exc_info is not None:
            try:
                raise exc_info[1], None, exc_info[2]
            finally:
                # avoid circular reference
                del exc_info

    def _file_complete(self):
        self.files_complete += 1
        self.have_first_file = True

        if self.files_complete == self.files_count:
            self.completed.set()

        logger.info("progress %s/%s %s%%",
                    self.files_complete, self.files_count,
                    self.files_complete / self.files_count * 100)

    def close(self, move_files=None):
        if move_files is None:
            move_files = self.success

        if self._dataset is not None or self._gribmirror is not None or \
                self._tmp_directory is not None:
            if move_files:
                logger.info("moving downloaded files")
            else:
                logger.info("deleting failed download files")

        if self._dataset is not None:
            self._dataset.close()
            self._dataset = None
            if move_files:
                self._move_file()
            else:
                self._delete_file()

        if self._gribmirror is not None:
            self._gribmirror.close()
            self._gribmirror = None
            if move_files:
                self._move_file(Dataset.SUFFIX_GRIBMIRROR)
            else:
                self._delete_file(Dataset.SUFFIX_GRIBMIRROR)

        if self._tmp_directory is not None:
            self._remove_download_directory()
            self._tmp_directory = None

    def __del__(self):
        self.close()

    def _remove_download_directory(self):
        l = os.listdir(self._tmp_directory)
        if l:
            logger.warning("cleaning %s unknown file%s in temporary directory",
                           len(l), '' if len(l) == 1 else 's')

        logger.debug("removing temporary directory")
        shutil.rmtree(self._tmp_directory)

    def _move_file(self, suffix=''):
        fn1 = Dataset.filename(self.ds_time,
                               directory=self._tmp_directory,
                               suffix=suffix)
        fn2 = Dataset.filename(self.ds_time,
                               directory=self.directory,
                               suffix=suffix)
        logger.debug("renaming %s to %s", fn1, fn2)
        os.rename(fn1, fn2)

    def _delete_file(self, suffix=''):
        fn = Dataset.filename(self.ds_time,
                              directory=self._tmp_directory,
                              suffix=suffix)
        logger.warning("deleting %s", fn)
        os.unlink(fn)
Ejemplo n.º 49
0
class AWSLogs(object):

    ACTIVE = 1
    EXHAUSTED = 2
    WATCH_SLEEP = 2

    def __init__(self, **kwargs):
        self.connection_cls = kwargs.get('connection_cls', AWSConnection)
        self.aws_region = kwargs.get('aws_region')
        self.aws_access_key_id = kwargs.get('aws_access_key_id')
        self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
        self.log_group_name = kwargs.get('log_group_name')
        self.log_stream_name = kwargs.get('log_stream_name')
        self.watch = kwargs.get('watch')
        self.color_enabled = kwargs.get('color_enabled')
        self.output_stream_enabled = kwargs.get('output_stream_enabled')
        self.output_group_enabled = kwargs.get('output_group_enabled')
        self.start = self.parse_datetime(kwargs.get('start'))
        self.end = self.parse_datetime(kwargs.get('end'))
        self.pool_size = max(kwargs.get('pool_size', 0), 10)
        self.max_group_length = 0
        self.max_stream_length = 0
        self.publishers = []
        self.events_queue = Queue()
        self.raw_events_queue = PriorityQueue()
        self.publishers_queue = PriorityQueue()
        self.publishers = []
        self.stream_status = {}
        self.stream_max_timestamp = {}
        self.connection = self.connection_cls(
            self.aws_region,
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key
        )

    def _get_streams_from_patterns(self, log_group_pattern, log_stream_pattern):
        """Returns pairs of group, stream matching ``log_group_pattern`` and
        ``log_stream_pattern``."""
        for group in self._get_groups_from_pattern(log_group_pattern):
            for stream in self._get_streams_from_pattern(group,
                                                         log_stream_pattern):
                yield group, stream

    def _get_groups_from_pattern(self, pattern):
        """Returns groups matching ``pattern``."""
        pattern = '.*' if pattern == 'ALL' else pattern
        reg = re.compile('^{0}'.format(pattern))
        for group in self.get_groups():
            if re.match(reg, group):
                yield group

    def _get_streams_from_pattern(self, group, pattern):
        """Returns streams in ``group`` matching ``pattern``."""
        pattern = '.*' if pattern == 'ALL' else pattern
        reg = re.compile('^{0}'.format(pattern))
        for stream in self.get_streams(group):
            if re.match(reg, stream):
                yield stream

    def _publisher_queue_consumer(self):
        """Consume ``publishers_queue`` api calls, run them and publish log
        events to ``raw_events_queue``. If ``nextForwardToken`` is present
        register a new api call into ``publishers_queue`` using as weight
        the timestamp of the latest event."""
        while True:
            try:
                _, (log_group_name, log_stream_name, next_token) = self.publishers_queue.get(block=False)
            except Empty:
                if self.watch:
                    gevent.sleep(self.WATCH_SLEEP)
                else:
                    break

            response = self.connection.get_log_events(
                next_token=next_token,
                log_group_name=log_group_name,
                log_stream_name=log_stream_name,
                start_time=self.start,
                end_time=self.end,
                start_from_head=True
            )

            if not len(response['events']):
                self.stream_status[(log_group_name, log_stream_name)] = self.EXHAUSTED
                continue

            self.stream_status[(log_group_name, log_stream_name)] = self.ACTIVE

            for event in response['events']:
                event['group'] = log_group_name
                event['stream'] = log_stream_name
                self.raw_events_queue.put((event['timestamp'], event))
                self.stream_max_timestamp[(log_group_name, log_stream_name)] = event['timestamp']

            if 'nextForwardToken' in response:
                self.publishers_queue.put(
                    (response['events'][-1]['timestamp'],
                     (log_group_name, log_stream_name, response['nextForwardToken']))
                )

    def _get_min_timestamp(self):
        """Return the minimum timestamp available across all active streams."""
        pending = [self.stream_max_timestamp[k] for k, v in self.stream_status.iteritems() if v != self.EXHAUSTED]
        return min(pending) if pending else None

    def _get_all_streams_exhausted(self):
        """Return if all streams are exhausted."""
        return all((s == self.EXHAUSTED for s in self.stream_status.itervalues()))

    def _raw_events_queue_consumer(self):
        """Consume events from ``raw_events_queue`` if all active streams
        have already publish events up to the ``_get_min_timestamp`` and
        register them in order into ``events_queue``."""
        while True:
            if self._get_all_streams_exhausted() and self.raw_events_queue.empty():
                if self.watch:
                    gevent.sleep(self.WATCH_SLEEP)
                    continue
                self.events_queue.put(NO_MORE_EVENTS)
                break

            try:
                timestamp, line = self.raw_events_queue.peek(timeout=1)
            except Empty:
                continue

            min_timestamp = self._get_min_timestamp()
            if min_timestamp and min_timestamp < timestamp:
                gevent.sleep(0.3)
                continue

            timestamp, line = self.raw_events_queue.get()

            output = [line['message']]
            if self.output_stream_enabled:
                output.insert(
                    0,
                    self.color(
                        line['stream'].ljust(self.max_stream_length, ' '),
                        'cyan'
                    )
                )
            if self.output_group_enabled:
                output.insert(
                    0,
                    self.color(
                        line['group'].ljust(self.max_group_length, ' '),
                        'green'
                    )
                )
            self.events_queue.put("{0}\n".format(' '.join(output)))

    def _events_consumer(self):
        """Print events from ``events_queue`` as soon as they are available."""
        while True:
            event = self.events_queue.get(True)
            if event == NO_MORE_EVENTS:
                break
            sys.stdout.write(event)
            sys.stdout.flush()

    def list_logs(self):
        self.register_publishers()

        pool = Pool(size=self.pool_size)
        pool.spawn(self._raw_events_queue_consumer)
        pool.spawn(self._events_consumer)

        if self.watch:
            pool.spawn(self.register_publishers_periodically)

        for i in xrange(self.pool_size):
            pool.spawn(self._publisher_queue_consumer)
        pool.join()

    def register_publishers(self):
        """Register publishers into ``publishers_queue``."""
        for group, stream in self._get_streams_from_patterns(self.log_group_name, self.log_stream_name):
            if (group, stream) in self.publishers:
                continue
            self.publishers.append((group, stream))
            self.max_group_length = max(self.max_group_length, len(group))
            self.max_stream_length = max(self.max_stream_length, len(stream))
            self.publishers_queue.put((0, (group, stream, None)))
            self.stream_status[(group, stream)] = self.ACTIVE
            self.stream_max_timestamp[(group, stream)] = -1

    def register_publishers_periodically(self):
        while True:
            self.register_publishers()
            gevent.sleep(2)

    def list_groups(self):
        """Lists available CloudWatch logs groups"""
        for group in self.get_groups():
            print group

    def list_streams(self, *args, **kwargs):
        """Lists available CloudWatch logs streams in ``log_group_name``."""
        for stream in self.get_streams(*args, **kwargs):
            print stream

    def get_groups(self):
        """Returns available CloudWatch logs groups"""
        next_token = None
        while True:
            response = self.connection.describe_log_groups(next_token=next_token)

            for group in response.get('logGroups', []):
                yield group['logGroupName']

            if 'nextToken' in response:
                next_token = response['nextToken']
            else:
                break

    def get_streams(self, log_group_name=None):
        """Returns available CloudWatch logs streams in ``log_group_name``."""
        log_group_name = log_group_name or self.log_group_name
        next_token = None

        while True:
            response = self.connection.describe_log_streams(
                log_group_name=log_group_name,
                next_token=next_token
            )

            for stream in response.get('logStreams', []):
                yield stream['logStreamName']

            if 'nextToken' in response:
                next_token = response['nextToken']
            else:
                break

    def color(self, text, color):
        """Returns coloured version of ``text`` if ``color_enabled``."""
        if self.color_enabled:
            return colored(text, color)
        return text

    def parse_datetime(self, datetime_text):
        """Parse ``datetime_text`` into a ``datetime``."""
        if not datetime_text:
            return None

        ago_match = re.match(r'(\d+)\s?(m|minute|minutes|h|hour|hours|d|day|days|w|weeks|weeks)(?: ago)?', datetime_text)
        if ago_match:
            amount, unit = ago_match.groups()
            amount = int(amount)
            unit = {'m': 60, 'h': 3600, 'd': 86400, 'w': 604800}[unit[0]]
            date = datetime.now() + timedelta(seconds=unit * amount * -1)
        else:
            try:
                date = parse(datetime_text)
            except ValueError:
                raise exceptions.UnknownDateError(datetime_text)

        return int(date.strftime("%s")) * 1000
Ejemplo n.º 50
0
    def __init__(self, directory, ds_time, timeout=120,
                 first_file_timeout=600,
                 bad_download_retry_limit=3,
                 write_dataset=True, write_gribmirror=True,
                 deadline=None,
                 dataset_host="ftp.ncep.noaa.gov",
                 dataset_path="/pub/data/nccf/com/gfs/prod/gfs.{0}/"):

        # set these ASAP for close() via __del__ if __init__ raises something
        self.success = False
        self._dataset = None
        self._gribmirror = None
        self._tmp_directory = None

        assert ds_time.hour in (0, 6, 12, 18)
        assert ds_time.minute == ds_time.second == ds_time.microsecond == 0

        if not (write_dataset or write_gribmirror):
            raise ValueError("Choose write_datset or write_gribmirror "
                                "(or both)")

        if deadline is None:
            deadline = max(datetime.now() + timedelta(hours=2),
                           ds_time + timedelta(hours=9, minutes=30))

        self.directory = directory
        self.ds_time = ds_time

        self.timeout = timeout
        self.first_file_timeout = first_file_timeout
        self.write_dataset = write_dataset
        self.write_gribmirror = write_gribmirror
        self.bad_download_retry_limit = bad_download_retry_limit

        self.deadline = deadline
        self.dataset_host = dataset_host
        self.dataset_path = dataset_path

        self.have_first_file = False

        self.files_complete = 0
        self.files_count = 0
        self.completed = Event()

        ds_time_str = self.ds_time.strftime("%Y%m%d%H")
        self.remote_directory = dataset_path.format(ds_time_str)

        self._greenlets = Group()
        self.unpack_lock = RLock()

        # Items in the queue are
        #   (hour, sleep_until, filename, ...)
        # so they sort by hour, and then if a not-found adds a delay to
        # a specific file, files from that hour without the delay
        # are tried first
        self._files = PriorityQueue()

        # areas in self.dataset.array are considered 'undefined' until
        #   self.checklist[index[:3]] is True, since unpack_grib may
        #   write to them, and then abort via ValueError before marking
        #   updating the checklist if the file turns out later to be bad

        # the checklist also serves as a sort of final sanity check:
        #   we also have "does this file contain all the records we think it
        #   should" checklists; see Worker._download_file

        self._checklist = make_checklist()
Ejemplo n.º 51
0
class ConnectionPool(object):

    def __init__(self, factory,
                 retry_max=3, retry_delay=.1,
                 timeout=-1, max_lifetime=600.,
                 max_size=10, options=None):
        self.max_size = max_size
        self.pool = PriorityQueue()
        self.size = 0
        self.factory = factory
        self.retry_max = retry_max
        self.retry_delay = retry_delay
        self.timeout = timeout
        self.max_lifetime = max_lifetime
        if options is None:
            self.options = {}
        else:
            self.options = options

    def too_old(self, conn):
        return time.time() - conn.get_lifetime() > self.max_lifetime

    def release_connection(self, conn):
        connected = conn.is_connected()
        if connected and not self.too_old(conn):
            self.pool.put((conn.get_lifetime(), conn))
        else:
            conn.invalidate()

    def get(self, **options):
        pool = self.pool

        # first let's try to find a matching one
        found = None
        if self.size >= self.max_size or pool.qsize():
            for priority, candidate in pool:
                if self.too_old(candidate):
                    # let's drop it
                    continue

                matches = candidate.matches(**options)
                if not matches:
                    # let's put it back
                    pool.put((priority, candidate))
                else:
                    found = candidate
                    break

        # we got one.. we use it
        if found is not None:
            return found

        # we build a new one and send it back
        tries = 0
        last_error = None

        while tries < self.retry_max:
            self.size += 1
            try:
                new_item = self.factory(**options)
            except Exception, e:
                self.size -= 1
                last_error = e
            else:
                # we should be connected now
                if new_item.is_connected():
                    return new_item

            tries += 1
            gevent.sleep(self.retry_delay)

        if last_error is None:
            raise MaxTriesError()
        else:
            raise last_error
Ejemplo n.º 52
0
 def __init__(self):
     self.inbox = PriorityQueue()
     self._handlers = {ShutdownRequest: self.receive_shutdown}
     Greenlet.__init__(self)
Ejemplo n.º 53
0
class SubNameBrute:
    def __init__(self, target, options, process_num, dns_servers, next_subs,
                 scan_count, found_count, queue_size_list, tmp_dir):
        self.target = target.strip()
        self.options = options
        self.process_num = process_num
        self.dns_servers = dns_servers
        self.dns_count = len(dns_servers)
        self.next_subs = next_subs
        self.scan_count = scan_count
        self.scan_count_local = 0
        self.found_count = found_count
        self.found_count_local = 0
        self.queue_size_list = queue_size_list

        self.resolvers = [dns.resolver.Resolver(configure=False) for _ in range(options.threads)]
        for _r in self.resolvers:
            _r.lifetime = _r.timeout = 6.0
        self.queue = PriorityQueue()
        self.item_index = 0
        self.priority = 0
        self._load_sub_names()
        self.ip_dict = {}
        self.found_subs = set()
        self.ex_resolver = dns.resolver.Resolver(configure=False)
        self.ex_resolver.nameservers = dns_servers
        self.local_time = time.time()
        self.outfile = open('%s/%s_part_%s.txt' % (tmp_dir, target, process_num), 'w')

    def _load_sub_names(self):
        if self.options.full_scan and self.options.file == 'subnames.txt':
            _file = 'dict/subnames_full.txt'
        else:
            if os.path.exists(self.options.file):
                _file = self.options.file
            elif os.path.exists('dict/%s' % self.options.file):
                _file = 'dict/%s' % self.options.file
            else:
                print_msg('[ERROR] Names file not found: %s' % self.options.file)
                exit(-1)

        normal_lines = []
        wildcard_lines = []
        wildcard_list = []
        regex_list = []
        lines = set()
        with open(_file) as f:
            for line in f.xreadlines():
                sub = line.strip()
                if not sub or sub in lines:
                    continue
                lines.add(sub)

                if sub.find('{alphnum}') >= 0 or sub.find('{alpha}') >= 0 or sub.find('{num}') >= 0:
                    wildcard_lines.append(sub)
                    sub = sub.replace('{alphnum}', '[a-z0-9]')
                    sub = sub.replace('{alpha}', '[a-z]')
                    sub = sub.replace('{num}', '[0-9]')
                    if sub not in wildcard_list:
                        wildcard_list.append(sub)
                        regex_list.append('^' + sub + '$')
                else:
                    normal_lines.append(sub)
        if regex_list:
            pattern = '|'.join(regex_list)
            _regex = re.compile(pattern)
            for line in normal_lines[:]:
                if _regex.search(line):
                    normal_lines.remove(line)

        for item in normal_lines[self.process_num::self.options.process]:
            self.priority += 1
            self.queue.put((self.priority, item))

        for item in wildcard_lines[self.process_num::self.options.process]:
            self.queue.put((88888888, item))

    def put_item(self, item):
        num = item.count('{alphnum}') + item.count('{alpha}') + item.count('{num}')
        if num == 0:
            self.priority += 1
            self.queue.put((self.priority, item))
        else:
            self.queue.put((self.priority + num * 10000000, item))

    def _scan(self, j):
        self.resolvers[j].nameservers = [self.dns_servers[j % self.dns_count]]
        while not self.queue.empty():
            try:
                item = self.queue.get(timeout=3.0)[1]
                self.scan_count_local += 1
                if time.time() - self.local_time > 3.0:
                    self.scan_count.value += self.scan_count_local
                    self.scan_count_local = 0
                    self.queue_size_list[self.process_num] = self.queue.qsize()
            except Exception as e:
                break
            try:
                if item.find('{alphnum}') >= 0:
                    for _letter in 'abcdefghijklmnopqrstuvwxyz0123456789':
                        self.put_item(item.replace('{alphnum}', _letter, 1))
                    continue
                elif item.find('{alpha}') >= 0:
                    for _letter in 'abcdefghijklmnopqrstuvwxyz':
                        self.put_item(item.replace('{alpha}', _letter, 1))
                    continue
                elif item.find('{num}') >= 0:
                    for _letter in '0123456789':
                        self.put_item(item.replace('{num}', _letter, 1))
                    continue
                elif item.find('{next_sub}') >= 0:
                    for _ in self.next_subs:
                        self.queue.put((0, item.replace('{next_sub}', _, 1)))
                    continue
                else:
                    sub = item

                if sub in self.found_subs:
                    continue

                cur_sub_domain = sub + '.' + self.target
                _sub = sub.split('.')[-1]
                try:
                    answers = self.resolvers[j].query(cur_sub_domain)
                except dns.resolver.NoAnswer, e:
                    answers = self.ex_resolver.query(cur_sub_domain)

                if answers:
                    self.found_subs.add(sub)
                    ips = ', '.join(sorted([answer.address for answer in answers]))
                    if ips in ['1.1.1.1', '127.0.0.1', '0.0.0.0']:
                        continue

                    if self.options.i and is_intranet(answers[0].address):
                        continue

                    try:
                        self.scan_count_local += 1
                        answers = self.resolvers[j].query(cur_sub_domain, 'cname')
                        cname = answers[0].target.to_unicode().rstrip('.')
                        if cname.endswith(self.target) and cname not in self.found_subs:
                            self.found_subs.add(cname)
                            cname_sub = cname[:len(cname) - len(self.target) - 1]    # new sub
                            self.queue.put((0, cname_sub))

                    except:
                        pass

                    if (_sub, ips) not in self.ip_dict:
                        self.ip_dict[(_sub, ips)] = 1
                    else:
                        self.ip_dict[(_sub, ips)] += 1
                        if self.ip_dict[(_sub, ips)] > 30:
                            continue

                    self.found_count_local += 1
                    if time.time() - self.local_time > 3.0:
                        self.found_count.value += self.found_count_local
                        self.found_count_local = 0
                        self.queue_size_list[self.process_num] = self.queue.qsize()
                        self.local_time = time.time()

                    msg = cur_sub_domain.ljust(30) + ips
                    # print_msg(msg, line_feed=True)

                    self.outfile.write(cur_sub_domain.ljust(30) + '\t' + ips + '\n')
                    self.outfile.flush()
                    try:
                        self.resolvers[j].query('lijiejietest.' + cur_sub_domain)
                    except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer) as e:
                        self.queue.put((999999999, '{next_sub}.' + sub))
                    except:
                        pass

            except (dns.resolver.NXDOMAIN, dns.name.EmptyLabel) as e:
                pass
            except (dns.resolver.NoNameservers, dns.resolver.NoAnswer, dns.exception.Timeout) as e:
                pass