Ejemplo n.º 1
0
def upload_photos_in_pending(with_failed=True):
    q_filter = ['pending']
    if with_failed:
        q_filter.append('failed')

    photos = (Photo.select(Photo.local_path, Photo.ext_album_key).where(
        (Photo.status << q_filter)))
    photos = list(photos)

    def worker():
        logger.info('[New worker started]')
        while True:
            item = q.get()
            try:
                upload_photo(item)
            finally:
                q.task_done()

    q = JoinableQueue(maxsize=10)
    for i in range(UPLOADING_WORKERS_COUNT):
        gevent.spawn(worker)

    for p in photos:
        q.put((p.local_path, p.ext_album_key))

    q.join()
Ejemplo n.º 2
0
def update_keywords():
    sm_api = SmugmugAPI()

    def worker():
        logger.info('[Worker started]')
        while True:
            item = q.get()
            try:
                sm_api.update_image_keywords(*item)
            finally:
                q.task_done()

    q = JoinableQueue(maxsize=100)
    for i in range(50):
        gevent.spawn(worker)

    photos = (Photo.select(Photo.local_path, Photo.ext_key).where(
        (Photo.status == 'uploaded')))
    photos = list(photos)
    print("Total photos to update:", len(photos))
    cnt = 0
    for p in photos:
        cnt += 1
        print(cnt)
        keywords = get_keywords(p.local_path)
        q.put((p.ext_key, keywords))

    q.join()
Ejemplo n.º 3
0
    def on_search(self, query):

        log.debug('search for %r', query)

        queue = JoinableQueue()
        task_group = g.api.search(query, queue)

        while True:
            finished = all(
                [t.ready() for t in task_group]
            )
            try:
                item = queue.get(timeout=1.0)
            except Empty:

                if finished:
                    break

                continue

            try:
                self.emit('result', item._asdict())
            finally:
                queue.task_done()

        queue.join()
        task_group.join()

        self.emit('done', query)
Ejemplo n.º 4
0
def process_24_network(net, port):
    q = JoinableQueue()
    r = JoinableQueue()
    gevent.spawn(prepare_list, q, net)

    tasks = []
    for x in range(0, CONCURRENT_GROUPS):
        #print "spawning %i" % x
        tasks += [gevent.spawn(scan_network, q, r, port)]

    q.join()
    gevent.joinall(tasks)

    if not r.empty():
        with open(str(net.ip) + '_' + str(port) + ".m3u", "w+") as f:
            f.write("#EXTM3U\n")
            while not r.empty():
                try:
                    group = r.get(timeout=10)
                    f.write(
                        '#EXTINF:-1 tvg-logo="" tvg-name="" group-title="",ChannelName'
                        + "\n")
                    f.write('udp://@' + str(group) + ':' + str(port) + "\n")
                    logging.info("Ok ====> %s" % group)
                except gevent.queue.Empty:
                    break
class GeventPoolExecutor2(LoggerMixin):
    def __init__(
        self,
        max_works,
    ):
        self._q = JoinableQueue(maxsize=max_works)
        # self._q = Queue(maxsize=max_works)
        for _ in range(max_works):
            gevent.spawn(self.__worker)
        # atexit.register(self.__atexit)
        self._q.join(timeout=100)

    def __worker(self):
        while True:
            fn, args, kwargs = self._q.get()
            try:
                fn(*args, **kwargs)
            except Exception as exc:
                self.logger.exception(
                    f'函数 {fn.__name__} 中发生错误,错误原因是 {type(exc)} {exc} ')
            finally:
                pass
                self._q.task_done()

    def submit(self, fn: Callable, *args, **kwargs):
        self._q.put((fn, args, kwargs))

    def __atexit(self):
        self.logger.critical('想即将退出程序。')
        self._q.join()
Ejemplo n.º 6
0
class GQueue(object):
    def __init__(self):
        self.__QUEUE = JoinableQueue()

    def job(self, func):
        @functools.wraps(func)
        def f(*args, **kwargs):
            self.__QUEUE.put([func, args, kwargs])

        return f

    def join(self):
        self.__QUEUE.join()

    def work(self):
        while True:
            func, args, kwargs = self.__QUEUE.get()
            try:
                func(*args, **kwargs)
            finally:
                self.__QUEUE.task_done()

    def run_worker(self, num=1):
        for i in range(num):
            gevent.spawn(self.work)
Ejemplo n.º 7
0
    def test_api(self):

        queue = JoinableQueue()
        task_group = self.api.search('terminator', queue)

        while True:
            finished = all(
                [greenlet.ready() for greenlet in task_group.greenlets]
            )
            try:
                item = queue.get(timeout=1.0)
            except Empty:

                if finished:
                    log.info('queue is empty and all jobs are done, quitting')
                    break

                log.info(
                    'queue was empty and jobs are still running, retrying'
                )

                continue

            try:
                log.info('%r', item)
            finally:
                queue.task_done()

        task_group.join()
        queue.join()

        log.info('joined everything')
Ejemplo n.º 8
0
def extract(input_dir, output_path, func):
    with open(output_path, 'w') as output:

        tasks = JoinableQueue()
        for file_name in os.listdir(input_dir):
            tasks.put(file_name)

        def _extract(file_name):
            file_path = os.path.join(input_dir, file_name)

            with open(file_path) as f:
                try:
                    json = simplejson.load(f)
                except Exception as e:
                    print(str(e))
                    print('Failed to load json file {}'.format(file_path))

                for pair in func(json):
                    output.write('\t'.join([str(x) for x in pair]) + '\n')

        def worker():
            while True:
                file_name = tasks.get()
                _extract(file_name)
                print(file_name)
                tasks.task_done()


        for i in range(10):
            gevent.spawn(worker)

        tasks.join()
Ejemplo n.º 9
0
def handle():
    connection = create_postgresql_connection()

    cursor = connection.cursor()
    cursor.execute("BEGIN;")
    cursor.execute("DELETE FROM core_ratequery;")
    cursor.execute("COMMIT;")
    cursor.close()

    queue = JoinableQueue()
    event = Event()

    age_ids = age_map(connection).values() + [None]
    sex_ids = sex_map(connection).values() + [None]
    education_ids = education_map(connection).values() + [None]
    province_ids = province_map(connection).values() + [None]

    cursor = connection.cursor()
    cursor.execute("SELECT DISTINCT cycle FROM core_microdata;");
    cycles = [row[0] for row in cursor]
    cursor.close()

    greenlets = []

    for i in range(50):
        gv = gevent.spawn(worker, queue, event)
        greenlets.append(gv)

    combs = itertools.product(age_ids, sex_ids, province_ids, education_ids, cycles)
    for c in combs:
        queue.put(c)

    queue.join()
    event.set()
    gevent.joinall(greenlets)
def processor(data):
    """
    Each launched process(=NUM_CORES) executes 1 item in the list map_data as data.
    For given start_id and batch_size, launches gevent consumers to scrape data for the given ID
    Also, the main thread acts as a producer to produce the data for the workers to use 
    """
    try:
        NUM_GREENLETS = 8  # Depending on how much I/O block is expected. Varies for each problem.
        process_id = multiprocessing.current_process()
        monkey.patch_all(
        )  # Patch all the libraries to support non-IO blocking

        start_id = data["start_id"]
        batch_size = data["batch_size"]

        joinable_queue = JoinableQueue()

        # Launch NUM_GREENLETS workers
        for i in range(NUM_GREENLETS):
            gevent.spawn(worker,
                         joinable_queue=joinable_queue,
                         greenlet_id=i,
                         process_id=process_id)

        # Producer
        for id in range(start_id, start_id + batch_size):
            joinable_queue.put(id)

        joinable_queue.join()

    except:
        # If the processes have any uncaptured error, it'd not redirect to stderr,
        # as it's a different Pipe for each process fork spawned
        print(traceback.format_exc())
Ejemplo n.º 11
0
class GeventPoolExecutor2(LoggerMixin):
    def __init__(
        self,
        max_works,
    ):
        check_gevent_monkey_patch()
        self._q = JoinableQueue(maxsize=max_works)
        # self._q = Queue(maxsize=max_works)
        for _ in range(max_works):
            # self.logger.debug('yyyyyy')
            gevent.spawn(self.__worker)
        atexit.register(self.__atexit)

    def __worker(self):
        while True:
            fn, args, kwargs = self._q.get()
            # noinspection PyBroadException
            try:
                fn(*args, **kwargs)
            except Exception as exc:
                self.logger.exception(
                    f'函数 {fn.__name__} 中发生错误,错误原因是 {type(exc)} {exc} ')
            finally:
                pass
                self._q.task_done()

    def submit(self, fn: Callable, *args, **kwargs):
        # self.logger.debug(self._q.qsize())
        self._q.put((fn, args, kwargs))

    def __atexit(self):
        self.logger.critical('想即将退出程序。')
        self._q.join()
Ejemplo n.º 12
0
    def test_api(self):

        queue = JoinableQueue()
        task_group = self.api.search('terminator', queue)

        while True:
            finished = all(
                [greenlet.ready() for greenlet in task_group.greenlets])
            try:
                item = queue.get(timeout=1.0)
            except Empty:

                if finished:
                    log.info('queue is empty and all jobs are done, quitting')
                    break

                log.info(
                    'queue was empty and jobs are still running, retrying')

                continue

            try:
                log.info('%r', item)
            finally:
                queue.task_done()

        task_group.join()
        queue.join()

        log.info('joined everything')
Ejemplo n.º 13
0
    def start(self):
        if not self.__threads:
            self.__threads = len(IPNetwork(self.__ip)) if len(IPNetwork(self.__ip)) <= 10 else 10
        if len(IPNetwork(self.__ip)) < int(self.__threads):
            print "Please decrease number of threads to number of hosts <= %s" % len(IPNetwork(self.__ip))
            exit()

        queue = JoinableQueue()
        [queue.put(str(ip)) for ip in IPNetwork(self.__ip)]

        workers = [spawn(self.get_ip_info, queue, self.__apis) for t in range(int(self.__threads))]

        queue.join()
Ejemplo n.º 14
0
    def start(self):
        if not self.__threads:
            self.__threads = len(IPNetwork(
                self.__ip)) if len(IPNetwork(self.__ip)) <= 10 else 10
        if len(IPNetwork(self.__ip)) < int(self.__threads):
            print "Please decrease number of threads to number of hosts <= %s" % len(
                IPNetwork(self.__ip))
            exit()

        queue = JoinableQueue()
        [queue.put(str(ip)) for ip in IPNetwork(self.__ip)]

        workers = [
            spawn(self.get_ip_info, queue, self.__apis)
            for t in range(int(self.__threads))
        ]

        queue.join()
Ejemplo n.º 15
0
def main():
    if "-v" in argv:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Starting up")

    q = JoinableQueue()
    gevent.spawn(feeder, q)

    tasks = []
    for x in range(0, CONCURRENT_GROUPS):
        #print "spawning %i" % x
        tasks += [gevent.spawn(poolworker, q)]

    q.join()
    gevent.joinall(tasks)

    logging.info("Finished.")
Ejemplo n.º 16
0
def spider(start_url, max_depth=1, no_of_workers=10, page_fn=check_page_for_profanities):
    """
    Concurrently spider the web, starting from web page, executing page_fn
    on each page.

    start_url specifies the document the spider starts from.
    max_depth specifies the maximum link depth from the start_url that
    processing will occur.
    no_of_workers specifies how many concurrent workers process the job queue.
    page_fn is a function that takes BeautifulSoup parsed html and a url and
    processes them as required
    """
    seen_urls = set((start_url,))
    job_queue = JoinableQueue()
    job_queue.put((start_url, max_depth))

    for i in range(no_of_workers):
        gevent.spawn(job_worker, job_queue, seen_urls, page_fn)

    job_queue.join()
Ejemplo n.º 17
0
def handle():
    #The expected format is:
    #ciclo	edad	sexo	nforma	prov	aoi	factorel
    csv_path = sys.argv[1]

    queue = JoinableQueue()
    event = Event()

    greenlets = []

    for i in range(90):
        gv = gevent.spawn(worker, queue, event)
        greenlets.append(gv)

    with io.open(csv_path, 'r') as f:
        for line in f:
            queue.put(line)

    queue.join()
    event.set()
    gevent.joinall(greenlets)
Ejemplo n.º 18
0
def recursive_crawl(url):
    all_urls = set()
    processed_urls = set()
    task_queue = JoinableQueue()

    def add_to_all(url):
        if url not in all_urls:
            print("Record url {}".format(url))
            all_urls.add(url)

    task_queue.put_nowait(url)

    # Start workers
    workers = []
    for i in xrange(10):
        workers.append(gevent.spawn(url_worker, i, processed_urls, add_to_all, task_queue))
    print("workers", len(workers))

    task_queue.join()

    print("Processed", len(processed_urls), "All", len(all_urls))
    print("Total latency", demo_helpers.TOTAL_LATENCY)
Ejemplo n.º 19
0
class TaskList:
    def __init__(self):
        self.queue = JoinableQueue()
        self.all_tasks = {}

    def add_task(self, task):
        self.all_tasks[task.get_id()] = task
        self.queue.put(task)

    def get_queue(self):
        return self.queue

    def join(self, timeout=None):
        return self.queue.join(timeout)
Ejemplo n.º 20
0
class MassGet(FastGet):
    def __init__(self, urls, dic, threads=10, report_db=False, keepalive=None, each_threads=10):
        self.dic = dic
        self.report_db = report_db
        self.table = None
        if report_db:
            self.sql_conn(report_db)
        self.keepalive = keepalive
        self.each_threads = each_threads
        self.queue = JoinableQueue()
        [self.queue.put(x.strip()) for x in urls]
        [spawn(self.worker) for _ in xrange(threads)]
        self.queue.join()

    def worker(self):
        while not self.queue.empty():
            url = self.queue.get()
            try:
                FastGet(url, self.dic, self.each_threads, self.report_db, self.keepalive, self.table)
            except Exception as e:
                logging.error('Worker global exception for %s: %s' % (url, e))
            finally:
                self.queue.task_done()
Ejemplo n.º 21
0
def handle():
    connection = create_postgresql_connection()

    cursor = connection.cursor()
    cursor.execute("BEGIN;")
    cursor.execute("DELETE FROM core_ratequery;")
    cursor.execute("COMMIT;")
    cursor.close()

    queue = JoinableQueue()
    event = Event()

    age_ids = age_map(connection).values() + [None]
    sex_ids = sex_map(connection).values() + [None]
    education_ids = education_map(connection).values() + [None]
    province_ids = province_map(connection).values() + [None]

    cursor = connection.cursor()
    cursor.execute("SELECT DISTINCT cycle FROM core_microdata;")
    cycles = [row[0] for row in cursor]
    cursor.close()

    greenlets = []

    for i in range(50):
        gv = gevent.spawn(worker, queue, event)
        greenlets.append(gv)

    combs = itertools.product(age_ids, sex_ids, province_ids, education_ids,
                              cycles)
    for c in combs:
        queue.put(c)

    queue.join()
    event.set()
    gevent.joinall(greenlets)
Ejemplo n.º 22
0
        else:
            sleep(5)


if __name__ == '__main__':
    t_status = spawn_link_exception(status_thread)
    t_item_queue = spawn_link_exception(add_to_item_queue)
    for i in range(80):
        spawn_link_exception(run_find_item)
    #t_index_items = spawn_link_exception(index_items)
    for i in range(8):
        spawn_link_exception(run_solr_queue, i)

    #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr])

    sleep(1)
    print('join item_queue thread')
    t_item_queue.join()
    print('item_queue thread complete')
    #print 'join item_and_host_queue:', item_and_host_queue.qsize()
    #item_and_host_queue.join()
    #print 'item_and_host_queue complete'
    for host, host_queue in host_queues.items():
        qsize = host_queue.qsize()
        print('host:', host, qsize)
        host_queue.join()

    print('join solr_queue:', solr_queue.qsize())
    solr_queue.join()
    print('solr_queue complete')
Ejemplo n.º 23
0
class FastGet:
    def __init__(self, url, dic, threads=100, report_db=False, keepalive=None, table_name=None):
        self.url = url
        parts = urlparse(url)
        self.scheme, self.host, self.port = parts.scheme, parts.hostname, parts.port
        if not self.port:
            self.port = 443 if self.scheme == 'https' else 80

        self.keepalive = keepalive
        try:
            instance = HehReq(self.host, int(self.port), self.scheme, self.keepalive)
        except Exception as e:
            logging.error('Init exception for %s: %s' % (self.url, e))
            return
        if not keepalive:
            self.keepalive = instance.detect_keepalive()
        if self.keepalive == 0:
            logging.error('Keep-Alive value for %s appears to be 0, check the connection' % url)
            return
        logging.warning('Calculated Keep-Alive for %s: %s' % (url, self.keepalive))

        self.report_db = report_db
        if report_db:
            self.table = table_name
            self.sql_conn(report_db)

        self.queue = JoinableQueue()
        [self.queue.put(dic[i:i + self.keepalive]) for i in xrange(0, len(dic), self.keepalive)]
        [spawn(self.worker) for _ in xrange(threads)]
        self.queue.join()

    def sql_conn(self, report_db):
        self.conn = MySQLdb.connect(report_db['host'], report_db['user'], report_db['passwd'], report_db['db'])
        self.cur = self.conn.cursor()
        if not self.table:
            self.table = 'scan_%s' % datetime.strftime(datetime.now(), '%Y_%m_%d_%H%M%S')
            self.cur.execute(
                'create table %s(scheme varchar(16), host varchar(128), port smallint, uri varchar(128),\
                code smallint, size int, type varchar(128))' % self.table)

    def report(self, result):
        if result[1] not in [302, 404]:
            logging.warning('Path %s://%s:%s/%s, response code %s, content-length %s, content-type %s' % (
                self.scheme, self.host, self.port, result[0], result[1], result[2], result[3]))
        if self.report_db:
            p = [self.scheme, self.host, self.port] + list(result)
            self.cur.execute('insert into %s values(%%s,%%s,%%s,%%s,%%s,%%s,%%s)' % self.table, p)

    def worker(self):
        try:
            instance = HehReq(self.host, int(self.port), self.scheme, self.keepalive)
        except Exception as e:
            logging.error('Worker init exception for %s: %s' % (self.url, e))
            return
        while not self.queue.empty():
            paths = self.queue.get()
            try:
                for x in instance.bulk_get(paths):
                    self.report(x)
            except Exception as e:
                logging.error('Worker loop exception for %s: %s' % (self.url, e))
            finally:
                if self.report_db:
                    self.conn.commit()
                self.queue.task_done()
Ejemplo n.º 24
0
class Importer(object):
    def __init__(self, creds, pool_size=POOL_SIZE):
        self.client = get_session(creds['host'],
                                  creds['key'],
                                  creds['secret'])
        self.queue = JoinableQueue(maxsize=POOL_SIZE*2)
        for i in range(pool_size):
            gevent.spawn(self.worker)

    def worker(self):
        while True:
            job = self.queue.get()
            typ = job.get('type')
            try:
                if typ == 'device':
                    self._process_device(job['data'])
                elif typ == 'datapoints':
                    self._process_datapoints(job['data'])
            finally:
                self.queue.task_done()

    def write_devices(self, devices):
        for device in devices:
            self.queue.put({'type': 'device', 'data': device})
        self.queue.join()

    def write_datapoints_from_file(self, infile):
        points = {}
        lineno = 0
        for line in infile:
            lineno += 1
            (device, sensor, ts, val) = line.split('\t')
            pts = points.setdefault(device, {}).setdefault(sensor, [])
            pts.append({'t': ts, 'v': float(val)})

            if lineno % 1000 == 0:
                self.queue.put({'type': 'datapoints', 'data': points})
                points = {}

        if points:
            self.queue.put({'type': 'datapoints', 'data': points})
        self.queue.join()

    def _process_device(self, device, retries=5):
        res = self.client.create_device(device)
        if res.successful != tempoiq.response.SUCCESS:
            if 'A device with that key already exists' in res.body:
                print("Skipping creating existing device {}"
                      .format(device['key']))
                return

            if retries > 0:
                print("Retrying device create {}, error {}"
                      .format(device['key'], res.body))
                self._process_device(device, retries - 1)
            else:
                print("Retries exceeded; couldn't create device {}"
                      .format(device['key']))

    def _process_datapoints(self, write_request, retries=5):
        try:
            res = self.client.write(write_request)
        except Exception, e:
            print("ERROR with request: --->")
            print(json.dumps(write_request, default=WriteEncoder().default))
            raise e

        if res.successful != tempoiq.response.SUCCESS:
            if retries > 0:
                print("Retrying write, error was: {}".format(res.body))
                return self._process_datapoints(write_request, retries - 1)
            else:
                print("Retries exceeded; lost data!")
                print(json.dumps(write_request, default=WriteEncoder().default))
                return True
        return False
Ejemplo n.º 25
0
class Worker(object):
    # http://www.gevent.org/gevent.wsgi.html
    # http://toastdriven.com/blog/2011/jul/31/gevent-long-polling-you/
    # http://blog.pythonisito.com/2012/07/gevent-and-greenlets.html

    DEFAULT_PORT = "9311"

    def __init__(self, port=DEFAULT_PORT):
        # REST services
        monkey.patch_all()
        signal(SIGQUIT, shutdown)
        self.is_config = False
        self.server = wsgi.WSGIServer(('', int(port)),
                                      self._response_handler,
                                      log=None)

        # sharding
        self.prefix = None
        self.shard_id = None
        self.ring = None

        # concurrency based on message passing / barrier pattern
        self._task_event = None
        self._task_queue = None

        # UnitOfWork
        self._uow = None

    def shard_start(self):
        """start the worker service for this shard"""
        self.server.serve_forever()

    def shard_stop(self, *args, **kwargs):
        """stop the worker service for this shard"""
        payload = args[0]

        if (self.prefix == payload["prefix"]) and (self.shard_id
                                                   == payload["shard_id"]):
            logging.info(
                "worker service stopping... you can safely ignore any exceptions that follow"
            )
            self.server.stop()
        else:
            # returns incorrect response in this case, to avoid exception
            logging.error("incorrect shard %s prefix %s", payload["shard_id"],
                          payload["prefix"])

    ######################################################################
    ## authentication methods

    def auth_request(self, payload, start_response, body):
        """test the authentication credentials for a REST call"""
        if (self.prefix == payload["prefix"]) and (self.shard_id
                                                   == payload["shard_id"]):
            return True
        else:
            # UoW caller did not provide correct credentials to access shard
            start_response('403 Forbidden', [('Content-Type', 'text/plain')])
            body.put("Forbidden, incorrect credentials for this shard\r\n")
            body.put(StopIteration)

            logging.error("incorrect credentials shard %s prefix %s",
                          payload["shard_id"], payload["prefix"])
            return False

    def shard_config(self, *args, **kwargs):
        """configure the service to run a shard"""
        payload, start_response, body = self.get_response_context(args)

        if self.is_config:
            # hey, somebody call security...
            start_response('403 Forbidden', [('Content-Type', 'text/plain')])
            body.put("Forbidden, shard is already in a configured state\r\n")
            body.put(StopIteration)

            logging.warning("denied configuring shard %s prefix %s",
                            self.shard_id, self.prefix)
        else:
            self.is_config = True
            self.prefix = payload["prefix"]
            self.shard_id = payload["shard_id"]

            # dependency injection for UnitOfWork
            uow_name = payload["uow_name"]
            logging.info("initializing unit of work based on %s", uow_name)

            ff = instantiate_class(uow_name)
            self._uow = ff.instantiate_uow(uow_name, self.prefix)

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            logging.info("configuring shard %s prefix %s", self.shard_id,
                         self.prefix)

    ######################################################################
    ## barrier pattern methods

    @contextmanager
    def wrap_task_event(self):
        """initialize a gevent.Event, to which the UnitOfWork will wait as a listener"""
        self._task_event = Event()
        yield

        # complete the Event, notifying the UnitOfWork which waited
        self._task_event.set()
        self._task_event = None

    def _consume_task_queue(self):
        """consume/serve requests until the task_queue empties"""
        while True:
            payload = self._task_queue.get()

            try:
                self._uow.perform_task(payload)
            finally:
                self._task_queue.task_done()

    def prep_task_queue(self):
        """prepare task_queue for another set of distributed tasks"""
        self._task_queue = JoinableQueue()
        spawn(self._consume_task_queue)

    def put_task_queue(self, payload):
        """put the given task definition into the task_queue"""
        self._task_queue.put_nowait(payload)

    def queue_wait(self, *args, **kwargs):
        """wait until all shards finished sending task_queue requests"""
        payload, start_response, body = self.get_response_context(args)

        if self.auth_request(payload, start_response, body):
            if self._task_event:
                self._task_event.wait()

            # HTTP response first, then initiate long-running task
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

    def queue_join(self, *args, **kwargs):
        """join on the task_queue, as a barrier to wait until it empties"""
        payload, start_response, body = self.get_response_context(args)

        if self.auth_request(payload, start_response, body):
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("join queue...\r\n")

            ## NB: TODO this step of emptying out the task_queue on
            ## shards could take a while on a large run... perhaps use
            ## a long-polling HTTP request or websocket instead?
            self._task_queue.join()

            body.put("done\r\n")
            body.put(StopIteration)

    ######################################################################
    ## hash ring methods

    def ring_init(self, *args, **kwargs):
        """initialize the HashRing"""
        payload, start_response, body = self.get_response_context(args)

        if self.auth_request(payload, start_response, body):
            self.ring = payload["ring"]

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            logging.info("setting hash ring %s", self.ring)

    ######################################################################
    ## WSGI handler for REST endpoints

    def get_response_context(self, args):
        """decode the WSGI response context from the Greenlet args"""
        env = args[0]
        msg = env["wsgi.input"].read()
        payload = loads(msg)
        start_response = args[1]
        body = args[2]

        return payload, start_response, body

    def _response_handler(self, env, start_response):
        """handle HTTP request/response"""
        uri_path = env["PATH_INFO"]
        body = JoinableQueue()

        if self._uow and self._uow.handle_endpoints(self, uri_path, env,
                                                    start_response, body):
            pass

        ##########################################
        # Worker endpoints

        elif uri_path == '/shard/config':
            # configure the service to run a shard
            Greenlet(self.shard_config, env, start_response, body).start()

        elif uri_path == '/shard/stop':
            # shutdown the service
            ## NB: must parse POST data specially, to avoid exception
            payload = loads(env["wsgi.input"].read())
            Greenlet(self.shard_stop, payload).start_later(1)

            # HTTP response starts first, to avoid error after server stops
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Goodbye\r\n")
            body.put(StopIteration)

        elif uri_path == '/queue/wait':
            # wait until all shards have finished sending task_queue requests
            Greenlet(self.queue_wait, env, start_response, body).start()

        elif uri_path == '/queue/join':
            # join on the task_queue, as a barrier to wait until it empties
            Greenlet(self.queue_join, env, start_response, body).start()

        elif uri_path == '/check/persist':
            ## NB: TODO checkpoint the service state to durable storage
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        elif uri_path == '/check/recover':
            ## NB: TODO restart the service, recovering from most recent checkpoint
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # HashRing endpoints

        elif uri_path == '/ring/init':
            # initialize the HashRing
            Greenlet(self.ring_init, env, start_response, body).start()

        elif uri_path == '/ring/add':
            ## NB: TODO add a node to the HashRing
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        elif uri_path == '/ring/del':
            ## NB: TODO delete a node from the HashRing
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # utility endpoints

        elif uri_path == '/':
            # dump info about the service in general
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put(str(env) + "\r\n")
            body.put(StopIteration)

        else:
            # ne znayu
            start_response('404 Not Found', [('Content-Type', 'text/plain')])
            body.put('Not Found\r\n')
            body.put(StopIteration)

        return body
Ejemplo n.º 26
0
class Worker (object):
    # http://www.gevent.org/gevent.wsgi.html
    # http://toastdriven.com/blog/2011/jul/31/gevent-long-polling-you/
    # http://blog.pythonisito.com/2012/07/gevent-and-greenlets.html

    DEFAULT_PORT = "9311"


    def __init__ (self, port=DEFAULT_PORT):
        monkey.patch_all()
        self.server = wsgi.WSGIServer(('', int(port)), self._response_handler)
        self.is_config = False
        self.prefix = None
        self.shard_id = None
        self.ring = None
        self.ff_name = None
        self.pop = None
        self.evt = None
        self.reify_queue = None


    def start (self):
        """start the service"""
        self.server.serve_forever()


    def stop (self, *args, **kwargs):
        """stop the service"""
        payload = args[0]
        body = args[1]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            logging.info("executor service stopping... you can safely ignore any exceptions that follow")
            self.server.stop()
        else:
            # NB: you have dialed a wrong number!
            # returns incorrect response in this case, to avoid exception
            logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"])


    def _bad_auth (self, payload, body, start_response):
        """Framework did not provide the correct credentials to access this shard"""
        start_response('403 Forbidden', [('Content-Type', 'text/plain')])
        body.put('Forbidden\r\n')
        body.put(StopIteration)

        logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"])


    def reify_consumer (self):
        """consume/serve reify requests until the queue empties"""

        while True:
            payload = self.reify_queue.get()

            try:
                key = payload["key"]
                gen = payload["gen"]
                feature_set = payload["feature_set"]
                self.pop.receive_reify(key, gen, feature_set)
            finally:
                self.reify_queue.task_done()


    def shard_config (self, *args, **kwargs):
        """configure the service to run a shard"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if self.is_config:
            # somebody contact security...
            start_response('403 Forbidden', [('Content-Type', 'text/plain')])
            body.put("Forbidden, executor already in a configured state\r\n")
            body.put(StopIteration)

            logging.warning("denied configuring shard %s prefix %s", self.shard_id, self.prefix)
        else:
            self.is_config = True
            self.prefix = payload["prefix"]
            self.shard_id = payload["shard_id"]

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            logging.info("configuring shard %s prefix %s", self.shard_id, self.prefix)


    def ring_init (self, *args, **kwargs):
        """initialize the HashRing"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            self.ring = payload["ring"]

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            logging.info("setting hash ring %s", self.ring)
        else:
            self._bad_auth(payload, body, start_response)


    def pop_init (self, *args, **kwargs):
        """initialize a Population of unique Individuals on this shard"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            self.ff_name = payload["ff_name"]
            logging.info("initializing population based on %s", self.ff_name)

            self.pop = Population(Individual(), self.ff_name, self.prefix)
            self.pop.set_ring(self.shard_id, self.ring)

            self.reify_queue = JoinableQueue()
            spawn(self.reify_consumer)

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)
        else:
            self._bad_auth(payload, body, start_response)


    def pop_gen (self, *args, **kwargs):
        """create generation 0 of Individuals in this shard of the Population"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            self.evt = Event()

            # HTTP response first, then initiate long-running task
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            self.pop.populate(0)

            self.evt.set()
            self.evt = None
        else:
            self._bad_auth(payload, body, start_response)


    def pop_wait (self, *args, **kwargs):
        """wait until all shards finished sending reify requests"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            if self.evt:
                self.evt.wait()

            # HTTP response first, then initiate long-running task
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)
        else:
            self._bad_auth(payload, body, start_response)


    def pop_join (self, *args, **kwargs):
        """join on the reify queue, to wait until it empties"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            self.reify_queue.join()

            ## NB: perhaps use a long-polling HTTP request or websocket instead?
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)
        else:
            self._bad_auth(payload, body, start_response)


    def pop_hist (self, *args, **kwargs):
        """calculate a partial histogram for the fitness distribution"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            start_response('200 OK', [('Content-Type', 'application/json')])
            body.put(dumps(self.pop.get_part_hist()))
            body.put("\r\n")
            body.put(StopIteration)
        else:
            self._bad_auth(payload, body, start_response)


    def pop_next (self, *args, **kwargs):
        """iterate N times or until a 'good enough' solution is found"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            self.evt = Event()

            # HTTP response first, then initiate long-running task
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            current_gen = payload["current_gen"]
            fitness_cutoff = payload["fitness_cutoff"]
            self.pop.next_generation(current_gen, fitness_cutoff)

            self.evt.set()
            self.evt = None
        else:
            self._bad_auth(payload, body, start_response)


    def pop_enum (self, *args, **kwargs):
        """enumerate the Individuals in this shard of the Population"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            fitness_cutoff = payload["fitness_cutoff"]

            start_response('200 OK', [('Content-Type', 'application/json')])
            body.put(dumps(self.pop.enum(fitness_cutoff)))
            body.put("\r\n")
            body.put(StopIteration)
        else:
            self._bad_auth(payload, body, start_response)


    def pop_reify (self, *args, **kwargs):
        """test/add a newly generated Individual into the Population (birth)"""
        payload = args[0]
        body = args[1]
        start_response = args[2]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            self.reify_queue.put_nowait(payload)

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)
        else:
            self._bad_auth(payload, body, start_response)


    def _response_handler (self, env, start_response):
        """handle HTTP request/response"""
        uri_path = env['PATH_INFO']
        body = Queue()

        ## NB: these handler cases can be collapsed into a common pattern
        ## except for config/stop -- later

        ##########################################
        # shard lifecycle endpoints

        if uri_path == '/shard/config':
            # configure the service to run a shard
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.shard_config, payload, body, start_response)
            gl.start()

        elif uri_path == '/shard/persist':
            # checkpoint the service state to durable storage
            payload = loads(env['wsgi.input'].read())
            print "POST", payload
            ## TODO
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        elif uri_path == '/shard/recover':
            # restart the service, recovering from the most recent checkpoint
            payload = loads(env['wsgi.input'].read())
            print "POST", payload
            ## TODO
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # HashRing endpoints

        elif uri_path == '/ring/init':
            # initialize the HashRing
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.ring_init, payload, body, start_response)
            gl.start()

        elif uri_path == '/ring/add':
            # add a node to the HashRing
            payload = loads(env['wsgi.input'].read())
            print "POST", payload
            ## TODO
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        elif uri_path == '/ring/del':
            # delete a node from the HashRing
            payload = loads(env['wsgi.input'].read())
            print "POST", payload
            ## TODO
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # evolution endpoints

        elif uri_path == '/pop/init':
            # initialize the Population subset on this shard
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_init, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/gen':
            # create generation 0 of Individuals in this shard of the Population
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_gen, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/wait':
            # wait until all shards have finished sending reify requests
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_wait, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/join':
            # join on the reify queue, to wait until it empties
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_join, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/hist':
            # calculate a partial histogram for the fitness distribution
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_hist, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/next':
            # attempt to run another generation
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_next, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/enum':
            # enumerate the Individuals in this shard of the Population
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_enum, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/reify':
            # test/add a newly generated Individual into the Population (birth)
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.pop_reify, payload, body, start_response)
            gl.start()

        elif uri_path == '/pop/evict':
            # remove an Individual from the Population (death)
            payload = loads(env['wsgi.input'].read())
            print "POST", payload
            ## TODO
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # utility endpoints

        elif uri_path == '/':
            # dump info about the service in general
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put(str(env) + "\r\n")
            body.put(StopIteration)

        elif uri_path == '/stop':
            # shutdown the service
            payload = loads(env['wsgi.input'].read())
            gl = Greenlet(self.stop, payload, body)
            gl.start_later(1)
            # HTTP response must start here, to avoid failure when server stops
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Goodbye\r\n")
            body.put(StopIteration)

        else:
            # ne znayu
            start_response('404 Not Found', [('Content-Type', 'text/plain')])
            body.put('Not Found\r\n')
            body.put(StopIteration)

        return body
Ejemplo n.º 27
0
class Migrator:
    def __init__(self,
                 scheme,
                 create_devices=True,
                 write_data=True,
                 start_date="2000-01-01T00:00:00Z",
                 end_date="2014-12-31T00:00:00Z",
                 pool_size=3):
        self.scheme = scheme
        self.create_devices = create_devices
        self.should_write_data = write_data
        self.start_date = start_date
        self.end_date = end_date
        self.tdb = TDBClient(scheme.db_key,
                             scheme.db_key,
                             scheme.db_secret,
                             base_url=scheme.db_baseurl)

        iq_endpoint = HTTPEndpoint(scheme.iq_baseurl, scheme.iq_key,
                                   scheme.iq_secret)
        self.tiq = TIQClient(iq_endpoint)
        self.queue = JoinableQueue()
        self.lock = Lock()
        self.dp_count = 0
        self.req_count = 0
        self.dp_reset = time.time()
        for i in range(pool_size):
            gevent.spawn(self.worker)

    def worker(self):
        while True:
            series = self.queue.get()
            try:
                self.migrate_series(series)
            finally:
                self.queue.task_done()

    def migrate_all_series(self, start_key="", limit=None):
        start_time = time.time()

        (keys, tags, attrs) = self.scheme.identity_series_filter()
        series_set = self.tdb.list_series(keys, tags, attrs)

        # Keep our own state of whether we passed the resume point, so we don't
        # need to assume client and server sort strings the same.
        found_first_series = False

        series_count = 0

        for series in series_set:
            if not found_first_series and series.key < start_key:
                continue
            else:
                found_first_series = True

            if limit and series_count >= limit:
                print("Reached limit of %d devices, stopping." % (limit))
                break

            if self.scheme.identity_series_client_filter(series):
                # If the series looks like an identity series,
                # queue it to be processed by the threadpool
                self.queue.put(series)
                series_count += 1

        self.queue.join()

        end_time = time.time()
        print("Exporting {} devices took {} seconds".format(
            series_count, end_time - start_time))

    def migrate_series(self, series):
        print("  Beginning to migrate series: %s" % (series.key))
        error = False
        try:
            if self.create_devices:
                error = self.create_device(series)

            if self.should_write_data and not error:
                error = self.write_data(series)
        except Exception, e:
            logging.exception(e)
            error = True

        if not error:
            print("COMPLETED migrating for series %s" % (series.key))
        else:
            print("ERROR migrating series %s" % (series.key))
Ejemplo n.º 28
0
class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' %
                                  self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename,
                                      logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0,
                       open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename,
                                      logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log(
                    "Couldn't get real IP address. Check yout internet connection.",
                    logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log(
                    "TOR socks proxy doesn't seem to be working.",
                    logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' %
                                      (real_ip, tor_ip))
            if real_ip == tor_ip:
                self.output.print_and_log(
                    "TOR doesn't work! Stop to be secure.", logging.ERROR)
                exit(-1)

        # Proxy
        if self.args.proxy is not None:
            self.session.proxies = {
                "https": self.args.proxy,
                "http": self.args.proxy
            }

        # Auth
        if self.args.auth is not None:
            items = self.args.auth.split(':')
            self.session.auth = (items[0], items[1])

        # Cookies
        self.cookies = {}
        if self.args.cookies is not None:
            self.cookies = Cookies.from_request(self.args.cookies)

        # Cookies from file
        if self.args.load_cookies is not None:
            if not path.exists(self.args.load_cookies) or not path.isfile(
                    self.args.load_cookies):
                self.output.print_and_log(
                    'Could not find cookie file: %s' % self.args.load_cookies,
                    logging.ERROR)
                exit(-1)

            self.cookies = MozillaCookieJar(self.args.load_cookies)
            self.cookies.load()

        self.session.cookies = self.cookies

        # User-Agent
        self.ua = UserAgent() if self.args.random_agent else None

    def worker(self, worker_id):
        self.output.write_log('Worker %i started.' % worker_id)
        while not self.hosts_queue.empty():
            host = self.hosts_queue.get()
            try:
                self.scan_host(worker_id, host)
            finally:
                self.output.write_log('Worker %i finished.' % worker_id)
                self.hosts_queue.task_done()

    def _head_available(self, host):
        """
        Determine if HEAD requests is allowed
        :param host:
        :return:
        """
        # Trying to use OPTIONS request
        try:
            response = self.session.options(host, headers=self._fill_headers())
            o = response.headers[
                'allow'] if 'allow' in response.headers else None
            if o is not None and o.find('HEAD') != -1:
                return True
        except:
            # TODO: fix
            pass

        try:
            return False if self.session.head(
                host,
                headers=self._fill_headers()).status_code == 405 else True
        except:
            # TODO: fix
            return False

    def scan_host(self, worker_id, host):
        # check if resolvable
        ip = helper.url_to_ip(host)
        if ip is None:
            self.output.write_log('Could not resolve %s  Skipping...' % host,
                                  logging.WARNING)
            self.output.urls_scanned += len(self.urls)
            return

        # Check for HEAD
        host_url = helper.host_to_url(host)
        head_available = False
        if self.args.head:
            head_available = self._head_available(host)
            if head_available:
                self.output.write_log('HEAD is supported for %s' % host)

        errors_count, urls_scanned = 0, 0
        for url in self.urls:
            full_url = urljoin(host_url, url)
            r = self.scan_url(full_url, head_available)
            urls_scanned += 1
            self.output.urls_scanned += 1

            # Output
            r['worker'] = worker_id
            self.output.write(**r)
            if r['exception'] is not None:
                errors_count += 1

            # Skip host on errors
            if self.args.skip is not None and errors_count == self.args.skip:
                self.output.write_log(
                    'Errors limit reached on %s Skipping other urls.' % host,
                    logging.WARNING)
                self.output.urls_scanned += len(self.urls) - urls_scanned
                break

        # cookies bugfix?
        self.session.cookies.clear()

    def _fill_headers(self):
        # Fill UserAgent in headers
        headers = {}
        if self.args.user_agent is not None:
            headers['User-agent'] = self.args.user_agent
        elif self.args.random_agent:
            headers['User-agent'] = self.ua.random

        # Fill Referer in headers
        if self.args.referer is not None:
            headers['Referer'] = self.args.referer

        return headers

    def _parse_response(self, url, response, exception):
        res = {'url': url, 'response': response, 'exception': exception}

        if response is None or exception is not None:
            res.update({
                'status': -1,
                'length': -1,
            })
            return res

        try:
            length = int(response.headers['content-length']
                         ) if 'content-length' in response.headers else len(
                             response.text)
        except Exception as exception:
            self.output.write_log(
                "Exception while getting content length for URL: %s Exception: %s"
                % (url, str(exception)), logging.ERROR)
            length = 0

        res.update({
            'status': response.status_code,
            'length': length,
        })
        return res

    def scan_url(self, url, use_head=False):
        self.output.write_log('Scanning %s' % url, logging.DEBUG)

        # Query URL and handle exceptions
        response, exception = None, None
        method = 'HEAD' if use_head else 'GET'
        try:
            # TODO: add support for user:password in URL
            response = self.session.request(
                method,
                url,
                headers=self._fill_headers(),
                allow_redirects=self.args.allow_redirects)
        except ConnectionError as ex:
            self.output.write_log('Connection error while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except HTTPError as ex:
            self.output.write_log('HTTP error while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except Timeout as ex:
            self.output.write_log('Timeout while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except TooManyRedirects as ex:
            self.output.write_log('Too many redirects while quering %s' % url,
                                  logging.ERROR)
            exception = ex
        except Exception as ex:
            self.output.write_log('Unknown exception while quering %s' % url,
                                  logging.ERROR)
            exception = ex

        # print('cookies: %s' % self.cookies)
        print('session.cookies: %s' % self.session.cookies)
        # self.session.cookies = self.cookies

        return self._parse_response(url, response, exception)

    def signal_handler(self):
        """
        Signal hdndler
        :return:
        """
        # TODO: add saving status via pickle
        self.output.print_and_log('Signal caught. Stopping...',
                                  logging.WARNING)
        self.stop()
        exit(signal.SIGINT)

    def _calc_urls(self):
        # Calculations
        self.urls_count = len(self.urls)
        self.hosts_count = len(self.hosts)
        self.full_urls_count = len(self.urls) * len(self.hosts)
        self.output.args.urls_count = self.full_urls_count

    def start(self):
        """
        Start mulithreaded scan
        :return:
        """
        # Set signal handler
        gevent.signal(signal.SIGTERM, self.signal_handler)
        gevent.signal(signal.SIGINT, self.signal_handler)
        gevent.signal(signal.SIGQUIT, self.signal_handler)

        # ICMP scan
        if self.args.icmp:
            if geteuid() != 0:
                self.output.print_and_log(
                    'To use ICMP scan option you must run as root. Skipping ICMP scan',
                    logging.WARNING)
            else:
                self.output.print_and_log('Starting ICMP scan.')
                self.hosts = helper.icmp_scan(self.hosts, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log(
                    'After ICMP scan %i hosts %i urls loaded, %i urls to scan'
                    %
                    (self.hosts_count, self.urls_count, self.full_urls_count))

        # SYN scan
        if self.args.syn:
            if self.args.tor or self.args.proxy is not None:
                self.output.print_and_log(
                    'SYN scan via tor or proxy is impossible!',
                    logging.WARNING)
                self.output.print_and_log(
                    'Stopping to prevent deanonymization!', logging.WARNING)
                exit(-1)

            if geteuid() != 0:
                self.output.print_and_log(
                    'To use SYN scan option you must run as root. Skipping SYN scan',
                    logging.WARNING)
            else:
                self.output.print_and_log('Starting SYN scan.')
                self.hosts = helper.syn_scan(self.hosts, self.args.ports,
                                             self.args.timeout)
                self._calc_urls()
                self.output.print_and_log(
                    'After SYN scan %i hosts %i urls loaded, %i urls to scan' %
                    (self.hosts_count, self.urls_count, self.full_urls_count))

        # Check threds count vs hosts count
        if self.args.threads > self.hosts_count:
            self.output.write_log(
                'Too many threads! Fixing threads count to %i' %
                self.hosts_count, logging.WARNING)
            threads_count = self.hosts_count
        else:
            threads_count = self.args.threads

        # Output urls count
        self.output.args.urls_count = self.full_urls_count

        # Start workers
        self.workers = [spawn(self.worker, i) for i in range(threads_count)]

        # Fill and join queue
        [self.hosts_queue.put(host) for host in self.hosts]
        self.hosts_queue.join()

    def stop(self):
        """
        Stop scan
        :return:
        """
        # TODO: stop correctly
        gevent.killall(self.workers)
Ejemplo n.º 29
0
    words = sys.argv[1:]

    queue_start = JoinableQueue()
    queue_to_search = JoinableQueue()
    queue_to_download = JoinableQueue()

    pbar = ProgressBar(maxval=len(words) * 3).start()

    for word in words:
        queue_start.put_nowait(word)

        CheckWord(queue_start, queue_to_search, pbar).start()
        SearchWord(queue_to_search, queue_to_download, pbar).start()
        Downloading(queue_to_download, pbar).start()

    queue_start.join()
    queue_to_search.join()
    queue_to_download.join()

    pbar.finish()

    exists = ', '.join(WORDS_STATUS['exists'])
    not_found = ', '.join(WORDS_STATUS['not_found'])
    downloaded = ', '.join(WORDS_STATUS['downloaded'])

    if exists:
        cprint('Files exists: {0}'.format(exists), 'green')

    if not_found:
        cprint('Files not_found: {0}'.format(not_found), 'red')
Ejemplo n.º 30
0
class GeventConsumer(object):

    def __init__(
        self,
        consumer_config=None,
        topic=None,
        parse_func=None,
        num=8,
        auto_commit_offset=False,
        is_debug=False,
    ):
        if not parse_func:
            raise Exception("not parse func, system exit")

        self.parse = parse_func
        self.queue = Queue(100)
        self.stop_flag = Event()
        self.num = num
        self.debug = is_debug
        if not self.debug:
            self.auto_commit_offset = auto_commit_offset
            if isinstance(consumer_config, dict):
                consumer_config.update({'enable.auto.commit':self.auto_commit_offset})
            self.consumer = Consumer(consumer_config)
            self.topic = topic
            self.consumer.subscribe(self.topic)

    def sign_handler(self, sig, frame):
        print(" >>> Termination_signal:[{}] to stop".format(sig))
        self.stop_flag.set()

    def kafka_to_queue(self):
        logger.info("Start Producer thread")
        m = 0
        time_diff = 0
        start_time = time.time()
        while not self.stop_flag.is_set():
            msg = self.consumer.poll(1)
            if msg is None:
                time.sleep(0.001)
                return
            err = msg.error()
            if err:
                if err.code() == KafkaError._PARTITION_EOF:
                    logger.debug(
                        '%s [%s] reached end at offset %s',
                        msg.topic(), msg.partition(), msg.offset()
                    )
                else:
                    logger.error('kafka failed, system exit')
                    self.stop_flag.set()

            self.queue.put(msg)

            # 消费速度统计
            m += 1
            current_time = time.time()
            time_diff = current_time - start_time
            if time_diff > 10:
                rate = m / time_diff
                start_time = current_time
                m = 0
                logger.info('consumer_rate:[%.2f]p/s, queue_size:[%d]' % (rate, self.queue.qsize()))
        logger.info("Producer thread has stopped")

    def consume(self):
        logger.info('Start Thread To Consumer')
        data = dict()
        stop = False
        while True:
            stop = self.stop_flag.is_set()
            if stop and self.queue.empty():
                break
            msg = self.queue.get()
            try:
                data = self.parse(msg.value())
                if data:
                    self.handle_data(data, stop)
            finally:
                self.queue.task_done()
                if not stop and not self.auto_commit_offset:
                    self.consumer.commit(msg)
        logger.info('Thread Consumer has stopped')

    def handle_data(self, data, stop):
        raise NotImplementedError

    def consume_forever(self):
        """
        start consume forever
        """
        signal(SIGTERM, self.sign_handler)
        signal(SIGINT, self.sign_handler)

        if self.debug:
            consume_func = self.mock_consume
            produce_func = self.mock_kafka
        else:
            consume_func = self.consume
            produce_func = self.kafka_to_queue

        task_list = []
        for _ in range(self.num):
            task_list.append(gevent.spawn(consume_func))

        produce_func()
        self.queue.join()
        if not self.debug:
            logger.info("closing kafka...")
            self.consumer.close()
        gevent.joinall(task_list, timeout=5)
        logger.info('Exiting with qsize:%d' % self.queue.qsize())

    # ===========mock kafka and consumer=======================
    def mock_kafka(self):
        logger.info("Start Producer thread")
        m = 0
        time_diff = 0
        start_time = time.time()
        # jing5 msg
        msg = "23230254455354325631393046433232323232320101008e14080b0e0c38426e0101008422551354455354325631393046433232323232323131313131313131313131313131313131313131313131313131313131313131313130010000000002803365818a91eb00010002fffe050018fffe2eeb596f50830005e91efd02649c6b7eb1ac0d80000043c497fd0022f90a3d057b2403032581373635343332310082e99f008a06".decode('hex')
        while not self.stop_flag.is_set():
            self.queue.put(msg)
            m += 1

            # 消费速度统计
            current_time = time.time()
            time_diff = current_time - start_time
            if time_diff > 5:
                rate = m / time_diff
                start_time = current_time
                m = 0
                logger.info('consumer_rate:[%.2f]p/s, queue_size:[%d]' % (rate, self.queue.qsize()))
        logger.info("closing produce...")
        logger.info("Producer thread has stopped")

    def mock_consume(self):
        logger.info('Start Thread To Consumer')
        data = dict()
        stop = False
        while True:
            stop = self.stop_flag.is_set()
            if stop and self.queue.empty():
                break
            msg = self.queue.get()
            try:
                data = self.parse(msg)
                self.handle_data(data, stop)
            except Exception as err:
                logger.error("consumer:{}".format(getcurrent()))
            finally:
                self.queue.task_done()
        logger.info('Thread Consumer has stopped')
Ejemplo n.º 31
0
class ConcurrentBase(object):
    """
    Provides the following useful methods to its inheriting classes:

    + _debug()
    + _notify()
    + _put()
    + finish()

    """
    
    def __init__(self, monitor, workers=1):
        self.klass = type(self)
        self.klass_name = self.klass.__name__
        self.FINISHED_PROCESSING = '{0}: finished processing'.format(self.klass_name)
        self._monitor = monitor
        self._workers_to_start = workers
        self._read_commands_q, self._write_commands_q = None, None
        self._setup_command_system()
        gevent.sleep(0)

    def _debug(self, msg, debug_level=None):
        self._monitor.debug('{0}: {1}'.format(self.klass_name, msg), debug_level)

    def finish(self):
        self._prevent_new_requests_from_being_processed()
        gevent.spawn(self._wait_for_processing_to_finish)
        gevent.sleep(0)

    def _notify(self, notification_msg):
        self._monitor.notify(self.klass, notification_msg)

    def _prevent_new_requests_from_being_processed(self):
        # don't accept new commands after receiving a finish command
        self._write_commands_q = ThrowawayCommandsQueue()
    
    def _process_commands(self):
        while True:
            try:
                ## do arbitrary command
                func, args = self._read_commands_q.get()
                func(args)
            finally:
                self._read_commands_q.task_done()

    def _put(self, method, args):
        ## tell some worker to do arbitrary command
        self._write_commands_q.put((method, args))
        gevent.sleep(0)

    def _setup_command_system(self):
        # we have two refs to the commands queue,
        # but write_commands_q will switch to throwaway
        # after we receive a finish command
        self._read_commands_q = JoinableQueue(None)
        self._write_commands_q = self._read_commands_q 
        for x in range(self._workers_to_start):
            gevent.spawn(self._process_commands)

    def _wait_for_processing_to_finish(self):
        self._read_commands_q.join()
        self._monitor.notify(self.klass, self.FINISHED_PROCESSING)
Ejemplo n.º 32
0
class Migrator:
    def __init__(self, scheme, create_devices=True,
                 write_data=True,
                 start_date="2000-01-01T00:00:00Z",
                 end_date="2014-12-31T00:00:00Z",
                 pool_size=3):
        self.scheme = scheme
        self.create_devices = create_devices
        self.should_write_data = write_data
        self.start_date = start_date
        self.end_date = end_date
        self.tdb = TDBClient(scheme.db_key, scheme.db_key,
                             scheme.db_secret,
                             base_url=scheme.db_baseurl)

        iq_endpoint = HTTPEndpoint(scheme.iq_baseurl,
                                   scheme.iq_key,
                                   scheme.iq_secret)
        self.tiq = TIQClient(iq_endpoint)
        self.queue = JoinableQueue()
        self.lock = Lock()
        self.dp_count = 0
        self.req_count = 0
        self.dp_reset = time.time()
        for i in range(pool_size):
            gevent.spawn(self.worker)

    def worker(self):
        while True:
            series = self.queue.get()
            try:
                self.migrate_series(series)
            finally:
                self.queue.task_done()

    def migrate_all_series(self, start_key="", limit=None):
        start_time = time.time()

        (keys, tags, attrs) = self.scheme.identity_series_filter()
        series_set = self.tdb.list_series(keys, tags, attrs)

        # Keep our own state of whether we passed the resume point, so we don't
        # need to assume client and server sort strings the same.
        found_first_series = False

        series_count = 0

        for series in series_set:
            if not found_first_series and series.key < start_key:
                continue
            else:
                found_first_series = True

            if limit and series_count >= limit:
                print("Reached limit of %d devices, stopping." % (limit))
                break

            if self.scheme.identity_series_client_filter(series):
                # If the series looks like an identity series,
                # queue it to be processed by the threadpool
                self.queue.put(series)
                series_count += 1

        self.queue.join()

        end_time = time.time()
        print("Exporting {} devices took {} seconds".format(series_count, end_time - start_time))

    def migrate_series(self, series):
        print("  Beginning to migrate series: %s" % (series.key))
        error = False
        try:
            if self.create_devices:
                error = self.create_device(series)

            if self.should_write_data and not error:
                error = self.write_data(series)
        except Exception, e:
            logging.exception(e)
            error = True

        if not error:
            print("COMPLETED migrating for series %s" % (series.key))
        else:
            print("ERROR migrating series %s" % (series.key))
Ejemplo n.º 33
0
class InterceptedStreamsMixin(object):
    """
    Mixin class for GethProcess instances that feeds all of the stdout and
    stderr lines into some set of provided callback functions.
    """
    stdout_callbacks = None
    stderr_callbacks = None

    def __init__(self, *args, **kwargs):
        super(InterceptedStreamsMixin, self).__init__(*args, **kwargs)
        self.stdout_callbacks = []
        self.stdout_queue = JoinableQueue()

        self.stderr_callbacks = []
        self.stderr_queue = JoinableQueue()

    def register_stdout_callback(self, callback_fn):
        self.stdout_callbacks.append(callback_fn)

    def register_stderr_callback(self, callback_fn):
        self.stderr_callbacks.append(callback_fn)

    def produce_stdout_queue(self):
        for line in iter(self.proc.stdout.readline, b''):
            self.stdout_queue.put(line)
            gevent.sleep(0)

    def produce_stderr_queue(self):
        for line in iter(self.proc.stderr.readline, b''):
            self.stderr_queue.put(line)
            gevent.sleep(0)

    def consume_stdout_queue(self):
        while True:
            line = self.stdout_queue.get()
            for fn in self.stdout_callbacks:
                fn(line.strip())
            gevent.sleep(0)

    def consume_stderr_queue(self):
        while True:
            line = self.stderr_queue.get()
            for fn in self.stderr_callbacks:
                fn(line.strip())
            gevent.sleep(0)

    def start(self):
        super(InterceptedStreamsMixin, self).start()

        gevent.spawn(self.produce_stdout_queue)
        gevent.spawn(self.produce_stderr_queue)

        gevent.spawn(self.consume_stdout_queue)
        gevent.spawn(self.consume_stderr_queue)

    def stop(self):
        super(InterceptedStreamsMixin, self).stop()

        try:
            self.stdout_queue.join(5)
        except Timeout:
            pass

        try:
            self.stderr_queue.join(5)
        except Timeout:
            pass
Ejemplo n.º 34
0
        log.error("database initialization error: %s" % (e, ))
        exit()

    #---------------------------------
    # module initialization
    #---------------------------------
    try:

        #the update manager maintains a queue of messages to be sent to connected clients.

        um = UpdateManager()
        pm = ProcessingModule(datadb, resourcedb, um)
        im = InstallationModule(RESOURCE_NAME, RESOURCE_URI, datadb)
        gevent.signal(signal.SIGQUIT, gevent.shutdown)
        gevent.spawn(worker)
        pqueue.join()

        log.info("module initialization completed... [SUCCESS]")
    except Exception, e:
        log.error("module initialization error: %s" % (e, ))

    #---------------------------------
    # Web Server initialization
    #---------------------------------
    try:
        debug(True)
        run(host=HOST, port=PORT, server='gevent')
    except Exception, e:
        log.error("Web Server Exception: %s" % (e, ))
        exit()
Ejemplo n.º 35
0
		#arbitrage_dg = async_result_opportunity.get()
		#print 'transactionTracking() received arbitrage polygon:', arbitrage_dg, '\n'
		print '         Ready to begin transaction....'		
	
	# Input Tracking
	while 1:
		gevent.joinall([
			gevent.spawn(ratesTracking),
			gevent.spawn(arbiterTracking),
			#gevent.spawn(transactionTracking),
		])

		stdscr.addstr(0, 1, 'Arbitrage Program created by Glen Baker', curses.A_REVERSE)
		stdscr.addstr(1, 1, "- Press any key to continue, 'q' to quit")
		c = stdscr.getch()
		if c == ord('q'):
			# Shutdown Curses UI
			curses.nocbreak(); stdscr.keypad(0); curses.echo()
			curses.endwin()

			rates_q = JoinableQueue() # Clear Queue
			break
		#if c == ord('r'):
		#	if rates_tracking:
		#		rates_tracking = False
		#	else:
		#		rates_tracking = True
		#		gevent.spawn(ratesTracking)

	rates_q.join()
Ejemplo n.º 36
0
class BaseLogger(Collected,Jobber):
	"""\
		This class implements one particular way to log things.
		"""
	storage = Loggers.storage
	q = None
	job = None
	ready = False
	_in_flush = False
	def __init__(self, level):
		self.level = level

		global logger_nr
		logger_nr += 1

		if not hasattr(self,"name") or self.name is None:
			self.name = Name(self.__class__.__name__, "x"+str(logger_nr))

		super(BaseLogger,self).__init__()
		self._init()

	def _init(self):
		"""Fork off the writer thread.
		   Override this to do nothing if you don't have one."""

		self.q = JoinableQueue(100)
		self.start_job("job",self._writer)
		self.job.link(self.delete)
		if self.ready is False:
			self.ready = True
		else:
			self.stop_job("job") # concurrency issues?

	def _writer(self):
		errs = 0
		for r in self.q:
			try:
				if r is FlushMe:
					self._flush()
				else:
					self._log(*r)
			except Exception as ex:
				errs += 1
				fix_exception(ex)
				from moat.run import process_failure
				process_failure(ex)
				if errs > 10:
					reraise(ex)
			else:
				if errs:
					errs -= 1
			finally:
				self.q.task_done()
		self.q.task_done() # for the StopIter

	# Collection stuff
	def list(self):
		yield super(BaseLogger,self)
		yield ("Type",self.__class__.__name__)
		yield ("Level",LogNames[self.level])
		yield ("Queue",self.q.qsize())

	def info(self):
		return LogNames[self.level]+": "+self.__class__.__name__

	def delete(self, ctx=None):
		if self.ready:
			self.ready = None
			super(BaseLogger,self).delete(ctx)
		try:
			if self.q:
				self.q.put(StopIteration,block=False)
		except Full:
			## panic?
			pass
		if self.job is not None:
			self.job.join(timeout=1)
			self.stop_job("job")

	def _wlog(self, *a):
		try:
			self.q.put(a, block=False)
		except Full:
			## panic?
			self.delete()

	def _log(self, level, *a):
		a=" ".join(( x if isinstance(x,six.string_types) else str(x)  for x in a))
		self._slog(level,a)

	def _slog(self, a):
		raise NotImplementedError("You need to override %s._log or ._slog" % (self.__class__.__name__,))

	def _flush(self):
		pass

	def log(self, level, *a):
		if LogLevels[level] >= self.level:
			self._wlog(level,*a)
			if TESTING and not (hasattr(a[0],"startswith") and a[0].startswith("TEST")):
				self.flush()
			else:
				gevent.sleep(0)

	def log_event(self, event, level):
		if level >= self.level:
			for r in report_(event,99):
				self._wlog(LogNames[level],r)
			if TESTING:
				self.flush()

	def log_failure(self, err, level=WARN):
		if level >= self.level:
			self._wlog(LogNames[level],format_exception(err))
			if TESTING:
				self.flush()
	
	def flush(self):
		if self._in_flush: return
		if self.q is not None:
			try:
				self._in_flush = True
				self.q.put(FlushMe)
				self.q.join()
			finally:
				self._in_flush = False

	def end_logging(self):
		self.flush()
		self.delete()
Ejemplo n.º 37
0
class ConcurrentBase(object):
    """
    Provides the following useful methods to its inheriting classes:

    + _debug()
    + _notify()
    + _put()
    + finish()

    """
    def __init__(self, monitor, workers=1):
        self.klass = type(self)
        self.klass_name = self.klass.__name__
        self.FINISHED_PROCESSING = '{0}: finished processing'.format(
            self.klass_name)
        self._monitor = monitor
        self._workers_to_start = workers
        self._setup_command_system()
        gevent.sleep(0)

    def _debug(self, msg, debug_level=None):
        self._monitor.debug('{0}: {1}'.format(self.klass_name, msg),
                            debug_level)

    def finish(self):
        self._prevent_new_requests_from_being_processed()
        gevent.spawn(self._wait_for_processing_to_finish)
        gevent.sleep(0)

    def _notify(self, notification_msg):
        self._monitor.notify(self.klass, notification_msg)

    def _prevent_new_requests_from_being_processed(self):
        # don't accept new commands after receiving a finish command
        self._write_commands_q = ThrowawayCommandsQueue()

    def _process_commands(self):
        while True:
            try:
                ## do arbitrary command
                func, args = self._read_commands_q.get()
                func(args)
            finally:
                self._read_commands_q.task_done()

    def _put(self, method, args):
        ## tell some worker to do arbitrary command
        self._write_commands_q.put((method, args))
        gevent.sleep(0)

    def _setup_command_system(self):
        # we have two refs to the commands queue,
        # but write_commands_q will switch to throwaway
        # after we receive a finish command
        self._read_commands_q = JoinableQueue(None)
        self._write_commands_q = self._read_commands_q
        for x in range(self._workers_to_start):
            gevent.spawn(self._process_commands)

    def _wait_for_processing_to_finish(self):
        self._read_commands_q.join()
        self._monitor.notify(self.klass, self.FINISHED_PROCESSING)
def start_fluud():
    parser = argparse.ArgumentParser()
    parser.add_argument('host', help='mongo host')
    parser.add_argument('port', help='mongo port')
    parser.add_argument('--login', help='mongo login')
    parser.add_argument('--password', help='mongo password')
    args = parser.parse_args()

    if args.login and args.password:
        login = urllib.quote_plus(args.login)
        password = urllib.quote_plus(args.password)
        uri = 'mongodb://{}:{}@{}:{}/'.format(login, password, args.host, args.port)
    else:
        uri = 'mongodb://{}:{}/'.format(args.host, args.port)

    client = MongoClient(uri)

    template = {
        "first_sample_timestamp": dateutil.parser.parse("2015-09-02T13:08:20.314Z"),
        "last_sample_timestamp":  dateutil.parser.parse("2015-09-02T13:08:20.314Z"),
        "metadata": {
            "typeURI": "http://schemas.dmtf.org/cloud/audit/1.0/event",
            "initiator": {
                "typeURI": "service/security/account/user",
                "host": {
                    "address": "192.168.0.2"
                },
                "id": "openstack:610e7d74-16af-4358-9b77-5275194fa6e4",
                "name": "8b07b49216d243d2b49561759bd104f4"
            },
            "target": {
                "typeURI": "service/security/account/user",
                "id": "openstack:fc43ddcf-d147-466c-adfe-d60bd2b773ba"
            },
            "observer": {
                "typeURI": "service/security",
                "id": "openstack:a256def4-0a36-472e-95e5-e456db4e0681"
            },
            "eventType": "activity",
            "eventTime": "2015-09-02T13:08:20.256770+0000",
            "host": "identity.node-1",
            "action": "authenticate",
            "outcome": "success",
            "id": "openstack:00244b9a-1a43-48a5-b75e-9d68dd647487",
            "event_type": "identity.authenticate"
        },
        "meter": [
            {
                "counter_name": "identity.authenticate.success",
                "counter_unit": "user",
                "counter_type": "delta"
            }
        ],
        "project_id": None,
        "source": "openstack",
        "user_id": "openstack:610e7d74-16af-4358-9b77-5275194fa6e4"
    }

    data = [copy.deepcopy(template) for _ in range(10000)]

    def progress():
        while True:
            print client.ceilometer.resource.count()
            sys.stdout.flush()
            sleep(2)

    spawn(progress)

    def worker():
        while True:
            q.get()
            try:
                client.ceilometer.resource.insert_many(copy.deepcopy(data), False)
            finally:
                q.task_done()

    q = JoinableQueue()
    for i in range(10):
        spawn(worker)

    for i in range(100):
        q.put(0)

    q.join()
Ejemplo n.º 39
0
class Worker (object):
    # http://www.gevent.org/gevent.wsgi.html
    # http://toastdriven.com/blog/2011/jul/31/gevent-long-polling-you/
    # http://blog.pythonisito.com/2012/07/gevent-and-greenlets.html

    DEFAULT_PORT = "9311"


    def __init__ (self, port=DEFAULT_PORT):
        # REST services
        monkey.patch_all()
        signal(SIGQUIT, shutdown)
        self.is_config = False
        self.server = wsgi.WSGIServer(('', int(port)), self._response_handler, log=None)

        # sharding
        self.prefix = None
        self.shard_id = None
        self.ring = None

        # concurrency based on message passing / barrier pattern
        self._task_event = None
        self._task_queue = None

        # UnitOfWork
        self._uow = None


    def shard_start (self):
        """start the worker service for this shard"""
        self.server.serve_forever()


    def shard_stop (self, *args, **kwargs):
        """stop the worker service for this shard"""
        payload = args[0]

        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            logging.info("worker service stopping... you can safely ignore any exceptions that follow")
            self.server.stop()
        else:
            # returns incorrect response in this case, to avoid exception
            logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"])


    ######################################################################
    ## authentication methods

    def auth_request (self, payload, start_response, body):
        """test the authentication credentials for a REST call"""
        if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]):
            return True
        else:
            # UoW caller did not provide correct credentials to access shard
            start_response('403 Forbidden', [('Content-Type', 'text/plain')])
            body.put("Forbidden, incorrect credentials for this shard\r\n")
            body.put(StopIteration)

            logging.error("incorrect credentials shard %s prefix %s", payload["shard_id"], payload["prefix"])
            return False


    def shard_config (self, *args, **kwargs):
        """configure the service to run a shard"""
        payload, start_response, body = self.get_response_context(args)

        if self.is_config:
            # hey, somebody call security...
            start_response('403 Forbidden', [('Content-Type', 'text/plain')])
            body.put("Forbidden, shard is already in a configured state\r\n")
            body.put(StopIteration)

            logging.warning("denied configuring shard %s prefix %s", self.shard_id, self.prefix)
        else:
            self.is_config = True
            self.prefix = payload["prefix"]
            self.shard_id = payload["shard_id"]

            # dependency injection for UnitOfWork
            uow_name = payload["uow_name"]
            logging.info("initializing unit of work based on %s", uow_name)

            ff = instantiate_class(uow_name)
            self._uow = ff.instantiate_uow(uow_name, self.prefix)

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            logging.info("configuring shard %s prefix %s", self.shard_id, self.prefix)


    ######################################################################
    ## barrier pattern methods

    @contextmanager
    def wrap_task_event (self):
        """initialize a gevent.Event, to which the UnitOfWork will wait as a listener"""
        self._task_event = Event()
        yield

        # complete the Event, notifying the UnitOfWork which waited
        self._task_event.set()
        self._task_event = None


    def _consume_task_queue (self):
        """consume/serve requests until the task_queue empties"""
        while True:
            payload = self._task_queue.get()

            try:
                self._uow.perform_task(payload)
            finally:
                self._task_queue.task_done()


    def prep_task_queue (self):
        """prepare task_queue for another set of distributed tasks"""
        self._task_queue = JoinableQueue()
        spawn(self._consume_task_queue)


    def put_task_queue (self, payload):
        """put the given task definition into the task_queue"""
        self._task_queue.put_nowait(payload)


    def queue_wait (self, *args, **kwargs):
        """wait until all shards finished sending task_queue requests"""
        payload, start_response, body = self.get_response_context(args)

        if self.auth_request(payload, start_response, body):
            if self._task_event:
                self._task_event.wait()

            # HTTP response first, then initiate long-running task
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)


    def queue_join (self, *args, **kwargs):
        """join on the task_queue, as a barrier to wait until it empties"""
        payload, start_response, body = self.get_response_context(args)

        if self.auth_request(payload, start_response, body):
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("join queue...\r\n")

            ## NB: TODO this step of emptying out the task_queue on
            ## shards could take a while on a large run... perhaps use
            ## a long-polling HTTP request or websocket instead?
            self._task_queue.join()

            body.put("done\r\n")
            body.put(StopIteration)


    ######################################################################
    ## hash ring methods

    def ring_init (self, *args, **kwargs):
        """initialize the HashRing"""
        payload, start_response, body = self.get_response_context(args)

        if self.auth_request(payload, start_response, body):
            self.ring = payload["ring"]

            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

            logging.info("setting hash ring %s", self.ring)


    ######################################################################
    ## WSGI handler for REST endpoints

    def get_response_context (self, args):
        """decode the WSGI response context from the Greenlet args"""
        env = args[0]
        msg = env["wsgi.input"].read()
        payload = loads(msg)
        start_response = args[1]
        body = args[2]

        return payload, start_response, body


    def _response_handler (self, env, start_response):
        """handle HTTP request/response"""
        uri_path = env["PATH_INFO"]
        body = JoinableQueue()

        if self._uow and self._uow.handle_endpoints(self, uri_path, env, start_response, body):
            pass

        ##########################################
        # Worker endpoints

        elif uri_path == '/shard/config':
            # configure the service to run a shard
            Greenlet(self.shard_config, env, start_response, body).start()

        elif uri_path == '/shard/stop':
            # shutdown the service
            ## NB: must parse POST data specially, to avoid exception
            payload = loads(env["wsgi.input"].read())
            Greenlet(self.shard_stop, payload).start_later(1)

            # HTTP response starts first, to avoid error after server stops
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Goodbye\r\n")
            body.put(StopIteration)

        elif uri_path == '/queue/wait':
            # wait until all shards have finished sending task_queue requests
            Greenlet(self.queue_wait, env, start_response, body).start()

        elif uri_path == '/queue/join':
            # join on the task_queue, as a barrier to wait until it empties
            Greenlet(self.queue_join, env, start_response, body).start()

        elif uri_path == '/check/persist':
            ## NB: TODO checkpoint the service state to durable storage
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        elif uri_path == '/check/recover':
            ## NB: TODO restart the service, recovering from most recent checkpoint
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # HashRing endpoints

        elif uri_path == '/ring/init':
            # initialize the HashRing
            Greenlet(self.ring_init, env, start_response, body).start()

        elif uri_path == '/ring/add':
            ## NB: TODO add a node to the HashRing
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        elif uri_path == '/ring/del':
            ## NB: TODO delete a node from the HashRing
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put("Bokay\r\n")
            body.put(StopIteration)

        ##########################################
        # utility endpoints

        elif uri_path == '/':
            # dump info about the service in general
            start_response('200 OK', [('Content-Type', 'text/plain')])
            body.put(str(env) + "\r\n")
            body.put(StopIteration)

        else:
            # ne znayu
            start_response('404 Not Found', [('Content-Type', 'text/plain')])
            body.put('Not Found\r\n')
            body.put(StopIteration)

        return body
Ejemplo n.º 40
0
class LeakQueue(object):
    def __init__(self, maxsize=0, workers=10):
        """ Setup the gevent queue and the workers.

        :param int maxsize: the max lenght of the queue, default the queue size is infinite.
        :param int workers: the number of workers, default=10.
        """
        self.queue = JoinableQueue(maxsize=maxsize)
        [spawn(self.worker) for x in xrange(workers)]

    def __repr__(self):
        return u'{} items in queue'.format(self.queue.qsize())

    def put(self, operation, item, date=None):
        """ Each item are queued for a later processing.

        :param str operation: the operation name.
        :param item: the item to queued.
        :param date date: when the item is trigger.

        :returns: True if insertions succeeds, False otherwise.
        """
        try:
            self.queue.put({
                "operation": operation,
                "item": item,
                "date": date or datetime.utcnow()
            })
            self.flush()
        except Exception as e:
            logger.critical(
                'unable to put an item in the queue :: {}'.format(e))
            return False
        else:
            return True

    def flush(self, force=False):
        """ Flush the queue and block until all tasks are done.

        :param boolean force: force the queue flushing

        :returns: True if the flush occurs, False otherwise.
        """
        if self.queue.full() or force:
            logger.info('queue is full ({} items) :: flush it !'.format(
                self.queue.qsize()))
            self.queue.join()
            return True
        return False

    def worker(self):
        while True:
            try:
                item = self.queue.get()
                logger.info('get item :: {}'.format(item))

                if not self.worker_process(item):
                    logger.info('re-queue item :: {}'.format(item))
                    self.queue.put(item)
            except Empty:
                logger.info('queue is empty')
            else:
                self.queue.task_done()

    def worker_process(self, item):
        """ Default action execute by each worker.
            Must return a True statement to remove the item,
            otherwise the worker put the item into the queue.
        """
        g_sleep()
        return item
Ejemplo n.º 41
0
class BaseLogger(Collected, Jobber):
    """\
		This class implements one particular way to log things.
		"""
    storage = Loggers.storage
    q = None
    job = None
    ready = False
    _in_flush = False

    def __init__(self, level):
        self.level = level

        global logger_nr
        logger_nr += 1

        if not hasattr(self, "name") or self.name is None:
            self.name = Name(self.__class__.__name__, "x" + str(logger_nr))

        super(BaseLogger, self).__init__()
        self._init()

    def _init(self):
        """Fork off the writer thread.
		   Override this to do nothing if you don't have one."""

        self.q = JoinableQueue(100)
        self.start_job("job", self._writer)
        self.job.link(self.delete)
        if self.ready is False:
            self.ready = True
        else:
            self.stop_job("job")  # concurrency issues?

    def _writer(self):
        errs = 0
        for r in self.q:
            try:
                if r is FlushMe:
                    self._flush()
                else:
                    self._log(*r)
            except Exception as ex:
                errs += 1
                fix_exception(ex)
                from moat.run import process_failure
                process_failure(ex)
                if errs > 10:
                    reraise(ex)
            else:
                if errs:
                    errs -= 1
            finally:
                self.q.task_done()
        self.q.task_done()  # for the StopIter

    # Collection stuff
    def list(self):
        yield super(BaseLogger, self)
        yield ("Type", self.__class__.__name__)
        yield ("Level", LogNames[self.level])
        yield ("Queue", self.q.qsize())

    def info(self):
        return LogNames[self.level] + ": " + self.__class__.__name__

    def delete(self, ctx=None):
        if self.ready:
            self.ready = None
            super(BaseLogger, self).delete(ctx)
        try:
            if self.q:
                self.q.put(StopIteration, block=False)
        except Full:
            ## panic?
            pass
        if self.job is not None:
            self.job.join(timeout=1)
            self.stop_job("job")

    def _wlog(self, *a):
        try:
            self.q.put(a, block=False)
        except Full:
            ## panic?
            self.delete()

    def _log(self, level, *a):
        a = " ".join(
            (x if isinstance(x, six.string_types) else str(x) for x in a))
        self._slog(level, a)

    def _slog(self, a):
        raise NotImplementedError("You need to override %s._log or ._slog" %
                                  (self.__class__.__name__, ))

    def _flush(self):
        pass

    def log(self, level, *a):
        if LogLevels[level] >= self.level:
            self._wlog(level, *a)
            if TESTING and not (hasattr(a[0], "startswith")
                                and a[0].startswith("TEST")):
                self.flush()
            else:
                gevent.sleep(0)

    def log_event(self, event, level):
        if level >= self.level:
            for r in report_(event, 99):
                self._wlog(LogNames[level], r)
            if TESTING:
                self.flush()

    def log_failure(self, err, level=WARN):
        if level >= self.level:
            self._wlog(LogNames[level], format_exception(err))
            if TESTING:
                self.flush()

    def flush(self):
        if self._in_flush: return
        if self.q is not None:
            try:
                self._in_flush = True
                self.q.put(FlushMe)
                self.q.join()
            finally:
                self._in_flush = False

    def end_logging(self):
        self.flush()
        self.delete()
Ejemplo n.º 42
0
            sleep(1)
        else:
            sleep(5)

if __name__ == '__main__':
    t_status = spawn_link_exception(status_thread)
    t_item_queue = spawn_link_exception(add_to_item_queue)
    for i in range(80):
        spawn_link_exception(run_find_item)
    #t_index_items = spawn_link_exception(index_items)
    for i in range(8):
        spawn_link_exception(run_solr_queue, i)

    #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr])

    sleep(1)
    print 'join item_queue thread'
    t_item_queue.join()
    print 'item_queue thread complete'
    #print 'join item_and_host_queue:', item_and_host_queue.qsize()
    #item_and_host_queue.join()
    #print 'item_and_host_queue complete'
    for host, host_queue in host_queues.items():
        qsize = host_queue.qsize()
        print 'host:', host, qsize
        host_queue.join()

    print 'join solr_queue:', solr_queue.qsize()
    solr_queue.join()
    print 'solr_queue complete'
Ejemplo n.º 43
0
        signal.setitimer(signal.ITIMER_REAL,delay + delay * threshold)
        gevent.sleep(delay)


if __name__ == '__main__':

    if len(sys.argv) < 3:
        sys.exit('Usage: %s worker_id concurrency' % sys.argv[0])

    wd = gevent.spawn(watchdog)
    worker_id = sys.argv[1]
    concurrency = int(sys.argv[2])
    queue = JoinableQueue(maxsize=concurrency)
    pool = Pool(concurrency)

    context = zmq.Context()
    # Socket to receive ssh hosts on
    receiver = context.socket(zmq.PULL)
    #receiver.setsockopt(zmq.RCVHWM, concurrency)
    receiver.connect("tcp://localhost:5557")

    # Socket to send uptime results to
    sender = context.socket(zmq.PUSH)
    sender.connect("tcp://localhost:5558")

    ssh_workers = [
        pool.spawn(uptime, i, worker_id, queue, sender) for i in xrange(concurrency)
    ]
    recv_hosts(receiver, queue)
    queue.join()
Ejemplo n.º 44
0
class HttpScanner(object):
    def __init__(self, args):
        """
        Initialise HTTP scanner
        :param args:
        :return:
        """
        self.args = args
        self.output = HttpScannerOutput(args)
        self._init_scan_options()

        # Reading files
        self.output.write_log("Reading files and deduplicating.", logging.INFO)
        self.hosts = self._file_to_list(args.hosts)
        self.urls = self._file_to_list(args.urls)

        #
        self._calc_urls()
        out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count)
        if self.args.ports is not None:
            out += ' %i ports' % len(self.args.ports)
        self.output.print_and_log(out)

        if self.args.ports is not None and not self.args.syn:
            new_hosts = []
            for host in self.hosts:
                for port in self.args.ports:
                    # print(host, port)
                    new_hosts.append(helper.generate_url(host, port))
            self.hosts = new_hosts

        #
        self._calc_urls()
        self.output.print_and_log('%i full urls to scan' % self.full_urls_count)

        # Queue and workers
        self.hosts_queue = JoinableQueue()
        self.workers = []

    def _file_to_list(self, filename, dedup=True):
        """
        Get list from file
        :param filename: file to read
        :return: list of lines
        """
        if not path.exists(filename) or not path.isfile(filename):
            self.output.print_and_log('File %s not found!' % filename, logging.ERROR)
            exit(-1)

        # Preparing lines list
        lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n'))
        if len(lines) == 0:
            self.output.print_and_log('File %s is empty!' % filename, logging.ERROR)
            exit(-1)

        return helper.deduplicate(lines) if dedup else lines

    def _init_scan_options(self):
        # Session
        self.session = session()
        self.session.timeout = self.args.timeout
        self.session.verify = False

        # TODO: debug and check
        # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries))
        # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries))
        # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request
        # Max retries
        adapters.DEFAULT_RETRIES = self.args.max_retries

        # TOR
        if self.args.tor:
            self.output.write_log("TOR usage detected. Making some checks.")
            self.session.proxies = {
                'http': 'socks5://127.0.0.1:9050',
                'https': 'socks5://127.0.0.1:9050'
            }

            url = 'http://ifconfig.me/ip'
            real_ip, tor_ip = None, None

            # Ger real IP address
            try:
                real_ip = get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("Couldn't get real IP address. Check yout internet connection.",
                                          logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Get TOR IP address
            try:
                tor_ip = self.session.get(url).text.strip()
            except Exception as exception:
                self.output.print_and_log("TOR socks proxy doesn't seem to be working.", logging.ERROR)
                self.output.write_log(str(exception), logging.ERROR)
                exit(-1)

            # Show IP addresses
            self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip))
            if real_ip == tor_ip:
                self.output.print_and_log("TOR doesn't work! Stop to be secure.", logging.ERROR)
                exit(-1)

        # Proxy
        if self.args.proxy is not None:
            self.session.proxies = {"https": self.args.proxy,
                                    "http": self.args.proxy}

        # Auth
        if self.args.auth is not None:
            items = self.args.auth.split(':')
            self.session.auth = (items[0], items[1])

        # Cookies
        self.cookies = {}
        if self.args.cookies is not None:
            self.cookies = Cookies.from_request(self.args.cookies)

        # Cookies from file
        if self.args.load_cookies is not None:
            if not path.exists(self.args.load_cookies) or not path.isfile(self.args.load_cookies):
                self.output.print_and_log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR)
                exit(-1)

            self.cookies = MozillaCookieJar(self.args.load_cookies)
            self.cookies.load()

        self.session.cookies = self.cookies

        # User-Agent
        self.ua = UserAgent() if self.args.random_agent else None

    def worker(self, worker_id):
        self.output.write_log('Worker %i started.' % worker_id)
        while not self.hosts_queue.empty():
            host = self.hosts_queue.get()
            try:
                self.scan_host(worker_id, host)
            finally:
                self.output.write_log('Worker %i finished.' % worker_id)
                self.hosts_queue.task_done()

    def _head_available(self, host):
        """
        Determine if HEAD requests is allowed
        :param host:
        :return:
        """
        # Trying to use OPTIONS request
        try:
            response = self.session.options(host, headers=self._fill_headers())
            o = response.headers['allow'] if 'allow' in response.headers else None
            if o is not None and o.find('HEAD') != -1:
                return True
        except:
            # TODO: fix
            pass

        try:
            return False if self.session.head(host, headers=self._fill_headers()).status_code == 405 else True
        except:
            # TODO: fix
            return False

    def scan_host(self, worker_id, host):
        # check if resolvable
        ip = helper.url_to_ip(host)
        if ip is None:
            self.output.write_log('Could not resolve %s  Skipping...' % host, logging.WARNING)
            self.output.urls_scanned += len(self.urls)
            return

        # Check for HEAD
        host_url = helper.host_to_url(host)
        head_available = False
        if self.args.head:
            head_available = self._head_available(host)
            if head_available:
                self.output.write_log('HEAD is supported for %s' % host)

        errors_count, urls_scanned = 0, 0
        for url in self.urls:
            full_url = urljoin(host_url, url)
            r = self.scan_url(full_url, head_available)
            urls_scanned += 1
            self.output.urls_scanned += 1

            # Output
            r['worker'] = worker_id
            self.output.write(**r)
            if r['exception'] is not None:
                errors_count += 1

            # Skip host on errors
            if self.args.skip is not None and errors_count == self.args.skip:
                self.output.write_log('Errors limit reached on %s Skipping other urls.' % host, logging.WARNING)
                self.output.urls_scanned += len(self.urls) - urls_scanned
                break

        # cookies bugfix?
        self.session.cookies.clear()

    def _fill_headers(self):
        # Fill UserAgent in headers
        headers = {}
        if self.args.user_agent is not None:
            headers['User-agent'] = self.args.user_agent
        elif self.args.random_agent:
            headers['User-agent'] = self.ua.random

        # Fill Referer in headers
        if self.args.referer is not None:
            headers['Referer'] = self.args.referer

        return headers

    def _parse_response(self, url, response, exception):
        res = {'url': url,
               'response': response,
               'exception': exception}

        if response is None or exception is not None:
            res.update({
                'status': -1,
                'length': -1,
            })
            return res

        try:
            length = int(response.headers['content-length']) if 'content-length' in response.headers else len(
                response.text)
        except Exception as exception:
            self.output.write_log(
                "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)),
                logging.ERROR)
            length = 0

        res.update({
            'status': response.status_code,
            'length': length,
        })
        return res

    def scan_url(self, url, use_head=False):
        self.output.write_log('Scanning %s' % url, logging.DEBUG)

        # Query URL and handle exceptions
        response, exception = None, None
        method = 'HEAD' if use_head else 'GET'
        try:
            # TODO: add support for user:password in URL
            response = self.session.request(method, url, headers=self._fill_headers(),
                                            allow_redirects=self.args.allow_redirects)
        except ConnectionError as ex:
            self.output.write_log('Connection error while quering %s' % url, logging.ERROR)
            exception = ex
        except HTTPError as ex:
            self.output.write_log('HTTP error while quering %s' % url, logging.ERROR)
            exception = ex
        except Timeout as ex:
            self.output.write_log('Timeout while quering %s' % url, logging.ERROR)
            exception = ex
        except TooManyRedirects as ex:
            self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR)
            exception = ex
        except Exception as ex:
            self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR)
            exception = ex


        # print('cookies: %s' % self.cookies)
        print('session.cookies: %s' % self.session.cookies)
        # self.session.cookies = self.cookies

        return self._parse_response(url, response, exception)

    def signal_handler(self):
        """
        Signal hdndler
        :return:
        """
        # TODO: add saving status via pickle
        self.output.print_and_log('Signal caught. Stopping...', logging.WARNING)
        self.stop()
        exit(signal.SIGINT)

    def _calc_urls(self):
        # Calculations
        self.urls_count = len(self.urls)
        self.hosts_count = len(self.hosts)
        self.full_urls_count = len(self.urls) * len(self.hosts)
        self.output.args.urls_count = self.full_urls_count

    def start(self):
        """
        Start mulithreaded scan
        :return:
        """
        # Set signal handler
        gevent.signal(signal.SIGTERM, self.signal_handler)
        gevent.signal(signal.SIGINT, self.signal_handler)
        gevent.signal(signal.SIGQUIT, self.signal_handler)

        # ICMP scan
        if self.args.icmp:
            if geteuid() != 0:
                self.output.print_and_log('To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING)
            else:
                self.output.print_and_log('Starting ICMP scan.')
                self.hosts = helper.icmp_scan(self.hosts, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log('After ICMP scan %i hosts %i urls loaded, %i urls to scan' %
                                          (self.hosts_count, self.urls_count, self.full_urls_count))

        # SYN scan
        if self.args.syn:
            if self.args.tor or self.args.proxy is not None:
                self.output.print_and_log('SYN scan via tor or proxy is impossible!', logging.WARNING)
                self.output.print_and_log('Stopping to prevent deanonymization!', logging.WARNING)
                exit(-1)

            if geteuid() != 0:
                self.output.print_and_log('To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING)
            else:
                self.output.print_and_log('Starting SYN scan.')
                self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout)
                self._calc_urls()
                self.output.print_and_log('After SYN scan %i hosts %i urls loaded, %i urls to scan' %
                                          (self.hosts_count, self.urls_count, self.full_urls_count))

        # Check threds count vs hosts count
        if self.args.threads > self.hosts_count:
            self.output.write_log('Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING)
            threads_count = self.hosts_count
        else:
            threads_count = self.args.threads

        # Output urls count
        self.output.args.urls_count = self.full_urls_count

        # Start workers
        self.workers = [spawn(self.worker, i) for i in range(threads_count)]

        # Fill and join queue
        [self.hosts_queue.put(host) for host in self.hosts]
        self.hosts_queue.join()

    def stop(self):
        """
        Stop scan
        :return:
        """
        # TODO: stop correctly
        gevent.killall(self.workers)
Ejemplo n.º 45
0
class AsynSpiderWithGevent(MySpider):
    def __init__(self, out=BasicAnalysis(), **kwargs):
        super(AsynSpiderWithGevent, self).__init__(out, **kwargs)
        self.q = JoinableQueue()
        self.fetching, self.fetched = set(), set()

    def assign_jobs(self, jobs):
        for job in jobs:
            self.q.put(job)

    def run(self):
        if self.q.empty():
            url = LIST_URL + urllib.urlencode(self.list_query)
            self.q.put(url)
        for _ in range(CONCURRENCY):
            gevent.spawn(self.worker)
        self.q.join()
        assert self.fetching == self.fetched
        self._out.finish()

    def worker(self):
        while True:
            self.fetch_url()

    def fetch_url(self):
        current_url = self.q.get()
        try:
            if current_url in self.fetching:
                return
            self.fetching.add(current_url)
            resp = requests.get(current_url, headers=HEADERS)
            self.fetched.add(current_url)
            xml = etree.fromstring(resp.content)
            has_total_count = xml.xpath("//totalcount/text()")
            if has_total_count:  # 非空证明为列表,否则为详细页
                total_count = int(has_total_count[0])
                if total_count == 0:
                    return  # 列表跨界
                if self.list_query["pageno"] == 1:
                    pageno = 2
                    # while pageno < 10:
                    while pageno <= total_count / PAGE_SIZE:
                        self.list_query["pageno"] = pageno
                        next_list_url = LIST_URL + urllib.urlencode(
                            self.list_query)
                        self.q.put(next_list_url)
                        # logging.info(next_list_url)
                        pageno += 1
                job_ids = xml.xpath("//jobid/text()")
                job_detail_urls = []
                for ID in job_ids:
                    new_detail_query = DETAIL_QUERY.copy()
                    new_detail_query["jobid"] = ID
                    job_detail_urls.append(DETAIL_URL +
                                           urllib.urlencode(new_detail_query))
                for detail_url in job_detail_urls:
                    self.q.put(detail_url)
                    # logging.info(detail_url)

            else:
                self._out.collect(xml)
        finally:
            self.q.task_done()
Ejemplo n.º 46
0
                else:
                    print name, 'exists'


def filewalk(path):
    #return ['file', ... ]
    w = os.walk(path)
    files = []
    [files.append(x) for i in w
     for x in i[2]]  #不能是for x in i for i in w 这个反回一堆None
    #for i in w:
    #	for x in i[2]:
    #		files.append(x)
    return files


START = time.time()
FILEEXISTS = filewalk('./')
while True:
    tasks = JoinableQueue(maxsize=10)  #debug了
    #tasks = Queue()
    tasks.join()

    #tag = u"rina+aizawa"  #tag
    recs = [gevent.spawn(tr, tag) for tag in tags]

    workers = [gevent.spawn(worker, n) for n in xrange(20)]

    gevent.joinall(workers)
    gevent.joinall(recs)
Ejemplo n.º 47
0
      log.error( "database initialization error: %s" % ( e, ) )
      exit()
       
  #---------------------------------
  # module initialization
  #---------------------------------
  try:    
     
      #the update manager maintains a queue of messages to be sent to connected clients.
       
      um = UpdateManager()
      pm = ProcessingModule( datadb, resourcedb, um )
      im = InstallationModule( RESOURCE_NAME, RESOURCE_URI, datadb )
      gevent.signal(signal.SIGQUIT, gevent.shutdown) 
      gevent.spawn(worker)
      pqueue.join()
     
      log.info( "module initialization completed... [SUCCESS]" );
  except Exception, e:
      log.error( "module initialization error: %s" % ( e, ) )
    
  #---------------------------------
  # Web Server initialization
  #---------------------------------
  try:
      debug( True )
      run( host=HOST, port=PORT, server='gevent')
  except Exception, e:  
      log.error( "Web Server Exception: %s" % ( e, ) )
      exit()
 
Ejemplo n.º 48
0
            save_queue.put((word, direction, data))

    conn.close()



arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--dict', type=str, help='dict path')
arg_parser.add_argument('--direction', type=str, help='direction')

args = arg_parser.parse_args()

word_list = get_words(args.dict)
print('word list size = %d' % (len(word_list)))
print(word_list[0:10])

fetch_queue = JoinableQueue()

save_queue = JoinableQueue()

for i in range(100):
    gevent.spawn(fetch_worker, fetch_queue, save_queue, args.direction)

gevent.spawn(save_worker, DSN, save_queue)

for word in word_list:
    fetch_queue.put(word)

fetch_queue.join()
save_queue.join()
Ejemplo n.º 49
0
import gevent
from gevent.queue import JoinableQueue

def worker():
    while True:
        item = q.get()
        try:
            do_work(item)
        finally:
            q.task_done()

num_worker_threads = 3
q = JoinableQueue()
for i in range(num_worker_threads):
     gevent.spawn(worker)

for item in source():
    q.put(item)

q.join()  # block until all tasks are done
Ejemplo n.º 50
0
    def scrape(self,
               url=None,
               scraper_name='index',
               session=None,
               burst_limit=None,
               rate_limit=None,
               receivers=[],
               initial_scope={},
               exception_handler=None):
        pool = Pool(10000)  # almost no limit, limit connections instead
        job_queue = JoinableQueue()
        data_queue = JoinableQueue()

        scope = Scope()
        scope['log'] = logbook.Logger(self.name)
        scope['push_data'] = lambda name, data:\
            data_queue.put((name, data))

        rs = session or requests.Session()
        rs.hooks['response'] = lambda r: glocal.log.info(r.url)
        cticket_gen = TicketGenerator(rate_limit, burst_limit)
        adapter = TicketBoundHTTPAdapter(cticket_gen)
        rs.mount('http://', adapter)
        rs.mount('https://', adapter)
        scope['requests'] = rs
        scope.update(initial_scope)

        job_queue.put(Job(self, scraper_name, url, scope))

        aborted = False

        def run_job(job):
            # runs a single job in the current greenlet
            try:
                # setup new log
                for val in job.run():
                    job_queue.put(job.from_yield(val))
            except CapacityError as e:
                job.log.warning('CapacityError: %s, backing off')
                job.log.debug(traceback.format_exc())
                # FIXME: throttle
            except TemporaryError as e:
                job.log.warning('Temporary failure on %s, ' 'rescheduling')
                job.log.debug(traceback.format_exc())
                job_queue.put(job.retry())
                # FIXME: add limit for retries
            except PermanentError as e:
                job.log.error(e)
                job.log.debug(traceback.format_exc())
            except CriticalError as e:
                job.log.critical(e)
                job.log.debug(traceback.format_exc())
                job.log.debug('Aborting scrape...')
            except Exception as e:
                job.log.error('Error handling job "%s" "%s": %s' %
                              (scraper_name, url, e))
                job.log.debug(traceback.format_exc())
                if exception_handler:
                    exception_handler(sys.exc_info())
            finally:
                job_queue.task_done()

        def job_spawner():
            # using the pool, spawns a new job for every job in the queue
            while not aborted:
                job = job_queue.get()
                if job is None:
                    break
                pool.spawn(run_job, job)

        def receiver_spawner():
            while not aborted:
                record = data_queue.get()
                if record is None:
                    break

                for receiver in receivers:
                    pool.spawn(receiver.process, record, scope)

                data_queue.task_done()

        spawner_greenlet = pool.spawn(job_spawner)
        receiver_greenlet = pool.spawn(receiver_spawner)

        # join queue
        job_queue.join()
        data_queue.join()

        # tell spawner to exit
        job_queue.put(None)
        data_queue.put(None)

        pool.join()

        # now perform all post-processing
        for receiver in receivers:
            if receiver._post_process:
                post_scope = scope.new_child()
                post_scope['log'] = logbook.Logger('%s-post_process')
                post_scope.inject_and_call(receiver._post_process)
Ejemplo n.º 51
0
    zfile = '%s.zip' % site_id
    with zipfile.ZipFile(zfile) as z:
        z.extractall('tmp')

    file_workers = [
        pool.spawn(upload_files, i, worker_id, file_queue) for i in xrange(concurrency)
    ]

    for dirname, dirnames, filenames in os.walk('tmp/%s' % site_id):
        # print path to all subdirectories first.
        files = []
        for filename in filenames:
            files.append(os.path.join(dirname, filename))
        for f in files:
            file_queue.put(f, block=False)
        print "START_DIRS"
        dirs = []
        for subdirname in dirnames:
            dirs.append(os.path.join(dirname, subdirname))
        if dirs:
            print "POOLING:", dirs
            dir_pool.imap(mkdirs, dirs)
        print "END"
    #joinall(dir_jobs)
    #joinall([
    #    spawn([s_dir] + dirs) for s_dir, dirs in skel_dirs.iteritems()
    #])

    file_queue.join()
Ejemplo n.º 52
0
    zfile = '%s.zip' % site_id
    with zipfile.ZipFile(zfile) as z:
        z.extractall('tmp')

    file_workers = [
        pool.spawn(upload_files, i, worker_id, file_queue)
        for i in xrange(concurrency)
    ]

    for dirname, dirnames, filenames in os.walk('tmp/%s' % site_id):
        # print path to all subdirectories first.
        files = []
        for filename in filenames:
            files.append(os.path.join(dirname, filename))
        for f in files:
            file_queue.put(f, block=False)
        print "START_DIRS"
        dirs = []
        for subdirname in dirnames:
            dirs.append(os.path.join(dirname, subdirname))
        if dirs:
            print "POOLING:", dirs
            dir_pool.imap(mkdirs, dirs)
        print "END"
    #joinall(dir_jobs)
    #joinall([
    #    spawn([s_dir] + dirs) for s_dir, dirs in skel_dirs.iteritems()
    #])

    file_queue.join()
Ejemplo n.º 53
0
    for i in range(NUM_THEME_WORKER_THREADS):
         gevent.spawn(theme_worker)

    for i in range(NUM_PROJECT_WORKER_THREADS):
         gevent.spawn(project_worker)

#    i = 0
    for item in get_themes():
        q.put(item)
#        i += 1
#        if i >= 1:
#            break

    try:
        q.join()  # block until all tasks are done
        project_queue.join()
    except KeyboardInterrupt:
        logging.info('CTRL-C: save before exit')
        raise

    length_queue.put(StopIteration)
    max_length = 0
    for length in length_queue:
        if max_length < length:
            max_length = length

    out_queue.put(StopIteration)
    data = None

    headers = ["Theme", "Activities (research area)", "Project Acronym", "Start Date", "End Date", "Project Cost", "Project Funding", "Project Status", "Contract Type", "Coordinator", "Project Reference", "Record"]
Ejemplo n.º 54
0
def filewalk(path):
	#return ['file', ... ]
	w = os.walk(path)
	files = []
	[files.append(x) for i in w for x in i[2]]  #不能是for x in i for i in w 这个反回一堆None
	#for i in w:
	#	for x in i[2]:
	#		files.append(x)
	return files

START = time.time()
FILEEXISTS = filewalk('./')
while True:
	tasks = JoinableQueue(maxsize = 10)  #debug了
	#tasks = Queue()
	tasks.join()
	
	#tag = u"rina+aizawa"  #tag
	recs = [ gevent.spawn(tr, tag) for tag in tags]
	
	
	workers = [ gevent.spawn(worker, n) for n in xrange(20) ]  
	
	gevent.joinall( workers )
	gevent.joinall( recs )