Ejemplo n.º 1
0
def crawl2csv(filename, start, end):
    """sleep sec 可以用random生成在一个范围的正态分布更好些
    start, end: up主mid范围"""
    Q = Queue()

    with open(filename, 'w', encoding='utf8', newline='') as fwriter:
        mycsvwriter = csv.writer(fwriter)
        mythreads = []
        pthread = Producer(Q,
                           start=start,
                           end=end,
                           func=BiliUser.getVideoList,
                           sleepsec=0.1)
        mythreads.append(pthread)
        consumer_num = 4  # 4个消费者线程
        for _ in range(consumer_num):
            cthread = Consumer(Q,
                               csvwriter=mycsvwriter,
                               func=BiliVideo.store_video_simpleajax,
                               sleepsec=0.01)
            mythreads.append(cthread)
        with Timer() as t:
            for thread in mythreads:
                thread.start()
            for thread in mythreads:
                thread.join()

        print('runtime - (%i_%i) - : %s' % (start, end, t.elapsed))
        print('======= All Done! =======')
Ejemplo n.º 2
0
def crawl2db(getsession, start, end):
    """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session
    视频访问速率有很严格的限制,请调大sleepsec"""
    Q = Queue()
    mythreads = []
    pthread = Producer(Q,
                       start=start,
                       end=end,
                       func=BiliUser.getVideoList,
                       sleepsec=0.1)
    mythreads.append(pthread)
    consumer_num = 4  # 4个消费者线程
    sessions = [getsession() for _ in range(consumer_num)]
    for i in range(consumer_num):
        db_session = sessions[i]  # 每个线程一个session
        cthread = Consumer(Q,
                           session=db_session,
                           func=BiliVideo.store_video_simpleajax,
                           sleepsec=0.01)
        mythreads.append(cthread)
    with Timer() as t:
        for thread in mythreads:
            thread.start()
        for thread in mythreads:
            thread.join()
    for session in sessions:
        session.close()

    # db_session.close()
    print('runtime - (%i_%i) - : %s' % (start, end, t.elapsed))
    print('======= All Done! =======')
Ejemplo n.º 3
0
def produce_consume():
    real_path, word_path, config_path = paths()
    check_paths(word_path, config_path)
    config = get_config(config_path)
    try:
        error = check_config(config)
    except Exception as e:
        print(type(e).__name__, e)
        exit(1)
    else:
        if error is not None:
            print(error)
            exit(1)
    q = Queue()
    consumer = Consumer(q)
    for i in range(16):
        t = Thread(target=consumer.consume_domains)
        t.daemon = True
        t.start()
    Producer(q, config, word_path).get_doms()
    q.join()
    if config['write_to_file']:
        print_red('writing to domains.json')
        p = Process(target=add_data, args=(real_path, consumer.get_domains()))
        p.start()
    print_red('sleeping zzzzz...')
    sleep(config['interval'])
Ejemplo n.º 4
0
class Administrator:
    """
    Receives and logs every message sent within the whole system.
    Is allowed to broadcast messages to all other workers.
    """
    def __init__(self):
        self._log_consumer = Consumer('hospital', 'topic', 'localhost')
        self._log_queue = self._log_consumer.add_queue(
            routing_key='#', callback=self.process_log)
        self._log_consumer.start(new_thread=True)
        self._info_producer = Producer('info', 'fanout', 'localhost')

    def send_info(self, message):
        print('sending info: ', message)
        self._info_producer.send_message(message=message)

    def process_log(self, ch, method, properties, body):
        body = body.decode()
        log = colored('LOG: ' + body, 'yellow')
        print(log)
        ch.basic_ack(delivery_tag=method.delivery_tag)
Ejemplo n.º 5
0
def crawl2csv(filename, start, end):
    """sleep sec 可以用random生成在一个范围的正态分布更好些
    start,end: aid范围"""
    Q = Queue()
    
    with open(filename, 'w', encoding='utf8', newline='') as fwriter:
        mycsvwriter = csv.writer(fwriter)
        mythreads = []
        pthread = Producer(Q, start=start, end=end, func=lambda x: (x, ), sleepsec=0.5)
        mythreads.append(pthread)
        consumer_num = 4 # 4个消费者线程
        for _ in range(consumer_num):
            cthread = Consumer(Q, csvwriter=mycsvwriter, func=BiliVideo.store_video, sleepsec=0.5)
            mythreads.append(cthread)
        with Timer() as t:
            for thread in mythreads:
                thread.start()
            for thread in mythreads:
                thread.join()
        
        print('runtime: %s' % t.elapsed)
        print('======= All Done! ======')
def crawl2db(getsession, start, end):
    """多线程只使用一个连接会存在一些问题,建立一个session池每个线程一个session
    视频访问速率有很严格的限制,请调大sleepsec"""
    Q = Queue()
    mythreads = []
    pthread = Producer(Q, start=start, end=end, func=lambda x: (x,), sleepsec=0.5)
    mythreads.append(pthread)
    consumer_num = 4 # 4个消费者线程
    sessions = [getsession() for _ in range(consumer_num)]
    for i in range(consumer_num):
        db_session = sessions[i] # 每个线程一个session
        cthread = Consumer(Q, session=db_session, func=TddAddFocusVideo.store_video, sleepsec=0.5)
        mythreads.append(cthread)
    with Timer() as t:
        for thread in mythreads:
            thread.start()
        for thread in mythreads:
            thread.join()
    for session in sessions:
        session.close()
        
    # db_session.close()
    print('runtime: %s' % t.elapsed)
    print('======= All Done! ======')
Ejemplo n.º 7
0
 def __init__(self):
     self._log_consumer = Consumer('hospital', 'topic', 'localhost')
     self._log_queue = self._log_consumer.add_queue(
         routing_key='#', callback=self.process_log)
     self._log_consumer.start(new_thread=True)
     self._info_producer = Producer('info', 'fanout', 'localhost')