コード例 #1
0
class ProducerConsumerThreadTwitterAsync(object):
    """
    Producer
    Consumer
    Multi Thread Crawling.
    Using the Consumer Producer pattern
    Reference
        Python Consumer Producer pattern
            http://agiliq.com/blog/2013/10/producer-consumer-problem-in-python/
        Multi Thread Design pattern
    """
    def __init__(self):
        self.twitter_get_user_timeline = TwitterGetUserTimelineModule()
        self.req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, \
                                                              params = self.twitter_get_user_timeline.params)

    @asyncio.coroutine
    def producer_run(self):
        """
        Running Producer
        """
        self.twitter_get_user_timeline.twitter_method(self.req)
        while True:
            try:
                yield from twiteet_queue.put(
                    self.twitter_get_user_timeline.twitter_txt_dict)
                yield from asyncio.sleep(0.5 + random.random())
            except asyncio.QueueFull:
                print("Queue Full")
                pass
            else:
                log_text = "Produced "
                print(log_text)
                time.sleep(random.uniform(5, 8))

    @asyncio.coroutine
    def consumer_run(self):
        """
        Running Consumer
        """
        while not twiteet_queue.empty():
            twitter_txt_dict = yield from twiteet_queue.get()
            log_text = "Consume "
            print(log_text)
            for k, v in twitter_txt_dict.items():
                params = {
                    "screen_name": k,
                    "exclude_replies": False,
                    "count": 10000
                }
                req = self.twitter_get_user_timeline.twitter.get(
                    self.twitter_get_user_timeline.url, params=params)
                self.twitter_get_user_timeline.twitter_method(req,
                                                              dict_flag=False,
                                                              dict_value=v)
            # SQLite
            self.twitter_get_user_timeline.conn.commit()
            self.twitter_get_user_timeline.conn.close()
            twiteet_queue.task_done()
            yield from asyncio.sleep(0.5)
class ProducerConsumerThreadTwitterAsync(object):
    """
    Producer
    Consumer
    Multi Thread Crawling.
    Using the Consumer Producer pattern
    Reference
        Python Consumer Producer pattern
            http://agiliq.com/blog/2013/10/producer-consumer-problem-in-python/
        Multi Thread Design pattern
    """
    def __init__(self):
        self.twitter_get_user_timeline = TwitterGetUserTimelineModule()
        self.req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, \
                                                              params = self.twitter_get_user_timeline.params)

    @asyncio.coroutine
    def producer_run(self):
        """
        Running Producer
        """
        self.twitter_get_user_timeline.twitter_method(self.req)
        while True:
             try:
                 yield from twiteet_queue.put(self.twitter_get_user_timeline.twitter_txt_dict)
                 yield from asyncio.sleep(0.5 + random.random())
             except asyncio.QueueFull:
                 print("Queue Full")
                 pass
             else:
                 log_text = "Produced "
                 print(log_text)
                 time.sleep(random.uniform(5, 8))

    @asyncio.coroutine
    def consumer_run(self):
        """
        Running Consumer
        """
        while not twiteet_queue.empty():
            twitter_txt_dict = yield from twiteet_queue.get()
            log_text = "Consume "
            print(log_text)
            for k,v in twitter_txt_dict.items():
                params = {"screen_name": k, "exclude_replies": False, "count": 10000}
                req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, params = params)
                self.twitter_get_user_timeline.twitter_method(req, dict_flag=False, dict_value=v)
            # SQLite
            self.twitter_get_user_timeline.conn.commit()
            self.twitter_get_user_timeline.conn.close()
            twiteet_queue.task_done()
            yield from asyncio.sleep(0.5)
コード例 #3
0
 def __init__(self):
     self.twitter_get_user_timeline = TwitterGetUserTimelineModule()
     self.req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, \
                                                           params = self.twitter_get_user_timeline.params)
コード例 #4
0
class ProducerConsumerThreadTwitter(object):
    """
    Producer
    Consumer
    Multi Thread Crawling.
    Using the Consumer Producer pattern
    Reference
        Python Consumer Producer pattern
            http://agiliq.com/blog/2013/10/producer-consumer-problem-in-python/
        Multi Thread Design pattern
    """
    def __init__(self):
        self.twitter_get_user_timeline = TwitterGetUserTimelineModule()
        self.req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, \
                                                              params = self.twitter_get_user_timeline.params)

    def producer_run(self):
        """
        Running Producer
        """
        self.twitter_get_user_timeline.twitter_method(self.req)
        global twiteet_queue
        global check_queue
        while True:
            if twiteet_queue not in check_queue.queue:
                try:
                    twiteet_queue.put(
                        self.twitter_get_user_timeline.twitter_txt_dict)
                    check_queue.put(
                        self.twitter_get_user_timeline.twitter_txt_dict)
                except twiteet_queue.Full:
                    print("Queue Full")
                    pass
                else:
                    log_text = "Produced "
                    print(log_text)
                    time.sleep(random.uniform(5, 8))

    def consumer_run(self):
        """
        Running Consumer
        """
        global twiteet_queue
        while True:
            try:
                twitter_txt_dict = twiteet_queue.get()
            except twiteet_queue.Empty:
                print("Queue Empty")
                pass
            else:
                log_text = "Consume "
                print(log_text)
                for k, v in twitter_txt_dict.items():
                    params = {
                        "screen_name": k,
                        "exclude_replies": False,
                        "count": 10000
                    }
                    req = self.twitter_get_user_timeline.twitter.get(
                        self.twitter_get_user_timeline.url, params=params)
                    self.twitter_get_user_timeline.twitter_method(
                        req, dict_flag=False, dict_value=v)
                # SQLite
                self.twitter_get_user_timeline.conn.commit()
                self.twitter_get_user_timeline.conn.close()
                twiteet_queue.task_done()
                # Setting the wait time, I refered to the bellow link
                #  https://www.w3.org/Protocols/HTTP-NG/http-prob.html
                time.sleep(random.uniform(5, 8))
 def __init__(self):
     self.twitter_get_user_timeline = TwitterGetUserTimelineModule()
     self.req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, \
                                                           params = self.twitter_get_user_timeline.params)
class ProducerConsumerThreadTwitter(object):
    """
    Producer
    Consumer
    Multi Thread Crawling.
    Using the Consumer Producer pattern
    Reference
        Python Consumer Producer pattern
            http://agiliq.com/blog/2013/10/producer-consumer-problem-in-python/
        Multi Thread Design pattern
    """

    def __init__(self):
        self.twitter_get_user_timeline = TwitterGetUserTimelineModule()
        self.req = self.twitter_get_user_timeline.twitter.get(
            self.twitter_get_user_timeline.url, params=self.twitter_get_user_timeline.params
        )

    def producer_run(self):
        """
        Running Producer
        """
        self.twitter_get_user_timeline.twitter_method(self.req)
        global twiteet_queue
        global check_queue
        while True:
            if twiteet_queue not in check_queue.queue:
                try:
                    twiteet_queue.put(self.twitter_get_user_timeline.twitter_txt_dict)
                    check_queue.put(self.twitter_get_user_timeline.twitter_txt_dict)
                except twiteet_queue.Full:
                    print("Queue Full")
                    pass
                else:
                    log_text = "Produced "
                    print(log_text)
                    time.sleep(random.uniform(5, 8))

    def consumer_run(self):
        """
        Running Consumer
        """
        global twiteet_queue
        while True:
            try:
                twitter_txt_dict = twiteet_queue.get()
            except twiteet_queue.Empty:
                print("Queue Empty")
                pass
            else:
                log_text = "Consume "
                print(log_text)
                for k, v in twitter_txt_dict.items():
                    params = {"screen_name": k, "exclude_replies": False, "count": 10000}
                    req = self.twitter_get_user_timeline.twitter.get(self.twitter_get_user_timeline.url, params=params)
                    self.twitter_get_user_timeline.twitter_method(req, dict_flag=False, dict_value=v)
                # SQLite
                self.twitter_get_user_timeline.conn.commit()
                self.twitter_get_user_timeline.conn.close()
                twiteet_queue.task_done()
                # Setting the wait time, I refered to the bellow link
                #  https://www.w3.org/Protocols/HTTP-NG/http-prob.html
                time.sleep(random.uniform(5, 8))