class ParserWorkerThread(threading.Thread):
    """
    Instances of this class examine messages and fill in the title for any urls in the message
    """

    def __init__(self, thread_id, in_q, out_q, timeout=1):
        threading.Thread.__init__(self)
        self.daemon = True
        self.name = "Worker %d" % thread_id
        self._logger = logging.getLogger(self.name)
        self._in_q = in_q
        self._out_q = out_q
        self._timeout = timeout
        self._stopped = threading.Event()

        # Use a parser to do this lookup
        self._parser = HipChatParser()

    def run(self):
        self._logger.debug('Worker starting')
        while not self._stopped.is_set():
            try:
                item = self._in_q.get(True, self._timeout)
                self._worker_process(item)
                self._in_q.task_done()
            except Queue.Empty:
                # After sufficient time with no items being on the queue, we should probably reclaim this thread
                pass
        self._logger.debug('Worker stopping')

    def join(self, timeout=None):
        """
        Stop all processing on this thread
        """
        self._stopped.set()
        super(ParserWorkerThread, self).join(timeout)

    def _worker_process(self, msg):
        """
        Do the actual work of looking up titles of urls in the given message.
        If the message changes as a result of those lookups, dispatch an updated version of the message.
        """
        self._logger.debug('Processing: %s', msg)

        original_json = msg.details_as_json
        self._lookup_costly_details(msg)
        msg.details_as_json = self._parser.dict_to_json(msg.details)

        # Only dispatch an update if the details have changed
        if original_json != msg.details_as_json:
            self._out_q.put(msg)

    def _lookup_costly_details(self, msg):
        """
        Fill in whatever details we can about the message.
        """
        for d in msg.details[HipChatParser.DETAIL_LINKS]:
            d[HipChatParser.DETAIL_TITLE] = self._parser.fetch_title(d[HipChatParser.DETAIL_URL])
 def test_Parse_Mentions_Single_AtEnd(self):
     p = HipChatParser()
     s = 'you around? @chris'
     t = ('{\n'
          '  "mentions": [\n'
          '    "chris"\n'
          '  ]\n'
          '}')
     self.assertEqual(p.parse(s), t)
 def test_Parse_Emoticons_Multiple(self):
     p = HipChatParser()
     s = 'Good morning! (megusta) (coffee)'
     t = ('{\n'
          '  "emoticons": [\n'
          '    "megusta", \n'
          '    "coffee"\n'
          '  ]\n'
          '}')
     self.assertEqual(p.parse(s), t)
 def test_Parse_Links_Single(self):
     fake_url_fetcher = FakeUrlFetcher({
         "http://www.nbcolympics.com": "<title>NBC Olympics | 2014 NBC Olympics in Sochi Russia</title>"})
     p = HipChatParser(url_fetcher=fake_url_fetcher)
     s = 'Olympics are starting soon; http://www.nbcolympics.com'
     t = ('{\n'
          '  "links": [\n'
          '    {\n'
          '      "title": "NBC Olympics | 2014 NBC Olympics in Sochi Russia", \n'
          '      "url": "http://www.nbcolympics.com"\n'
          '    }\n'
          '  ]\n'
          '}')
     self.assertEqual(p.parse(s), t)
    def __init__(self, number_workers=5):
        self._worker_q = Queue.Queue()
        self.out_q = Queue.Queue()
        self._number_workers = number_workers
        self._threads = []

        # Make a "fast" parser, by simply install a url fetcher that return an empty string.
        # (sometimes you just have to love the power of dependency injection :)
        self._fastParser = HipChatParser(NullUrlFetcher())
    def __init__(self, thread_id, in_q, out_q, timeout=1):
        threading.Thread.__init__(self)
        self.daemon = True
        self.name = "Worker %d" % thread_id
        self._logger = logging.getLogger(self.name)
        self._in_q = in_q
        self._out_q = out_q
        self._timeout = timeout
        self._stopped = threading.Event()

        # Use a parser to do this lookup
        self._parser = HipChatParser()
 def test_Parse_Everything(self):
     fake_url_fetcher = FakeUrlFetcher({
         "https://twitter.com/jdorfman/status/430511497475670016":
             "<title>Justin Dorfman on Twitter: &quot;nice @littlebigdetail from @HipChat (shows hex "
             "colors when pasted in chat). http://t.co/7cI6Gjy5pq&quot;</title>"})
     p = HipChatParser(url_fetcher=fake_url_fetcher)
     s = '@bob @john (success) such a cool feature; https://twitter.com/jdorfman/status/430511497475670016'
     t = ('{\n'
          '  "emoticons": [\n'
          '    "success"\n'
          '  ], \n'
          '  "links": [\n'
          '    {\n'
          '      "title": "Justin Dorfman on Twitter: \\\"nice @littlebigdetail from @HipChat (shows hex '
          'colors when pasted in chat). http://t.co/7cI6Gjy5pq\\\"\", \n'
          '      "url": "https://twitter.com/jdorfman/status/430511497475670016"\n'
          '    }\n'
          '  ], \n'
          '  "mentions": [\n'
          '    "bob", \n'
          '    "john"\n'
          '  ]\n'
          '}')
     self.assertMultiLineEqual(p.parse(s), t)
def test2(strings):
    parser = HipChatParser(url_fetcher=FakeUrlFetcher({}))
    stats = PerformanceStatistician()
    for x in strings:
        stats.measure(lambda: parser.parse(x))
    print stats.report()
def test1(strings):
    parser = HipChatParser(url_fetcher=FakeUrlFetcher({}))
    iterations = 1000
    executor = lambda: all(parser.parse(x) is not None for x in strings)
    print '{} messages: {:f} seconds'.format(len(strings) * iterations, timeit.timeit(executor, number=iterations))
 def test_Parse_Emoticons_TooLongIdentifier_EmptyJsonString(self):
     p = HipChatParser()
     s = 'Good morning! (thisIsTooLongToBeAnEmoticon)'
     t = '{}'
     self.assertEqual(p.parse(s), t)
 def test_Parse_Emoticons_UnmatchedBrackets_EmptyJsonString(self):
     p = HipChatParser()
     s = 'Good morning! (megusta coffee)'
     t = '{}'
     self.assertEqual(p.parse(s), t)
 def test_Parse_Mentions_BareAt_EmptyJsonString(self):
     p = HipChatParser()
     s = 'this string contains a lonely @ sign'
     self.assertEqual(p.parse(s), '{}')
 def test_Parse_StringWithoutAnyMarkup_EmptyJsonString(self):
     p = HipChatParser()
     s = 'this string contains no interesting markup'
     self.assertEqual(p.parse(s), '{}')
 def test_Parse_EmptyString_EmptyJsonString(self):
     p = HipChatParser()
     s = ''
     self.assertEqual(p.parse(s), '{}')
class AsyncParser:
    """
    Create a message parser which decodes details about the provided messages and dispatches
    the resulting augmented messages to an output queue.
    """

    _logger = logging.getLogger('AsyncParser')

    def __init__(self, number_workers=5):
        self._worker_q = Queue.Queue()
        self.out_q = Queue.Queue()
        self._number_workers = number_workers
        self._threads = []

        # Make a "fast" parser, by simply install a url fetcher that return an empty string.
        # (sometimes you just have to love the power of dependency injection :)
        self._fastParser = HipChatParser(NullUrlFetcher())

    def start(self):
        """
        Start pulling messages from the queue and dispatching them to the out queue
        """
        self._logger.debug('Starting...')
        # In a real app, we would manage these threads more intelligently
        self._threads = [self._create_worker(i) for i in range(self._number_workers)]
        self._logger.info('Started')

    def stop(self):
        """
        Shutdown this processor in an orderly fashion
        """
        self._logger.debug('Stopping...')
        for t in self._threads:
            t.join()
        self._logger.info('Stopped')

    def parse(self, msg):
        """
        Parses the given message and send the result to the output queue
        """
        self._logger.debug('Parsing: %s', msg)

        # Quickly decode the details that we can do without delay
        msg.details = self._fastParser.parse_to_dict(msg.text)
        msg.details_as_json = self._fastParser.dict_to_json(msg.details)

        # Pumps out the message. "slow" details are not yet filled in
        self.out_q.put(msg)

        # If the message had links, send it to the workers, which will
        # produced an updated message once the details are filled in
        if HipChatParser.DETAIL_LINKS in msg.details:
            self._worker_q.put(msg)

    def _create_worker(self, worker_id):
        """
        Create and start a worker that will collect more costly message details
        """
        w = ParserWorkerThread(worker_id, self._worker_q, self.out_q)
        w.start()
        return w