コード例 #1
0
def main():

    task_queue = TaskQueue()

    df = KalmanFilter()
    dd = GetDistance()
    dw = DrawSingle(ylim_min=0, ylim_max=10, key='distance')

    task_queue.append(df)
    task_queue.append(dd)
    task_queue.append(dw)

    socketRun(task_queue.update, port=8070)
コード例 #2
0
    def test_page_crawler_init(self):
        ''' test generic page crawler initialization '''

        url = 'http://www.nyu.edu/engineering'
        page = Page(url, depth=1, score=9)
        queue = TaskQueue()
        keywords = ['nyu', 'poly']
        cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)

        url = u'http://www.nyu.edu/engineering'
        cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)

        url = u'http://www.google.com/search?q=♥'
        cr = GenericPageCrawler(page, queue, None, None, keywords, fake=True)
コード例 #3
0
def main():
    ''' main routine function '''

    # argument passing and config file reading
    st = Settings()

    # start queue service
    qs = TaskQueue()

    # start de-duplicate hash
    cc = DeDupeCache()

    # kick off dispatcher
    dp = Dispatcher(qs, cc, st)
    dp.run()
コード例 #4
0
    def test_simple_enqueue_dequeue(self):
        q = TaskQueue()
        task = Page('http://www.google.com', 1, 80)
        q.en_queue(task)

        self.assertTrue(q.total_task_cnt == 1)
        self.assertTrue(q.prio_task_cnt[0] == 1)
        self.assertTrue(q.prio_task_list[0] == [task])

        outtask = q.de_queue()
        self.assertTrue(outtask.depth == 1)
        self.assertTrue(outtask.score == 80)
        self.assertTrue(outtask.url == 'http://www.google.com')

        self.check_empty_queue(q)
コード例 #5
0
    def test_normalize_url(self):
        ''' test normalize url function '''

        url = 'http://www.poly.edu/admission/page.html#tuition'
        page = Page(url, depth=1, score=9)
        queue = TaskQueue()
        keywords = ['nyu', 'poly']

        self.assertTrue(
            vc.normalize_link(url) ==
            'http://www.poly.edu/admission/page.html')

        url2 = 'http://www.poly.edu/admission/page.html#tuition#abc'
        self.assertTrue(
            vc.normalize_link(url2) ==
            'http://www.poly.edu/admission/page.html')
コード例 #6
0
    def test_simplify_url(self):
        url = "http://www.poly.edu/admission/../page.html"
        page = Page(url, depth=1, score=9)
        queue = TaskQueue()
        keywords = ['nyu', 'poly']

        self.assertTrue(
            vc.simplify_link(url) == 'http://www.poly.edu/page.html')

        url2 = 'http://www.poly.edu/./page.html'
        self.assertTrue(
            vc.simplify_link(url2) == 'http://www.poly.edu/page.html')

        url3 = 'http://www.poly.edu/../../../../page.html'
        self.assertTrue(
            vc.simplify_link(url3) == 'http://www.poly.edu/page.html')

        url4 = 'http://www.poly.edu/aa/bb/cc/../page.html'
        self.assertTrue(
            vc.simplify_link(url4) == 'http://www.poly.edu/aa/bb/page.html')

        url5 = 'http://www.poly.edu/aa/bb/cc/../../../page.html'
        self.assertTrue(
            vc.simplify_link(url5) == 'http://www.poly.edu/page.html')

        url6 = 'http://www.poly.edu/aa/bb/cc/../../../../page.html'
        self.assertTrue(
            vc.simplify_link(url6) == 'http://www.poly.edu/page.html')

        url7 = 'http://www.poly.edu/./././aa/././././bb/./cc/.././././page.html'
        self.assertTrue(
            vc.simplify_link(url7) == 'http://www.poly.edu/aa/bb/page.html')

        url8 = [
            'http://www.poly.edu/index.html',
            'http://www.poly.edu/index.htm',
            'http://www.poly.edu/index.jsp',
            'http://www.poly.edu/index.asp',
            'http://www.poly.edu/index.aspx',
            'http://www.poly.edu/index.php',
        ]

        for url in url8:
            self.assertTrue(vc.simplify_link(url) == 'http://www.poly.edu')

        url9 = 'http://www.poly.edu/a/../../b/index.html'
        self.assertTrue(vc.simplify_link(url9) == 'http://www.poly.edu/b')
コード例 #7
0
    def test_bulk_enqueue_dequeue(self):
        q = TaskQueue()

        for cnt in range(10000):
            task = Page('http://www.nyu.edu/engineering', 2, 60)
            q.en_queue(task)

        self.assertTrue(q.total_task_cnt == 10000)
        self.assertTrue(q.prio_task_cnt[0] == 10000)
        self.assertTrue(len(q.prio_task_list[0]) == 10000)

        while 1:
            outtask = q.de_queue()
            if not outtask:
                break

            self.assertTrue(outtask.url == 'http://www.nyu.edu/engineering')
            self.assertTrue(outtask.depth == 2)
            self.assertTrue(outtask.score == 60)

        self.check_empty_queue(q)
コード例 #8
0
 def test_init(self):
     q = TaskQueue()
     self.check_empty_queue(q)