Example #1
0
class TestProcessThreads(unittest.TestCase):

    def setUp(self):
        mockweb = MockWebClient(HTMLFactory.tablamensajes_html())
        self.threadfactory = AsuntoFactory(mockweb)
        self.mongo = MockMongo()
        self.processthread = ProcessThreads(self.mongo, MockMsgPageFactory())
        self.navhtml = HTMLFactory.tablamensajes_html() +  HTMLFactory.navigation_url()

    def test_getlistofthreads(self):
        self.assertEquals("", self.threadfactory.nextUrl())
        self.processthread.storeThreads(self.threadfactory)

        self.assertEquals(2, self.mongo.treadssaved)

    def test_createThreadStruct(self):
        self.assertEquals("", self.threadfactory.nextUrl())
        thread = self.threadfactory.create(HTMLFactory.asunto())
        msgs = list()
        struct = self.processthread._createThreadStruct(thread, msgs)

        self.assertEquals("LaBSK", struct['source'])
        self.assertEquals(thread['title'], struct['title'])
        self.assertIsInstance(struct['msgs'], list)

    def test_createThreadStruct_with_msgs(self):
        self.processthread.storeThreads(self.threadfactory)

        self.assertEquals(2, len(self.mongo.listofthreads))
        thread = self.mongo.listofthreads[0]
        self.assertEqual(2, len(thread['msgs']))

    def test_default_pagelimit_is_1(self):
        mockweb = MockWebClient(self.navhtml)
        self.processthread.storeThreads(AsuntoFactory(mockweb))

        self.assertEquals(2, len(self.mongo.listofthreads))

    def test_two_loops(self):
        mockweb = MockWebClient(self.navhtml)
        self.processthread.pagelimit = 2
        self.processthread.storeThreads(AsuntoFactory(mockweb))

        self.assertEquals(4, len(self.mongo.listofthreads))

    def test_generate_new_factory(self):
        oldpage = AsuntoFactory( MockWebClient(self.navhtml))
        page = self.processthread._nextPage(oldpage)

        self.assertEquals(u"http://labsk.net/index.php?board=18.20", page.webclient.url)
Example #2
0
        """ Old thread seems to be the same one than the new thread        """
        self.thread -= 1
        print "Skipping ", new.title, ", ", new.date, " | ", new.link

    def __str__(self):
        return ". Threads: " + str(self.thread) \


db = MongoDB()
db.query("labsk_merge")
db.insert("labsk_" + str(datetime.now()))

starttime = datetime.now()

listener = StdListener()
threads = ProcessThreads(db, MsgPageFactory())
threads.setListener(listener)
threads.setPageLimit(1)
threads.setMsgPageLimit(250)  # Nunca bajes este valor o perderas mensajes, al menos mantenlo igual

threads.scrapListOfURL(labsk_urls)
delta = datetime.now() - starttime

print "----------------------------------------------"
print "Total time: ", delta
print "Page limit ", threads.pagelimit, " Msg page limit ", threads.msgpagelimit
print str(listener)

mr = db.merge('link')
print str(mr)
Example #3
0
 def setUp(self):
     mockweb = MockWebClient(HTMLFactory.tablamensajes_html())
     self.threadfactory = AsuntoFactory(mockweb)
     self.mongo = MockMongo()
     self.processthread = ProcessThreads(self.mongo, MockMsgPageFactory())
     self.navhtml = HTMLFactory.tablamensajes_html() +  HTMLFactory.navigation_url()
Example #4
0
class TestProcessThreads(unittest.TestCase):

    def setUp(self):
        mockweb = MockWebClient(HTMLFactory.tablamensajes_html())
        self.threadfactory = AsuntoFactory(mockweb)
        self.mongo = MockMongo()
        self.processthread = ProcessThreads(self.mongo, MockMsgPageFactory())
        self.navhtml = HTMLFactory.tablamensajes_html() +  HTMLFactory.navigation_url()

    def test_getlistofthreads(self):
        self.assertEquals("", self.threadfactory.nextUrl())
        self.processthread.storeThreads(self.threadfactory)

        self.assertEquals(2, self.mongo.treadssaved)

    def test_createThreadStruct_with_msgs(self):
        self.processthread.storeThreads(self.threadfactory)

        self.assertEquals(2, len(self.mongo.listofthreads))
        thread = self.mongo.listofthreads[0]
        self.assertEqual(2, len(thread['msgs']))

    def test_createThreadStruct_stores_locaton(self):
        urlobject = mock()
        when(urlobject).description().thenReturn("X")
        self.threadfactory.urlobject = urlobject

        self.processthread.storeThreads(self.threadfactory)

        self.assertEquals(2, len(self.mongo.listofthreads))
        thread = self.mongo.listofthreads[0]
        self.assertEqual("X", thread['location'])


    def test_default_pagelimit_is_1(self):
        mockweb = MockWebClient(self.navhtml)
        self.processthread.storeThreads(AsuntoFactory(mockweb))

        self.assertEquals(2, len(self.mongo.listofthreads))

    def test_two_loops(self):
        mockweb = MockWebClient(self.navhtml)
        self.processthread.pagelimit = 2
        self.processthread.storeThreads(AsuntoFactory(mockweb))

        self.assertEquals(4, len(self.mongo.listofthreads))

    def test_generate_new_factory(self):
        oldpage = AsuntoFactory( MockWebClient(self.navhtml))
        page = self.processthread._nextPage(oldpage)

        self.assertEquals(u"http://labsk.net/index.php?board=18.20", page.webclient.url)