def scrapeMessages(topicID, pageNum): """Scrape all messages on the specified topic, page combination.""" """CAVEAT: Messages are not memoized.""" offset = (pageNum - 1) * 20 html = bitcointalk.requestTopicPage(topicID, offset) # _saveToFile(html, "topicpage", "{0}.{1}".format(topicId, offset)) data = bitcointalk.parseTopicPage(html) data = data['messages'] pg.insertMessages(data) return data
def scrapeMessages(topicId, pageNum): """Scrape all messages on the specified topic, page combination.""" """CAVEAT: Messages are not memoized.""" offset = (pageNum-1)*20 html = bitcointalk.requestTopicPage(topicId, offset) _saveToFile(html, "topicpage", "{0}.{1}".format(topicId, offset)) data = bitcointalk.parseTopicPage(html) data = data['messages'] pg.insertMessages(data) return data
def scrapeTopic(topicID): """Scrape information on the specified topic.""" html = bitcointalk.requestTopicPageAll(topicID) data = bitcointalk.parseTopicPage(html) messages = data.pop('messages') pg.insertMessages(messages) if topicID not in memo["topics"]: pg.insertTopic(data) memo["topics"].add(topicID) return data
def testTopic(self): """Test insert and select topic functions.""" f = codecs.open("{0}/example/topic_14.html".format( os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8') html = f.read() f.close() datum = bitcointalk.parseTopicPage(html) del datum['messages'] insertTopic(datum) # Make sure a second insert doesn't cause problems insertTopic(datum) selectDatum = selectTopic(14) self.assertEqual(datum, selectDatum)
def testMessages(self): """Test insert and select messages functions.""" f = codecs.open( "{0}/example/topic_602041.12400.html".format( os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8') html = f.read() f.close() data = bitcointalk.parseTopicPage(html) data = data['messages'] insertMessages(data) # Make sure a second insert doesn't cause problems insertMessages(data) selectData = selectMessages([ 8125509, 8125667, 8125970, 8126348, 8126542, 8126615, 8126655, 8126666 ]) datum = data[0] self.assertEqual(data, selectData)
def testMessages(self): """Test insert and select messages functions.""" f = codecs.open("{0}/example/topic_602041.12400.html".format( os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8') html = f.read() f.close() data = bitcointalk.parseTopicPage(html) data = data['messages'] insertMessages(data) # Make sure a second insert doesn't cause problems insertMessages(data) selectData = selectMessages( [ 8125509, 8125667, 8125970, 8126348, 8126542, 8126615, 8126655, 8126666 ]) datum = data[0] self.assertEqual(data, selectData)