def scrapeMessages(topicID, pageNum): """Scrape all messages on the specified topic, page combination.""" """CAVEAT: Messages are not memoized.""" offset = (pageNum - 1) * 20 html = bitcointalk.requestTopicPage(topicID, offset) # _saveToFile(html, "topicpage", "{0}.{1}".format(topicId, offset)) data = bitcointalk.parseTopicPage(html) data = data['messages'] pg.insertMessages(data) return data
def scrapeMessages(topicId, pageNum): """Scrape all messages on the specified topic, page combination.""" """CAVEAT: Messages are not memoized.""" offset = (pageNum-1)*20 html = bitcointalk.requestTopicPage(topicId, offset) _saveToFile(html, "topicpage", "{0}.{1}".format(topicId, offset)) data = bitcointalk.parseTopicPage(html) data = data['messages'] pg.insertMessages(data) return data
def scrapeTopic(topicID): """Scrape information on the specified topic.""" html = bitcointalk.requestTopicPageAll(topicID) data = bitcointalk.parseTopicPage(html) messages = data.pop('messages') pg.insertMessages(messages) if topicID not in memo["topics"]: pg.insertTopic(data) memo["topics"].add(topicID) return data
def _insertTopicPage(data): """Insert data as topic and messages and splice off messages.""" pg.insertMessages(data.pop("messages")) pg.insertTopic(data)
def _insertTopicPage(data): """Insert data as topic and messages and splice off messages.""" pg.insertMessages(data.pop('messages')) pg.insertTopic(data)