Exemple #1
0
def scrapeMessages(topicID, pageNum):
    """Scrape all messages on the specified topic, page combination."""
    """CAVEAT: Messages are not memoized."""
    offset = (pageNum - 1) * 20
    html = bitcointalk.requestTopicPage(topicID, offset)
    # _saveToFile(html, "topicpage", "{0}.{1}".format(topicId, offset))
    data = bitcointalk.parseTopicPage(html)
    data = data['messages']
    pg.insertMessages(data)
    return data
def scrapeMessages(topicId, pageNum):
    """Scrape all messages on the specified topic, page combination."""
    """CAVEAT: Messages are not memoized."""
    offset = (pageNum-1)*20
    html = bitcointalk.requestTopicPage(topicId, offset)
    _saveToFile(html, "topicpage", "{0}.{1}".format(topicId, offset))
    data = bitcointalk.parseTopicPage(html)
    data = data['messages']
    pg.insertMessages(data)
    return data
Exemple #3
0
def scrapeTopic(topicID):
    """Scrape information on the specified topic."""
    html = bitcointalk.requestTopicPageAll(topicID)
    data = bitcointalk.parseTopicPage(html)
    messages = data.pop('messages')
    pg.insertMessages(messages)

    if topicID not in memo["topics"]:
        pg.insertTopic(data)
        memo["topics"].add(topicID)

    return data
Exemple #4
0
 def testTopic(self):
     """Test insert and select topic functions."""
     f = codecs.open("{0}/example/topic_14.html".format(
         os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8')
     html = f.read()
     f.close()
     datum = bitcointalk.parseTopicPage(html)
     del datum['messages']
     insertTopic(datum)
     # Make sure a second insert doesn't cause problems
     insertTopic(datum)
     selectDatum = selectTopic(14)
     self.assertEqual(datum, selectDatum)
Exemple #5
0
 def testTopic(self):
     """Test insert and select topic functions."""
     f = codecs.open("{0}/example/topic_14.html".format(
         os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8')
     html = f.read()
     f.close()
     datum = bitcointalk.parseTopicPage(html)
     del datum['messages']
     insertTopic(datum)
     # Make sure a second insert doesn't cause problems
     insertTopic(datum)
     selectDatum = selectTopic(14)
     self.assertEqual(datum, selectDatum)
Exemple #6
0
 def testMessages(self):
     """Test insert and select messages functions."""
     f = codecs.open(
         "{0}/example/topic_602041.12400.html".format(
             os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8')
     html = f.read()
     f.close()
     data = bitcointalk.parseTopicPage(html)
     data = data['messages']
     insertMessages(data)
     # Make sure a second insert doesn't cause problems
     insertMessages(data)
     selectData = selectMessages([
         8125509, 8125667, 8125970, 8126348, 8126542, 8126615, 8126655,
         8126666
     ])
     datum = data[0]
     self.assertEqual(data, selectData)
Exemple #7
0
 def testMessages(self):
     """Test insert and select messages functions."""
     f = codecs.open("{0}/example/topic_602041.12400.html".format(
         os.path.dirname(os.path.abspath(__file__))), 'r', 'utf-8')
     html = f.read()
     f.close()
     data = bitcointalk.parseTopicPage(html)
     data = data['messages']
     insertMessages(data)
     # Make sure a second insert doesn't cause problems
     insertMessages(data)
     selectData = selectMessages(
         [
             8125509,
             8125667,
             8125970,
             8126348,
             8126542,
             8126615,
             8126655,
             8126666
         ])
     datum = data[0]
     self.assertEqual(data, selectData)