Esempio n. 1
0
  def handleQuit(self):
    self.quit = True
    self.log.info("Shutting down.")
    
    #tell all our threads to stop
    for idx, link in self.workers.iteritems():
      self.sendMessage(link, 'shutdown')

    #wait for all our threads to stop
    threads = len(self.workers)
    lastUpdate = 0
    while threads > 0:
      for idx, link in self.workers.iteritems():
        threads = 0
        if link.process.is_alive():
          threads = threads + 1
      if time.time() - lastUpdate > 1:
        self.screen.erase()
        self.screen.addstr("%s\n\n" % time.asctime())
        self.screen.addstr("Waiting for worker threads to shut down (%d/%d)" % (threads, len(self.workers)))
        self.screen.refresh()
        lastUpdate = time.time()
        
    #stop our thread tracking.
    stacktracer.trace_stop()
        
    self.screen.erase()    
                topic_list.append(line)
                #if len(topic_list) >= MAX_TOPIC_NUM:
                #    break
        f.close()
        
        time_now = datetime.now()
        topic_path = 'tables/' + group_id + '/TopicInfo-' + group_id + '-' + str(time_now) + '-raw-' + str(index)
        comment_path = 'tables/' + group_id + '/CommentInfo-' + group_id + '-' + str(time_now) + '-raw-' + str(index)
        comment_crawler = CommentCrawler(group_id, topic_list, 5, topic_path, comment_path)
        comment_crawler.start()
    """
    base_path = '/home/kqc/dataset/douban-group/'
    # 抓取insidestory
    f = open(base_path +  'TopicList-' + group_id + '.txt', 'r')
    topic_list = []
    for line in f:
        line = line.strip()
        if line is not "":
            topic_list.append(line)
    f.close()
    
    #time_now = datetime.now()
    topic_path = '/home/kqc/dataset/douban-group/' + group_id + '/TopicInfo-' + group_id + '-raw-all.txt'
    comment_path = '/home/kqc/dataset/douban-group/' + group_id + '/CommentInfo-' + group_id + '-raw-all.txt'
    
    comment_crawler = CommentCrawler(group_id, topic_list, 5, base_path, topic_path, comment_path)
    comment_crawler.start()
    
    print "Done"
    stacktracer.trace_stop()
Esempio n. 3
0
#!/usr/bin/env python
# coding: utf-8
r"""Start with Trace"""

from stacktracer import trace_start, trace_stop

if __name__ == '__main__':
    trace_start("trace.html", interval=5, auto=True)
    from cvlab import main
    main()
    trace_stop()