def crawl(self):
    stream = tweepy.Stream('cscyberspace1','zxcrty09()', self.StreamingLib(),
                timeout=60.0)
    track_list = ['aapl', 'apple', 'goog', 'google', 'msft', 'microsoft',
                   'obama', 'beiber', 'justin', 'soccer', 'south africa']
    
    try:
       stream.filter(None, track=track_list)
    except:
       crawl_logs(['a fatal exception occured', 'ending stream...'])
Example #2
0
 def _loadData(self, filename):
    absolute_path = os.path.join(os.path.dirname(__file__),filename)
    try:
       FILE = open(absolute_path, "rb")
       buff = load(FILE)
       FILE.close()
       return buff
    except:
       logger.crawl_logs("A problem occured when reading  "+filename)
       return None
Example #3
0
   def __init__(self, train_data=None):
      '''initializes classifier with important features required for
         classification. Accepts a list of tuples each of the form
         ('label',  'sentence belonging to this class')'''

      self.classifier = None
      c_dist = ConditionalFreqDist()
      f_dist = FreqDist()
      self._setDefaultInformativeFeatures()
      
      #Generate conditional freq and freq dist for each word
      if train_data:
         try:
            for (tag, sentence) in train_data:
               for word in word_tokenize(sentence.lower()):
                  c_dist[tag].inc(word)
                  f_dist.inc(word)
         except:
            logger.crawl_logs(["ERROR: ",str(exc_info()[0])])

      self._computeInstanceInformativeWords(c_dist, f_dist)
 def on_timeout(self):
    crawl_logs(['connection timed out.'])
 def on_error(self, status_code):
    crawl_logs(['an error with status code %s occured' % (status_code)])
    return True
 def __init__(self):
    tweepy.StreamListener.__init__(self)
    crawl_logs(['instantiated new Stream Listener'])