def crawl(self): stream = tweepy.Stream('cscyberspace1','zxcrty09()', self.StreamingLib(), timeout=60.0) track_list = ['aapl', 'apple', 'goog', 'google', 'msft', 'microsoft', 'obama', 'beiber', 'justin', 'soccer', 'south africa'] try: stream.filter(None, track=track_list) except: crawl_logs(['a fatal exception occured', 'ending stream...'])
def _loadData(self, filename): absolute_path = os.path.join(os.path.dirname(__file__),filename) try: FILE = open(absolute_path, "rb") buff = load(FILE) FILE.close() return buff except: logger.crawl_logs("A problem occured when reading "+filename) return None
def __init__(self, train_data=None): '''initializes classifier with important features required for classification. Accepts a list of tuples each of the form ('label', 'sentence belonging to this class')''' self.classifier = None c_dist = ConditionalFreqDist() f_dist = FreqDist() self._setDefaultInformativeFeatures() #Generate conditional freq and freq dist for each word if train_data: try: for (tag, sentence) in train_data: for word in word_tokenize(sentence.lower()): c_dist[tag].inc(word) f_dist.inc(word) except: logger.crawl_logs(["ERROR: ",str(exc_info()[0])]) self._computeInstanceInformativeWords(c_dist, f_dist)
def on_timeout(self): crawl_logs(['connection timed out.'])
def on_error(self, status_code): crawl_logs(['an error with status code %s occured' % (status_code)]) return True
def __init__(self): tweepy.StreamListener.__init__(self) crawl_logs(['instantiated new Stream Listener'])