def on_data(self, data): tweet = GetTweets.get(data) if not tweet: return tag = self.host.tags[0] indx = 0 for t in self.host.tags: if t in tweet: tag = t break indx += 1 indx = min(indx, len(self.host.tags)-1) keywords = self.host.getKeywords.get(tweet, tag) if not keywords['words']: return if tag == self.host.correct_tag: keywords["label"] = 1 else: keywords["label"] = 0 try: self.host.cache[indx] = keywords except: pass
def on_data(self, data): tweet = GetTweets.get(data) if not tweet: return print tweet keywords = GetKeywords.get(tweet, sys.argv[1]) if not keywords['words']: return
def on_data(self, data): tweet = GetTweets.get(data) if not tweet: return keywords = GetKeywords.get(tweet, sys.argv[1]) if not keywords["words"]: return print keywords # winnow self.winnow.add(keywords['words']) label = self.winnow.predict(keywords['words']) self.winnow.learn(label, keywords) # Evalutation fact = keywords['label'] self.fp *= c self.tp *= c self.fn *= c self.tn *= c if label > fact: self.fp += 1 elif label < fact: self.tn += 1 elif label == 1: self.tp += 1 else: self.fn += 1 acc = (self.tp + self.fn)/(self.tp + self.fn + self.tn + self.fp + 1) pre = self.tp / (self.tp + self.fp + 1) rec = self.tp / (self.tp + self.fn + 1) acc = int(acc * 100) / 100.0 pre = int(pre * 100) / 100.0 rec = int(rec * 100) / 100.0 print ">>>> Get %s but actually was %s" % (label, fact) print ">>>> Accuracy: %s,\t Precision: %s,\t Recall: %s" % (acc, pre, rec) print ">>>> List length: %s" % len(self.winnow.stack)