def label_learned_set(vectorizer, clfl, threshold, validation_table): for row in sql_convenience.extract_classifications_and_tweets(validation_table): cls, tweet_id, tweet_text = row spd = vectorizer.transform([tweet_text]).todense() predicted_cls = clfl.predict(spd) predicted_class = predicted_cls[0] # turn 1D array of 1 item into 1 item predicted_proba = clfl.predict_proba(spd)[0][predicted_class] if predicted_proba < threshold and predicted_class == 1: predicted_class = 0 # force to out-of-class if we don't trust our answer sql_convenience.update_class(tweet_id, validation_table, predicted_class)
def annotate_all_messages(self): while True: msg = self.get_unannotated_message() if msg is not None: tweet_id = msg[b"tweet_id"] tweet_text = msg[b"tweet_text"] config.logging.info('Asking API for results for "%r"' % (repr(tweet_text))) response = self.call_api(tweet_text) self.store_raw_response(msg, response) if self.is_brand_of(self.brand, tweet_id): cls = sql_convenience.CLASS_IN else: cls = sql_convenience.CLASS_OUT # assign class to this tweet sql_convenience.update_class(tweet_id, self.destination_table, cls) else: break
def annotate_all_messages(self): while True: msg = self.get_unannotated_message() if msg is not None: tweet_id = msg[b'tweet_id'] tweet_text = msg[b'tweet_text'] config.logging.info('Asking API for results for "%r"' % (repr(tweet_text))) response = self.call_api(tweet_text) self.store_raw_response(msg, response) if self.is_brand_of(self.brand, tweet_id): cls = sql_convenience.CLASS_IN else: cls = sql_convenience.CLASS_OUT # assign class to this tweet sql_convenience.update_class(tweet_id, self.destination_table, cls) else: break