コード例 #1
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import exists
from db_tables import Base, Message, create_sqlite_db

# Set-up connection to message data_base 
print('Connect to database.')
engine = create_engine('sqlite:///data/testuser.db')
Base.metadata.bind = engine 
DBSession = sessionmaker(bind=engine)
session = DBSession()
print('Connect to database. Done.')

# Load trained classfier. At this point this is trained on the 
# newsgroup data, but will be trained on a data-set of the domain of interest. 
print('Load classifier.')
mc = MessageClassifier()
print('Load classifier. Done.')

messages = [getattr(x, "text").encode('utf-8') for x in session.query(Message).all()]
t0 = time() 
probabilities = mc.predict_proba(messages)
print('Predicted %d messages in %fs.' % (len(messages), (time()-t0)))

for message, probs in zip(messages, probabilities): 
	if sp.stats.entropy(probs) < 1.0: 
		print('%s with a probability of\t%f\t%r' % (mc.labels[np.argmax(probs)], np.max(probs), message))
		print('Entropy of prediction: %f' % (sp.stats.entropy(probs)))
コード例 #2
from sqlalchemy.orm import sessionmaker 
from sqlalchemy.sql import exists
from db_tables import Base, User, Connection, Message, Location, create_sqlite_db

from tweepy import Stream
from twitter_stream_classifier import TwitterStreamClassifier
from twitter_auth import Twitter_auth

# set keywords to filter.
keywords = ['data'] 
keywords = [] 

# Load trained classfier. At this point this is trained on the 
# newsgroup data, but will be trained on a data-set of the domain of interest. 
print('Load classifier.')
mc = MessageClassifier()
print('Load classifier. Done.')

# Create and connect to database
db_path = 'sqlite:///data/twitter_stream.db'
engine = create_engine(db_path)
Base.metadata.bind = engine 
DBSession = sessionmaker(bind=engine)
session = DBSession()

# Load the StreamListener. This holds the classifier. 
lister = TwitterStreamClassifier(db_session=session, classifier=mc, classes_of_interest=['sci.med'], probability_threshold=0.90)

# Get login from Twitter_auth module.