def learn(archivepath, brain, **kwargs): # start brain. Batch saves us from lots of I/O brain = Brain(brain) brain.set_stemmer(kwargs.get('language', 'english')) brain.start_batch_learning() tweets = tweet_generator(archivepath, **kwargs) count = 0 for text in tweets: count = count + 1 brain.learn(text) brain.stop_batch_learning() return count
def train_brain(self, channel): """ create a cobe brain file based on the db. This file is used by cobe to generate responses. """ logger.debug('starting training') logger.debug('ignored: {}'.format(IGNORED)) # replace the current brain try: os.remove('brain.ai') except: pass BRAIN = Brain('brain.ai') logger.debug('created brain.ai') start = time.time() BRAIN.start_batch_learning() logger_lines = db.logger.find({ 'channel': channel, 'nick': { '$nin': IGNORED }, 'message': { '$regex': '^(?!\.|\,|\!)' }, }) logger.debug('log total: {}'.format(logger_lines.count())) for line in logger_lines: BRAIN.learn(line['message']) BRAIN.stop_batch_learning() logger.debug('learned stuff. Took {:.2f}s'.format(time.time() - start))
from os import path from cobe.brain import Brain b = Brain("%s/shakespeare.sqlite" % path.split(path.abspath(__file__))[0]) b.start_batch_learning() with open("shakespeare.txt") as f: for l in f: b.learn(l) b.stop_batch_learning()
if args.trainingfile: training_file = args.trainingfile # Instantiate a copy of the Cobe brain and try to load the database. If the # brain file doesn't exist Cobe will create it. brain = Brain(brainfile) if training_file: if os.path.exists(training_file): logger.info( "Initializing a new personality matrix... this could take a while..." ) brain.start_batch_learning() file = open(training_file) for line in file.readlines(): brain.learn(line) brain.stop_batch_learning() file.close() logger.info("Done!") else: logger.warn("Unable to open specified training file " + training_file + ". The construct's going to have to learn the hard way.") # Turn on SSL/TLS support. if args.ssl: usessl = args.ssl # IRCS is port 6997/tcp by default. If the port isn't changed on the command # line, silently reset the port the bot tries to log in on. if args.ssl: irc_port = 6697
'screen_name': account, 'count': 200, 'exclude_replies': True, 'include_rts': False } if account in state['accounts']: last_tweet = long(state['accounts'][account]) params['since_id'] = last_tweet else: last_tweet = 0 timeline = api.statuses.user_timeline(**params) for tweet in timeline: b.learn(tweet['text']) #add it to the db db_manager.insert_tweet(tweet['text'].encode('utf-8', 'replace'), False) last_tweet = max(tweet['id'], last_tweet) tweets += 1 print "%d found..." % tweets state['accounts'][account] = str(last_tweet) print "Learning %d tweets" % tweets b.stop_batch_learning() #close the learned txt open(os.path.join(os.path.dirname(__file__), '.state'), 'w').write(dumps(state))