def untrained(self, cr, uid, ids, context=None): for id in ids: record = self.read(cr, uid, id, ['category_id','description']) if record['description']: group_obj = self.pool.get('crm.bayes.group') cat_obj = self.pool.get('crm.bayes.categories') cat_rec = cat_obj.read(cr, uid, record['category_id'][0],[]) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data : myfile = file(file_path+"crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path+"crm_bayes.bay") guesser.untrain(cat_rec['name'],record['description']) guesser.save(file_path+"crm_bayes.bay") myfile = file(file_path+"crm_bayes.bay", 'r') data= "" for fi in myfile.readlines(): data += fi group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data}) cat_obj.write(cr, uid, record['category_id'][0], {'train_messages':int(cat_rec['train_messages']) - 1 }) cr.execute("select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"% cat_rec['group_id'][0]) rec = cr.dictfetchall() if rec[0]['tot_guess']: percantage = float(rec[0]['tot_guess'] *100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) else : percantage = 0.0 group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data,'automate_test':percantage}) self.write(cr, uid, id, {'state_bayes':'untrained'}) return True
def main(): """ Build aggregator report pages with Bayes rating links. """ # Create a new Bayes guesser guesser = Bayes() # Attempt to load Bayes data, ignoring IOError on first run. try: guesser.load(BAYES_DATA_FN) except IOError: pass # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] entries = getNewFeedEntries(feeds, feed_db, entry_db) # Score the new entries using the Bayesian guesser entries = scoreEntries(guesser, entries) # Write out the current run's aggregator report. out_fn = time.strftime(HTML_FN) writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL, ENTRY_TMPL, PAGE_TMPL) # Close the databases and save the current guesser's state to disk. closeDBs(feed_db, entry_db) guesser.save(BAYES_DATA_FN)
def _load_guesser(self): if Bayes is None: return None guesser = Bayes() self.display(guesser) self.display(dir(guesser)) guesser.load("commands.bays") return guesser
def _load_guesser(self): if Bayes is None: return None guesser = Bayes() print guesser print dir(guesser) guesser.load('commands.bays') return guesser
def get_bayes(id=GLOBAL): if not id in guessers.keys(): bayes = Bayes(tokenizer=statustok) fn = filename(id=id) if os.path.exists(fn): bayes.load(fn) log.debug("Created classifier for '%s' at '%s'" % (id, fn)) guessers[id] = bayes return guessers[id]
def __init__(self,name): Bayes.__init__(self) self.brain = name + '.bay' try: Bayes.load(self,self.brain) print "[Bayes] Brain loaded ok" except: print "[Alert] Failed to load bayesian brain - %s, creating it now" % self.brain Bayes.save(self,self.brain) Bayes.load(self,self.brain)
def __init__(self, name): Bayes.__init__(self) self.brain = name + '.bay' try: Bayes.load(self, self.brain) print "[Bayes] Brain loaded ok" except: print "[Alert] Failed to load bayesian brain - %s, creating it now" % self.brain Bayes.save(self, self.brain) Bayes.load(self, self.brain)
def action_train(self, cr, uid, ids, context=None): cat_obj = self.pool.get('crm.bayes.categories') group_obj = self.pool.get('crm.bayes.group') message_obj = self.pool.get('crm.bayes.test.guess') for id in ids: cat_id = self.read(cr, uid, id, ['category_id','name']) cat_id = cat_id[0]['category_id'] if result : max_list = max(result, key=lambda k: k[1]) if cat_id: cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['train_messages']) cat_obj.write(cr, uid, cat_id, {'train_messages' :cat_guess_msg['train_messages'] + 1}) if max_list[1] > 0 and not cat_id: cat_id = cat_obj.search(cr, uid, [('name','=',max_list[0])])[0] cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['guess_messages']) cat_obj.write(cr, uid, cat_id, {'guess_messages' :cat_guess_msg['guess_messages'] + 1}) self.write(cr, uid, ids, {'category_id':cat_id}) if cat_id : cat_rec = cat_obj.read(cr, uid, cat_id, []) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data : myfile = file(file_path+"crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path+"crm_bayes.bay") guesser.train(cat_rec['name'], message_obj.read(cr, uid, id)[0]['name']) guesser.save(file_path+"crm_bayes.bay") myfile = file(file_path+"crm_bayes.bay", 'r') data="" for fi in myfile.readlines(): data += fi cr.execute("select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"% cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] =0 percantage = float(rec[0]['tot_guess'] *100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data,'automate_test':percantage}) else : raise osv.except_osv(_('Error !'),_('Please Select Category! ')) return { 'view_type': 'form', "view_mode": 'form', 'res_model': 'crm.bayes.train.message', 'type': 'ir.actions.act_window', 'target':'new', }
def check_junk(phrase): try: from reverend.thomas import Bayes g = Bayes() g.load("config/kikoo.bot") result = g.guess(phrase) print result if result: return int(result[0][0]) else: return -1 except: return -1
def get_db(private_path, username): path = os.path.join(os.path.join(private_path, username), 'spam.bayes') guesser = Bayes() # load the spam DB try: guesser.load(path) except IOError: print "Creating a new spam filter database" parent_directory = os.path.dirname(path) if not os.path.isdir(parent_directory): os.makedirs(parent_directory) guesser.save(path) return guesser, path
def trained(self, cr, uid, ids, context=None): for id in ids: record = self.read(cr, uid, id, ['category_id', 'description']) if not record['description']: raise osv.except_osv(_('Error!'), _("Description Not Define!")) if not record['category_id']: raise osv.except_osv(_('Error!'), _("Statistics Category Not Define!")) group_obj = self.pool.get('crm.bayes.group') cat_obj = self.pool.get('crm.bayes.categories') cat_rec = cat_obj.read(cr, uid, record['category_id'][0], []) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data: myfile = file(file_path + "crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path + "crm_bayes.bay") guesser.train(cat_rec['name'], record['description']) guesser.save(file_path + "crm_bayes.bay") myfile = file(file_path + "crm_bayes.bay", 'r') data = "" for fi in myfile.readlines(): data += fi cat_obj.write( cr, uid, record['category_id'][0], {'train_messages': int(cat_rec['train_messages']) + 1}) cr.execute( "select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d" % cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] = 0 percantage = float( rec[0]['tot_guess'] * 100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], { 'train_data': data, 'automate_test': percantage }) self.write(cr, uid, id, {'state_bayes': 'trained'}) return True
def guess_message(self,cr,uid,ids,context={}): cases = self.browse(cr, uid, ids) result_lang=[] if cases.description : guesser = Bayes() group_obj = self.pool.get('crm.bayes.group') data = "" for rec in group_obj.browse(cr, uid, group_obj.search(cr,uid,[('active','=',True)])): if rec['train_data']: data += rec['train_data'] if data : myfile = file("/tmp/crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load('/tmp/crm_bayes.bay') result_lang = guesser.guess(cases.description) guess_re = [] for le in result_lang: guess_re.append((le[0],le[1]*100)) return guess_re
def action_guess(self, cr, uid, ids, context=None): guesser = Bayes() group_obj = self.pool.get('crm.bayes.group') if result: for res in range(0, len(result)): result.pop(0) data = "" for rec in group_obj.browse(cr, uid, context['active_ids']): if rec['train_data']: data += rec['train_data'] result_lang=[] if data: myfile = file("/tmp/crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load('/tmp/crm_bayes.bay') message = self.read(cr, uid, ids, ['name']) result_lang = guesser.guess(message[0]['name']) cat_obj = self.pool.get('crm.bayes.categories') cat_id = cat_obj.search(cr, uid, []) for re in cat_obj.read(cr, uid, cat_id, ['name']): flag = False for r in result_lang: if r[0] == re['name']: result.append(r) flag = True break if not flag: result.append((re['name'],0)) context_new = {} context_new.update({'from_wiz':True}) context_new.update({'group_id':context.get('active_id',False)}) return { 'context': context_new, 'view_type': 'form', "view_mode": 'form', 'res_model': 'crm.bayes.test.train', 'type': 'ir.actions.act_window', 'target':'new', }
def action_guess(self, cr, uid, ids, context=None): guesser = Bayes() group_obj = self.pool.get('crm.bayes.group') if result: for res in range(0, len(result)): result.pop(0) data = "" for rec in group_obj.browse(cr, uid, context['active_ids']): if rec['train_data']: data += rec['train_data'] result_lang = [] if data: myfile = file("/tmp/crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load('/tmp/crm_bayes.bay') message = self.read(cr, uid, ids, ['name']) result_lang = guesser.guess(message[0]['name']) cat_obj = self.pool.get('crm.bayes.categories') cat_id = cat_obj.search(cr, uid, []) for re in cat_obj.read(cr, uid, cat_id, ['name']): flag = False for r in result_lang: if r[0] == re['name']: result.append(r) flag = True break if not flag: result.append((re['name'], 0)) context_new = {} context_new.update({'from_wiz': True}) context_new.update({'group_id': context.get('active_id', False)}) return { 'context': context_new, 'view_type': 'form', "view_mode": 'form', 'res_model': 'crm.bayes.test.train', 'type': 'ir.actions.act_window', 'target': 'new', }
def guess_message(self, cr, uid, ids, context={}): cases = self.browse(cr, uid, ids) result_lang = [] if cases.description: guesser = Bayes() group_obj = self.pool.get('crm.bayes.group') data = "" for rec in group_obj.browse( cr, uid, group_obj.search(cr, uid, [('active', '=', True)])): if rec['train_data']: data += rec['train_data'] if data: myfile = file("/tmp/crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load('/tmp/crm_bayes.bay') result_lang = guesser.guess(cases.description) guess_re = [] for le in result_lang: guess_re.append((le[0], le[1] * 100)) return guess_re
def classificar(): print ">>> Instanciando classificador" guesser = Bayes() print ">>> Carregando base de conhecimento" try: guesser.load(CAMINHO_CONHECIMENTO) except IOError: print "Erro. Não foi possível carregar a base. Certifique-se de que existe o arquivo %s." % CAMINHO_CONHECIMENTO sys.exit(1) try: arquivos = os.listdir("%s/" % CAMINHO_TOCLASSIFY) if '.svn' in arquivos: arquivos.remove('.svn') for nome_arquivo in arquivos: arquivo = open('%s/%s' % (CAMINHO_TOCLASSIFY, nome_arquivo), 'r') texto = arquivo.read() guess = guesser.guess(texto) print "\n>>> Arquivo %s:\n %s\n" % (nome_arquivo, guess) except: print "Erro. Não foi possível classificar."
def main(): """ Perform a test run of the FeedFilter using defaults. """ # Create a new Bayes guesser, attempt to load data guesser = Bayes() guesser.load(BAYES_DATA_FN) # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ] entries = getNewFeedEntries(feeds, feed_db, entry_db) # Build the feed filter. f = BayesFilter(guesser, entries) f.FEED_META['feed.title'] = FEED_TITLE f.FEED_META['feed.tagline'] = FEED_TAGLINE # Output the feed as both RSS and Atom. open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom()) # Close the databases and save the current guesser's state to disk. closeDBs(feed_db, entry_db)
def main(): """ Perform a test run of the FeedFilter using defaults. """ # Create a new Bayes guesser, attempt to load data guesser = Bayes() guesser.load(BAYES_DATA_FN) # Open up the databases, load the subscriptions, get new entries. feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN) feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()] entries = getNewFeedEntries(feeds, feed_db, entry_db) # Build the feed filter. f = BayesFilter(guesser, entries) f.FEED_META['feed.title'] = FEED_TITLE f.FEED_META['feed.tagline'] = FEED_TAGLINE # Output the feed as both RSS and Atom. open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom()) # Close the databases and save the current guesser's state to disk. closeDBs(feed_db, entry_db)
class BayesianClassifier: POSITIVE = POSITIVE NEGATIVE = NEGATIVE NEUTRAL = NEUTRAL THRESHHOLD = 0.1 guesser = None def __init__(self): self.guesser = Bayes() def train(self, example_tweets): for t in example_tweets: self.guesser.train(t.sentiment, t.text) self.guesser.train(POSITIVE, "cool") self.guesser.train(POSITIVE, "Woo") self.guesser.train(POSITIVE, "quite amazing") self.guesser.train(POSITIVE, "thks") self.guesser.train(POSITIVE, "looking forward to") self.guesser.train(POSITIVE, "damn good") self.guesser.train(POSITIVE, "frickin ruled") self.guesser.train(POSITIVE, "frickin rules") self.guesser.train(POSITIVE, "Way to go") self.guesser.train(POSITIVE, "cute") self.guesser.train(POSITIVE, "comeback") self.guesser.train(POSITIVE, "not suck") self.guesser.train(POSITIVE, "prop") self.guesser.train(POSITIVE, "kinda impressed") self.guesser.train(POSITIVE, "props") self.guesser.train(POSITIVE, "come on") self.guesser.train(POSITIVE, "congratulation") self.guesser.train(POSITIVE, "gtd") self.guesser.train(POSITIVE, "proud") self.guesser.train(POSITIVE, "thanks") self.guesser.train(POSITIVE, "can help") self.guesser.train(POSITIVE, "thanks!") self.guesser.train(POSITIVE, "pumped") self.guesser.train(POSITIVE, "integrate") self.guesser.train(POSITIVE, "really like") self.guesser.train(POSITIVE, "loves it") self.guesser.train(POSITIVE, "yay") self.guesser.train(POSITIVE, "amazing") self.guesser.train(POSITIVE, "epic flail") self.guesser.train(POSITIVE, "flail") self.guesser.train(POSITIVE, "good luck") self.guesser.train(POSITIVE, "fail") self.guesser.train(POSITIVE, "life saver") self.guesser.train(POSITIVE, "piece of cake") self.guesser.train(POSITIVE, "good thing") self.guesser.train(POSITIVE, "hawt") self.guesser.train(POSITIVE, "hawtness") self.guesser.train(POSITIVE, "highly positive") self.guesser.train(POSITIVE, "my hero") self.guesser.train(POSITIVE, "yummy") self.guesser.train(POSITIVE, "awesome") self.guesser.train(POSITIVE, "congrats") self.guesser.train(POSITIVE, "would recommend") self.guesser.train(POSITIVE, "intellectual vigor") self.guesser.train(POSITIVE, "really neat") self.guesser.train(POSITIVE, "yay") self.guesser.train(POSITIVE, "ftw") self.guesser.train(POSITIVE, "I want") self.guesser.train(POSITIVE, "best looking") self.guesser.train(POSITIVE, "imrpessive") self.guesser.train(POSITIVE, "positive") self.guesser.train(POSITIVE, "thx") self.guesser.train(POSITIVE, "thanks") self.guesser.train(POSITIVE, "thank you") self.guesser.train(POSITIVE, "endorse") self.guesser.train(POSITIVE, "clearly superior") self.guesser.train(POSITIVE, "superior") self.guesser.train(POSITIVE, "really love") self.guesser.train(POSITIVE, "woot") self.guesser.train(POSITIVE, "w00t") self.guesser.train(POSITIVE, "super") self.guesser.train(POSITIVE, "wonderful") self.guesser.train(POSITIVE, "leaning towards") self.guesser.train(POSITIVE, "rally") self.guesser.train(POSITIVE, "incredible") self.guesser.train(POSITIVE, "the best") self.guesser.train(POSITIVE, "is the best") self.guesser.train(POSITIVE, "strong") self.guesser.train(POSITIVE, "would love") self.guesser.train(POSITIVE, "rally") self.guesser.train(POSITIVE, "very quickly") self.guesser.train(POSITIVE, "very cool") self.guesser.train(POSITIVE, "absolutely love") self.guesser.train(POSITIVE, "very exceptional") self.guesser.train(POSITIVE, "so proud") self.guesser.train(POSITIVE, "funny") self.guesser.train(POSITIVE, "recommend") self.guesser.train(POSITIVE, "so proud") self.guesser.train(POSITIVE, "so great") self.guesser.train(POSITIVE, "so cool") self.guesser.train(POSITIVE, "cool") self.guesser.train(POSITIVE, "wowsers") self.guesser.train(POSITIVE, "plus") self.guesser.train(POSITIVE, "liked it") self.guesser.train(POSITIVE, "make a difference") self.guesser.train(POSITIVE, "moves me") self.guesser.train(POSITIVE, "inspired") self.guesser.train(POSITIVE, "OK") self.guesser.train(POSITIVE, "love it") self.guesser.train(POSITIVE, "LOL") self.guesser.train(POSITIVE, ":)") self.guesser.train(POSITIVE, ";)") self.guesser.train(POSITIVE, ":-)") self.guesser.train(POSITIVE, ";-)") self.guesser.train(POSITIVE, ":D") self.guesser.train(POSITIVE, ";]") self.guesser.train(POSITIVE, ":]") self.guesser.train(POSITIVE, ":p") self.guesser.train(POSITIVE, ";p") self.guesser.train(POSITIVE, "voting for") self.guesser.train(POSITIVE, "great") self.guesser.train(POSITIVE, "agreeable") self.guesser.train(POSITIVE, "amused") self.guesser.train(POSITIVE, "brave") self.guesser.train(POSITIVE, "calm") self.guesser.train(POSITIVE, "charming") self.guesser.train(POSITIVE, "cheerful") self.guesser.train(POSITIVE, "comfortable") self.guesser.train(POSITIVE, "cooperative") self.guesser.train(POSITIVE, "courageous") self.guesser.train(POSITIVE, "delightful") self.guesser.train(POSITIVE, "determined") self.guesser.train(POSITIVE, "eager") self.guesser.train(POSITIVE, "elated") self.guesser.train(POSITIVE, "enchanting") self.guesser.train(POSITIVE, "encouraging") self.guesser.train(POSITIVE, "energetic") self.guesser.train(POSITIVE, "enthusiastic") self.guesser.train(POSITIVE, "excited") self.guesser.train(POSITIVE, "exuberant") self.guesser.train(POSITIVE, "excellent") self.guesser.train(POSITIVE, "I like") self.guesser.train(POSITIVE, "fine") self.guesser.train(POSITIVE, "fair") self.guesser.train(POSITIVE, "faithful") self.guesser.train(POSITIVE, "fantastic") self.guesser.train(POSITIVE, "fine") self.guesser.train(POSITIVE, "friendly") self.guesser.train(POSITIVE, "fun ") self.guesser.train(POSITIVE, "funny") self.guesser.train(POSITIVE, "gentle") self.guesser.train(POSITIVE, "glorious") self.guesser.train(POSITIVE, "good") self.guesser.train(POSITIVE, "pretty good") self.guesser.train(POSITIVE, "happy") self.guesser.train(POSITIVE, "healthy") self.guesser.train(POSITIVE, "helpful") self.guesser.train(POSITIVE, "high") self.guesser.train(POSITIVE, "agile") self.guesser.train(POSITIVE, "responsive") self.guesser.train(POSITIVE, "hilarious") self.guesser.train(POSITIVE, "jolly") self.guesser.train(POSITIVE, "joyous") self.guesser.train(POSITIVE, "kind") self.guesser.train(POSITIVE, "lively") self.guesser.train(POSITIVE, "lovely") self.guesser.train(POSITIVE, "lucky") self.guesser.train(POSITIVE, "nice") self.guesser.train(POSITIVE, "nicely") self.guesser.train(POSITIVE, "obedient") self.guesser.train(POSITIVE, "perfect") self.guesser.train(POSITIVE, "pleasant") self.guesser.train(POSITIVE, "proud") self.guesser.train(POSITIVE, "relieved") self.guesser.train(POSITIVE, "silly") self.guesser.train(POSITIVE, "smiling") self.guesser.train(POSITIVE, "splendid") self.guesser.train(POSITIVE, "successful") self.guesser.train(POSITIVE, "thankful") self.guesser.train(POSITIVE, "thoughtful") self.guesser.train(POSITIVE, "victorious") self.guesser.train(POSITIVE, "vivacious") self.guesser.train(POSITIVE, "witty") self.guesser.train(POSITIVE, "wonderful") self.guesser.train(POSITIVE, "zealous") self.guesser.train(POSITIVE, "zany") self.guesser.train(POSITIVE, "rocks") self.guesser.train(POSITIVE, "comeback") self.guesser.train(POSITIVE, "pleasantly surprised") self.guesser.train(POSITIVE, "pleasantly") self.guesser.train(POSITIVE, "surprised") self.guesser.train(POSITIVE, "love") self.guesser.train(POSITIVE, "glad") self.guesser.train(POSITIVE, "yum") self.guesser.train(POSITIVE, "interesting") self.guesser.train(NEGATIVE, "FTL") self.guesser.train(NEGATIVE, "irritating") self.guesser.train(NEGATIVE, "not that good") self.guesser.train(NEGATIVE, "suck") self.guesser.train(NEGATIVE, "lying") self.guesser.train(NEGATIVE, "duplicity") self.guesser.train(NEGATIVE, "angered") self.guesser.train(NEGATIVE, "dumbfounding") self.guesser.train(NEGATIVE, "dumbifying") self.guesser.train(NEGATIVE, "not as good") self.guesser.train(NEGATIVE, "not impressed") self.guesser.train(NEGATIVE, "stomach it") self.guesser.train(NEGATIVE, "pw") self.guesser.train(NEGATIVE, "pwns") self.guesser.train(NEGATIVE, "pwnd") self.guesser.train(NEGATIVE, "pwning") self.guesser.train(NEGATIVE, "in a bad way") self.guesser.train(NEGATIVE, "horrifying") self.guesser.train(NEGATIVE, "wrong") self.guesser.train(NEGATIVE, "flailing") self.guesser.train(NEGATIVE, "failing") self.guesser.train(NEGATIVE, "fallen way behind") self.guesser.train(NEGATIVE, "fallen behind") self.guesser.train(NEGATIVE, "lose") self.guesser.train(NEGATIVE, "fallen") self.guesser.train(NEGATIVE, "self-deprecating") self.guesser.train(NEGATIVE, "hunker down") self.guesser.train(NEGATIVE, "duh") self.guesser.train(NEGATIVE, "get killed by") self.guesser.train(NEGATIVE, "got killed by") self.guesser.train(NEGATIVE, "hated us") self.guesser.train(NEGATIVE, "only works in safari") self.guesser.train(NEGATIVE, "must have ie") self.guesser.train(NEGATIVE, "fuming and frothing") self.guesser.train(NEGATIVE, "heavy") self.guesser.train(NEGATIVE, "buggy") self.guesser.train(NEGATIVE, "unusable") self.guesser.train(NEGATIVE, "nothing is") self.guesser.train(NEGATIVE, "is great until") self.guesser.train(NEGATIVE, "don't support") self.guesser.train(NEGATIVE, "despise") self.guesser.train(NEGATIVE, "pos") self.guesser.train(NEGATIVE, "hindrance") self.guesser.train(NEGATIVE, "sucks") self.guesser.train(NEGATIVE, "problems") self.guesser.train(NEGATIVE, "not working") self.guesser.train(NEGATIVE, "fuming") self.guesser.train(NEGATIVE, "annoying") self.guesser.train(NEGATIVE, "frothing") self.guesser.train(NEGATIVE, "poorly") self.guesser.train(NEGATIVE, "headache") self.guesser.train(NEGATIVE, "completely wrong") self.guesser.train(NEGATIVE, "sad news") self.guesser.train(NEGATIVE, "didn't last") self.guesser.train(NEGATIVE, "lame") self.guesser.train(NEGATIVE, "pet peeves") self.guesser.train(NEGATIVE, "pet peeve") self.guesser.train(NEGATIVE, "can't send") self.guesser.train(NEGATIVE, "bullshit") self.guesser.train(NEGATIVE, "fail") self.guesser.train(NEGATIVE, "so terrible") self.guesser.train(NEGATIVE, "negative") self.guesser.train(NEGATIVE, "anooying") self.guesser.train(NEGATIVE, "an issue") self.guesser.train(NEGATIVE, "drop dead") self.guesser.train(NEGATIVE, "trouble") self.guesser.train(NEGATIVE, "brainwashed") self.guesser.train(NEGATIVE, "smear") self.guesser.train(NEGATIVE, "commie") self.guesser.train(NEGATIVE, "communist") self.guesser.train(NEGATIVE, "anti-women") self.guesser.train(NEGATIVE, "WTF") self.guesser.train(NEGATIVE, "anxiety") self.guesser.train(NEGATIVE, "STING") self.guesser.train(NEGATIVE, "nobody spoke") self.guesser.train(NEGATIVE, "yell") self.guesser.train(NEGATIVE, "Damn") self.guesser.train(NEGATIVE, "aren't") self.guesser.train(NEGATIVE, "anti") self.guesser.train(NEGATIVE, "i hate") self.guesser.train(NEGATIVE, "hate") self.guesser.train(NEGATIVE, "dissapointing") self.guesser.train(NEGATIVE, "doesn't recommend") self.guesser.train(NEGATIVE, "the worst") self.guesser.train(NEGATIVE, "worst") self.guesser.train(NEGATIVE, "expensive") self.guesser.train(NEGATIVE, "crap") self.guesser.train(NEGATIVE, "socialist") self.guesser.train(NEGATIVE, "won't") self.guesser.train(NEGATIVE, "wont") self.guesser.train(NEGATIVE, ":(") self.guesser.train(NEGATIVE, ":-(") self.guesser.train(NEGATIVE, "Thanks") self.guesser.train(NEGATIVE, "smartass") self.guesser.train(NEGATIVE, "don't like") self.guesser.train(NEGATIVE, "too bad") self.guesser.train(NEGATIVE, "frickin") self.guesser.train(NEGATIVE, "snooty") self.guesser.train(NEGATIVE, "knee jerk") self.guesser.train(NEGATIVE, "jerk") self.guesser.train(NEGATIVE, "reactionist") self.guesser.train(NEGATIVE, "MUST DIE") self.guesser.train(NEGATIVE, "no more") self.guesser.train(NEGATIVE, "hypocrisy") self.guesser.train(NEGATIVE, "ugly") self.guesser.train(NEGATIVE, "too slow") self.guesser.train(NEGATIVE, "not reliable") self.guesser.train(NEGATIVE, "noise") self.guesser.train(NEGATIVE, "crappy") self.guesser.train(NEGATIVE, "horrible") self.guesser.train(NEGATIVE, "bad quality") self.guesser.train(NEGATIVE, "angry") self.guesser.train(NEGATIVE, "annoyed") self.guesser.train(NEGATIVE, "anxious") self.guesser.train(NEGATIVE, "arrogant") self.guesser.train(NEGATIVE, "ashamed") self.guesser.train(NEGATIVE, "awful") self.guesser.train(NEGATIVE, "bad") self.guesser.train(NEGATIVE, "bewildered") self.guesser.train(NEGATIVE, "blues") self.guesser.train(NEGATIVE, "bored") self.guesser.train(NEGATIVE, "clumsy") self.guesser.train(NEGATIVE, "combative") self.guesser.train(NEGATIVE, "condemned") self.guesser.train(NEGATIVE, "confused") self.guesser.train(NEGATIVE, "crazy") self.guesser.train(NEGATIVE, "flipped-out") self.guesser.train(NEGATIVE, "creepy") self.guesser.train(NEGATIVE, "cruel") self.guesser.train(NEGATIVE, "dangerous") self.guesser.train(NEGATIVE, "defeated") self.guesser.train(NEGATIVE, "defiant") self.guesser.train(NEGATIVE, "depressed") self.guesser.train(NEGATIVE, "disgusted") self.guesser.train(NEGATIVE, "disturbed") self.guesser.train(NEGATIVE, "dizzy") self.guesser.train(NEGATIVE, "dull") self.guesser.train(NEGATIVE, "embarrassed") self.guesser.train(NEGATIVE, "envious") self.guesser.train(NEGATIVE, "evil") self.guesser.train(NEGATIVE, "fierce") self.guesser.train(NEGATIVE, "foolish") self.guesser.train(NEGATIVE, "frantic") self.guesser.train(NEGATIVE, "frightened") self.guesser.train(NEGATIVE, "grieving") self.guesser.train(NEGATIVE, "grumpy") self.guesser.train(NEGATIVE, "helpless") self.guesser.train(NEGATIVE, "homeless") self.guesser.train(NEGATIVE, "hungry") self.guesser.train(NEGATIVE, "hurt") self.guesser.train(NEGATIVE, "ill") self.guesser.train(NEGATIVE, "itchy") self.guesser.train(NEGATIVE, "jealous") self.guesser.train(NEGATIVE, "jittery") self.guesser.train(NEGATIVE, "lazy") self.guesser.train(NEGATIVE, "lonely") self.guesser.train(NEGATIVE, "mysterious") self.guesser.train(NEGATIVE, "nasty") self.guesser.train(NEGATIVE, "rape") self.guesser.train(NEGATIVE, "naughty") self.guesser.train(NEGATIVE, "nervous") self.guesser.train(NEGATIVE, "nutty") self.guesser.train(NEGATIVE, "obnoxious") self.guesser.train(NEGATIVE, "outrageous") self.guesser.train(NEGATIVE, "panicky") self.guesser.train(NEGATIVE, "f*****g up") self.guesser.train(NEGATIVE, "repulsive") self.guesser.train(NEGATIVE, "scary") self.guesser.train(NEGATIVE, "selfish") self.guesser.train(NEGATIVE, "sore") self.guesser.train(NEGATIVE, "tense") self.guesser.train(NEGATIVE, "terrible") self.guesser.train(NEGATIVE, "testy") self.guesser.train(NEGATIVE, "thoughtless") self.guesser.train(NEGATIVE, "tired") self.guesser.train(NEGATIVE, "troubled") self.guesser.train(NEGATIVE, "upset") self.guesser.train(NEGATIVE, "uptight") self.guesser.train(NEGATIVE, "weary") self.guesser.train(NEGATIVE, "wicked") self.guesser.train(NEGATIVE, "worried") self.guesser.train(NEGATIVE, "is a fool") self.guesser.train(NEGATIVE, "painful") self.guesser.train(NEGATIVE, "pain") self.guesser.train(NEGATIVE, "gross") def classify(self, sentence): guess = self.guesser.guess(sentence) if len(guess) == 0: return NEUTRAL if len(guess) == 1: (sentiment, probabitily) = guess[0] return sentiment (max_sentiment, max_value) = guess[0] (min_sentiment, min_value) = guess[1] if max_value - min_value > self.THRESHHOLD: return max_sentiment return NEUTRAL def save(self): self.guesser.save() def load(self): self.guesser.load()
class Guesser(): def __init__(self, feed, user, config): import os.path self.user = user self.filename = config['bayes_dir'] self.filename += "/users/%s" % user.id if not os.path.exists(self.filename): os.makedirs(self.filename) self.filename += '/feed_%s.bayes' % str(feed.id) log.debug("filename:%s" % self.filename) stopwords = meta.Session\ .query(model.Stopword)\ .filter_by(feed_id=feed.id).all() self.stopwords = map(lambda x: x.word, stopwords) self.trainer = Bayes() self.trainer.getTokens = lambda x: my_tokenize(x, self.stopwords) if os.path.exists(self.filename): self.trainer.load(self.filename) else: self.trainer.newPool('ham') self.trainer.newPool('spam') def save(self): self.trainer.save(self.filename) def clear(self): self.trainer = Bayes() # self.trainer.getTokens = my_tokenize self.trainer.getTokens = lambda x: my_tokenize(x, self.stopwords) self.trainer.newPool('ham') self.trainer.newPool('spam') def is_spam(self, entry, use_classified=True): if use_classified: classy = meta.Session\ .query(model.Classification)\ .filter_by(user_id = self.user.id, entry_id=entry.id).first() if classy: if classy.pool == 'spam': return True elif classy.pool == 'ham': return False else: raise "bad pool" g = self.guess(entry) if g['spam'] and not g['ham']: return True if not g['spam'] and g['ham']: return False return (g['spam'] > g['ham']) def guess(self, entry): from rssmonster.controllers.bayes import __relevant__ log.debug("__relevant__(entry) %s" % __relevant__(entry)) log.debug("__relevant__(entry) %s" % self.trainer.guess(__relevant__(entry))) log.debug('self.filename: %s' % self.filename) # ret = dict(self.trainer.guess(__relevant__(entry))) ret = dict(self.trainer.guess(__relevant__(entry))) log.debug("ret: %s" % ret) if not 'spam' in ret: ret['spam'] = None if not 'ham' in ret: ret['ham'] = None return ret
def load_csv_to_bayes(filename): reader = csv.reader(file(filename)) reader.next() counts = defaultdict(int) for line in reader: body = line[1] if line[2] == "visible": status = "visible" else: status = "moderated" clean_body = re.sub("<[^>]*>","",body) guesser.train(status, clean_body) try: guesser.load('dataset.dat') except IOError as e: load_csv_to_bayes('good.csv') load_csv_to_bayes('bad.csv') guesser.save('dataset.dat') from flask import Flask, request from flask import render_template app = Flask(__name__) @app.route("/moderate") def moderate(): if request.args.has_key('callback'): wrapper = request.args.get('callback')+"(%s)" else:
#!/usr/bin/env python import socket, random from reverend.thomas import Bayes guesser = Bayes() guesser.load('spam.bay') host = 'maxhodak.com' port = 11911 backlog = 5 size = 1024 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((host,port)) s.listen(backlog) messages = [] next_msg = "Nothing here!" f = open("/tmp/asktell.notspam.log",'r') i = 0 for line in f: if i > 50: break i += 1 messages.append(line.strip()) spamlog = open('/tmp/asktell.spam.log','a+') notspamlog = open('/tmp/asktell.notspam.log','a+') while 1: client, address = s.accept() data = client.recv(size) if data: if len(data) > 300:
def run(corpus, verbose=False, hkap_file=os.path.join(software, 'libs/PACManData.bay'), train=False, authors=False, exact_names=False, first_only=False, nyears=10, plotit=False, hst=False, clobber=False, rs_exceptions=''): f = open(os.path.join(software, 'category_synonyms.txt'), 'r') lines = f.readlines() f.close() acronyms = {} for line in lines: if line.startswith('#'): continue key, value = line.split('=') acronyms[key.strip()] = value.strip().split(',') uber_categories = acronyms stopwords = load_stopwords() dguesser = Bayes() dguesser.load(hkap_file) if not authors: if hst: ## Below, proposals are retrieved, then parsed. abs = parse_abstracts_proposals(corpus) text = parse_science_justification_proposals(corpus) justification = abs + text bayesString = " " + justification else: f = open(corpus) lines = f.readlines() f.close() text = '' for line in lines: if line.startswith('#'): continue if not line.strip(): continue text += line.strip() + ' ' bayesString = text bayesString = work_string(bayesString, stopwords) result = dguesser.guess(bayesString) result = normalize_result(result) else: ## assumes input is a person report ## if .pkl report not available, creates new one import util records = [] results_dict = {} results_pkl = corpus.replace(corpus.split('.')[-1], 'pkl') if not os.path.isfile(results_pkl) or clobber: f = open(corpus) lines = f.readlines() f.close() for line in lines: if line.startswith('#'): continue if not line.strip(): continue info = line.rstrip().split("\t") if info[0] == '': continue # records.append(info[0].replace(' ','').replace('"','').replace("'",'').lower()) records.append(info[0].replace('"', '').replace("'", '').lower()) author_dict, cite_dict = util.adscrawl.run_authors( records, nyears=nyears, rs_exceptions=rs_exceptions) ## author_dict, cite_dict = util.adscrawl.run_exact_authors(records, nyears=nyears) pickle.dump(author_dict, open(results_pkl, 'wb')) pickle.dump(cite_dict, open('cites.pkl', 'wb')) else: author_dict = pickle.load(open(results_pkl, 'rb')) cite_dict = pickle.load(open('cites.pkl', 'rb')) for author in author_dict.keys(): bayesString = '' for abstract in author_dict[author]: bayesString = ' ' + abstract bayesString = work_string(bayesString, stopwords) result = dguesser.guess(bayesString) ## result = normalize_result(result) results_dict[author] = {} results_dict[author]['hkap'] = rec.fromrecords(result) try: results_dict[author]['cites'] = sorted(cite_dict[author], reverse=True) except: results_dict[author]['cites'] = [0] result = results_dict return (result, uber_categories)
text = "%s %s %s" % (post.title, post.author, post.summary) brain.train(tag, text) print "%s :: %s" % (tag, post.title) retrain() bayes.data = base64.encodestring(brain.saves()) bayes.save() from BeautifulSoup import BeautifulSoup from mainapp.models import Post from reverend.thomas import Bayes brain = Bayes() brain.load('fish.db') tag = 'Dead' posts = Post.objects.filter(read=read) posts = posts.filter(tags__in=tag) #brain.train('Dead', post.summary) t1 = Tag.objects.get(id=flag) for post in posts: t1 = Tag.objects.get(id=flag) if t1 in post.tags.all() and not feed: post.tags.remove(t1) post.read = not t1.read brain.untrain(t1.name, post.summary) else: post.tags.add(t1)
) neg_file = open(BASE_DIR+"/data/rt-polarity.neg").read() pos_file = open(BASE_DIR+"/data/rt-polarity.pos").read() neg_tweets_list = str(neg_file).split('\n') pos_tweets_list = str(pos_file).split('\n') neg_cutoff = int(neg_tweets_list.__len__()*3/4) pos_cutoff = int(pos_tweets_list.__len__()*3/4) neg_train = neg_tweets_list[:neg_cutoff] pos_train = pos_tweets_list[:neg_cutoff] neg_test = neg_tweets_list[neg_cutoff:] pos_test = pos_tweets_list[pos_cutoff:] tweet_data = {'neg_train':neg_train,'pos_train':pos_train,'neg_test':neg_test,'pos_test':pos_test} bestwords = get_best_words(pos_train, neg_train) single_classifier = Bayes() single_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_classifier.dat") non_stop_classifier = Bayes(tokenizer=non_stop_tokenizer()) non_stop_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_stop_classifier.dat") best_classifier = Bayes(tokenizer=best_tokenizer(best_words=bestwords)) best_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_best_classifier.dat") bigram_best_classifier = Bayes(tokenizer=best_bigram_tokenizer(best_words=bestwords)) bigram_best_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_bi_classifier.dat")
def action_train(self, cr, uid, ids, context=None): cat_obj = self.pool.get('crm.bayes.categories') group_obj = self.pool.get('crm.bayes.group') message_obj = self.pool.get('crm.bayes.test.guess') for id in ids: cat_id = self.read(cr, uid, id, ['category_id', 'name']) cat_id = cat_id[0]['category_id'] if result: max_list = max(result, key=lambda k: k[1]) if cat_id: cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['train_messages']) cat_obj.write(cr, uid, cat_id, { 'train_messages': cat_guess_msg['train_messages'] + 1 }) if max_list[1] > 0 and not cat_id: cat_id = cat_obj.search(cr, uid, [('name', '=', max_list[0])])[0] cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['guess_messages']) cat_obj.write(cr, uid, cat_id, { 'guess_messages': cat_guess_msg['guess_messages'] + 1 }) self.write(cr, uid, ids, {'category_id': cat_id}) if cat_id: cat_rec = cat_obj.read(cr, uid, cat_id, []) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data: myfile = file(file_path + "crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path + "crm_bayes.bay") guesser.train(cat_rec['name'], message_obj.read(cr, uid, id)[0]['name']) guesser.save(file_path + "crm_bayes.bay") myfile = file(file_path + "crm_bayes.bay", 'r') data = "" for fi in myfile.readlines(): data += fi cr.execute( "select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d" % cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] = 0 percantage = float( rec[0]['tot_guess'] * 100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], { 'train_data': data, 'automate_test': percantage }) else: raise osv.except_osv(_('Error !'), _('Please Select Category! ')) return { 'view_type': 'form', "view_mode": 'form', 'res_model': 'crm.bayes.train.message', 'type': 'ir.actions.act_window', 'target': 'new', }