def treinar(): print """>>> Carregando categorias...""" CATEGORIAS = os.listdir('./data') if '.svn' in CATEGORIAS: CATEGORIAS.remove('.svn') print ">>> Instanciando treinador\n" guesser = Bayes() try: for categoria in CATEGORIAS: print ">>> Treinando categoria %s" % categoria arquivos = os.listdir("%s/%s" % (CAMINHO_CATEGORIAS, categoria)) if '.svn' in arquivos: arquivos.remove('.svn') for arquivo in arquivos: arquivo = open('%s/%s/%s' % (CAMINHO_CATEGORIAS, categoria, arquivo), 'r') texto = arquivo.read() guesser.train(categoria, texto) print "\n>>> Salvando base de conhecimento...\n" guesser.save("conhecimento.bay") print "Voil?!\n" except: print "N?o foi poss?vel treinar a base"
def retrain(request): # Retrain your brain user = User.objects.get(user=request.user) posts = Post.objects.filter(user=user) bayes = Brain.objects.get(user=user) brain = Bayes() #brain.loads(base64.decodestring(bayes.data)) tagcount = 0 # retrain the brain based on existing tags for post in posts: print post.title, "::", for tag in post.tags.all(): text = "%s %s %s" % (post.title, post.author, post.summary) brain.train(tag, text) tagcount += 1 print tag, print brain.save('%s.db' % user) bayes.data = base64.encodestring(brain.saves()) bayes.save() message = 'Found %s tags' % tagcount params = {'Messages': [message,]} return response(request, 'mainapp/index.html', params)
class Guesser(object): def __init__(self, project): self.project = project self.bayes = Bayes() self._train() self.data = [] self.best = [] def _train(self): for sentence in self.project.classified(): self.bayes.train(sentence.get_classification(), sentence.sentence) def guess(self): for sentence in self.project.to_classify(): data = {'sentence_id': sentence.id} data['guesses'] = self.bayes.guess(sentence.sentence) self.data.append(data) return self.data def best_matches(self): if not self.data: return [] for matches in self.data: try: matches['guesses'] = sorted(matches['guesses'], key=lambda x:x[1], reverse=True)[0] except: matches['guesses'] = (None, None) match = {} match['id'] = matches['sentence_id'] match['guess'] = matches['guesses'][0] match['certainty'] = matches['guesses'][1] self.best.append(match) return self.best
def train(self,bucket,words): """ Nominate a bucket to which the words apply, and train accordingly """ if bucket != "" and words != "": try: Bayes.train(self,bucket,words) Bayes.save(self,self.brain) except: print "Failed to learn" else: return None
def train(self, bucket, words): """ Nominate a bucket to which the words apply, and train accordingly """ if bucket != "" and words != "": try: Bayes.train(self, bucket, words) Bayes.save(self, self.brain) except: print "Failed to learn" else: return None
def action_train(self, cr, uid, ids, context=None): cat_obj = self.pool.get('crm.bayes.categories') group_obj = self.pool.get('crm.bayes.group') message_obj = self.pool.get('crm.bayes.test.guess') for id in ids: cat_id = self.read(cr, uid, id, ['category_id','name']) cat_id = cat_id[0]['category_id'] if result : max_list = max(result, key=lambda k: k[1]) if cat_id: cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['train_messages']) cat_obj.write(cr, uid, cat_id, {'train_messages' :cat_guess_msg['train_messages'] + 1}) if max_list[1] > 0 and not cat_id: cat_id = cat_obj.search(cr, uid, [('name','=',max_list[0])])[0] cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['guess_messages']) cat_obj.write(cr, uid, cat_id, {'guess_messages' :cat_guess_msg['guess_messages'] + 1}) self.write(cr, uid, ids, {'category_id':cat_id}) if cat_id : cat_rec = cat_obj.read(cr, uid, cat_id, []) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data : myfile = file(file_path+"crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path+"crm_bayes.bay") guesser.train(cat_rec['name'], message_obj.read(cr, uid, id)[0]['name']) guesser.save(file_path+"crm_bayes.bay") myfile = file(file_path+"crm_bayes.bay", 'r') data="" for fi in myfile.readlines(): data += fi cr.execute("select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"% cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] =0 percantage = float(rec[0]['tot_guess'] *100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data,'automate_test':percantage}) else : raise osv.except_osv(_('Error !'),_('Please Select Category! ')) return { 'view_type': 'form', "view_mode": 'form', 'res_model': 'crm.bayes.train.message', 'type': 'ir.actions.act_window', 'target':'new', }
def getCategoryGuesses(self, corpus1, corpus2, corpus3): from reverend.thomas import Bayes # instantiate guesser guesser = Bayes() # train category guesser with first corpus guesser.train('first reference text', corpus1) guesser.train('second reference text', corpus2) # compare with second corpus guesses = guesser.guess(corpus3) return guesses
def treino (self): banco_do_jornal = Server() genero=[banco_do_jornal[doc] for doc in GENEROS] #treinando o reverend from reverend.thomas import Bayes guesser = Bayes() guesser.train('artigo', ' '.join(genero[0][doc]['texto'] for doc in genero[0])) guesser.train('resenha',' '.join(genero[6][doc]['texto'] for doc in genero[6])) guesser.train('noticia',' '.join(genero[1][doc]['texto'] for doc in genero [1])) guesser.train('cronica',' '.join(genero[5][doc]['texto']for doc in genero[5] if 'texto' in genero[5][doc] )) guesser.train('horoscopo',' '.join(genero[3][doc]['texto']for doc in genero[3])) guesser.train('manchete',' '.join(genero[2][doc]['titulo']for doc in genero[2])) guesser.train('receita',' '.join(genero[4][doc]['texto']for doc in genero[4])) guesser.save('my_guesser.bay') variavel = guesser.guess('Cidad?o se descuidou e roubaram seu celular. Como era um executivo e n?o sabia mais viver sem celular, ficou furioso. Deu parte do roubo, de Quara?.? Pois ?.? Carol.? Hein?? Meu nome. ? Carol.? Ah. Voc?s s?o...? N?o, n?o. Nos conhecemos h? pouco.? Escute Carol. Eu trouxe uma encomenda para o Amleto. De Quara?. Uma pessegada, mas n?o me lembro do endere?o.') print 'Resultado = ', variavel
def treino (self): banco_do_jornal = Server() genero=[banco_do_jornal[doc] for doc in GENEROS] #treinando o reverend from reverend.thomas import Bayes guesser = Bayes() guesser.train('artigo', ' '.join(genero[0][doc]['texto'] for doc in genero[0])) guesser.train('resenha',' '.join(genero[6][doc]['texto'] for doc in genero[6])) guesser.train('noticia',' '.join(genero[1][doc]['texto'] for doc in genero [1])) guesser.train('cronica',' '.join(genero[5][doc]['texto']for doc in genero[5] if 'texto' in genero[5][doc] )) guesser.train('horoscopo',' '.join(genero[3][doc]['texto']for doc in genero[3])) guesser.train('manchete',' '.join(genero[2][doc]['titulo']for doc in genero[2])) guesser.train('receita',' '.join(genero[4][doc]['texto']for doc in genero[4])) guesser.save('my_guesser.bay') variavel = guesser.guess('Bolo de chocolate :ingredientes : 6 ovos, 2 xicaras de farinha, 1 colher de achocolatado, 1 lata de leite condensado, 2 copos de leite e 3 colheres de açucar') print 'Resultado = ', variavel
def trained(self, cr, uid, ids, context=None): for id in ids: record = self.read(cr, uid, id, ['category_id', 'description']) if not record['description']: raise osv.except_osv(_('Error!'), _("Description Not Define!")) if not record['category_id']: raise osv.except_osv(_('Error!'), _("Statistics Category Not Define!")) group_obj = self.pool.get('crm.bayes.group') cat_obj = self.pool.get('crm.bayes.categories') cat_rec = cat_obj.read(cr, uid, record['category_id'][0], []) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data: myfile = file(file_path + "crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path + "crm_bayes.bay") guesser.train(cat_rec['name'], record['description']) guesser.save(file_path + "crm_bayes.bay") myfile = file(file_path + "crm_bayes.bay", 'r') data = "" for fi in myfile.readlines(): data += fi cat_obj.write( cr, uid, record['category_id'][0], {'train_messages': int(cat_rec['train_messages']) + 1}) cr.execute( "select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d" % cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] = 0 percantage = float( rec[0]['tot_guess'] * 100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], { 'train_data': data, 'automate_test': percantage }) self.write(cr, uid, id, {'state_bayes': 'trained'}) return True
def mark(request, flag): id = request.GET.get('post', None) feed = request.GET.get('feed', None) category = request.GET.get('category') tag = request.GET.get('tag') or None try: if feed: posts = Post.objects.filter(feed=feed) else: posts = Post.objects.filter(id=id) except Post.DoesNotExist: return HttpResponseRedirect('/') bayes = Brain.objects.get(user=request.user) #login required brain = Bayes() brain.loads(base64.decodestring(bayes.data)) if flag in ('read', 'unread'): flag = flag == 'read' posts.update(read=flag) else: for post in posts: text = "%s %s %s" % (post.title, post.author, post.summary) t1 = Tag.objects.get(id=flag) if t1 in post.tags.all() and not feed: post.tags.remove(t1) brain.untrain(t1.name, text) else: post.tags.add(t1) brain.train(t1.name, text) post.save() bayes.data = base64.encodestring(brain.saves()) bayes.save() if category: return HttpResponseRedirect('/?category=%s' % category) elif feed: return HttpResponseRedirect('/?feed=%s' % feed) elif tag: return HttpResponseRedirect('/?tag=%s' % tag) else: return HttpResponseRedirect('/')
def read(request, id): try: post = Post.objects.get(id=id) post.read = True post.save() try: bayes = Brain.objects.get(user=request.user) #login required brain = Bayes() brain.loads(base64.decodestring(bayes.data)) text = post.title + ' ' + post.author + post.summary brain.train('Interesting', text) bayes.data = base64.encodestring(brain.saves()) bayes.save() except Exception, e: print "Couldn't train %s because %s" % (post.title, e) return HttpResponseRedirect(post.link)
def getLanguageGuesses(self, stopWords, corpus, languages): from reverend.thomas import Bayes # charset charset = 'us-ascii' # instantiate guesser guesser = Bayes() # go through language in order to train guesser for selectLanguage in languages: if selectLanguage != 'automatic': stopWordString = stopWords.getStopWordString(selectLanguage) guesser.train(selectLanguage, stopWordString.encode(charset, 'replace')) # get list of possible languages languageGuesses = guesser.guess(corpus.encode(charset, 'replace')) return languageGuesses
def trained(self, cr, uid, ids, context=None): for id in ids: record = self.read(cr, uid, id, ['category_id','description']) if not record['description'] : raise osv.except_osv(_('Error!'),_("Description Not Define!")) if not record['category_id']: raise osv.except_osv(_('Error!'),_("Statistics Category Not Define!")) group_obj = self.pool.get('crm.bayes.group') cat_obj = self.pool.get('crm.bayes.categories') cat_rec = cat_obj.read(cr, uid, record['category_id'][0], []) guesser = Bayes() data ="" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data : myfile = file(file_path+"crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path+"crm_bayes.bay") guesser.train(cat_rec['name'], record['description']) guesser.save(file_path+"crm_bayes.bay") myfile = file(file_path+"crm_bayes.bay", 'r') data="" for fi in myfile.readlines(): data += fi cat_obj.write(cr, uid, record['category_id'][0], {'train_messages':int(cat_rec['train_messages']) + 1}) cr.execute("select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"% cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] =0 percantage = float(rec[0]['tot_guess'] *100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data,'automate_test':percantage}) self.write(cr, uid, id, {'state_bayes':'trained'}) return True
from reverend.thomas import Bayes import twitter guesser = Bayes() # train on the full set minus 500 reviews of each type pos = open('pos_smiles_5k') for sent in pos: guesser.train('pos', sent.rstrip()) neg = open('neg_smiles_5k') for sent in neg: guesser.train('neg', sent.rstrip()) guesser.save('twitter_guesser.bay') api = twitter.Api() latest = api.GetPublicTimeline() threshold = 0.1 for tweet in latest: classif = guesser.guess(tweet.text) if len(classif) == 2: if classif[0][1] - classif[1][1] > threshold: print "%s : %s!" % (tweet.text, classif[0][0]) else: print "%s : NOISE" % tweet.text else:
class NaiveBayesClassifier(object): def __init__(self, non_spam_train_dir, spam_train_dir): self.non_spam_train_dir = non_spam_train_dir self.spam_train_dir = spam_train_dir self.naive_bayes_classifier = Bayes() self.total_num_train_files = 0 self.total_num_test_files = 0 self.num_misclass = 0 def make_single_line_from_body_of_file(self, filename): fd = open(filename) total = '' return total.join(line.strip() for line in fd) def train(self): for subdir, dirs, files in os.walk(self.non_spam_train_dir): for file_i in files: self.total_num_train_files += 1 filename = os.path.join(subdir, file_i) full_text_line = self.make_single_line_from_body_of_file(filename) self.naive_bayes_classifier.train('nonspam', full_text_line) for subdir, dirs, files in os.walk(self.spam_train_dir): for file_i in files: self.total_num_train_files += 1 filename = os.path.join(subdir, file_i) full_text_line = self.make_single_line_from_body_of_file(filename) self.naive_bayes_classifier.train('spam', full_text_line) def train_for_given_dirs(self, non_spam_train_dir, spam_train_dir): for subdir, dirs, files in os.walk(non_spam_train_dir): for file_i in files: self.total_num_train_files += 1 filename = os.path.join(subdir, file_i) full_text_line = self.make_single_line_from_body_of_file(filename) self.naive_bayes_classifier.train('nonspam', full_text_line) for subdir, dirs, files in os.walk(spam_train_dir): for file_i in files: self.total_num_train_files += 1 filename = os.path.join(subdir, file_i) full_text_line = self.make_single_line_from_body_of_file(filename) self.naive_bayes_classifier.train('spam', full_text_line) def train_two_files(self): fd = open(self.non_spam_train_dir, 'r') for line in fd: self.naive_bayes_classifier.train('nonspam', line) fd = open(self.spam_train_dir, 'r') for line in fd: self.naive_bayes_classifier.train('spam', line) def train_for_two_exogenous_files(self, non_spam_train_file, spam_train_file): if non_spam_train_file != '': fd = open(non_spam_train_file, 'r') for line in fd: self.naive_bayes_classifier.train('nonspam', line) if spam_train_file != '': fd = open(spam_train_file, 'r') for line in fd: self.naive_bayes_classifier.train('spam', line) def test(self, non_spam_test_dir, spam_test_dir): # rb.classify('sloths are so cute i love them') == 'good' for subdir, dirs, files in os.walk(non_spam_test_dir): for file_i in files: self.total_num_train_files += 1 filename = os.path.join(subdir, file_i) full_text_line = self.make_single_line_from_body_of_file(filename) class_prob_vec = self.naive_bayes_classifier.guess(full_text_line) self.total_num_test_files += 1 y_hat = class_prob_vec[0][0] if y_hat != 'nonspam': self.num_misclass += 1 for subdir, dirs, files in os.walk(spam_test_dir): for file_i in files: self.total_num_train_files += 1 filename = os.path.join(subdir, file_i) full_text_line = self.make_single_line_from_body_of_file(filename) class_prob_vec = self.naive_bayes_classifier.guess(full_text_line) self.total_num_test_files += 1 y_hat = class_prob_vec[0][0] print class_prob_vec if y_hat != 'spam': self.num_misclass += 1 misclass_rate = (self.num_misclass/float(self.total_num_test_files)) accuracy = 1 - misclass_rate print 'Misclassification rate is %f' % misclass_rate print 'Accuracy is %f' % accuracy def make_single_line_from_body(self, text_body): total = ' ' return total.join(line.strip() for line in text_body) def make_single_line_from_body2(self, text_body): total = '' for line in text_body: total += line + ' ' return total def create_nonspam_spam_datasets(self, text_body): return text_body def classify(self, text_body): class_prob_vec = self.naive_bayes_classifier.guess(text_body) y_hat = 'nonspam' if len(class_prob_vec) != 0: y_hat = class_prob_vec[0][0] return y_hat
'Goal: Build a language recognizer using a naive bayesian classifier' # Make a 50 language reconizer trained on 10 books per language at: # http://www.gutenberg.org/browse/languages/en # http://www.gutenberg.org/files/1342/1342-0.txt from reverend.thomas import Bayes # Train the classifier language_sniffer = Bayes() for lang in ['en', 'es', 'fr', 'de', 'it']: filename = 'notes/proverbs_%s.txt' % lang with open(filename) as f: data = f.read().decode('utf-8') language_sniffer.train(lang, data) # Apply the classifier phrases = u'''\ All the leaves are brown and the sky is gray. I've been for a walk on a winter's day. De colores, todos los colores. De colores se visten los campos en la primavera. Jingle bells, jingle all the way. Oh what fun it is to ride in a one horse open sleigh. Casca belles, hoy es navidad. Es un dia, de allegria y felicidad. '''.splitlines() for phrase in phrases: best_guess = language_sniffer.guess(phrase)[0][0] print best_guess, '<--', phrase[:30]
from reverend.thomas import Bayes guesser = Bayes() guesser.train('french', 'le la les du un une je il elle de en') guesser.train('german', 'der die das ein eine') guesser.train('spanish', 'el uno una las de la en') guesser.train('english', 'the it she he they them are were to') guesser.guess('they went to el cantina') guesser.guess('they were flying planes') guesser.train('english', 'the rain in spain falls mainly on the plain') guesser.save('my_guesser.bay')
from reverend.thomas import Bayes guesser = Bayes() guesser.train('fish', 'salmon trout cod carp') guesser.train('fowl', 'hen chicken duck goose') guesser.guess('chicken tikka marsala') guesser.untrain('fish', 'salmon carp')
# instantiate URL retriever class urlRetriever = retriever.URLRetriever() # try retrieval of url try: corpusSet = urlRetriever.retrieveURL(url) corpus = corpusSet['corpus'] charset = corpusSet['charset'] except IOError: error = 1 errorMessage = 'URL could not be retrieved' # stop word object stopWords = retriever.StopWords() # guess language guesser = Bayes() for selectLanguage in languages: if selectLanguage != 'automatic': stopWordString = stopWords.getStopWordString(selectLanguage) guesser.train(selectLanguage, stopWordString) language = guesser.guess(corpus) # print stopword string print stopWordString # print language # print language.pop(0)[0] print language
class BayesianClassifier: POSITIVE = POSITIVE NEGATIVE = NEGATIVE NEUTRAL = NEUTRAL THRESHHOLD = 0.1 guesser = None def __init__(self): self.guesser = Bayes() def train(self, example_tweets): for t in example_tweets: self.guesser.train(t.sentiment, t.text) self.guesser.train(POSITIVE, "cool") self.guesser.train(POSITIVE, "Woo") self.guesser.train(POSITIVE, "quite amazing") self.guesser.train(POSITIVE, "thks") self.guesser.train(POSITIVE, "looking forward to") self.guesser.train(POSITIVE, "damn good") self.guesser.train(POSITIVE, "frickin ruled") self.guesser.train(POSITIVE, "frickin rules") self.guesser.train(POSITIVE, "Way to go") self.guesser.train(POSITIVE, "cute") self.guesser.train(POSITIVE, "comeback") self.guesser.train(POSITIVE, "not suck") self.guesser.train(POSITIVE, "prop") self.guesser.train(POSITIVE, "kinda impressed") self.guesser.train(POSITIVE, "props") self.guesser.train(POSITIVE, "come on") self.guesser.train(POSITIVE, "congratulation") self.guesser.train(POSITIVE, "gtd") self.guesser.train(POSITIVE, "proud") self.guesser.train(POSITIVE, "thanks") self.guesser.train(POSITIVE, "can help") self.guesser.train(POSITIVE, "thanks!") self.guesser.train(POSITIVE, "pumped") self.guesser.train(POSITIVE, "integrate") self.guesser.train(POSITIVE, "really like") self.guesser.train(POSITIVE, "loves it") self.guesser.train(POSITIVE, "yay") self.guesser.train(POSITIVE, "amazing") self.guesser.train(POSITIVE, "epic flail") self.guesser.train(POSITIVE, "flail") self.guesser.train(POSITIVE, "good luck") self.guesser.train(POSITIVE, "fail") self.guesser.train(POSITIVE, "life saver") self.guesser.train(POSITIVE, "piece of cake") self.guesser.train(POSITIVE, "good thing") self.guesser.train(POSITIVE, "hawt") self.guesser.train(POSITIVE, "hawtness") self.guesser.train(POSITIVE, "highly positive") self.guesser.train(POSITIVE, "my hero") self.guesser.train(POSITIVE, "yummy") self.guesser.train(POSITIVE, "awesome") self.guesser.train(POSITIVE, "congrats") self.guesser.train(POSITIVE, "would recommend") self.guesser.train(POSITIVE, "intellectual vigor") self.guesser.train(POSITIVE, "really neat") self.guesser.train(POSITIVE, "yay") self.guesser.train(POSITIVE, "ftw") self.guesser.train(POSITIVE, "I want") self.guesser.train(POSITIVE, "best looking") self.guesser.train(POSITIVE, "imrpessive") self.guesser.train(POSITIVE, "positive") self.guesser.train(POSITIVE, "thx") self.guesser.train(POSITIVE, "thanks") self.guesser.train(POSITIVE, "thank you") self.guesser.train(POSITIVE, "endorse") self.guesser.train(POSITIVE, "clearly superior") self.guesser.train(POSITIVE, "superior") self.guesser.train(POSITIVE, "really love") self.guesser.train(POSITIVE, "woot") self.guesser.train(POSITIVE, "w00t") self.guesser.train(POSITIVE, "super") self.guesser.train(POSITIVE, "wonderful") self.guesser.train(POSITIVE, "leaning towards") self.guesser.train(POSITIVE, "rally") self.guesser.train(POSITIVE, "incredible") self.guesser.train(POSITIVE, "the best") self.guesser.train(POSITIVE, "is the best") self.guesser.train(POSITIVE, "strong") self.guesser.train(POSITIVE, "would love") self.guesser.train(POSITIVE, "rally") self.guesser.train(POSITIVE, "very quickly") self.guesser.train(POSITIVE, "very cool") self.guesser.train(POSITIVE, "absolutely love") self.guesser.train(POSITIVE, "very exceptional") self.guesser.train(POSITIVE, "so proud") self.guesser.train(POSITIVE, "funny") self.guesser.train(POSITIVE, "recommend") self.guesser.train(POSITIVE, "so proud") self.guesser.train(POSITIVE, "so great") self.guesser.train(POSITIVE, "so cool") self.guesser.train(POSITIVE, "cool") self.guesser.train(POSITIVE, "wowsers") self.guesser.train(POSITIVE, "plus") self.guesser.train(POSITIVE, "liked it") self.guesser.train(POSITIVE, "make a difference") self.guesser.train(POSITIVE, "moves me") self.guesser.train(POSITIVE, "inspired") self.guesser.train(POSITIVE, "OK") self.guesser.train(POSITIVE, "love it") self.guesser.train(POSITIVE, "LOL") self.guesser.train(POSITIVE, ":)") self.guesser.train(POSITIVE, ";)") self.guesser.train(POSITIVE, ":-)") self.guesser.train(POSITIVE, ";-)") self.guesser.train(POSITIVE, ":D") self.guesser.train(POSITIVE, ";]") self.guesser.train(POSITIVE, ":]") self.guesser.train(POSITIVE, ":p") self.guesser.train(POSITIVE, ";p") self.guesser.train(POSITIVE, "voting for") self.guesser.train(POSITIVE, "great") self.guesser.train(POSITIVE, "agreeable") self.guesser.train(POSITIVE, "amused") self.guesser.train(POSITIVE, "brave") self.guesser.train(POSITIVE, "calm") self.guesser.train(POSITIVE, "charming") self.guesser.train(POSITIVE, "cheerful") self.guesser.train(POSITIVE, "comfortable") self.guesser.train(POSITIVE, "cooperative") self.guesser.train(POSITIVE, "courageous") self.guesser.train(POSITIVE, "delightful") self.guesser.train(POSITIVE, "determined") self.guesser.train(POSITIVE, "eager") self.guesser.train(POSITIVE, "elated") self.guesser.train(POSITIVE, "enchanting") self.guesser.train(POSITIVE, "encouraging") self.guesser.train(POSITIVE, "energetic") self.guesser.train(POSITIVE, "enthusiastic") self.guesser.train(POSITIVE, "excited") self.guesser.train(POSITIVE, "exuberant") self.guesser.train(POSITIVE, "excellent") self.guesser.train(POSITIVE, "I like") self.guesser.train(POSITIVE, "fine") self.guesser.train(POSITIVE, "fair") self.guesser.train(POSITIVE, "faithful") self.guesser.train(POSITIVE, "fantastic") self.guesser.train(POSITIVE, "fine") self.guesser.train(POSITIVE, "friendly") self.guesser.train(POSITIVE, "fun ") self.guesser.train(POSITIVE, "funny") self.guesser.train(POSITIVE, "gentle") self.guesser.train(POSITIVE, "glorious") self.guesser.train(POSITIVE, "good") self.guesser.train(POSITIVE, "pretty good") self.guesser.train(POSITIVE, "happy") self.guesser.train(POSITIVE, "healthy") self.guesser.train(POSITIVE, "helpful") self.guesser.train(POSITIVE, "high") self.guesser.train(POSITIVE, "agile") self.guesser.train(POSITIVE, "responsive") self.guesser.train(POSITIVE, "hilarious") self.guesser.train(POSITIVE, "jolly") self.guesser.train(POSITIVE, "joyous") self.guesser.train(POSITIVE, "kind") self.guesser.train(POSITIVE, "lively") self.guesser.train(POSITIVE, "lovely") self.guesser.train(POSITIVE, "lucky") self.guesser.train(POSITIVE, "nice") self.guesser.train(POSITIVE, "nicely") self.guesser.train(POSITIVE, "obedient") self.guesser.train(POSITIVE, "perfect") self.guesser.train(POSITIVE, "pleasant") self.guesser.train(POSITIVE, "proud") self.guesser.train(POSITIVE, "relieved") self.guesser.train(POSITIVE, "silly") self.guesser.train(POSITIVE, "smiling") self.guesser.train(POSITIVE, "splendid") self.guesser.train(POSITIVE, "successful") self.guesser.train(POSITIVE, "thankful") self.guesser.train(POSITIVE, "thoughtful") self.guesser.train(POSITIVE, "victorious") self.guesser.train(POSITIVE, "vivacious") self.guesser.train(POSITIVE, "witty") self.guesser.train(POSITIVE, "wonderful") self.guesser.train(POSITIVE, "zealous") self.guesser.train(POSITIVE, "zany") self.guesser.train(POSITIVE, "rocks") self.guesser.train(POSITIVE, "comeback") self.guesser.train(POSITIVE, "pleasantly surprised") self.guesser.train(POSITIVE, "pleasantly") self.guesser.train(POSITIVE, "surprised") self.guesser.train(POSITIVE, "love") self.guesser.train(POSITIVE, "glad") self.guesser.train(POSITIVE, "yum") self.guesser.train(POSITIVE, "interesting") self.guesser.train(NEGATIVE, "FTL") self.guesser.train(NEGATIVE, "irritating") self.guesser.train(NEGATIVE, "not that good") self.guesser.train(NEGATIVE, "suck") self.guesser.train(NEGATIVE, "lying") self.guesser.train(NEGATIVE, "duplicity") self.guesser.train(NEGATIVE, "angered") self.guesser.train(NEGATIVE, "dumbfounding") self.guesser.train(NEGATIVE, "dumbifying") self.guesser.train(NEGATIVE, "not as good") self.guesser.train(NEGATIVE, "not impressed") self.guesser.train(NEGATIVE, "stomach it") self.guesser.train(NEGATIVE, "pw") self.guesser.train(NEGATIVE, "pwns") self.guesser.train(NEGATIVE, "pwnd") self.guesser.train(NEGATIVE, "pwning") self.guesser.train(NEGATIVE, "in a bad way") self.guesser.train(NEGATIVE, "horrifying") self.guesser.train(NEGATIVE, "wrong") self.guesser.train(NEGATIVE, "flailing") self.guesser.train(NEGATIVE, "failing") self.guesser.train(NEGATIVE, "fallen way behind") self.guesser.train(NEGATIVE, "fallen behind") self.guesser.train(NEGATIVE, "lose") self.guesser.train(NEGATIVE, "fallen") self.guesser.train(NEGATIVE, "self-deprecating") self.guesser.train(NEGATIVE, "hunker down") self.guesser.train(NEGATIVE, "duh") self.guesser.train(NEGATIVE, "get killed by") self.guesser.train(NEGATIVE, "got killed by") self.guesser.train(NEGATIVE, "hated us") self.guesser.train(NEGATIVE, "only works in safari") self.guesser.train(NEGATIVE, "must have ie") self.guesser.train(NEGATIVE, "fuming and frothing") self.guesser.train(NEGATIVE, "heavy") self.guesser.train(NEGATIVE, "buggy") self.guesser.train(NEGATIVE, "unusable") self.guesser.train(NEGATIVE, "nothing is") self.guesser.train(NEGATIVE, "is great until") self.guesser.train(NEGATIVE, "don't support") self.guesser.train(NEGATIVE, "despise") self.guesser.train(NEGATIVE, "pos") self.guesser.train(NEGATIVE, "hindrance") self.guesser.train(NEGATIVE, "sucks") self.guesser.train(NEGATIVE, "problems") self.guesser.train(NEGATIVE, "not working") self.guesser.train(NEGATIVE, "fuming") self.guesser.train(NEGATIVE, "annoying") self.guesser.train(NEGATIVE, "frothing") self.guesser.train(NEGATIVE, "poorly") self.guesser.train(NEGATIVE, "headache") self.guesser.train(NEGATIVE, "completely wrong") self.guesser.train(NEGATIVE, "sad news") self.guesser.train(NEGATIVE, "didn't last") self.guesser.train(NEGATIVE, "lame") self.guesser.train(NEGATIVE, "pet peeves") self.guesser.train(NEGATIVE, "pet peeve") self.guesser.train(NEGATIVE, "can't send") self.guesser.train(NEGATIVE, "bullshit") self.guesser.train(NEGATIVE, "fail") self.guesser.train(NEGATIVE, "so terrible") self.guesser.train(NEGATIVE, "negative") self.guesser.train(NEGATIVE, "anooying") self.guesser.train(NEGATIVE, "an issue") self.guesser.train(NEGATIVE, "drop dead") self.guesser.train(NEGATIVE, "trouble") self.guesser.train(NEGATIVE, "brainwashed") self.guesser.train(NEGATIVE, "smear") self.guesser.train(NEGATIVE, "commie") self.guesser.train(NEGATIVE, "communist") self.guesser.train(NEGATIVE, "anti-women") self.guesser.train(NEGATIVE, "WTF") self.guesser.train(NEGATIVE, "anxiety") self.guesser.train(NEGATIVE, "STING") self.guesser.train(NEGATIVE, "nobody spoke") self.guesser.train(NEGATIVE, "yell") self.guesser.train(NEGATIVE, "Damn") self.guesser.train(NEGATIVE, "aren't") self.guesser.train(NEGATIVE, "anti") self.guesser.train(NEGATIVE, "i hate") self.guesser.train(NEGATIVE, "hate") self.guesser.train(NEGATIVE, "dissapointing") self.guesser.train(NEGATIVE, "doesn't recommend") self.guesser.train(NEGATIVE, "the worst") self.guesser.train(NEGATIVE, "worst") self.guesser.train(NEGATIVE, "expensive") self.guesser.train(NEGATIVE, "crap") self.guesser.train(NEGATIVE, "socialist") self.guesser.train(NEGATIVE, "won't") self.guesser.train(NEGATIVE, "wont") self.guesser.train(NEGATIVE, ":(") self.guesser.train(NEGATIVE, ":-(") self.guesser.train(NEGATIVE, "Thanks") self.guesser.train(NEGATIVE, "smartass") self.guesser.train(NEGATIVE, "don't like") self.guesser.train(NEGATIVE, "too bad") self.guesser.train(NEGATIVE, "frickin") self.guesser.train(NEGATIVE, "snooty") self.guesser.train(NEGATIVE, "knee jerk") self.guesser.train(NEGATIVE, "jerk") self.guesser.train(NEGATIVE, "reactionist") self.guesser.train(NEGATIVE, "MUST DIE") self.guesser.train(NEGATIVE, "no more") self.guesser.train(NEGATIVE, "hypocrisy") self.guesser.train(NEGATIVE, "ugly") self.guesser.train(NEGATIVE, "too slow") self.guesser.train(NEGATIVE, "not reliable") self.guesser.train(NEGATIVE, "noise") self.guesser.train(NEGATIVE, "crappy") self.guesser.train(NEGATIVE, "horrible") self.guesser.train(NEGATIVE, "bad quality") self.guesser.train(NEGATIVE, "angry") self.guesser.train(NEGATIVE, "annoyed") self.guesser.train(NEGATIVE, "anxious") self.guesser.train(NEGATIVE, "arrogant") self.guesser.train(NEGATIVE, "ashamed") self.guesser.train(NEGATIVE, "awful") self.guesser.train(NEGATIVE, "bad") self.guesser.train(NEGATIVE, "bewildered") self.guesser.train(NEGATIVE, "blues") self.guesser.train(NEGATIVE, "bored") self.guesser.train(NEGATIVE, "clumsy") self.guesser.train(NEGATIVE, "combative") self.guesser.train(NEGATIVE, "condemned") self.guesser.train(NEGATIVE, "confused") self.guesser.train(NEGATIVE, "crazy") self.guesser.train(NEGATIVE, "flipped-out") self.guesser.train(NEGATIVE, "creepy") self.guesser.train(NEGATIVE, "cruel") self.guesser.train(NEGATIVE, "dangerous") self.guesser.train(NEGATIVE, "defeated") self.guesser.train(NEGATIVE, "defiant") self.guesser.train(NEGATIVE, "depressed") self.guesser.train(NEGATIVE, "disgusted") self.guesser.train(NEGATIVE, "disturbed") self.guesser.train(NEGATIVE, "dizzy") self.guesser.train(NEGATIVE, "dull") self.guesser.train(NEGATIVE, "embarrassed") self.guesser.train(NEGATIVE, "envious") self.guesser.train(NEGATIVE, "evil") self.guesser.train(NEGATIVE, "fierce") self.guesser.train(NEGATIVE, "foolish") self.guesser.train(NEGATIVE, "frantic") self.guesser.train(NEGATIVE, "frightened") self.guesser.train(NEGATIVE, "grieving") self.guesser.train(NEGATIVE, "grumpy") self.guesser.train(NEGATIVE, "helpless") self.guesser.train(NEGATIVE, "homeless") self.guesser.train(NEGATIVE, "hungry") self.guesser.train(NEGATIVE, "hurt") self.guesser.train(NEGATIVE, "ill") self.guesser.train(NEGATIVE, "itchy") self.guesser.train(NEGATIVE, "jealous") self.guesser.train(NEGATIVE, "jittery") self.guesser.train(NEGATIVE, "lazy") self.guesser.train(NEGATIVE, "lonely") self.guesser.train(NEGATIVE, "mysterious") self.guesser.train(NEGATIVE, "nasty") self.guesser.train(NEGATIVE, "rape") self.guesser.train(NEGATIVE, "naughty") self.guesser.train(NEGATIVE, "nervous") self.guesser.train(NEGATIVE, "nutty") self.guesser.train(NEGATIVE, "obnoxious") self.guesser.train(NEGATIVE, "outrageous") self.guesser.train(NEGATIVE, "panicky") self.guesser.train(NEGATIVE, "f*****g up") self.guesser.train(NEGATIVE, "repulsive") self.guesser.train(NEGATIVE, "scary") self.guesser.train(NEGATIVE, "selfish") self.guesser.train(NEGATIVE, "sore") self.guesser.train(NEGATIVE, "tense") self.guesser.train(NEGATIVE, "terrible") self.guesser.train(NEGATIVE, "testy") self.guesser.train(NEGATIVE, "thoughtless") self.guesser.train(NEGATIVE, "tired") self.guesser.train(NEGATIVE, "troubled") self.guesser.train(NEGATIVE, "upset") self.guesser.train(NEGATIVE, "uptight") self.guesser.train(NEGATIVE, "weary") self.guesser.train(NEGATIVE, "wicked") self.guesser.train(NEGATIVE, "worried") self.guesser.train(NEGATIVE, "is a fool") self.guesser.train(NEGATIVE, "painful") self.guesser.train(NEGATIVE, "pain") self.guesser.train(NEGATIVE, "gross") def classify(self, sentence): guess = self.guesser.guess(sentence) if len(guess) == 0: return NEUTRAL if len(guess) == 1: (sentiment, probabitily) = guess[0] return sentiment (max_sentiment, max_value) = guess[0] (min_sentiment, min_value) = guess[1] if max_value - min_value > self.THRESHHOLD: return max_sentiment return NEUTRAL
def local_search(self, cid, term_unstemmed, recent): term = self.stem(term_unstemmed) exemplar_pids = self.get_term_exemplars(cid, term) if len(exemplar_pids) < 4: return self.fulltext(cid, term, recent) log_tmp("SEARCH: %s exemplars" % len(exemplar_pids)) guesser = Bayes() for ex_pid in exemplar_pids: ex = state.the.get_post(ex_pid, content=True) log_tmp("SEARCH: exemplar tokens: [%s]" % ex.tokens()) guesser.train("relevant", ex.tokens()) # get normalized content from p. # TODO Toss in other factors, if possible. for neg_ex_pid in state.the.get_random_pids(len(exemplar_pids)): # probably cacheable, if we use a bigger pool guesser.train("random", state.the.get_post(neg_ex_pid, content=True).tokens()) log_tmp("SEARCH: trained") proportions = [ (tok, (count + 1) / (1.0 * guesser.pools["random"].get(tok, 0) + 1)) for (tok, count) in guesser.poolData("relevant") ] proportions = [ # knock out the weak and irrelevant ones before sorting (tok, prop) for (tok, prop) in proportions if prop > 2 ] fulltext_fallback = len(proportions) < 3 if fulltext_fallback: query = xapian.Query(xapian.Query.OP_AND, [term]) else: proportions.sort(key=operator.itemgetter(1), reverse=True) log_tmp("SEARCH: proportions: " + str(proportions)) # search for the twelve best words query = xapian.Query(xapian.Query.OP_OR, [tok for (tok, prop) in proportions[:12]]) log_tmp("SEARCH: query: " + str(query)) enq = xapian.Enquire(self.mainabase) enq.set_query( # xapian.Query(xapian.Query.OP_AND, query # , ##Something scoring for BROAD_SUPPORT##) ##Something scoring for recency, if appropriate ) mset = enq.get_mset(0, 25) results = [] for m in mset: doc = m.get_document() post = state.the.get_post(int(doc.get_data()), True) for (pool, prob) in guesser.guess(post.tokens()): if pool == "relevant": rel_prob = prob score = rel_prob score *= post.broad_support if recent: score *= _post_age_score(post) results.append(SearchResult(post, term, score)) # results.append( (post, score, "rel: %f b_s: %f root age: %f" % # (rel_prob, post.broad_support, sqrt(age_days)) ) ) results.sort(lambda x, y: cmp(x.score, y.score), reverse=True) return results[:10]
>>> >>> y = big_func(10) Doing hard work INFO:root:Called big_func() with (10,) giving 11 in 1.074376 seconds >>> y = big_func(20) Doing hard work INFO:root:Called big_func() with (20,) giving 21 in 1.100503 seconds >>> show_cache(big_func) {10: 11, 20: 21} SyntaxError: invalid syntax >>> >>> >>> from reverend.thomas import Bayes >>> gender = Bayes() >>> gender.train('male', 'bill hank chris mark martin pat adam hank chris zack sean') >>> gender.train('female', 'mindy shelly pat mary daisy amber chris pat becky sue') >>> gender.guess('hank') [('male', 0.9999)] >>> gender.guess('mindy') [('female', 0.9999)] >>> gender.guess('pat') [('female', 0.6451612903225806), ('male', 0.35483870967741926)] >>> gender.guess('chris') [('male', 0.6875000000000001), ('female', 0.3125)] >>> gender.train('male', 'red red orange yellow red orange blue black brown blue red yellow') >>> gender.train('female', 'pink red green green blue blue chartreuse green blue yellow orange blue green') >>> gender.guess('red') [('male', 0.8), ('female', 0.19999999999999996)] >>> gender.guess('pink') [('female', 0.9999)]
from reverend.thomas import Bayes guesser = Bayes() for each in ('north','south','east','west'): guesser.train('do_move', 'to the %s' % each) guesser.train('do_move', 'head %s' % each) guesser.train('do_move', 'spin %s' % each) # if we don't do this 'do_move' is going to get too dominant guesser.train('do_take', 'grab') guesser.train('do_take', 'grab the') guesser.train('do_take', 'pick up') guesser.train('do_take', 'pick up the') guesser.train('do_take', 'lift') guesser.train('do_take', 'lift the') guesser.train('do_take', 'fetch') guesser.train('do_take', 'fetch the') bulk = """wear suit of armour put on suit of armour use armor climb into armour wear the armor place armour on me place armor on self""" for line in bulk.splitlines(): guesser.train('do_take', line) guesser.save('commands.bays')
tag = 'Dead' posts = Post.objects.filter(read=read) posts = posts.filter(tags__in=tag) #brain.train('Dead', post.summary) t1 = Tag.objects.get(id=flag) for post in posts: t1 = Tag.objects.get(id=flag) if t1 in post.tags.all() and not feed: post.tags.remove(t1) post.read = not t1.read brain.untrain(t1.name, post.summary) else: post.tags.add(t1) post.read = t1.read brain.train(t1.name, post.summary) post.save() flag = "Weather" t1 = Tag.objects.get(name=flag) keyword = "weather" for post in posts: if keyword in post.title.lower(): post.tags.add(t1) post.dead = True brain.train(t1.name, post.title+post.summary) print "Tagging " + t1.name post.save()
('philosophie', 'je sais que je ne sais rien'), ('philosophie', 'les phénomènes sont réels à \ condition que nous le souhaitions'), ('philosophie', 'la raison est-elle toujours \ raisonnable ?'), ('philosophie', 'le cerveau peut-il être compris ?'), ('philosophie', "l'univers peut-il être l'objet de \ connaissance ?"), ('philosophie', 'le calcul a-t-il des limites \ intrinsèques ?'), ('philosophie', "une relation peut être durable si \ l'homme la souhaite")] for uneCategorie, uneProposition in uneListeDePropositions: # entrainement du réseau unReseauBayesiens.train(uneCategorie, uneProposition) phraseAnalyz1 = 'voici un résultat : 66/6 = 11 ' phraseAnalyz2 = "je ne saurais dire s'il pourra tout comprendre ... " phraseAnalyz3 = "le phénomène de la pluie pourrait être d'origine divine" phraseAnalyz4 = 'la représentation bourbakiste des chiffres assure leur \ détermination' for unePhrase in (phraseAnalyz1, phraseAnalyz2, phraseAnalyz3, phraseAnalyz4): # calculs de la catégorie solutions = unReseauBayesiens.guess(unePhrase) categorie = solutions[0][0] probabilite = solutions[0][1] print "la phrase '%s' est de catégorie '%s' avec une \ proba de '%d /100' " % (unePhrase, categorie, probabilite * 100)
from pysrt import SubRipFile import sys import os import itertools from reverend.thomas import Bayes from collections import defaultdict guesser = Bayes() for f in os.listdir(sys.argv[1]): f_train = os.path.join(sys.argv[1], f) for line in open(f_train, "r"): line = line.strip() if not line: continue guesser.train(os.path.basename(f), line) for line in sys.stdin: accum, count = defaultdict(float), defaultdict(int) line = line.strip() lines = SubRipFile.open(line, encoding='iso-8859-1') lines = map(lambda k: k.text.lower().strip(), lines) if not lines: continue for line_ in lines: for lang, precision in guesser.guess(line_): accum[lang] = accum[lang] + precision count[lang] = count[lang] + 1 langs = sorted([(k, accum[k] / count[k]) for k in accum.keys()], key=lambda (a, b): b, reverse=True) for lang in langs: print "\t".join(map(str, [lang[0], lang[1], line]))
from db import models from peewee import fn from reverend.thomas import Bayes tweets = models.Tweet.select().where( (models.Tweet.classification == 'ok') | (models.Tweet.classification == 'harassing') ).order_by(fn.Random()) countTrain = int(len(tweets) * 0.8) train = tweets[:countTrain] test = tweets[countTrain:] guesser = Bayes() for tweet in train: guesser.train(tweet.classification, tweet.text.lower()) correct = 0 correctHarassing = 0 totalHarassing = 0 incorrect = 0 falsePos = 0 falseNeg = 0 for tweet in test: resultsRaw = guesser.guess(tweet.text.lower()) results = {} for k,v in resultsRaw: results[k] = v guess = 'harassing' if results.get('harassing', 0.0) > 0.4 else 'ok'
from build import generatePostList from renderer import readFileContents from reverend.thomas import Bayes import json guesser = Bayes() for controlFilename in generatePostList("posts"): control = json.loads(readFileContents(controlFilename), encoding='utf-8') content = readFileContents(controlFilename.replace(".control", "")) for cat in control["categories"]: guesser.train(cat, content) for controlFilename in generatePostList("posts"): control = json.loads(readFileContents(controlFilename), encoding='utf-8') content = readFileContents(controlFilename.replace(".control", "")) for guess in guesser.guess(content): if guess[1] > 0.05 and guess[0] not in control["categories"] and not guess[0] == "personal": print control["title"], guess[0]
""" pip install reverend pip install sets Source Code :https://laslabs.github.io/python-reverend/_modules/reverend/thomas.html Overview of Bayes Rule: https://towardsdatascience.com/bayes-rule-with-a-simple-and-practical-example-2bce3d0f4ad0 """ from reverend.thomas import Bayes g = Bayes() # guesser g.train('french','La souris est rentre dans son trou.') g.train('english','my tailor is rich.') g.train('french','Je ne sais pas si je viendrai demain.') g.train('english','I do not plan to update my website soon and I would really like some help from the rest of you idiots.') print(g.guess('Jumping out of cliffs it not a good idea.')) # print(g.guess('Demain il fera trs probablement chaud.'))
class Trainer(Frame): def __init__(self, parent, guesser=None, itemClass=None): self.status = StatusBar(parent) self.status.pack(side=BOTTOM, fill=X) Frame.__init__(self, parent) self.pack(side=TOP, fill=BOTH) self.itemsPerPage = 20 self.rows = [] for i in range(self.itemsPerPage): self.rows.append(ItemRow()) self.items = [] self.files = [] self.cursor = 0 self.dirty = False if guesser is None: from reverend.thomas import Bayes self.guesser = Bayes() else: self.guesser = guesser if itemClass is None: self.itemClass = TextItem else: self.itemClass = itemClass for row in self.rows: row.summary.set('foo') self.initViews() def initViews(self): self.nb = Notebook(self) ## frame1 = Frame(self.nb()) ## self.poolView = PoolView(frame1, guesser=self.guesser, app=self) ## self.poolView.pack(side=TOP) frame2 = Frame(self.nb()) self.poolView = PoolView(frame2, guesser=self.guesser, app=self) self.poolView.pack(side=TOP) self.listView = Canvas(frame2, relief=GROOVE) self.listView.pack(padx=3) bn = Button(self.listView, text="Load training", command=self.loadCorpus) bn.pack(side=RIGHT, anchor=NE, fill=X) self.columnHeadings() self.addNextPrev() frame3 = Frame(self.nb()) self.testView = TestView(frame3, guesser=self.guesser, app=self) self.testView.pack() frame4 = Frame(self.nb()) bp = Button(frame4, text="Quit", command=self.quitNow) bp.pack(side=BOTTOM) #self.nb.add_screen(frame1, 'Reverend') self.nb.add_screen(frame2, 'Training') self.nb.add_screen(frame3, 'Testing') self.nb.add_screen(frame4, 'Quit') def addNextPrev(self): npFrame = Frame(self.listView) npFrame.pack(side=BOTTOM, fill=X) bn = Button(npFrame, text="Prev Page", command=self.prevPage) bn.grid(row=0, column=0) bn = Button(npFrame, text="Next Page", command=self.nextPage) bn.grid(row=0, column=1) def loadCorpus(self): path = tkFileDialog.askdirectory() if not path: return self.loadFileList(path) self.displayItems() self.displayRows() def bulkTest(self): dirs = [] for pool in self.guesser.poolNames(): path = tkFileDialog.askdirectory() dirs.append((pool, path)) for pool, path in dirs: print pool, path def displayList(self): for item in self.items: self.itemRow(item) def displayRows(self): for row in self.rows: self.displayRow(row) def loadFileList(self, path): listing = os.listdir(path) self.files = [os.path.join(path, file) for file in listing] self.cursor = 0 def prevPage(self): self.cursor = max(0, self.cursor - self.itemsPerPage) self.displayItems() def nextPage(self): self.cursor = min(len(self.files), self.cursor + self.itemsPerPage) self.displayItems() def displayItems(self): theseFiles = self.files[self.cursor:self.cursor + self.itemsPerPage] items = [] for file, row in zip(theseFiles, self.rows): fp = open(file, 'rb') try: item = self.itemClass.fromFile(fp) finally: fp.close() if item is None: continue items.append(item) guesses = self.guesser.guess(item) summary = item.summary() cols = item.columnDefs() s = '' for c, ignore in cols: s += summary[c] + ' ' row.initialize(item, s, guesses, self.guesser.poolNames()) self.items = items def quitNow(self): if self.dirty: if tkMessageBox.askyesno("You have unsaved changes!", "Quit without saving?"): self.quit() self.quit() def columnHeadings(self): # FIXME - Something better for columns and rows in general line = Frame(self.listView, relief=RAISED, borderwidth=1) line.pack(side=TOP, padx=2, pady=1) colHeadings = self.itemClass.columnDefs() currCol = 0 for cHdr, width in colHeadings: l = Label(line, text=cHdr, width=width, bg='lightblue') l.grid(row=0, column=currCol) currCol += 1 line = Frame(self) line.pack(fill=X) def training(self, row): sel = row.selection.get() self.guesser.train(sel, row.original) row.current = sel self.guessAll() def guessAll(self): self.poolView.refresh() pools = self.guesser.poolNames() for row in self.rows: row.setGuess(self.guesser.guess(row.original), pools) def displayRow(self, row, bgc=None): # UGH - REWRITE! line = Frame(self.listView, bg=bgc) line.pack(pady=1) row.line = line self.insertRadios(row) Label(line, text=row.summary.get(), textvariable=row.summary, width=60, bg=bgc, anchor=W).grid(row=0, column=2) #Label(line, text=row.guess, width=7, bg=bgc, anchor=W).grid(row=0, column=1) colourStripe = Label(line, text=' ', width=1, bg=bgc, anchor=W, relief=GROOVE) colourStripe.grid(row=0, column=1) line.colourStripe = colourStripe pools = self.guesser.poolNames() row.refreshColour(pools) def poolAdded(self): if not self.items: return pools = self.guesser.poolNames() for row in self.rows: for r in row.radios: r.destroy() self.insertRadios(row) row.refreshColour(pools) self.dirty = True def insertRadios(self, row): radioFrame = Frame(row.line) radioFrame.grid(row=0, column=0) currCol = 0 radios = [] v = row.selection ci = 0 colours = row.defaultColours() pools = self.guesser.poolNames() for pool in pools: rb = Radiobutton(radioFrame, text=pool, variable=v, value=pool, command=Command(self.training, row), bg=None) rb.grid(row=0, column=currCol) radios.append(rb) currCol += 1 ci += 1 row.radios = radios
def treino (self): banco_do_jornal = Server() genero=[banco_do_jornal[doc] for doc in GENEROS] #dicionários dicionario_artigo={'enunciados de opiniao':'eu acredito''eu acho''nós entendemos que'} dicionario_resenha={'auxiliar modal':'pode''deve','lexico':'filme''peça''livro''artista'} dicionario_horoscopo={'lexico':'signo''peixes''áries''capricórnio''escorpião''cancer''gêmeos''touro''libra''sargitário''aquario''planeta''mercurio''vênus''marte''jupter''saturno''urano''netuno''ascendente''amor''saúde''trabalho''carta''sorte''dinheiro'} dicionario_noticia={'marcadores de data':'janeiro''fevereiro''março''abril''maio''junho''julho''agosto''setembro''outubro''novembro''dezenbro'} #treinando o reverend from reverend.thomas import Bayes guesser = Bayes() guesser.train('artigo', ' '.join(genero[0][doc]['texto'] for doc in genero[0]) ) guesser.train('resenha',' '.join(genero[6][doc]['texto'] for doc in genero[6])) guesser.train('noticia',' '.join(genero[1][doc]['texto'] for doc in genero [1])) guesser.train('cronica',' '.join(genero[5][doc]['texto']for doc in genero[5] if 'texto' in genero[5][doc] )) guesser.train('horoscopo',' '.join(genero[3][doc]['texto']for doc in genero[3])) guesser.train('manchete',' '.join(genero[2][doc]['titulo']for doc in genero[2])) guesser.train('receita',' '.join(genero[4][doc]['texto']for doc in genero[4])) guesser.save('my_guesser.bay') variavel = guesser.guess('Lía, Claudia e Dourado se enfrentam no oitavo paredão do BBB10, que acontecerá nesta terça (2)Lia foi a escolha do líder Michel, que justificou que sua opinião vem sendo formada ao longo do jogo. Cacau foi eliminada, pois foram 80% dos votos contra ela, então ela saiu, muitas pessoas não queriam que ela saisse mais foram os votos que decidiram a derrota da cacau (Cláudia)') print 'Resultado = ', variavel
from reverend.thomas import Bayes guesser = Bayes() f = open("spam.log",'r') for line in f: guesser.train('spam', line.strip()) f = open("notspam.log",'r') for line in f: guesser.train('notspam', line.strip()) guesser.save('spam.bay')
from reverend.thomas import Bayes guesser = Bayes() guesser.train('fish', 'salmon trout cod carp') guesser.train('fowl', 'hen chicken duck goose') guesser.guess('chicken tikka marsala') guesser.untrain('fish','salmon carp')
def action_train(self, cr, uid, ids, context=None): cat_obj = self.pool.get('crm.bayes.categories') group_obj = self.pool.get('crm.bayes.group') message_obj = self.pool.get('crm.bayes.test.guess') for id in ids: cat_id = self.read(cr, uid, id, ['category_id', 'name']) cat_id = cat_id[0]['category_id'] if result: max_list = max(result, key=lambda k: k[1]) if cat_id: cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['train_messages']) cat_obj.write(cr, uid, cat_id, { 'train_messages': cat_guess_msg['train_messages'] + 1 }) if max_list[1] > 0 and not cat_id: cat_id = cat_obj.search(cr, uid, [('name', '=', max_list[0])])[0] cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['guess_messages']) cat_obj.write(cr, uid, cat_id, { 'guess_messages': cat_guess_msg['guess_messages'] + 1 }) self.write(cr, uid, ids, {'category_id': cat_id}) if cat_id: cat_rec = cat_obj.read(cr, uid, cat_id, []) guesser = Bayes() data = "" for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]): if rec['train_data']: data += rec['train_data'] if data: myfile = file(file_path + "crm_bayes.bay", 'w') myfile.write(data) myfile.close() guesser.load(file_path + "crm_bayes.bay") guesser.train(cat_rec['name'], message_obj.read(cr, uid, id)[0]['name']) guesser.save(file_path + "crm_bayes.bay") myfile = file(file_path + "crm_bayes.bay", 'r') data = "" for fi in myfile.readlines(): data += fi cr.execute( "select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d" % cat_rec['group_id'][0]) rec = cr.dictfetchall() if not rec[0]['tot_guess']: rec[0]['tot_guess'] = 0 percantage = float( rec[0]['tot_guess'] * 100) / float(rec[0]['tot_guess'] + rec[0]['tot_train']) group_obj.write(cr, uid, cat_rec['group_id'][0], { 'train_data': data, 'automate_test': percantage }) else: raise osv.except_osv(_('Error !'), _('Please Select Category! ')) return { 'view_type': 'form', "view_mode": 'form', 'res_model': 'crm.bayes.train.message', 'type': 'ir.actions.act_window', 'target': 'new', }
class BayesianClassifier: POSITIVE = POSITIVE NEGATIVE = NEGATIVE NEUTRAL = NEUTRAL THRESHHOLD = 0.1 guesser = None def __init__(self): self.guesser = Bayes() def train(self, example_tweets): for t in example_tweets: self.guesser.train(t.sentiment, t.message) self.guesser.train(POSITIVE, "cool") self.guesser.train(POSITIVE, "Woo") self.guesser.train(POSITIVE, "quite amazing") self.guesser.train(POSITIVE, "thks") self.guesser.train(POSITIVE, "looking forward to") self.guesser.train(POSITIVE, "damn good") self.guesser.train(POSITIVE, "frickin ruled") self.guesser.train(POSITIVE, "frickin rules") self.guesser.train(POSITIVE, "Way to go") self.guesser.train(POSITIVE, "cute") self.guesser.train(POSITIVE, "comeback") self.guesser.train(POSITIVE, "not suck") self.guesser.train(POSITIVE, "prop") self.guesser.train(POSITIVE, "kinda impressed") self.guesser.train(POSITIVE, "props") self.guesser.train(POSITIVE, "come on") self.guesser.train(POSITIVE, "congratulation") self.guesser.train(POSITIVE, "gtd") self.guesser.train(POSITIVE, "proud") self.guesser.train(POSITIVE, "thanks") self.guesser.train(POSITIVE, "can help") self.guesser.train(POSITIVE, "thanks!") self.guesser.train(POSITIVE, "pumped") self.guesser.train(POSITIVE, "integrate") self.guesser.train(POSITIVE, "really like") self.guesser.train(POSITIVE, "loves it") self.guesser.train(POSITIVE, "yay") self.guesser.train(POSITIVE, "amazing") self.guesser.train(POSITIVE, "epic flail") self.guesser.train(POSITIVE, "flail") self.guesser.train(POSITIVE, "good luck") self.guesser.train(POSITIVE, "fail") self.guesser.train(POSITIVE, "life saver") self.guesser.train(POSITIVE, "piece of cake") self.guesser.train(POSITIVE, "good thing") self.guesser.train(POSITIVE, "hawt") self.guesser.train(POSITIVE, "hawtness") self.guesser.train(POSITIVE, "highly positive") self.guesser.train(POSITIVE, "my hero") self.guesser.train(POSITIVE, "yummy") self.guesser.train(POSITIVE, "awesome") self.guesser.train(POSITIVE, "congrats") self.guesser.train(POSITIVE, "would recommend") self.guesser.train(POSITIVE, "intellectual vigor") self.guesser.train(POSITIVE, "really neat") self.guesser.train(POSITIVE, "yay") self.guesser.train(POSITIVE, "ftw") self.guesser.train(POSITIVE, "I want") self.guesser.train(POSITIVE, "best looking") self.guesser.train(POSITIVE, "imrpessive") self.guesser.train(POSITIVE, "positive") self.guesser.train(POSITIVE, "thx") self.guesser.train(POSITIVE, "thanks") self.guesser.train(POSITIVE, "thank you") self.guesser.train(POSITIVE, "endorse") self.guesser.train(POSITIVE, "clearly superior") self.guesser.train(POSITIVE, "superior") self.guesser.train(POSITIVE, "really love") self.guesser.train(POSITIVE, "woot") self.guesser.train(POSITIVE, "w00t") self.guesser.train(POSITIVE, "super") self.guesser.train(POSITIVE, "wonderful") self.guesser.train(POSITIVE, "leaning towards") self.guesser.train(POSITIVE, "rally") self.guesser.train(POSITIVE, "incredible") self.guesser.train(POSITIVE, "the best") self.guesser.train(POSITIVE, "is the best") self.guesser.train(POSITIVE, "strong") self.guesser.train(POSITIVE, "would love") self.guesser.train(POSITIVE, "rally") self.guesser.train(POSITIVE, "very quickly") self.guesser.train(POSITIVE, "very cool") self.guesser.train(POSITIVE, "absolutely love") self.guesser.train(POSITIVE, "very exceptional") self.guesser.train(POSITIVE, "so proud") self.guesser.train(POSITIVE, "funny") self.guesser.train(POSITIVE, "recommend") self.guesser.train(POSITIVE, "so proud") self.guesser.train(POSITIVE, "so great") self.guesser.train(POSITIVE, "so cool") self.guesser.train(POSITIVE, "cool") self.guesser.train(POSITIVE, "wowsers") self.guesser.train(POSITIVE, "plus") self.guesser.train(POSITIVE, "liked it") self.guesser.train(POSITIVE, "make a difference") self.guesser.train(POSITIVE, "moves me") self.guesser.train(POSITIVE, "inspired") self.guesser.train(POSITIVE, "OK") self.guesser.train(POSITIVE, "love it") self.guesser.train(POSITIVE, "LOL") self.guesser.train(POSITIVE, ":)") self.guesser.train(POSITIVE, ";)") self.guesser.train(POSITIVE, ":-)") self.guesser.train(POSITIVE, ";-)") self.guesser.train(POSITIVE, ":D") self.guesser.train(POSITIVE, ";]") self.guesser.train(POSITIVE, ":]") self.guesser.train(POSITIVE, ":p") self.guesser.train(POSITIVE, ";p") self.guesser.train(POSITIVE, "voting for") self.guesser.train(POSITIVE, "great") self.guesser.train(POSITIVE, "agreeable") self.guesser.train(POSITIVE, "amused") self.guesser.train(POSITIVE, "brave") self.guesser.train(POSITIVE, "calm") self.guesser.train(POSITIVE, "charming") self.guesser.train(POSITIVE, "cheerful") self.guesser.train(POSITIVE, "comfortable") self.guesser.train(POSITIVE, "cooperative") self.guesser.train(POSITIVE, "courageous") self.guesser.train(POSITIVE, "delightful") self.guesser.train(POSITIVE, "determined") self.guesser.train(POSITIVE, "eager") self.guesser.train(POSITIVE, "elated") self.guesser.train(POSITIVE, "enchanting") self.guesser.train(POSITIVE, "encouraging") self.guesser.train(POSITIVE, "energetic") self.guesser.train(POSITIVE, "enthusiastic") self.guesser.train(POSITIVE, "excited") self.guesser.train(POSITIVE, "exuberant") self.guesser.train(POSITIVE, "excellent") self.guesser.train(POSITIVE, "I like") self.guesser.train(POSITIVE, "fine") self.guesser.train(POSITIVE, "fair") self.guesser.train(POSITIVE, "faithful") self.guesser.train(POSITIVE, "fantastic") self.guesser.train(POSITIVE, "fine") self.guesser.train(POSITIVE, "friendly") self.guesser.train(POSITIVE, "fun ") self.guesser.train(POSITIVE, "funny") self.guesser.train(POSITIVE, "gentle") self.guesser.train(POSITIVE, "glorious") self.guesser.train(POSITIVE, "good") self.guesser.train(POSITIVE, "pretty good") self.guesser.train(POSITIVE, "happy") self.guesser.train(POSITIVE, "healthy") self.guesser.train(POSITIVE, "helpful") self.guesser.train(POSITIVE, "high") self.guesser.train(POSITIVE, "agile") self.guesser.train(POSITIVE, "responsive") self.guesser.train(POSITIVE, "hilarious") self.guesser.train(POSITIVE, "jolly") self.guesser.train(POSITIVE, "joyous") self.guesser.train(POSITIVE, "kind") self.guesser.train(POSITIVE, "lively") self.guesser.train(POSITIVE, "lovely") self.guesser.train(POSITIVE, "lucky") self.guesser.train(POSITIVE, "nice") self.guesser.train(POSITIVE, "nicely") self.guesser.train(POSITIVE, "obedient") self.guesser.train(POSITIVE, "perfect") self.guesser.train(POSITIVE, "pleasant") self.guesser.train(POSITIVE, "proud") self.guesser.train(POSITIVE, "relieved") self.guesser.train(POSITIVE, "silly") self.guesser.train(POSITIVE, "smiling") self.guesser.train(POSITIVE, "splendid") self.guesser.train(POSITIVE, "successful") self.guesser.train(POSITIVE, "thankful") self.guesser.train(POSITIVE, "thoughtful") self.guesser.train(POSITIVE, "victorious") self.guesser.train(POSITIVE, "vivacious") self.guesser.train(POSITIVE, "witty") self.guesser.train(POSITIVE, "wonderful") self.guesser.train(POSITIVE, "zealous") self.guesser.train(POSITIVE, "zany") self.guesser.train(POSITIVE, "rocks") self.guesser.train(POSITIVE, "comeback") self.guesser.train(POSITIVE, "pleasantly surprised") self.guesser.train(POSITIVE, "pleasantly") self.guesser.train(POSITIVE, "surprised") self.guesser.train(POSITIVE, "love") self.guesser.train(POSITIVE, "glad") self.guesser.train(POSITIVE, "yum") self.guesser.train(POSITIVE, "interesting") self.guesser.train(NEGATIVE, "FTL") self.guesser.train(NEGATIVE, "f**k") self.guesser.train(NEGATIVE, "irritating") self.guesser.train(NEGATIVE, "not that good") self.guesser.train(NEGATIVE, "suck") self.guesser.train(NEGATIVE, "lying") self.guesser.train(NEGATIVE, "duplicity") self.guesser.train(NEGATIVE, "angered") self.guesser.train(NEGATIVE, "dumbfounding") self.guesser.train(NEGATIVE, "dumbifying") self.guesser.train(NEGATIVE, "not as good") self.guesser.train(NEGATIVE, "not impressed") self.guesser.train(NEGATIVE, "stomach it") self.guesser.train(NEGATIVE, "pw") self.guesser.train(NEGATIVE, "pwns") self.guesser.train(NEGATIVE, "pwnd") self.guesser.train(NEGATIVE, "pwning") self.guesser.train(NEGATIVE, "in a bad way") self.guesser.train(NEGATIVE, "horrifying") self.guesser.train(NEGATIVE, "wrong") self.guesser.train(NEGATIVE, "flailing") self.guesser.train(NEGATIVE, "failing") self.guesser.train(NEGATIVE, "fallen way behind") self.guesser.train(NEGATIVE, "fallen behind") self.guesser.train(NEGATIVE, "lose") self.guesser.train(NEGATIVE, "fallen") self.guesser.train(NEGATIVE, "self-deprecating") self.guesser.train(NEGATIVE, "hunker down") self.guesser.train(NEGATIVE, "duh") self.guesser.train(NEGATIVE, "get killed by") self.guesser.train(NEGATIVE, "got killed by") self.guesser.train(NEGATIVE, "hated us") self.guesser.train(NEGATIVE, "only works in safari") self.guesser.train(NEGATIVE, "must have ie") self.guesser.train(NEGATIVE, "fuming and frothing") self.guesser.train(NEGATIVE, "heavy") self.guesser.train(NEGATIVE, "buggy") self.guesser.train(NEGATIVE, "unusable") self.guesser.train(NEGATIVE, "nothing is") self.guesser.train(NEGATIVE, "is great until") self.guesser.train(NEGATIVE, "don't support") self.guesser.train(NEGATIVE, "despise") self.guesser.train(NEGATIVE, "pos") self.guesser.train(NEGATIVE, "hindrance") self.guesser.train(NEGATIVE, "sucks") self.guesser.train(NEGATIVE, "problems") self.guesser.train(NEGATIVE, "not working") self.guesser.train(NEGATIVE, "fuming") self.guesser.train(NEGATIVE, "annoying") self.guesser.train(NEGATIVE, "frothing") self.guesser.train(NEGATIVE, "poorly") self.guesser.train(NEGATIVE, "headache") self.guesser.train(NEGATIVE, "completely wrong") self.guesser.train(NEGATIVE, "sad news") self.guesser.train(NEGATIVE, "didn't last") self.guesser.train(NEGATIVE, "lame") self.guesser.train(NEGATIVE, "pet peeves") self.guesser.train(NEGATIVE, "pet peeve") self.guesser.train(NEGATIVE, "can't send") self.guesser.train(NEGATIVE, "bullshit") self.guesser.train(NEGATIVE, "fail") self.guesser.train(NEGATIVE, "so terrible") self.guesser.train(NEGATIVE, "negative") self.guesser.train(NEGATIVE, "anooying") self.guesser.train(NEGATIVE, "an issue") self.guesser.train(NEGATIVE, "drop dead") self.guesser.train(NEGATIVE, "trouble") self.guesser.train(NEGATIVE, "brainwashed") self.guesser.train(NEGATIVE, "smear") self.guesser.train(NEGATIVE, "commie") self.guesser.train(NEGATIVE, "communist") self.guesser.train(NEGATIVE, "anti-women") self.guesser.train(NEGATIVE, "WTF") self.guesser.train(NEGATIVE, "anxiety") self.guesser.train(NEGATIVE, "STING") self.guesser.train(NEGATIVE, "nobody spoke") self.guesser.train(NEGATIVE, "yell") self.guesser.train(NEGATIVE, "Damn") self.guesser.train(NEGATIVE, "aren't") self.guesser.train(NEGATIVE, "anti") self.guesser.train(NEGATIVE, "i hate") self.guesser.train(NEGATIVE, "hate") self.guesser.train(NEGATIVE, "dissapointing") self.guesser.train(NEGATIVE, "doesn't recommend") self.guesser.train(NEGATIVE, "the worst") self.guesser.train(NEGATIVE, "worst") self.guesser.train(NEGATIVE, "expensive") self.guesser.train(NEGATIVE, "crap") self.guesser.train(NEGATIVE, "socialist") self.guesser.train(NEGATIVE, "won't") self.guesser.train(NEGATIVE, "wont") self.guesser.train(NEGATIVE, ":(") self.guesser.train(NEGATIVE, ":-(") self.guesser.train(NEGATIVE, "Thanks") self.guesser.train(NEGATIVE, "smartass") self.guesser.train(NEGATIVE, "don't like") self.guesser.train(NEGATIVE, "too bad") self.guesser.train(NEGATIVE, "frickin") self.guesser.train(NEGATIVE, "snooty") self.guesser.train(NEGATIVE, "knee jerk") self.guesser.train(NEGATIVE, "jerk") self.guesser.train(NEGATIVE, "reactionist") self.guesser.train(NEGATIVE, "MUST DIE") self.guesser.train(NEGATIVE, "no more") self.guesser.train(NEGATIVE, "hypocrisy") self.guesser.train(NEGATIVE, "ugly") self.guesser.train(NEGATIVE, "too slow") self.guesser.train(NEGATIVE, "not reliable") self.guesser.train(NEGATIVE, "noise") self.guesser.train(NEGATIVE, "crappy") self.guesser.train(NEGATIVE, "horrible") self.guesser.train(NEGATIVE, "bad quality") self.guesser.train(NEGATIVE, "angry") self.guesser.train(NEGATIVE, "annoyed") self.guesser.train(NEGATIVE, "anxious") self.guesser.train(NEGATIVE, "arrogant") self.guesser.train(NEGATIVE, "ashamed") self.guesser.train(NEGATIVE, "awful") self.guesser.train(NEGATIVE, "bad") self.guesser.train(NEGATIVE, "bewildered") self.guesser.train(NEGATIVE, "blues") self.guesser.train(NEGATIVE, "bored") self.guesser.train(NEGATIVE, "clumsy") self.guesser.train(NEGATIVE, "combative") self.guesser.train(NEGATIVE, "condemned") self.guesser.train(NEGATIVE, "confused") self.guesser.train(NEGATIVE, "crazy") self.guesser.train(NEGATIVE, "flipped-out") self.guesser.train(NEGATIVE, "creepy") self.guesser.train(NEGATIVE, "cruel") self.guesser.train(NEGATIVE, "dangerous") self.guesser.train(NEGATIVE, "defeated") self.guesser.train(NEGATIVE, "defiant") self.guesser.train(NEGATIVE, "depressed") self.guesser.train(NEGATIVE, "disgusted") self.guesser.train(NEGATIVE, "disturbed") self.guesser.train(NEGATIVE, "dizzy") self.guesser.train(NEGATIVE, "dull") self.guesser.train(NEGATIVE, "embarrassed") self.guesser.train(NEGATIVE, "envious") self.guesser.train(NEGATIVE, "evil") self.guesser.train(NEGATIVE, "fierce") self.guesser.train(NEGATIVE, "foolish") self.guesser.train(NEGATIVE, "frantic") self.guesser.train(NEGATIVE, "frightened") self.guesser.train(NEGATIVE, "grieving") self.guesser.train(NEGATIVE, "grumpy") self.guesser.train(NEGATIVE, "helpless") self.guesser.train(NEGATIVE, "homeless") self.guesser.train(NEGATIVE, "hungry") self.guesser.train(NEGATIVE, "hurt") self.guesser.train(NEGATIVE, "ill") self.guesser.train(NEGATIVE, "itchy") self.guesser.train(NEGATIVE, "jealous") self.guesser.train(NEGATIVE, "jittery") self.guesser.train(NEGATIVE, "lazy") self.guesser.train(NEGATIVE, "lonely") self.guesser.train(NEGATIVE, "mysterious") self.guesser.train(NEGATIVE, "nasty") self.guesser.train(NEGATIVE, "rape") self.guesser.train(NEGATIVE, "naughty") self.guesser.train(NEGATIVE, "nervous") self.guesser.train(NEGATIVE, "nutty") self.guesser.train(NEGATIVE, "obnoxious") self.guesser.train(NEGATIVE, "outrageous") self.guesser.train(NEGATIVE, "panicky") self.guesser.train(NEGATIVE, "f*****g up") self.guesser.train(NEGATIVE, "repulsive") self.guesser.train(NEGATIVE, "scary") self.guesser.train(NEGATIVE, "selfish") self.guesser.train(NEGATIVE, "sore") self.guesser.train(NEGATIVE, "tense") self.guesser.train(NEGATIVE, "terrible") self.guesser.train(NEGATIVE, "testy") self.guesser.train(NEGATIVE, "thoughtless") self.guesser.train(NEGATIVE, "tired") self.guesser.train(NEGATIVE, "troubled") self.guesser.train(NEGATIVE, "upset") self.guesser.train(NEGATIVE, "uptight") self.guesser.train(NEGATIVE, "weary") self.guesser.train(NEGATIVE, "wicked") self.guesser.train(NEGATIVE, "worried") self.guesser.train(NEGATIVE, "is a fool") self.guesser.train(NEGATIVE, "painful") self.guesser.train(NEGATIVE, "pain") self.guesser.train(NEGATIVE, "gross") def classify(self, sentence): guess = self.guesser.guess(sentence) if len(guess) == 0: return NEUTRAL if len(guess) == 1: (sentiment, probabitily) = guess[0] return sentiment (max_sentiment, max_value) = guess[0] (min_sentiment, min_value) = guess[1] if max_value - min_value > self.THRESHHOLD: return max_sentiment return NEUTRAL
N2 = len(T2) G1 = geo_mean(T1) G2 = geo_mean(T2) N = N1 + N2 + N3 AGM = (N1 * G1 - N2 * G2) / N return AGM # reduce(lambda x, y: x*y, numbers)**(1.0/len(numbers)) nick = 'nthcolumn' posts = getPosts(nick, n=500) for post in posts[10:]: with open('./corpora/%s.txt' % nick, 'a') as file: text = file.write(post + os.linesep) ai.train(nick, post) posts = tweets for post in posts: with open('./corpora/%s.txt' % nick, 'a') as file: post = post.encode('ascii', 'ignore') if post: text = file.write(post + os.linesep) for post in posts: with open('./corpora/%s.txt' % nick, 'a') as file: post = post.encode('ascii', 'ignore') if post: text = file.write(post + os.linesep)