def untrained(self, cr, uid, ids, context=None):
     for id in ids:
         record = self.read(cr, uid, id, ['category_id','description'])
         if record['description']:
             group_obj = self.pool.get('crm.bayes.group')
             cat_obj = self.pool.get('crm.bayes.categories')
             cat_rec = cat_obj.read(cr, uid, record['category_id'][0],[])
             guesser = Bayes()
             data = ""
             for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]):
                 if rec['train_data']:
                     data += rec['train_data']
             if data :
                 myfile = file(file_path+"crm_bayes.bay", 'w')
                 myfile.write(data)
                 myfile.close()
                 guesser.load(file_path+"crm_bayes.bay")
             guesser.untrain(cat_rec['name'],record['description'])
             guesser.save(file_path+"crm_bayes.bay")
             myfile = file(file_path+"crm_bayes.bay", 'r')
             data= ""
             for fi in myfile.readlines():
                 data += fi
             group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data})
             cat_obj.write(cr, uid, record['category_id'][0], {'train_messages':int(cat_rec['train_messages']) - 1 })
             cr.execute("select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"% cat_rec['group_id'][0])
             rec = cr.dictfetchall()
             if rec[0]['tot_guess']:
                 percantage = float(rec[0]['tot_guess'] *100)  / float(rec[0]['tot_guess'] + rec[0]['tot_train'])
             else :
                 percantage = 0.0
             group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data,'automate_test':percantage})            
             self.write(cr, uid, id, {'state_bayes':'untrained'})
     return True    
Beispiel #2
0
def main():
    """
    Build aggregator report pages with Bayes rating links.
    """
    # Create a new Bayes guesser
    guesser = Bayes()

    # Attempt to load Bayes data, ignoring IOError on first run.
    try:
        guesser.load(BAYES_DATA_FN)
    except IOError:
        pass

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]
    entries = getNewFeedEntries(feeds, feed_db, entry_db)

    # Score the new entries using the Bayesian guesser
    entries = scoreEntries(guesser, entries)

    # Write out the current run's aggregator report.
    out_fn = time.strftime(HTML_FN)
    writeAggregatorPage(entries, out_fn, DATE_HDR_TMPL, FEED_HDR_TMPL,
                        ENTRY_TMPL, PAGE_TMPL)

    # Close the databases and save the current guesser's state to disk.
    closeDBs(feed_db, entry_db)
    guesser.save(BAYES_DATA_FN)
Beispiel #3
0
 def _load_guesser(self):
     if Bayes is None:
         return None
     guesser = Bayes()
     self.display(guesser)
     self.display(dir(guesser))
     guesser.load("commands.bays")
     return guesser
Beispiel #4
0
 def _load_guesser(self):
     if Bayes is None:
         return None
     guesser = Bayes()
     print guesser
     print dir(guesser)
     guesser.load('commands.bays')
     return guesser
Beispiel #5
0
def get_bayes(id=GLOBAL):
    if not id in guessers.keys():
        bayes = Bayes(tokenizer=statustok)
        fn = filename(id=id)
        if os.path.exists(fn):
            bayes.load(fn)
        log.debug("Created classifier for '%s' at '%s'" % (id, fn))
        guessers[id] = bayes
    return guessers[id]
Beispiel #6
0
  def __init__(self,name):
    Bayes.__init__(self)

    self.brain = name + '.bay'

    try:
      Bayes.load(self,self.brain)
      print "[Bayes] Brain loaded ok"
    except:
      print "[Alert] Failed to load bayesian brain - %s, creating it now" % self.brain
      Bayes.save(self,self.brain)
      Bayes.load(self,self.brain)
Beispiel #7
0
    def __init__(self, name):
        Bayes.__init__(self)

        self.brain = name + '.bay'

        try:
            Bayes.load(self, self.brain)
            print "[Bayes] Brain loaded ok"
        except:
            print "[Alert] Failed to load bayesian brain - %s, creating it now" % self.brain
            Bayes.save(self, self.brain)
            Bayes.load(self, self.brain)
 def action_train(self, cr, uid, ids, context=None):
     cat_obj = self.pool.get('crm.bayes.categories')
     group_obj = self.pool.get('crm.bayes.group')
     message_obj = self.pool.get('crm.bayes.test.guess')
     
     for id in ids:
         cat_id = self.read(cr, uid, id, ['category_id','name'])         
         cat_id = cat_id[0]['category_id']
         if  result :
             max_list = max(result, key=lambda k: k[1])
             if cat_id:
                 cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['train_messages'])
                 cat_obj.write(cr, uid, cat_id, {'train_messages' :cat_guess_msg['train_messages'] + 1})
             if max_list[1] > 0 and not cat_id:
                 cat_id = cat_obj.search(cr, uid, [('name','=',max_list[0])])[0]
                 cat_guess_msg = cat_obj.read(cr, uid, cat_id, ['guess_messages'])
                 cat_obj.write(cr, uid, cat_id, {'guess_messages' :cat_guess_msg['guess_messages'] + 1})
                 self.write(cr, uid, ids, {'category_id':cat_id})
         if cat_id :
             cat_rec = cat_obj.read(cr, uid, cat_id, [])
             guesser = Bayes()
             data = ""
             for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]):
                 if rec['train_data']:
                     data += rec['train_data']
             if data :
                 myfile = file(file_path+"crm_bayes.bay", 'w')
                 myfile.write(data)
                 myfile.close()
                 guesser.load(file_path+"crm_bayes.bay")
                 
             guesser.train(cat_rec['name'], message_obj.read(cr, uid, id)[0]['name'])
             guesser.save(file_path+"crm_bayes.bay")
             myfile = file(file_path+"crm_bayes.bay", 'r')
             data=""
             for fi in myfile.readlines():
                 data += fi 
             cr.execute("select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"% cat_rec['group_id'][0])
             rec = cr.dictfetchall()
             if not rec[0]['tot_guess']:
                 rec[0]['tot_guess'] =0
             percantage = float(rec[0]['tot_guess'] *100)  / float(rec[0]['tot_guess'] + rec[0]['tot_train'])
             group_obj.write(cr, uid, cat_rec['group_id'][0], {'train_data': data,'automate_test':percantage})            
         else :
             raise osv.except_osv(_('Error !'),_('Please Select Category! '))
     return {
         'view_type': 'form', 
         "view_mode": 'form', 
         'res_model': 'crm.bayes.train.message', 
         'type': 'ir.actions.act_window', 
         'target':'new', 
      }
Beispiel #9
0
def check_junk(phrase):
    try:
        from reverend.thomas import Bayes
        g = Bayes()
        g.load("config/kikoo.bot")
        result = g.guess(phrase)
        print result
        if result:
            return int(result[0][0])
        else:
            return -1
    except:
        return -1
Beispiel #10
0
def get_db(private_path, username):
    path = os.path.join(os.path.join(private_path, username), 'spam.bayes')
    guesser = Bayes()

    # load the spam DB
    try:
        guesser.load(path)
    except IOError:
        print "Creating a new spam filter database"

        parent_directory = os.path.dirname(path)
        if not os.path.isdir(parent_directory):
            os.makedirs(parent_directory)

        guesser.save(path)

    return guesser, path
Beispiel #11
0
def get_db(private_path, username):
  path = os.path.join(os.path.join(private_path, username), 'spam.bayes')
  guesser = Bayes()

  # load the spam DB
  try:
    guesser.load(path)
  except IOError:
    print "Creating a new spam filter database"

    parent_directory = os.path.dirname(path)
    if not os.path.isdir(parent_directory):
      os.makedirs(parent_directory)

    guesser.save(path)

  return guesser, path
Beispiel #12
0
    def trained(self, cr, uid, ids, context=None):
        for id in ids:
            record = self.read(cr, uid, id, ['category_id', 'description'])
            if not record['description']:
                raise osv.except_osv(_('Error!'), _("Description Not Define!"))
            if not record['category_id']:
                raise osv.except_osv(_('Error!'),
                                     _("Statistics Category Not Define!"))
            group_obj = self.pool.get('crm.bayes.group')
            cat_obj = self.pool.get('crm.bayes.categories')
            cat_rec = cat_obj.read(cr, uid, record['category_id'][0], [])
            guesser = Bayes()
            data = ""
            for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]):
                if rec['train_data']:
                    data += rec['train_data']
            if data:
                myfile = file(file_path + "crm_bayes.bay", 'w')
                myfile.write(data)
                myfile.close()
                guesser.load(file_path + "crm_bayes.bay")
            guesser.train(cat_rec['name'], record['description'])
            guesser.save(file_path + "crm_bayes.bay")
            myfile = file(file_path + "crm_bayes.bay", 'r')
            data = ""
            for fi in myfile.readlines():
                data += fi
            cat_obj.write(
                cr, uid, record['category_id'][0],
                {'train_messages': int(cat_rec['train_messages']) + 1})
            cr.execute(
                "select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"
                % cat_rec['group_id'][0])
            rec = cr.dictfetchall()
            if not rec[0]['tot_guess']:
                rec[0]['tot_guess'] = 0
            percantage = float(
                rec[0]['tot_guess'] * 100) / float(rec[0]['tot_guess'] +
                                                   rec[0]['tot_train'])
            group_obj.write(cr, uid, cat_rec['group_id'][0], {
                'train_data': data,
                'automate_test': percantage
            })

            self.write(cr, uid, id, {'state_bayes': 'trained'})
        return True
 def guess_message(self,cr,uid,ids,context={}):
     cases = self.browse(cr, uid, ids)
     result_lang=[]
     if cases.description :
         guesser = Bayes()
         group_obj = self.pool.get('crm.bayes.group')
         data = ""
         for rec in group_obj.browse(cr, uid, group_obj.search(cr,uid,[('active','=',True)])):
             if rec['train_data']:
                 data += rec['train_data']
         if data :
             myfile = file("/tmp/crm_bayes.bay", 'w')
             myfile.write(data)
             myfile.close()
             guesser.load('/tmp/crm_bayes.bay')
             result_lang = guesser.guess(cases.description)
     guess_re = []
     for le in result_lang:
         guess_re.append((le[0],le[1]*100))
     return guess_re
 def action_guess(self, cr, uid, ids, context=None):
     guesser = Bayes()
     group_obj = self.pool.get('crm.bayes.group')
     if result:
         for res in range(0, len(result)):
             result.pop(0)
     data = ""
     for rec in group_obj.browse(cr, uid, context['active_ids']):
         if rec['train_data']:
             data += rec['train_data']
     result_lang=[]
     if data:
         myfile = file("/tmp/crm_bayes.bay", 'w')
         myfile.write(data)
         myfile.close()
         guesser.load('/tmp/crm_bayes.bay')
         message = self.read(cr, uid, ids, ['name'])
         result_lang = guesser.guess(message[0]['name'])
         
     cat_obj = self.pool.get('crm.bayes.categories')
     cat_id = cat_obj.search(cr, uid, [])
     for re in cat_obj.read(cr, uid, cat_id, ['name']):
         flag = False
         for r in result_lang:
             if r[0] == re['name']:
                 result.append(r)
                 flag = True
                 break
         if not flag:
             result.append((re['name'],0))
     context_new = {}
     context_new.update({'from_wiz':True})
     context_new.update({'group_id':context.get('active_id',False)})
     return {
         'context': context_new,
         'view_type': 'form', 
         "view_mode": 'form', 
         'res_model': 'crm.bayes.test.train', 
         'type': 'ir.actions.act_window', 
         'target':'new', 
      }
Beispiel #15
0
    def action_guess(self, cr, uid, ids, context=None):
        guesser = Bayes()
        group_obj = self.pool.get('crm.bayes.group')
        if result:
            for res in range(0, len(result)):
                result.pop(0)
        data = ""
        for rec in group_obj.browse(cr, uid, context['active_ids']):
            if rec['train_data']:
                data += rec['train_data']
        result_lang = []
        if data:
            myfile = file("/tmp/crm_bayes.bay", 'w')
            myfile.write(data)
            myfile.close()
            guesser.load('/tmp/crm_bayes.bay')
            message = self.read(cr, uid, ids, ['name'])
            result_lang = guesser.guess(message[0]['name'])

        cat_obj = self.pool.get('crm.bayes.categories')
        cat_id = cat_obj.search(cr, uid, [])
        for re in cat_obj.read(cr, uid, cat_id, ['name']):
            flag = False
            for r in result_lang:
                if r[0] == re['name']:
                    result.append(r)
                    flag = True
                    break
            if not flag:
                result.append((re['name'], 0))
        context_new = {}
        context_new.update({'from_wiz': True})
        context_new.update({'group_id': context.get('active_id', False)})
        return {
            'context': context_new,
            'view_type': 'form',
            "view_mode": 'form',
            'res_model': 'crm.bayes.test.train',
            'type': 'ir.actions.act_window',
            'target': 'new',
        }
Beispiel #16
0
 def guess_message(self, cr, uid, ids, context={}):
     cases = self.browse(cr, uid, ids)
     result_lang = []
     if cases.description:
         guesser = Bayes()
         group_obj = self.pool.get('crm.bayes.group')
         data = ""
         for rec in group_obj.browse(
                 cr, uid, group_obj.search(cr, uid,
                                           [('active', '=', True)])):
             if rec['train_data']:
                 data += rec['train_data']
         if data:
             myfile = file("/tmp/crm_bayes.bay", 'w')
             myfile.write(data)
             myfile.close()
             guesser.load('/tmp/crm_bayes.bay')
             result_lang = guesser.guess(cases.description)
     guess_re = []
     for le in result_lang:
         guess_re.append((le[0], le[1] * 100))
     return guess_re
Beispiel #17
0
def classificar():

    print ">>> Instanciando classificador"    
    guesser = Bayes()
    print ">>> Carregando base de conhecimento"    
    try:
        guesser.load(CAMINHO_CONHECIMENTO)
    except IOError: 
        print "Erro. Não foi possível carregar a base. Certifique-se de que existe o arquivo %s." % CAMINHO_CONHECIMENTO
        sys.exit(1)

    try:
        arquivos = os.listdir("%s/" % CAMINHO_TOCLASSIFY)
        if '.svn' in arquivos:
            arquivos.remove('.svn')
        for nome_arquivo in arquivos:
            arquivo = open('%s/%s' % (CAMINHO_TOCLASSIFY, nome_arquivo), 'r')               
            texto = arquivo.read()
            guess = guesser.guess(texto)
            print "\n>>> Arquivo %s:\n %s\n" % (nome_arquivo, guess)
    except:
        print "Erro. Não foi possível classificar."
def main():
    """
    Perform a test run of the FeedFilter using defaults.
    """
    # Create a new Bayes guesser, attempt to load data
    guesser = Bayes()
    guesser.load(BAYES_DATA_FN)
    
    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds   = [ x.strip() for x in open(FEEDS_FN, "r").readlines() ]
    entries = getNewFeedEntries(feeds, feed_db, entry_db)
    
    # Build the feed filter.
    f = BayesFilter(guesser, entries)
    f.FEED_META['feed.title']   = FEED_TITLE
    f.FEED_META['feed.tagline'] = FEED_TAGLINE
    
    # Output the feed as both RSS and Atom.
    open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss())
    open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())
    
    # Close the databases and save the current guesser's state to disk.
    closeDBs(feed_db, entry_db)
Beispiel #19
0
def main():
    """
    Perform a test run of the FeedFilter using defaults.
    """
    # Create a new Bayes guesser, attempt to load data
    guesser = Bayes()
    guesser.load(BAYES_DATA_FN)

    # Open up the databases, load the subscriptions, get new entries.
    feed_db, entry_db = openDBs(FEED_DB_FN, ENTRY_DB_FN)
    feeds = [x.strip() for x in open(FEEDS_FN, "r").readlines()]
    entries = getNewFeedEntries(feeds, feed_db, entry_db)

    # Build the feed filter.
    f = BayesFilter(guesser, entries)
    f.FEED_META['feed.title'] = FEED_TITLE
    f.FEED_META['feed.tagline'] = FEED_TAGLINE

    # Output the feed as both RSS and Atom.
    open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss())
    open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())

    # Close the databases and save the current guesser's state to disk.
    closeDBs(feed_db, entry_db)
Beispiel #20
0
class BayesianClassifier:

  POSITIVE = POSITIVE
  NEGATIVE = NEGATIVE
  NEUTRAL  = NEUTRAL

  THRESHHOLD = 0.1
  guesser = None

  def __init__(self):
    self.guesser = Bayes()

  def train(self, example_tweets):
    for t in example_tweets:
      self.guesser.train(t.sentiment, t.text)

    self.guesser.train(POSITIVE, "cool")
    self.guesser.train(POSITIVE, "Woo")
    self.guesser.train(POSITIVE, "quite amazing")
    self.guesser.train(POSITIVE, "thks")
    self.guesser.train(POSITIVE, "looking forward to")
    self.guesser.train(POSITIVE, "damn good")
    self.guesser.train(POSITIVE, "frickin ruled")
    self.guesser.train(POSITIVE, "frickin rules")
    self.guesser.train(POSITIVE, "Way to go")
    self.guesser.train(POSITIVE, "cute")
    self.guesser.train(POSITIVE, "comeback")
    self.guesser.train(POSITIVE, "not suck")
    self.guesser.train(POSITIVE, "prop")
    self.guesser.train(POSITIVE, "kinda impressed")
    self.guesser.train(POSITIVE, "props")
    self.guesser.train(POSITIVE, "come on")
    self.guesser.train(POSITIVE, "congratulation")
    self.guesser.train(POSITIVE, "gtd")
    self.guesser.train(POSITIVE, "proud")
    self.guesser.train(POSITIVE, "thanks")
    self.guesser.train(POSITIVE, "can help")
    self.guesser.train(POSITIVE, "thanks!")
    self.guesser.train(POSITIVE, "pumped")
    self.guesser.train(POSITIVE, "integrate")
    self.guesser.train(POSITIVE, "really like")
    self.guesser.train(POSITIVE, "loves it")
    self.guesser.train(POSITIVE, "yay")
    self.guesser.train(POSITIVE, "amazing")
    self.guesser.train(POSITIVE, "epic flail")
    self.guesser.train(POSITIVE, "flail")
    self.guesser.train(POSITIVE, "good luck")
    self.guesser.train(POSITIVE, "fail")
    self.guesser.train(POSITIVE, "life saver")
    self.guesser.train(POSITIVE, "piece of cake")
    self.guesser.train(POSITIVE, "good thing")
    self.guesser.train(POSITIVE, "hawt")
    self.guesser.train(POSITIVE, "hawtness")
    self.guesser.train(POSITIVE, "highly positive")
    self.guesser.train(POSITIVE, "my hero")
    self.guesser.train(POSITIVE, "yummy")
    self.guesser.train(POSITIVE, "awesome")
    self.guesser.train(POSITIVE, "congrats")
    self.guesser.train(POSITIVE, "would recommend")
    self.guesser.train(POSITIVE, "intellectual vigor")
    self.guesser.train(POSITIVE, "really neat")
    self.guesser.train(POSITIVE, "yay")
    self.guesser.train(POSITIVE, "ftw")
    self.guesser.train(POSITIVE, "I want")
    self.guesser.train(POSITIVE, "best looking")
    self.guesser.train(POSITIVE, "imrpessive")
    self.guesser.train(POSITIVE, "positive")
    self.guesser.train(POSITIVE, "thx")
    self.guesser.train(POSITIVE, "thanks")
    self.guesser.train(POSITIVE, "thank you")
    self.guesser.train(POSITIVE, "endorse")
    self.guesser.train(POSITIVE, "clearly superior")
    self.guesser.train(POSITIVE, "superior")
    self.guesser.train(POSITIVE, "really love")
    self.guesser.train(POSITIVE, "woot")
    self.guesser.train(POSITIVE, "w00t")
    self.guesser.train(POSITIVE, "super")
    self.guesser.train(POSITIVE, "wonderful")
    self.guesser.train(POSITIVE, "leaning towards")
    self.guesser.train(POSITIVE, "rally")
    self.guesser.train(POSITIVE, "incredible")
    self.guesser.train(POSITIVE, "the best")
    self.guesser.train(POSITIVE, "is the best")
    self.guesser.train(POSITIVE, "strong")
    self.guesser.train(POSITIVE, "would love")
    self.guesser.train(POSITIVE, "rally")
    self.guesser.train(POSITIVE, "very quickly")
    self.guesser.train(POSITIVE, "very cool")
    self.guesser.train(POSITIVE, "absolutely love")
    self.guesser.train(POSITIVE, "very exceptional")
    self.guesser.train(POSITIVE, "so proud")
    self.guesser.train(POSITIVE, "funny")
    self.guesser.train(POSITIVE, "recommend")
    self.guesser.train(POSITIVE, "so proud")
    self.guesser.train(POSITIVE, "so great")
    self.guesser.train(POSITIVE, "so cool")
    self.guesser.train(POSITIVE, "cool")
    self.guesser.train(POSITIVE, "wowsers")
    self.guesser.train(POSITIVE, "plus")
    self.guesser.train(POSITIVE, "liked it")
    self.guesser.train(POSITIVE, "make a difference")
    self.guesser.train(POSITIVE, "moves me")
    self.guesser.train(POSITIVE, "inspired")
    self.guesser.train(POSITIVE, "OK")
    self.guesser.train(POSITIVE, "love it")
    self.guesser.train(POSITIVE, "LOL")
    self.guesser.train(POSITIVE, ":)")
    self.guesser.train(POSITIVE, ";)")
    self.guesser.train(POSITIVE, ":-)")
    self.guesser.train(POSITIVE, ";-)")
    self.guesser.train(POSITIVE, ":D")
    self.guesser.train(POSITIVE, ";]")
    self.guesser.train(POSITIVE, ":]")
    self.guesser.train(POSITIVE, ":p")
    self.guesser.train(POSITIVE, ";p")
    self.guesser.train(POSITIVE, "voting for")
    self.guesser.train(POSITIVE, "great")
    self.guesser.train(POSITIVE, "agreeable")
    self.guesser.train(POSITIVE, "amused")
    self.guesser.train(POSITIVE, "brave")
    self.guesser.train(POSITIVE, "calm")
    self.guesser.train(POSITIVE, "charming")
    self.guesser.train(POSITIVE, "cheerful")
    self.guesser.train(POSITIVE, "comfortable")
    self.guesser.train(POSITIVE, "cooperative")
    self.guesser.train(POSITIVE, "courageous")
    self.guesser.train(POSITIVE, "delightful")
    self.guesser.train(POSITIVE, "determined")
    self.guesser.train(POSITIVE, "eager")
    self.guesser.train(POSITIVE, "elated")
    self.guesser.train(POSITIVE, "enchanting")
    self.guesser.train(POSITIVE, "encouraging")
    self.guesser.train(POSITIVE, "energetic")
    self.guesser.train(POSITIVE, "enthusiastic")
    self.guesser.train(POSITIVE, "excited")
    self.guesser.train(POSITIVE, "exuberant")
    self.guesser.train(POSITIVE, "excellent")
    self.guesser.train(POSITIVE, "I like")
    self.guesser.train(POSITIVE, "fine")
    self.guesser.train(POSITIVE, "fair")
    self.guesser.train(POSITIVE, "faithful")
    self.guesser.train(POSITIVE, "fantastic")
    self.guesser.train(POSITIVE, "fine")
    self.guesser.train(POSITIVE, "friendly")
    self.guesser.train(POSITIVE, "fun ")
    self.guesser.train(POSITIVE, "funny")
    self.guesser.train(POSITIVE, "gentle")
    self.guesser.train(POSITIVE, "glorious")
    self.guesser.train(POSITIVE, "good")
    self.guesser.train(POSITIVE, "pretty good")
    self.guesser.train(POSITIVE, "happy")
    self.guesser.train(POSITIVE, "healthy")
    self.guesser.train(POSITIVE, "helpful")
    self.guesser.train(POSITIVE, "high")
    self.guesser.train(POSITIVE, "agile")
    self.guesser.train(POSITIVE, "responsive")
    self.guesser.train(POSITIVE, "hilarious")
    self.guesser.train(POSITIVE, "jolly")
    self.guesser.train(POSITIVE, "joyous")
    self.guesser.train(POSITIVE, "kind")
    self.guesser.train(POSITIVE, "lively")
    self.guesser.train(POSITIVE, "lovely")
    self.guesser.train(POSITIVE, "lucky")
    self.guesser.train(POSITIVE, "nice")
    self.guesser.train(POSITIVE, "nicely")
    self.guesser.train(POSITIVE, "obedient")
    self.guesser.train(POSITIVE, "perfect")
    self.guesser.train(POSITIVE, "pleasant")
    self.guesser.train(POSITIVE, "proud")
    self.guesser.train(POSITIVE, "relieved")
    self.guesser.train(POSITIVE, "silly")
    self.guesser.train(POSITIVE, "smiling")
    self.guesser.train(POSITIVE, "splendid")
    self.guesser.train(POSITIVE, "successful")
    self.guesser.train(POSITIVE, "thankful")
    self.guesser.train(POSITIVE, "thoughtful")
    self.guesser.train(POSITIVE, "victorious")
    self.guesser.train(POSITIVE, "vivacious")
    self.guesser.train(POSITIVE, "witty")
    self.guesser.train(POSITIVE, "wonderful")
    self.guesser.train(POSITIVE, "zealous")
    self.guesser.train(POSITIVE, "zany")
    self.guesser.train(POSITIVE, "rocks")
    self.guesser.train(POSITIVE, "comeback")
    self.guesser.train(POSITIVE, "pleasantly surprised")
    self.guesser.train(POSITIVE, "pleasantly")
    self.guesser.train(POSITIVE, "surprised")
    self.guesser.train(POSITIVE, "love")
    self.guesser.train(POSITIVE, "glad")
    self.guesser.train(POSITIVE, "yum")
    self.guesser.train(POSITIVE, "interesting")



    self.guesser.train(NEGATIVE, "FTL")
    self.guesser.train(NEGATIVE, "irritating")
    self.guesser.train(NEGATIVE, "not that good")
    self.guesser.train(NEGATIVE, "suck")
    self.guesser.train(NEGATIVE, "lying")
    self.guesser.train(NEGATIVE, "duplicity")
    self.guesser.train(NEGATIVE, "angered")
    self.guesser.train(NEGATIVE, "dumbfounding")
    self.guesser.train(NEGATIVE, "dumbifying")
    self.guesser.train(NEGATIVE, "not as good")
    self.guesser.train(NEGATIVE, "not impressed")
    self.guesser.train(NEGATIVE, "stomach it")
    self.guesser.train(NEGATIVE, "pw")
    self.guesser.train(NEGATIVE, "pwns")
    self.guesser.train(NEGATIVE, "pwnd")
    self.guesser.train(NEGATIVE, "pwning")
    self.guesser.train(NEGATIVE, "in a bad way")
    self.guesser.train(NEGATIVE, "horrifying")
    self.guesser.train(NEGATIVE, "wrong")
    self.guesser.train(NEGATIVE, "flailing")
    self.guesser.train(NEGATIVE, "failing")
    self.guesser.train(NEGATIVE, "fallen way behind")
    self.guesser.train(NEGATIVE, "fallen behind")
    self.guesser.train(NEGATIVE, "lose")
    self.guesser.train(NEGATIVE, "fallen")
    self.guesser.train(NEGATIVE, "self-deprecating")
    self.guesser.train(NEGATIVE, "hunker down")
    self.guesser.train(NEGATIVE, "duh")
    self.guesser.train(NEGATIVE, "get killed by")
    self.guesser.train(NEGATIVE, "got killed by")
    self.guesser.train(NEGATIVE, "hated us")
    self.guesser.train(NEGATIVE, "only works in safari")
    self.guesser.train(NEGATIVE, "must have ie")
    self.guesser.train(NEGATIVE, "fuming and frothing")
    self.guesser.train(NEGATIVE, "heavy")
    self.guesser.train(NEGATIVE, "buggy")
    self.guesser.train(NEGATIVE, "unusable")
    self.guesser.train(NEGATIVE, "nothing is")
    self.guesser.train(NEGATIVE, "is great until")
    self.guesser.train(NEGATIVE, "don't support")
    self.guesser.train(NEGATIVE, "despise")
    self.guesser.train(NEGATIVE, "pos")
    self.guesser.train(NEGATIVE, "hindrance")
    self.guesser.train(NEGATIVE, "sucks")
    self.guesser.train(NEGATIVE, "problems")
    self.guesser.train(NEGATIVE, "not working")
    self.guesser.train(NEGATIVE, "fuming")
    self.guesser.train(NEGATIVE, "annoying")
    self.guesser.train(NEGATIVE, "frothing")
    self.guesser.train(NEGATIVE, "poorly")
    self.guesser.train(NEGATIVE, "headache")
    self.guesser.train(NEGATIVE, "completely wrong")
    self.guesser.train(NEGATIVE, "sad news")
    self.guesser.train(NEGATIVE, "didn't last")
    self.guesser.train(NEGATIVE, "lame")
    self.guesser.train(NEGATIVE, "pet peeves")
    self.guesser.train(NEGATIVE, "pet peeve")
    self.guesser.train(NEGATIVE, "can't send")
    self.guesser.train(NEGATIVE, "bullshit")
    self.guesser.train(NEGATIVE, "fail")
    self.guesser.train(NEGATIVE, "so terrible")
    self.guesser.train(NEGATIVE, "negative")
    self.guesser.train(NEGATIVE, "anooying")
    self.guesser.train(NEGATIVE, "an issue")
    self.guesser.train(NEGATIVE, "drop dead")
    self.guesser.train(NEGATIVE, "trouble")
    self.guesser.train(NEGATIVE, "brainwashed")
    self.guesser.train(NEGATIVE, "smear")
    self.guesser.train(NEGATIVE, "commie")
    self.guesser.train(NEGATIVE, "communist")
    self.guesser.train(NEGATIVE, "anti-women")
    self.guesser.train(NEGATIVE, "WTF")
    self.guesser.train(NEGATIVE, "anxiety")
    self.guesser.train(NEGATIVE, "STING")
    self.guesser.train(NEGATIVE, "nobody spoke")
    self.guesser.train(NEGATIVE, "yell")
    self.guesser.train(NEGATIVE, "Damn")
    self.guesser.train(NEGATIVE, "aren't")
    self.guesser.train(NEGATIVE, "anti")
    self.guesser.train(NEGATIVE, "i hate")
    self.guesser.train(NEGATIVE, "hate")
    self.guesser.train(NEGATIVE, "dissapointing")
    self.guesser.train(NEGATIVE, "doesn't recommend")
    self.guesser.train(NEGATIVE, "the worst")
    self.guesser.train(NEGATIVE, "worst")
    self.guesser.train(NEGATIVE, "expensive")
    self.guesser.train(NEGATIVE, "crap")
    self.guesser.train(NEGATIVE, "socialist")
    self.guesser.train(NEGATIVE, "won't")
    self.guesser.train(NEGATIVE, "wont")
    self.guesser.train(NEGATIVE, ":(")
    self.guesser.train(NEGATIVE, ":-(")
    self.guesser.train(NEGATIVE, "Thanks")
    self.guesser.train(NEGATIVE, "smartass")
    self.guesser.train(NEGATIVE, "don't like")
    self.guesser.train(NEGATIVE, "too bad")
    self.guesser.train(NEGATIVE, "frickin")
    self.guesser.train(NEGATIVE, "snooty")
    self.guesser.train(NEGATIVE, "knee jerk")
    self.guesser.train(NEGATIVE, "jerk")
    self.guesser.train(NEGATIVE, "reactionist")
    self.guesser.train(NEGATIVE, "MUST DIE")
    self.guesser.train(NEGATIVE, "no more")
    self.guesser.train(NEGATIVE, "hypocrisy")
    self.guesser.train(NEGATIVE, "ugly")
    self.guesser.train(NEGATIVE, "too slow")
    self.guesser.train(NEGATIVE, "not reliable")
    self.guesser.train(NEGATIVE, "noise")
    self.guesser.train(NEGATIVE, "crappy")
    self.guesser.train(NEGATIVE, "horrible")
    self.guesser.train(NEGATIVE, "bad quality")
    self.guesser.train(NEGATIVE, "angry")
    self.guesser.train(NEGATIVE, "annoyed")
    self.guesser.train(NEGATIVE, "anxious")
    self.guesser.train(NEGATIVE, "arrogant")
    self.guesser.train(NEGATIVE, "ashamed")
    self.guesser.train(NEGATIVE, "awful")
    self.guesser.train(NEGATIVE, "bad")
    self.guesser.train(NEGATIVE, "bewildered")
    self.guesser.train(NEGATIVE, "blues")
    self.guesser.train(NEGATIVE, "bored")
    self.guesser.train(NEGATIVE, "clumsy")
    self.guesser.train(NEGATIVE, "combative")
    self.guesser.train(NEGATIVE, "condemned")
    self.guesser.train(NEGATIVE, "confused")
    self.guesser.train(NEGATIVE, "crazy")
    self.guesser.train(NEGATIVE, "flipped-out")
    self.guesser.train(NEGATIVE, "creepy")
    self.guesser.train(NEGATIVE, "cruel")
    self.guesser.train(NEGATIVE, "dangerous")
    self.guesser.train(NEGATIVE, "defeated")
    self.guesser.train(NEGATIVE, "defiant")
    self.guesser.train(NEGATIVE, "depressed")
    self.guesser.train(NEGATIVE, "disgusted")
    self.guesser.train(NEGATIVE, "disturbed")
    self.guesser.train(NEGATIVE, "dizzy")
    self.guesser.train(NEGATIVE, "dull")
    self.guesser.train(NEGATIVE, "embarrassed")
    self.guesser.train(NEGATIVE, "envious")
    self.guesser.train(NEGATIVE, "evil")
    self.guesser.train(NEGATIVE, "fierce")
    self.guesser.train(NEGATIVE, "foolish")
    self.guesser.train(NEGATIVE, "frantic")
    self.guesser.train(NEGATIVE, "frightened")
    self.guesser.train(NEGATIVE, "grieving")
    self.guesser.train(NEGATIVE, "grumpy")
    self.guesser.train(NEGATIVE, "helpless")
    self.guesser.train(NEGATIVE, "homeless")
    self.guesser.train(NEGATIVE, "hungry")
    self.guesser.train(NEGATIVE, "hurt")
    self.guesser.train(NEGATIVE, "ill")
    self.guesser.train(NEGATIVE, "itchy")
    self.guesser.train(NEGATIVE, "jealous")
    self.guesser.train(NEGATIVE, "jittery")
    self.guesser.train(NEGATIVE, "lazy")
    self.guesser.train(NEGATIVE, "lonely")
    self.guesser.train(NEGATIVE, "mysterious")
    self.guesser.train(NEGATIVE, "nasty")
    self.guesser.train(NEGATIVE, "rape")
    self.guesser.train(NEGATIVE, "naughty")
    self.guesser.train(NEGATIVE, "nervous")
    self.guesser.train(NEGATIVE, "nutty")
    self.guesser.train(NEGATIVE, "obnoxious")
    self.guesser.train(NEGATIVE, "outrageous")
    self.guesser.train(NEGATIVE, "panicky")
    self.guesser.train(NEGATIVE, "f*****g up")
    self.guesser.train(NEGATIVE, "repulsive")
    self.guesser.train(NEGATIVE, "scary")
    self.guesser.train(NEGATIVE, "selfish")
    self.guesser.train(NEGATIVE, "sore")
    self.guesser.train(NEGATIVE, "tense")
    self.guesser.train(NEGATIVE, "terrible")
    self.guesser.train(NEGATIVE, "testy")
    self.guesser.train(NEGATIVE, "thoughtless")
    self.guesser.train(NEGATIVE, "tired")
    self.guesser.train(NEGATIVE, "troubled")
    self.guesser.train(NEGATIVE, "upset")
    self.guesser.train(NEGATIVE, "uptight")
    self.guesser.train(NEGATIVE, "weary")
    self.guesser.train(NEGATIVE, "wicked")
    self.guesser.train(NEGATIVE, "worried")
    self.guesser.train(NEGATIVE, "is a fool")
    self.guesser.train(NEGATIVE, "painful")
    self.guesser.train(NEGATIVE, "pain")
    self.guesser.train(NEGATIVE, "gross")

  def classify(self, sentence):
    guess = self.guesser.guess(sentence)
    if len(guess) == 0:
      return NEUTRAL

    if len(guess) == 1:
      (sentiment, probabitily) = guess[0]
      return sentiment

    (max_sentiment, max_value) = guess[0]
    (min_sentiment, min_value) = guess[1]
    if max_value - min_value > self.THRESHHOLD:
      return max_sentiment

    return NEUTRAL


  def save(self):
    self.guesser.save()


  def load(self):
    self.guesser.load()
Beispiel #21
0
class Guesser():
    
    def __init__(self, feed, user, config):
        import os.path

        self.user = user
        self.filename = config['bayes_dir']
        self.filename += "/users/%s" % user.id
        if not os.path.exists(self.filename):
            os.makedirs(self.filename)
        self.filename += '/feed_%s.bayes' % str(feed.id)
        log.debug("filename:%s" % self.filename)

        stopwords = meta.Session\
                .query(model.Stopword)\
                .filter_by(feed_id=feed.id).all()
        self.stopwords = map(lambda x: x.word, stopwords)

        self.trainer = Bayes()
        self.trainer.getTokens = lambda x: my_tokenize(x, self.stopwords)
        if os.path.exists(self.filename):
            self.trainer.load(self.filename)
        else:
            self.trainer.newPool('ham')
            self.trainer.newPool('spam')

    def save(self):
        self.trainer.save(self.filename)

    def clear(self):
        self.trainer = Bayes()
#        self.trainer.getTokens = my_tokenize
        self.trainer.getTokens = lambda x: my_tokenize(x, self.stopwords)
        self.trainer.newPool('ham')
        self.trainer.newPool('spam')
    
    def is_spam(self, entry, use_classified=True):
        if use_classified:
            classy = meta.Session\
                    .query(model.Classification)\
                    .filter_by(user_id = self.user.id, entry_id=entry.id).first()
            if classy:
                if classy.pool == 'spam':
                    return True
                elif classy.pool == 'ham':
                    return False
                else:
                    raise "bad pool"
                                
        g = self.guess(entry)

        if g['spam'] and not g['ham']:
            return True
            
        if not g['spam'] and g['ham']:
            return False
            
        return (g['spam'] > g['ham'])

    def guess(self, entry):
        from rssmonster.controllers.bayes import __relevant__
        
        log.debug("__relevant__(entry) %s" % __relevant__(entry))
        log.debug("__relevant__(entry) %s" % self.trainer.guess(__relevant__(entry)))
        log.debug('self.filename: %s' % self.filename)
#        ret = dict(self.trainer.guess(__relevant__(entry)))
        ret = dict(self.trainer.guess(__relevant__(entry)))
        log.debug("ret: %s" % ret)
        if not 'spam' in ret:
            ret['spam'] = None
        if not 'ham' in ret:
            ret['ham'] = None
        
        return ret
Beispiel #22
0
class BayesianClassifier:

    POSITIVE = POSITIVE
    NEGATIVE = NEGATIVE
    NEUTRAL = NEUTRAL

    THRESHHOLD = 0.1
    guesser = None

    def __init__(self):
        self.guesser = Bayes()

    def train(self, example_tweets):
        for t in example_tweets:
            self.guesser.train(t.sentiment, t.text)

        self.guesser.train(POSITIVE, "cool")
        self.guesser.train(POSITIVE, "Woo")
        self.guesser.train(POSITIVE, "quite amazing")
        self.guesser.train(POSITIVE, "thks")
        self.guesser.train(POSITIVE, "looking forward to")
        self.guesser.train(POSITIVE, "damn good")
        self.guesser.train(POSITIVE, "frickin ruled")
        self.guesser.train(POSITIVE, "frickin rules")
        self.guesser.train(POSITIVE, "Way to go")
        self.guesser.train(POSITIVE, "cute")
        self.guesser.train(POSITIVE, "comeback")
        self.guesser.train(POSITIVE, "not suck")
        self.guesser.train(POSITIVE, "prop")
        self.guesser.train(POSITIVE, "kinda impressed")
        self.guesser.train(POSITIVE, "props")
        self.guesser.train(POSITIVE, "come on")
        self.guesser.train(POSITIVE, "congratulation")
        self.guesser.train(POSITIVE, "gtd")
        self.guesser.train(POSITIVE, "proud")
        self.guesser.train(POSITIVE, "thanks")
        self.guesser.train(POSITIVE, "can help")
        self.guesser.train(POSITIVE, "thanks!")
        self.guesser.train(POSITIVE, "pumped")
        self.guesser.train(POSITIVE, "integrate")
        self.guesser.train(POSITIVE, "really like")
        self.guesser.train(POSITIVE, "loves it")
        self.guesser.train(POSITIVE, "yay")
        self.guesser.train(POSITIVE, "amazing")
        self.guesser.train(POSITIVE, "epic flail")
        self.guesser.train(POSITIVE, "flail")
        self.guesser.train(POSITIVE, "good luck")
        self.guesser.train(POSITIVE, "fail")
        self.guesser.train(POSITIVE, "life saver")
        self.guesser.train(POSITIVE, "piece of cake")
        self.guesser.train(POSITIVE, "good thing")
        self.guesser.train(POSITIVE, "hawt")
        self.guesser.train(POSITIVE, "hawtness")
        self.guesser.train(POSITIVE, "highly positive")
        self.guesser.train(POSITIVE, "my hero")
        self.guesser.train(POSITIVE, "yummy")
        self.guesser.train(POSITIVE, "awesome")
        self.guesser.train(POSITIVE, "congrats")
        self.guesser.train(POSITIVE, "would recommend")
        self.guesser.train(POSITIVE, "intellectual vigor")
        self.guesser.train(POSITIVE, "really neat")
        self.guesser.train(POSITIVE, "yay")
        self.guesser.train(POSITIVE, "ftw")
        self.guesser.train(POSITIVE, "I want")
        self.guesser.train(POSITIVE, "best looking")
        self.guesser.train(POSITIVE, "imrpessive")
        self.guesser.train(POSITIVE, "positive")
        self.guesser.train(POSITIVE, "thx")
        self.guesser.train(POSITIVE, "thanks")
        self.guesser.train(POSITIVE, "thank you")
        self.guesser.train(POSITIVE, "endorse")
        self.guesser.train(POSITIVE, "clearly superior")
        self.guesser.train(POSITIVE, "superior")
        self.guesser.train(POSITIVE, "really love")
        self.guesser.train(POSITIVE, "woot")
        self.guesser.train(POSITIVE, "w00t")
        self.guesser.train(POSITIVE, "super")
        self.guesser.train(POSITIVE, "wonderful")
        self.guesser.train(POSITIVE, "leaning towards")
        self.guesser.train(POSITIVE, "rally")
        self.guesser.train(POSITIVE, "incredible")
        self.guesser.train(POSITIVE, "the best")
        self.guesser.train(POSITIVE, "is the best")
        self.guesser.train(POSITIVE, "strong")
        self.guesser.train(POSITIVE, "would love")
        self.guesser.train(POSITIVE, "rally")
        self.guesser.train(POSITIVE, "very quickly")
        self.guesser.train(POSITIVE, "very cool")
        self.guesser.train(POSITIVE, "absolutely love")
        self.guesser.train(POSITIVE, "very exceptional")
        self.guesser.train(POSITIVE, "so proud")
        self.guesser.train(POSITIVE, "funny")
        self.guesser.train(POSITIVE, "recommend")
        self.guesser.train(POSITIVE, "so proud")
        self.guesser.train(POSITIVE, "so great")
        self.guesser.train(POSITIVE, "so cool")
        self.guesser.train(POSITIVE, "cool")
        self.guesser.train(POSITIVE, "wowsers")
        self.guesser.train(POSITIVE, "plus")
        self.guesser.train(POSITIVE, "liked it")
        self.guesser.train(POSITIVE, "make a difference")
        self.guesser.train(POSITIVE, "moves me")
        self.guesser.train(POSITIVE, "inspired")
        self.guesser.train(POSITIVE, "OK")
        self.guesser.train(POSITIVE, "love it")
        self.guesser.train(POSITIVE, "LOL")
        self.guesser.train(POSITIVE, ":)")
        self.guesser.train(POSITIVE, ";)")
        self.guesser.train(POSITIVE, ":-)")
        self.guesser.train(POSITIVE, ";-)")
        self.guesser.train(POSITIVE, ":D")
        self.guesser.train(POSITIVE, ";]")
        self.guesser.train(POSITIVE, ":]")
        self.guesser.train(POSITIVE, ":p")
        self.guesser.train(POSITIVE, ";p")
        self.guesser.train(POSITIVE, "voting for")
        self.guesser.train(POSITIVE, "great")
        self.guesser.train(POSITIVE, "agreeable")
        self.guesser.train(POSITIVE, "amused")
        self.guesser.train(POSITIVE, "brave")
        self.guesser.train(POSITIVE, "calm")
        self.guesser.train(POSITIVE, "charming")
        self.guesser.train(POSITIVE, "cheerful")
        self.guesser.train(POSITIVE, "comfortable")
        self.guesser.train(POSITIVE, "cooperative")
        self.guesser.train(POSITIVE, "courageous")
        self.guesser.train(POSITIVE, "delightful")
        self.guesser.train(POSITIVE, "determined")
        self.guesser.train(POSITIVE, "eager")
        self.guesser.train(POSITIVE, "elated")
        self.guesser.train(POSITIVE, "enchanting")
        self.guesser.train(POSITIVE, "encouraging")
        self.guesser.train(POSITIVE, "energetic")
        self.guesser.train(POSITIVE, "enthusiastic")
        self.guesser.train(POSITIVE, "excited")
        self.guesser.train(POSITIVE, "exuberant")
        self.guesser.train(POSITIVE, "excellent")
        self.guesser.train(POSITIVE, "I like")
        self.guesser.train(POSITIVE, "fine")
        self.guesser.train(POSITIVE, "fair")
        self.guesser.train(POSITIVE, "faithful")
        self.guesser.train(POSITIVE, "fantastic")
        self.guesser.train(POSITIVE, "fine")
        self.guesser.train(POSITIVE, "friendly")
        self.guesser.train(POSITIVE, "fun ")
        self.guesser.train(POSITIVE, "funny")
        self.guesser.train(POSITIVE, "gentle")
        self.guesser.train(POSITIVE, "glorious")
        self.guesser.train(POSITIVE, "good")
        self.guesser.train(POSITIVE, "pretty good")
        self.guesser.train(POSITIVE, "happy")
        self.guesser.train(POSITIVE, "healthy")
        self.guesser.train(POSITIVE, "helpful")
        self.guesser.train(POSITIVE, "high")
        self.guesser.train(POSITIVE, "agile")
        self.guesser.train(POSITIVE, "responsive")
        self.guesser.train(POSITIVE, "hilarious")
        self.guesser.train(POSITIVE, "jolly")
        self.guesser.train(POSITIVE, "joyous")
        self.guesser.train(POSITIVE, "kind")
        self.guesser.train(POSITIVE, "lively")
        self.guesser.train(POSITIVE, "lovely")
        self.guesser.train(POSITIVE, "lucky")
        self.guesser.train(POSITIVE, "nice")
        self.guesser.train(POSITIVE, "nicely")
        self.guesser.train(POSITIVE, "obedient")
        self.guesser.train(POSITIVE, "perfect")
        self.guesser.train(POSITIVE, "pleasant")
        self.guesser.train(POSITIVE, "proud")
        self.guesser.train(POSITIVE, "relieved")
        self.guesser.train(POSITIVE, "silly")
        self.guesser.train(POSITIVE, "smiling")
        self.guesser.train(POSITIVE, "splendid")
        self.guesser.train(POSITIVE, "successful")
        self.guesser.train(POSITIVE, "thankful")
        self.guesser.train(POSITIVE, "thoughtful")
        self.guesser.train(POSITIVE, "victorious")
        self.guesser.train(POSITIVE, "vivacious")
        self.guesser.train(POSITIVE, "witty")
        self.guesser.train(POSITIVE, "wonderful")
        self.guesser.train(POSITIVE, "zealous")
        self.guesser.train(POSITIVE, "zany")
        self.guesser.train(POSITIVE, "rocks")
        self.guesser.train(POSITIVE, "comeback")
        self.guesser.train(POSITIVE, "pleasantly surprised")
        self.guesser.train(POSITIVE, "pleasantly")
        self.guesser.train(POSITIVE, "surprised")
        self.guesser.train(POSITIVE, "love")
        self.guesser.train(POSITIVE, "glad")
        self.guesser.train(POSITIVE, "yum")
        self.guesser.train(POSITIVE, "interesting")

        self.guesser.train(NEGATIVE, "FTL")
        self.guesser.train(NEGATIVE, "irritating")
        self.guesser.train(NEGATIVE, "not that good")
        self.guesser.train(NEGATIVE, "suck")
        self.guesser.train(NEGATIVE, "lying")
        self.guesser.train(NEGATIVE, "duplicity")
        self.guesser.train(NEGATIVE, "angered")
        self.guesser.train(NEGATIVE, "dumbfounding")
        self.guesser.train(NEGATIVE, "dumbifying")
        self.guesser.train(NEGATIVE, "not as good")
        self.guesser.train(NEGATIVE, "not impressed")
        self.guesser.train(NEGATIVE, "stomach it")
        self.guesser.train(NEGATIVE, "pw")
        self.guesser.train(NEGATIVE, "pwns")
        self.guesser.train(NEGATIVE, "pwnd")
        self.guesser.train(NEGATIVE, "pwning")
        self.guesser.train(NEGATIVE, "in a bad way")
        self.guesser.train(NEGATIVE, "horrifying")
        self.guesser.train(NEGATIVE, "wrong")
        self.guesser.train(NEGATIVE, "flailing")
        self.guesser.train(NEGATIVE, "failing")
        self.guesser.train(NEGATIVE, "fallen way behind")
        self.guesser.train(NEGATIVE, "fallen behind")
        self.guesser.train(NEGATIVE, "lose")
        self.guesser.train(NEGATIVE, "fallen")
        self.guesser.train(NEGATIVE, "self-deprecating")
        self.guesser.train(NEGATIVE, "hunker down")
        self.guesser.train(NEGATIVE, "duh")
        self.guesser.train(NEGATIVE, "get killed by")
        self.guesser.train(NEGATIVE, "got killed by")
        self.guesser.train(NEGATIVE, "hated us")
        self.guesser.train(NEGATIVE, "only works in safari")
        self.guesser.train(NEGATIVE, "must have ie")
        self.guesser.train(NEGATIVE, "fuming and frothing")
        self.guesser.train(NEGATIVE, "heavy")
        self.guesser.train(NEGATIVE, "buggy")
        self.guesser.train(NEGATIVE, "unusable")
        self.guesser.train(NEGATIVE, "nothing is")
        self.guesser.train(NEGATIVE, "is great until")
        self.guesser.train(NEGATIVE, "don't support")
        self.guesser.train(NEGATIVE, "despise")
        self.guesser.train(NEGATIVE, "pos")
        self.guesser.train(NEGATIVE, "hindrance")
        self.guesser.train(NEGATIVE, "sucks")
        self.guesser.train(NEGATIVE, "problems")
        self.guesser.train(NEGATIVE, "not working")
        self.guesser.train(NEGATIVE, "fuming")
        self.guesser.train(NEGATIVE, "annoying")
        self.guesser.train(NEGATIVE, "frothing")
        self.guesser.train(NEGATIVE, "poorly")
        self.guesser.train(NEGATIVE, "headache")
        self.guesser.train(NEGATIVE, "completely wrong")
        self.guesser.train(NEGATIVE, "sad news")
        self.guesser.train(NEGATIVE, "didn't last")
        self.guesser.train(NEGATIVE, "lame")
        self.guesser.train(NEGATIVE, "pet peeves")
        self.guesser.train(NEGATIVE, "pet peeve")
        self.guesser.train(NEGATIVE, "can't send")
        self.guesser.train(NEGATIVE, "bullshit")
        self.guesser.train(NEGATIVE, "fail")
        self.guesser.train(NEGATIVE, "so terrible")
        self.guesser.train(NEGATIVE, "negative")
        self.guesser.train(NEGATIVE, "anooying")
        self.guesser.train(NEGATIVE, "an issue")
        self.guesser.train(NEGATIVE, "drop dead")
        self.guesser.train(NEGATIVE, "trouble")
        self.guesser.train(NEGATIVE, "brainwashed")
        self.guesser.train(NEGATIVE, "smear")
        self.guesser.train(NEGATIVE, "commie")
        self.guesser.train(NEGATIVE, "communist")
        self.guesser.train(NEGATIVE, "anti-women")
        self.guesser.train(NEGATIVE, "WTF")
        self.guesser.train(NEGATIVE, "anxiety")
        self.guesser.train(NEGATIVE, "STING")
        self.guesser.train(NEGATIVE, "nobody spoke")
        self.guesser.train(NEGATIVE, "yell")
        self.guesser.train(NEGATIVE, "Damn")
        self.guesser.train(NEGATIVE, "aren't")
        self.guesser.train(NEGATIVE, "anti")
        self.guesser.train(NEGATIVE, "i hate")
        self.guesser.train(NEGATIVE, "hate")
        self.guesser.train(NEGATIVE, "dissapointing")
        self.guesser.train(NEGATIVE, "doesn't recommend")
        self.guesser.train(NEGATIVE, "the worst")
        self.guesser.train(NEGATIVE, "worst")
        self.guesser.train(NEGATIVE, "expensive")
        self.guesser.train(NEGATIVE, "crap")
        self.guesser.train(NEGATIVE, "socialist")
        self.guesser.train(NEGATIVE, "won't")
        self.guesser.train(NEGATIVE, "wont")
        self.guesser.train(NEGATIVE, ":(")
        self.guesser.train(NEGATIVE, ":-(")
        self.guesser.train(NEGATIVE, "Thanks")
        self.guesser.train(NEGATIVE, "smartass")
        self.guesser.train(NEGATIVE, "don't like")
        self.guesser.train(NEGATIVE, "too bad")
        self.guesser.train(NEGATIVE, "frickin")
        self.guesser.train(NEGATIVE, "snooty")
        self.guesser.train(NEGATIVE, "knee jerk")
        self.guesser.train(NEGATIVE, "jerk")
        self.guesser.train(NEGATIVE, "reactionist")
        self.guesser.train(NEGATIVE, "MUST DIE")
        self.guesser.train(NEGATIVE, "no more")
        self.guesser.train(NEGATIVE, "hypocrisy")
        self.guesser.train(NEGATIVE, "ugly")
        self.guesser.train(NEGATIVE, "too slow")
        self.guesser.train(NEGATIVE, "not reliable")
        self.guesser.train(NEGATIVE, "noise")
        self.guesser.train(NEGATIVE, "crappy")
        self.guesser.train(NEGATIVE, "horrible")
        self.guesser.train(NEGATIVE, "bad quality")
        self.guesser.train(NEGATIVE, "angry")
        self.guesser.train(NEGATIVE, "annoyed")
        self.guesser.train(NEGATIVE, "anxious")
        self.guesser.train(NEGATIVE, "arrogant")
        self.guesser.train(NEGATIVE, "ashamed")
        self.guesser.train(NEGATIVE, "awful")
        self.guesser.train(NEGATIVE, "bad")
        self.guesser.train(NEGATIVE, "bewildered")
        self.guesser.train(NEGATIVE, "blues")
        self.guesser.train(NEGATIVE, "bored")
        self.guesser.train(NEGATIVE, "clumsy")
        self.guesser.train(NEGATIVE, "combative")
        self.guesser.train(NEGATIVE, "condemned")
        self.guesser.train(NEGATIVE, "confused")
        self.guesser.train(NEGATIVE, "crazy")
        self.guesser.train(NEGATIVE, "flipped-out")
        self.guesser.train(NEGATIVE, "creepy")
        self.guesser.train(NEGATIVE, "cruel")
        self.guesser.train(NEGATIVE, "dangerous")
        self.guesser.train(NEGATIVE, "defeated")
        self.guesser.train(NEGATIVE, "defiant")
        self.guesser.train(NEGATIVE, "depressed")
        self.guesser.train(NEGATIVE, "disgusted")
        self.guesser.train(NEGATIVE, "disturbed")
        self.guesser.train(NEGATIVE, "dizzy")
        self.guesser.train(NEGATIVE, "dull")
        self.guesser.train(NEGATIVE, "embarrassed")
        self.guesser.train(NEGATIVE, "envious")
        self.guesser.train(NEGATIVE, "evil")
        self.guesser.train(NEGATIVE, "fierce")
        self.guesser.train(NEGATIVE, "foolish")
        self.guesser.train(NEGATIVE, "frantic")
        self.guesser.train(NEGATIVE, "frightened")
        self.guesser.train(NEGATIVE, "grieving")
        self.guesser.train(NEGATIVE, "grumpy")
        self.guesser.train(NEGATIVE, "helpless")
        self.guesser.train(NEGATIVE, "homeless")
        self.guesser.train(NEGATIVE, "hungry")
        self.guesser.train(NEGATIVE, "hurt")
        self.guesser.train(NEGATIVE, "ill")
        self.guesser.train(NEGATIVE, "itchy")
        self.guesser.train(NEGATIVE, "jealous")
        self.guesser.train(NEGATIVE, "jittery")
        self.guesser.train(NEGATIVE, "lazy")
        self.guesser.train(NEGATIVE, "lonely")
        self.guesser.train(NEGATIVE, "mysterious")
        self.guesser.train(NEGATIVE, "nasty")
        self.guesser.train(NEGATIVE, "rape")
        self.guesser.train(NEGATIVE, "naughty")
        self.guesser.train(NEGATIVE, "nervous")
        self.guesser.train(NEGATIVE, "nutty")
        self.guesser.train(NEGATIVE, "obnoxious")
        self.guesser.train(NEGATIVE, "outrageous")
        self.guesser.train(NEGATIVE, "panicky")
        self.guesser.train(NEGATIVE, "f*****g up")
        self.guesser.train(NEGATIVE, "repulsive")
        self.guesser.train(NEGATIVE, "scary")
        self.guesser.train(NEGATIVE, "selfish")
        self.guesser.train(NEGATIVE, "sore")
        self.guesser.train(NEGATIVE, "tense")
        self.guesser.train(NEGATIVE, "terrible")
        self.guesser.train(NEGATIVE, "testy")
        self.guesser.train(NEGATIVE, "thoughtless")
        self.guesser.train(NEGATIVE, "tired")
        self.guesser.train(NEGATIVE, "troubled")
        self.guesser.train(NEGATIVE, "upset")
        self.guesser.train(NEGATIVE, "uptight")
        self.guesser.train(NEGATIVE, "weary")
        self.guesser.train(NEGATIVE, "wicked")
        self.guesser.train(NEGATIVE, "worried")
        self.guesser.train(NEGATIVE, "is a fool")
        self.guesser.train(NEGATIVE, "painful")
        self.guesser.train(NEGATIVE, "pain")
        self.guesser.train(NEGATIVE, "gross")

    def classify(self, sentence):
        guess = self.guesser.guess(sentence)
        if len(guess) == 0:
            return NEUTRAL

        if len(guess) == 1:
            (sentiment, probabitily) = guess[0]
            return sentiment

        (max_sentiment, max_value) = guess[0]
        (min_sentiment, min_value) = guess[1]
        if max_value - min_value > self.THRESHHOLD:
            return max_sentiment

        return NEUTRAL

    def save(self):
        self.guesser.save()

    def load(self):
        self.guesser.load()
def load_csv_to_bayes(filename):
    reader = csv.reader(file(filename))
    reader.next()
    counts = defaultdict(int)
    for line in reader:
        body = line[1]
        if line[2] == "visible":
            status = "visible"
        else:
            status = "moderated"
        clean_body = re.sub("<[^>]*>","",body)
        guesser.train(status, clean_body)


try:
    guesser.load('dataset.dat')
except IOError as e:
    load_csv_to_bayes('good.csv')
    load_csv_to_bayes('bad.csv')
    guesser.save('dataset.dat')


from flask import Flask, request
from flask import render_template
app = Flask(__name__)

@app.route("/moderate")
def moderate():
    if request.args.has_key('callback'):
        wrapper = request.args.get('callback')+"(%s)"
    else:
Beispiel #24
0
#!/usr/bin/env python

import socket, random
from reverend.thomas import Bayes

guesser = Bayes()
guesser.load('spam.bay')

host = 'maxhodak.com'
port = 11911
backlog = 5
size = 1024
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((host,port))
s.listen(backlog)
messages = []
next_msg = "Nothing here!"
f = open("/tmp/asktell.notspam.log",'r')
i = 0
for line in f:
  if i > 50:
    break
  i += 1
  messages.append(line.strip())
spamlog = open('/tmp/asktell.spam.log','a+')
notspamlog = open('/tmp/asktell.notspam.log','a+')
while 1:
  client, address = s.accept()
  data = client.recv(size)
  if data:
    if len(data) > 300:
Beispiel #25
0
def run(corpus,
        verbose=False,
        hkap_file=os.path.join(software, 'libs/PACManData.bay'),
        train=False,
        authors=False,
        exact_names=False,
        first_only=False,
        nyears=10,
        plotit=False,
        hst=False,
        clobber=False,
        rs_exceptions=''):
    f = open(os.path.join(software, 'category_synonyms.txt'), 'r')
    lines = f.readlines()
    f.close()
    acronyms = {}
    for line in lines:
        if line.startswith('#'): continue
        key, value = line.split('=')
        acronyms[key.strip()] = value.strip().split(',')
    uber_categories = acronyms

    stopwords = load_stopwords()

    dguesser = Bayes()
    dguesser.load(hkap_file)

    if not authors:
        if hst:
            ## Below, proposals are retrieved, then parsed.
            abs = parse_abstracts_proposals(corpus)
            text = parse_science_justification_proposals(corpus)
            justification = abs + text
            bayesString = " " + justification
        else:
            f = open(corpus)
            lines = f.readlines()
            f.close()
            text = ''
            for line in lines:
                if line.startswith('#'): continue
                if not line.strip(): continue
                text += line.strip() + ' '
            bayesString = text
        bayesString = work_string(bayesString, stopwords)
        result = dguesser.guess(bayesString)
        result = normalize_result(result)

    else:
        ## assumes input is a person report
        ## if .pkl report not available, creates new one
        import util

        records = []
        results_dict = {}
        results_pkl = corpus.replace(corpus.split('.')[-1], 'pkl')
        if not os.path.isfile(results_pkl) or clobber:
            f = open(corpus)
            lines = f.readlines()
            f.close()
            for line in lines:
                if line.startswith('#'): continue
                if not line.strip(): continue
                info = line.rstrip().split("\t")
                if info[0] == '': continue
                # records.append(info[0].replace(' ','').replace('"','').replace("'",'').lower())
                records.append(info[0].replace('"', '').replace("'",
                                                                '').lower())
            author_dict, cite_dict = util.adscrawl.run_authors(
                records, nyears=nyears, rs_exceptions=rs_exceptions)
            ## author_dict, cite_dict = util.adscrawl.run_exact_authors(records, nyears=nyears)
            pickle.dump(author_dict, open(results_pkl, 'wb'))
            pickle.dump(cite_dict, open('cites.pkl', 'wb'))
        else:
            author_dict = pickle.load(open(results_pkl, 'rb'))
            cite_dict = pickle.load(open('cites.pkl', 'rb'))
        for author in author_dict.keys():
            bayesString = ''
            for abstract in author_dict[author]:
                bayesString = ' ' + abstract

            bayesString = work_string(bayesString, stopwords)
            result = dguesser.guess(bayesString)
            ## result = normalize_result(result)
            results_dict[author] = {}
            results_dict[author]['hkap'] = rec.fromrecords(result)
            try:
                results_dict[author]['cites'] = sorted(cite_dict[author],
                                                       reverse=True)
            except:
                results_dict[author]['cites'] = [0]
        result = results_dict
    return (result, uber_categories)
Beispiel #26
0
      text = "%s %s %s" % (post.title, post.author, post.summary)
      brain.train(tag, text)
      print "%s :: %s" % (tag, post.title)

retrain()

bayes.data = base64.encodestring(brain.saves())
bayes.save()

from BeautifulSoup import BeautifulSoup

from mainapp.models import Post
from reverend.thomas import Bayes

brain = Bayes()
brain.load('fish.db')

tag = 'Dead'
posts = Post.objects.filter(read=read)
posts = posts.filter(tags__in=tag)
#brain.train('Dead', post.summary)
t1 = Tag.objects.get(id=flag)

for post in posts:
  t1 = Tag.objects.get(id=flag)
  if t1 in post.tags.all() and not feed:
    post.tags.remove(t1) 
    post.read = not t1.read
    brain.untrain(t1.name, post.summary)
  else:
    post.tags.add(t1)
Beispiel #27
0
)



neg_file = open(BASE_DIR+"/data/rt-polarity.neg").read()
pos_file = open(BASE_DIR+"/data/rt-polarity.pos").read()
neg_tweets_list = str(neg_file).split('\n')
pos_tweets_list = str(pos_file).split('\n')

neg_cutoff = int(neg_tweets_list.__len__()*3/4)
pos_cutoff = int(pos_tweets_list.__len__()*3/4)

neg_train = neg_tweets_list[:neg_cutoff]
pos_train = pos_tweets_list[:neg_cutoff]


neg_test = neg_tweets_list[neg_cutoff:]
pos_test = pos_tweets_list[pos_cutoff:]
tweet_data = {'neg_train':neg_train,'pos_train':pos_train,'neg_test':neg_test,'pos_test':pos_test}



bestwords = get_best_words(pos_train, neg_train)
single_classifier = Bayes()
single_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_classifier.dat")
non_stop_classifier = Bayes(tokenizer=non_stop_tokenizer())
non_stop_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_stop_classifier.dat")
best_classifier = Bayes(tokenizer=best_tokenizer(best_words=bestwords))
best_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_best_classifier.dat")
bigram_best_classifier = Bayes(tokenizer=best_bigram_tokenizer(best_words=bestwords))
bigram_best_classifier.load(fname=BASE_DIR+"/data/rt_polarity_classifiers/single_bi_classifier.dat")
Beispiel #28
0
    def action_train(self, cr, uid, ids, context=None):
        cat_obj = self.pool.get('crm.bayes.categories')
        group_obj = self.pool.get('crm.bayes.group')
        message_obj = self.pool.get('crm.bayes.test.guess')

        for id in ids:
            cat_id = self.read(cr, uid, id, ['category_id', 'name'])
            cat_id = cat_id[0]['category_id']
            if result:
                max_list = max(result, key=lambda k: k[1])
                if cat_id:
                    cat_guess_msg = cat_obj.read(cr, uid, cat_id,
                                                 ['train_messages'])
                    cat_obj.write(cr, uid, cat_id, {
                        'train_messages':
                        cat_guess_msg['train_messages'] + 1
                    })
                if max_list[1] > 0 and not cat_id:
                    cat_id = cat_obj.search(cr, uid,
                                            [('name', '=', max_list[0])])[0]
                    cat_guess_msg = cat_obj.read(cr, uid, cat_id,
                                                 ['guess_messages'])
                    cat_obj.write(cr, uid, cat_id, {
                        'guess_messages':
                        cat_guess_msg['guess_messages'] + 1
                    })
                    self.write(cr, uid, ids, {'category_id': cat_id})
            if cat_id:
                cat_rec = cat_obj.read(cr, uid, cat_id, [])
                guesser = Bayes()
                data = ""
                for rec in group_obj.browse(cr, uid, [cat_rec['group_id'][0]]):
                    if rec['train_data']:
                        data += rec['train_data']
                if data:
                    myfile = file(file_path + "crm_bayes.bay", 'w')
                    myfile.write(data)
                    myfile.close()
                    guesser.load(file_path + "crm_bayes.bay")

                guesser.train(cat_rec['name'],
                              message_obj.read(cr, uid, id)[0]['name'])
                guesser.save(file_path + "crm_bayes.bay")
                myfile = file(file_path + "crm_bayes.bay", 'r')
                data = ""
                for fi in myfile.readlines():
                    data += fi
                cr.execute(
                    "select sum(train_messages) as tot_train,sum(guess_messages) as tot_guess from crm_bayes_categories where group_id=%d"
                    % cat_rec['group_id'][0])
                rec = cr.dictfetchall()
                if not rec[0]['tot_guess']:
                    rec[0]['tot_guess'] = 0
                percantage = float(
                    rec[0]['tot_guess'] * 100) / float(rec[0]['tot_guess'] +
                                                       rec[0]['tot_train'])
                group_obj.write(cr, uid, cat_rec['group_id'][0], {
                    'train_data': data,
                    'automate_test': percantage
                })
            else:
                raise osv.except_osv(_('Error !'),
                                     _('Please Select Category! '))
        return {
            'view_type': 'form',
            "view_mode": 'form',
            'res_model': 'crm.bayes.train.message',
            'type': 'ir.actions.act_window',
            'target': 'new',
        }