def learnNaiveBayesText(): print '... Learning Text ... \n' vocabulary = helpers.getVocabulary() vocabulary_count = len(vocabulary); examples = helpers.getExamplesCount() counter = 0 # Start learning for v in helpers.getV(): docs = helpers.getDocsByClass(v) docs_count = len(docs) docs_terms = helpers.getDocsTermsByClass(docs) # probability of class v p_v = (float)(docs_count) / (float)(examples) # total of distinct word in docs n = len(docs_terms) # foreach word wk in Vocabulary for w in vocabulary: nk = docs_terms.count(w) p_w_v = (float)(nk + 1) / (float)(n + vocabulary_count) print '#' + str(counter) + ':' + str(p_w_v) counter += 1 # save term helpers.saveTerms(w,v,p_w_v) # End learning print '!!!! Finished !!!!\n' return
def best(s, regenf, canRegen, scoref): niches = {} verb = h.getV(s) root = Node(s, Settings(regenf, canRegen)) root.score = scoref([s])[0] ni = Niche(verb, root) niches[verb] = ni while True: #print "--------------------------------" children = [] allDead = True for k in niches: n = niches[k] if not n.isDead: allDead = False children += n.step() if allDead and not children: break if not children: continue raw = [" ".join(c.words) for c in children] #speed up by preventing generation of stories with verbs that match stale > strikes or intrans niches! scores = scoref(raw) for i, child in enumerate(children): child.score = scores[i] v = h.getV(child.s) if v not in niches: ni2 = Niche(v, child) niches[v] = ni2 else: niches[v].push(child) choices = [] for v in niches: n = niches[v] if not n.intrans: continue print n.bestch.s, n.bestsc choices.append((n.bestch, n.bestsc)) m = min([c[1] for c in choices]) if m >= 0: m = 0 i = h.weighted_choice(choices, -m) best = choices[i][0] return best.s, best.score
def doit(topic,noun,w2v,pens,retries=0): #if not stanford.check(): # print "START THE SERVER" # raw_input('Press Enter...') f = random.choice(formats) form = f[0] axis = f[1] canRegen = f[2] s = form(topic,noun,w2v) regenf = lambda lock: form(topic,noun,w2v,lock) scoref = lambda x: h.getSkipScores(axis[0],axis[1][0],axis[1][1],x,pens) if s is None or isBad(h.getV(s)): if retries > 20: return None print "RETRYING" return doit(topic,noun,w2v,pens,retries+1) else: #instead of just randomly genning one story, randomly gen one for each verb (species) to get started? best = priority.best(s,regenf,canRegen,scoref)[0] raw = h.strip(best).split()[:3] notraw = best.split() best = ". ".join([h.firstCharUp(h.makePlural(r)) for r in raw])+". "+" ".join(notraw[3:]) print best,"\n" return best
def classifyNaiveBayesText(): print '... Classifying document ... \n' # start classification vocabulary = helpers.getVocabulary() words = helpers.getTestFileToArray() positions = set(vocabulary) & set(words) results = [] for v in helpers.getV(): vnb = 1 for i in positions: vnb *= helpers.getTermScore(i,v) results.append(vnb) print str(v) + ':' + str(vnb) print 'The result is : ' + str(max(results)) # end classification print '!!!! Finished !!!!\n' print '===== Result =====\n' print '==================\n' return