Ejemplo n.º 1
0
def learnNaiveBayesText():
    print '... Learning Text ... \n'

    vocabulary = helpers.getVocabulary()
    vocabulary_count = len(vocabulary);
    examples = helpers.getExamplesCount()

    counter = 0

    # Start learning
    for v in helpers.getV():
        docs = helpers.getDocsByClass(v)
        docs_count = len(docs)
        docs_terms = helpers.getDocsTermsByClass(docs)
        # probability of class v
        p_v = (float)(docs_count) / (float)(examples)
        # total of distinct word in docs
        n = len(docs_terms)

        # foreach word wk in Vocabulary
        for w in vocabulary:
            nk = docs_terms.count(w)
            p_w_v = (float)(nk + 1) / (float)(n + vocabulary_count)
            print '#' + str(counter) + ':' + str(p_w_v)
            counter += 1
            # save term
            helpers.saveTerms(w,v,p_w_v)
    # End learning

    print '!!!! Finished !!!!\n'
    return
Ejemplo n.º 2
0
def best(s, regenf, canRegen, scoref):
    niches = {}
    verb = h.getV(s)
    root = Node(s, Settings(regenf, canRegen))
    root.score = scoref([s])[0]
    ni = Niche(verb, root)
    niches[verb] = ni
    while True:
        #print "--------------------------------"
        children = []
        allDead = True
        for k in niches:
            n = niches[k]
            if not n.isDead:
                allDead = False
                children += n.step()
        if allDead and not children:
            break
        if not children:
            continue
        raw = [" ".join(c.words) for c in children]
        #speed up by preventing generation of stories with verbs that match stale > strikes or intrans niches!
        scores = scoref(raw)
        for i, child in enumerate(children):
            child.score = scores[i]
            v = h.getV(child.s)
            if v not in niches:
                ni2 = Niche(v, child)
                niches[v] = ni2
            else:
                niches[v].push(child)
    choices = []
    for v in niches:
        n = niches[v]
        if not n.intrans:
            continue
        print n.bestch.s, n.bestsc
        choices.append((n.bestch, n.bestsc))
    m = min([c[1] for c in choices])
    if m >= 0:
        m = 0
    i = h.weighted_choice(choices, -m)
    best = choices[i][0]
    return best.s, best.score
Ejemplo n.º 3
0
def doit(topic,noun,w2v,pens,retries=0):
	#if not stanford.check():
	#	print "START THE SERVER"
	#	raw_input('Press Enter...')
	f = random.choice(formats)
	form = f[0]
	axis = f[1]
	canRegen = f[2]
	s = form(topic,noun,w2v)
	regenf = lambda lock: form(topic,noun,w2v,lock)
	scoref = lambda x: h.getSkipScores(axis[0],axis[1][0],axis[1][1],x,pens)
	if s is None or isBad(h.getV(s)):
		if retries > 20:
			return None
		print "RETRYING"
		return doit(topic,noun,w2v,pens,retries+1)
	else:
		#instead of just randomly genning one story, randomly gen one for each verb (species) to get started?
		best = priority.best(s,regenf,canRegen,scoref)[0]
		raw = h.strip(best).split()[:3]
		notraw = best.split()
		best = ". ".join([h.firstCharUp(h.makePlural(r)) for r in raw])+". "+" ".join(notraw[3:])
		print best,"\n"
		return best
Ejemplo n.º 4
0
def classifyNaiveBayesText():
    print '... Classifying document ... \n'

    # start classification
    vocabulary = helpers.getVocabulary()
    words = helpers.getTestFileToArray()
    positions = set(vocabulary) & set(words)

    results = []

    for v in helpers.getV():
        vnb = 1
        for i in positions:
            vnb *= helpers.getTermScore(i,v)
        results.append(vnb)
        print str(v) + ':' + str(vnb)

    print 'The result is : ' + str(max(results))
    # end classification

    print '!!!! Finished !!!!\n'
    print '===== Result =====\n'
    print '==================\n'
    return