Ejemplo n.º 1
0
def do_stats(num_sents, benchmarkincr=.05, status=1):
    """Generates a lot of sentences, and displays statistical info
    
    num_sents: number of sentences to run the analysis on
    benchmarkincr: for progress indicator
    status: boolean, whether or not to show the progress indicator
    """
    global length
    total_breaks = 0
    total_words = 0
    total_nobreaks = 0
    lastbenchmark = 0.0
    for i in xrange(num_sents):
        if status:
            if 1.0 * i / num_sents > lastbenchmark + benchmarkincr:
                print "%d%% done, %d sentences analyzed" %(100.0 * i / num_sents, i)
                lastbenchmark += benchmarkincr
        sent = list(rsg.random_sentence(data, length))[:-1]
        num_breaks = num_cbreaks(sent)
        if num_breaks == 0: total_nobreaks += 1
        total_breaks += num_breaks
        total_words  += len(sent)
    avg_words_per_sent   = total_words * 1.0 / num_sents
    avg_breaks_per_sent = total_breaks * 1.0 / num_sents
    breaks_per_word      = total_breaks * 1.0 / total_words
    perc_total_nobreaks  = total_nobreaks *1.0 / num_sents
    print "------------------- Results -----------------------"
    allvars = locals(); allvars.update(globals())
    print """
length=%(length)s
num_sents=%(num_sents)s
perc_total_nobreaks=%(perc_total_nobreaks)s #Straight-copied sentences; indicator of sparseness
avg_words_per_sent=%(avg_words_per_sent)s
avg_breaks_per_sent=%(avg_breaks_per_sent)s
breaks_per_word=%(breaks_per_word)s
""" % allvars
Ejemplo n.º 2
0
        
        breaks = 0
        for i in range(0,length):
            end_of_ngram = sent[i]
            word, posls = end_of_ngram
            print "%-25s: n/a" %word
        for i in range(length, len(sent)):

            end_of_prev_ngram = sent[i-1]
            word,posls = end_of_prev_ngram
            prev_absolute_wordpositions = [pos[2] for pos in posls]

            end_of_ngram = sent[i]
            word, posls = end_of_ngram
            cur_absolute_wordpositions = [pos[2] for pos in posls]

            for cur_absolute_wordpos in cur_absolute_wordpositions:
                if cur_absolute_wordpos - 1 in prev_absolute_wordpositions:
                    print "%-25s: continuous.." %word
                    break #No continuity break!
            else:
                print "%-25s: Continuity break over the n-1-gram: %s " \
                        %(word, words[i-(length-1):i])
    elif opt=='':
        print '-'*60
        sent = list(rsg.random_sentence(data, length))[:-1]
        print ' '.join([itm[0] for itm in sent])
    else:
        #read-eval-print loop by default
        print eval(opt)