def stoch(histo):
    ''' for lists
    Stochastic sampling means taking an element from a given collection at random 
    This returns a list of percentages of words in a histogram
    '''

    percentages = []

    total_wc = 0
    for item in histo:
        if type(item[0]) == int:
            total_wc += int(item[0])
        else:
            total_wc += int(item[1])

    for item in histo:
        if type(item[0]) == int:
            freq = frequency(item[1], histo)
            perc = (freq / total_wc) * 100
            instance = (item[1], perc)
            percentages.append(instance)
        else:
            freq = frequency(item[0], histo)
            perc = (freq / total_wc) * 100
            instance = (item[0], perc)
            percentages.append(instance)

    return percentages
def stoch(histo):
    ''' Lists
    Stochastic sampling means taking an element from a given collection at random 
    This returns a list of percentages of word frequencies in a histogram
    '''

    percentages = []

    total_wc = 0  # total word count
    for item in histo:
        if type(item[0]
                ) == int:  # counts_list vs list_hist the order is different
            total_wc += int(item[0])
        else:
            total_wc += int(item[1])

    for item in histo:
        if type(item[0]) == int:
            freq = frequency(item[1], histo)
            perc = (freq / total_wc) * 100
            instance = (item[1], perc)
            percentages.append(instance)
        else:
            freq = frequency(item[0], histo)
            perc = (freq / total_wc) * 100
            instance = (item[0], perc)
            percentages.append(instance)

    return percentages
def stoch(histo):
    ''' Stochastic sampling means taking an element from a given collection at random '''

    percentages = []

    total_wc = 0
    for item in histo:
        total_wc += int(item[1])

    for item in histo:
        freq = frequency(item[0], histo)
        perc = freq / total_wc
        instance = (item[0], perc)
        percentages.append(instance)

    return percentages
Example #4
0
def stoch(histo):
    ''' Lists - list_hist
    Stochastic sampling means taking an element from a given collection at random 
    This returns a list of percentages of word frequencies in a histogram
    '''

    percentages = []

    total_wc = 0  # total word count
    for item in histo:
        total_wc += int(item[1])

    for item in histo:
        freq = frequency(item[0], histo)
        perc = (freq / total_wc) * 100
        instance = (item[0], perc)
        percentages.append(instance)

    return percentages
Example #5
0
import random
from histogram import open_file, histogram, frequency

histogram = (histogram(open_file('test.txt')))

# histogram = {"one": 1, "fish": 4, "two": 1, "blue": 1, "red": 1}


print(frequency("the", histogram(open_file())))
# print(frequency("of", histogram(open_file())))
# print(total_words(histogram(open_file())))
total_words = total_words(histogram)

print(total_words)

def stochastic(dic, total_words):
  rand_num = random.randint(1, total_words)
  # print(rand_num)
  total_value = 0
  for key,value in dic.items():
    # if rand_num <= value:
    # print("total_value", total_value)
    if rand_num - value - total_value <= 0:
      return key
    else:
      total_value += value
Example #6
0
import random
from histogram import open_file, histogram, frequency

histogram = (histogram(open_file('words.txt')))

histogram1 = histogram(open_file())

print(frequency("the", histogram1)    

#histogram(open_file())))
# print(frequency("of", histogram(open_file())))
# print(total_words(histogram(open_file())))
# total_words = total_words(histogram)