def main(argv): # stop execution if file not specified if len(argv) == 0: raise Exception("No file specified") h = frequency.histogram(argv[1]) # call sample print(sample(h))
def smpl_dstr(filename): words_histogram = frequency.histogram(filename) words_source = [] for each_word in words_histogram: for i in range(0, frequency.frequency(each_word, words_histogram)): words_source.append(each_word) index = random.randint(0, len(words_source) - 1) return(words_source[index])
def gen_word(): my_file = open("./words.txt", "r") lines = my_file.readlines() my_histogram = histogram(lines) sentence = "" num_words = 10 # for i in range(num_words): # word = sample(my_histogram) # sentence += " " + word markovchain = MarkovChain(lines) sentence = markovchain.walk(num_words) return sentence
def generate_random_word(source_text): histogram_sample = histogram(source_text) count = 0 random_num = random.randint(0, int(total_weight(histogram_sample))) for element in histogram_sample: random_num = random_num - element[1] if random_num <= 0: generated_word = element[0] return generated_word break else: continue
def median(inlist: List(float)) -> float: """ Returns the computed median value of a list of numbers, given the number of bins to use for the histogram (more bins brings the computed value closer to the median score, default number of bins = 1000). See G.W. Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics. Usage: lmedian (inlist, numbins=1000) """ numbins = 1000 (hist, smallest, binsize, extras) = frequency.histogram(inlist, numbins, [min(inlist), max(inlist)]) # make histog cumhist = support.cumsum(hist) # make cumulative histogram for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score if cumhist[i] >= len(inlist) / 2.0: cfbin = i break LRL = smallest + binsize * cfbin # get lower read limit of that bin cfbelow = cumhist[cfbin - 1] freq = float(hist[cfbin]) # frequency IN the 50%ile bin _median = LRL + ((len(inlist) / 2.0 - cfbelow) / float(freq)) * binsize # median formula return _median
length = len(hist) num = random.randint(0, length) for word in hist: num -= hist[word] if num <= 0: return word def test_probability(hist): """Checks to see if the weighted_select has a non-uniform selection probability; returns a dictionary mapping keys to probabilities dictionary -> dictionary """ keys = {} for key in hist.keys(): keys[key] = 0 for i in range(10000): key = weighted_select(hist) keys[key] += 1 for key in keys: keys[key] = keys[key]/10000 return keys if __name__ == "__main__": args = sys.argv[1:] text = ' '.join(args) hist = histogram(text) print(test_probability(hist))
def main(argv): corpus = frequency.histogram(argv[1]) for i in range(50): print( stochastic.sample(corpus) + " " + stochastic.sample(corpus) + " " + stochastic.sample(corpus))
print(moment.describe(lf)) print('\nFREQUENCY') print('freqtable:') print('itemfreq:') print(frequency.itemfreq(l)) print(frequency.itemfreq(l)) print('scoreatpercentile:', frequency.scoreatpercentile(l, 40), frequency.scoreatpercentile(lf, 40), frequency.scoreatpercentile(l, 40), frequency.scoreatpercentile(lf, 40)) print('percentileofscore:', frequency.percentileofscore(l, 12), frequency.percentileofscore(lf, 12), frequency.percentileofscore(l, 12), frequency.percentileofscore(lf, 12)) print('histogram:', frequency.histogram(l, 10, [0, max(l)]), frequency.histogram(l, 10, [0, max(l)])) print('cumfreq:') print(frequency.cumfreq(l)) print(frequency.cumfreq(lf)) print(frequency.cumfreq(l)) print(frequency.cumfreq(lf)) print('relfreq:') print(frequency.relfreq(l)) print(frequency.relfreq(lf)) print(frequency.relfreq(l)) print(frequency.relfreq(lf)) print('\nVARIATION') print('obrientransform:')